00001 /********************************* 00002 tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation. 00003 00004 Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France 00005 Contact: christophe.servan@lium.univ-lemans.fr 00006 00007 The tercpp tool and library are free software: you can redistribute it and/or modify it 00008 under the terms of the GNU Lesser General Public License as published by 00009 the Free Software Foundation, either version 2.1 of the licence, or 00010 (at your option) any later version. 00011 00012 This program and library are distributed in the hope that it will be useful, but WITHOUT 00013 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00014 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 00015 for more details. 00016 00017 You should have received a copy of the GNU Lesser General Public License 00018 along with this library; if not, write to the Free Software Foundation, 00019 Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00020 **********************************/ 00021 00022 #include "terAlignment.h" 00023 using namespace std; 00024 namespace TERCPPNS_TERCpp 00025 { 00026 00027 terAlignment::terAlignment() 00028 { 00029 // vector<string> ref; 00030 // vector<string> hyp; 00031 // vector<string> aftershift; 00032 00033 // TERshift[] allshifts = null; 00034 00035 numEdits=0; 00036 numWords=0; 00037 // bestRef=""; 00038 00039 numIns=0; 00040 numDel=0; 00041 numSub=0; 00042 numSft=0; 00043 numWsf=0; 00044 averageWords=0; 00045 00046 } 00047 void terAlignment::set(terAlignment& l_terAlignment) 00048 { 00049 numEdits=l_terAlignment.numEdits; 00050 numWords=l_terAlignment.numWords; 00051 bestRef=l_terAlignment.bestRef; 00052 numIns=l_terAlignment.numIns; 00053 numDel=l_terAlignment.numDel; 00054 numSub=l_terAlignment.numSub; 00055 numSft=l_terAlignment.numSft; 00056 numWsf=l_terAlignment.numWsf; 00057 averageWords=l_terAlignment.averageWords; 00058 ref=l_terAlignment.ref; 00059 hyp=l_terAlignment.hyp; 00060 aftershift=l_terAlignment.aftershift; 00061 // allshifts=l_terAlignment.allshifts; 00062 hyp_int=l_terAlignment.hyp_int; 00063 aftershift_int=l_terAlignment.aftershift_int; 00064 alignment=l_terAlignment.alignment; 00065 allshifts=(*(new vector<terShift>((int)l_terAlignment.allshifts.size()))); 00066 for (int l_i=0; l_i< (int)l_terAlignment.allshifts.size(); l_i++) { 00067 allshifts.at(l_i).set(l_terAlignment.allshifts.at(l_i)); 00068 } 00069 00070 } 00071 void terAlignment::set(terAlignment* l_terAlignment) 00072 { 00073 numEdits=l_terAlignment->numEdits; 00074 numWords=l_terAlignment->numWords; 00075 bestRef=l_terAlignment->bestRef; 00076 numIns=l_terAlignment->numIns; 00077 numDel=l_terAlignment->numDel; 00078 numSub=l_terAlignment->numSub; 00079 numSft=l_terAlignment->numSft; 00080 numWsf=l_terAlignment->numWsf; 00081 averageWords=l_terAlignment->averageWords; 00082 ref=l_terAlignment->ref; 00083 hyp=l_terAlignment->hyp; 00084 aftershift=l_terAlignment->aftershift; 00085 // allshifts=l_terAlignment->allshifts; 00086 hyp_int=l_terAlignment->hyp_int; 00087 aftershift_int=l_terAlignment->aftershift_int; 00088 alignment=l_terAlignment->alignment; 00089 allshifts=(*(new vector<terShift>((int)l_terAlignment->allshifts.size()))); 00090 for (int l_i=0; l_i< (int)l_terAlignment->allshifts.size(); l_i++) { 00091 allshifts.at(l_i).set(l_terAlignment->allshifts.at(l_i)); 00092 } 00093 00094 } 00095 00096 string terAlignment::toString() 00097 { 00098 stringstream s; 00099 s.str ( "" ); 00100 s << "Original Ref: \t" << join ( " ", ref ) << endl; 00101 s << "Original Hyp: \t" << join ( " ", hyp ) <<endl; 00102 s << "Hyp After Shift:\t" << join ( " ", aftershift ); 00103 // s << "Hyp After Shift: " << join ( " ", aftershift ); 00104 s << endl; 00105 // string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift); 00106 if ( ( int ) sizeof ( alignment ) >0 ) { 00107 s << "Alignment: ("; 00108 // s += "\nAlignment: ("; 00109 for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) { 00110 s << alignment[i]; 00111 // s+=alignment[i]; 00112 } 00113 // s += ")"; 00114 s << ")"; 00115 } 00116 s << endl; 00117 if ( ( int ) allshifts.size() == 0 ) { 00118 // s += "\nNumShifts: 0"; 00119 s << "NumShifts: 0"; 00120 } else { 00121 // s += "\nNumShifts: " + (int)allshifts.size(); 00122 s << "NumShifts: "<< ( int ) allshifts.size(); 00123 for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { 00124 s << endl << " " ; 00125 s << ( ( terShift ) allshifts[i] ).toString(); 00126 // s += "\n " + allshifts[i]; 00127 } 00128 } 00129 s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")"; 00130 // s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")"; 00131 return s.str(); 00132 00133 } 00134 string terAlignment::join ( string delim, vector<string> arr ) 00135 { 00136 if ( ( int ) arr.size() == 0 ) return ""; 00137 // if ((int)delim.compare("") == 0) delim = new String(""); 00138 // String s = new String(""); 00139 stringstream s; 00140 s.str ( "" ); 00141 for ( int i = 0; i < ( int ) arr.size(); i++ ) { 00142 if ( i == 0 ) { 00143 s << arr.at ( i ); 00144 } else { 00145 s << delim << arr.at ( i ); 00146 } 00147 } 00148 return s.str(); 00149 // return ""; 00150 } 00151 double terAlignment::score() 00152 { 00153 if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) { 00154 return 1.0; 00155 } 00156 if ( numWords <= 0.0 ) { 00157 return 0.0; 00158 } 00159 return ( double ) numEdits / numWords; 00160 } 00161 double terAlignment::scoreAv() 00162 { 00163 if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) { 00164 return 1.0; 00165 } 00166 if ( averageWords <= 0.0 ) { 00167 return 0.0; 00168 } 00169 return ( double ) numEdits / averageWords; 00170 } 00171 00172 void terAlignment::scoreDetails() 00173 { 00174 numIns = numDel = numSub = numWsf = numSft = 0; 00175 if((int)allshifts.size()>0) { 00176 for(int i = 0; i < (int)allshifts.size(); ++i) { 00177 numWsf += allshifts[i].size(); 00178 } 00179 numSft = allshifts.size(); 00180 } 00181 00182 if((int)alignment.size()>0 ) { 00183 for(int i = 0; i < (int)alignment.size(); ++i) { 00184 switch (alignment[i]) { 00185 case 'S': 00186 case 'T': 00187 numSub++; 00188 break; 00189 case 'D': 00190 numDel++; 00191 break; 00192 case 'I': 00193 numIns++; 00194 break; 00195 } 00196 } 00197 } 00198 // if(numEdits != numSft + numDel + numIns + numSub) 00199 // System.out.println("** Error, unmatch edit erros " + numEdits + 00200 // " vs " + (numSft + numDel + numIns + numSub)); 00201 } 00202 string terAlignment::printAlignments() 00203 { 00204 stringstream to_return; 00205 for(int i = 0; i < (int)alignment.size(); ++i) { 00206 char alignInfo=alignment.at(i); 00207 if (alignInfo == 'A' ) { 00208 alignInfo='A'; 00209 } 00210 00211 if (i==0) { 00212 to_return << alignInfo; 00213 } else { 00214 to_return << " " << alignInfo; 00215 } 00216 } 00217 return to_return.str(); 00218 } 00219 string terAlignment::printAllShifts() 00220 { 00221 stringstream to_return; 00222 if ( ( int ) allshifts.size() == 0 ) { 00223 // s += "\nNumShifts: 0"; 00224 to_return << "NbrShifts: 0"; 00225 } else { 00226 // s += "\nNumShifts: " + (int)allshifts.size(); 00227 to_return << "NbrShifts: "<< ( int ) allshifts.size(); 00228 for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { 00229 to_return << "\t" ; 00230 to_return << ( ( terShift ) allshifts[i] ).toString(); 00231 // s += "\n " + allshifts[i]; 00232 } 00233 } 00234 return to_return.str(); 00235 } 00236 00237 }