/********************************* tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation. Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 2.1 of the licence, or (at your option) any later version. This program and library are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ #include "terAlignment.h" using namespace std; namespace TERCPPNS_TERCpp { terAlignment::terAlignment() { // vector ref; // vector hyp; // vector aftershift; // TERshift[] allshifts = null; numEdits=0; numWords=0; // bestRef=""; numIns=0; numDel=0; numSub=0; numSft=0; numWsf=0; averageWords=0; } void terAlignment::set(terAlignment& l_terAlignment) { numEdits=l_terAlignment.numEdits; numWords=l_terAlignment.numWords; bestRef=l_terAlignment.bestRef; numIns=l_terAlignment.numIns; numDel=l_terAlignment.numDel; numSub=l_terAlignment.numSub; numSft=l_terAlignment.numSft; numWsf=l_terAlignment.numWsf; averageWords=l_terAlignment.averageWords; ref=l_terAlignment.ref; hyp=l_terAlignment.hyp; aftershift=l_terAlignment.aftershift; // allshifts=l_terAlignment.allshifts; hyp_int=l_terAlignment.hyp_int; aftershift_int=l_terAlignment.aftershift_int; alignment=l_terAlignment.alignment; allshifts=(*(new vector((int)l_terAlignment.allshifts.size()))); for (int l_i=0; l_i< (int)l_terAlignment.allshifts.size(); l_i++) { allshifts.at(l_i).set(l_terAlignment.allshifts.at(l_i)); } } void terAlignment::set(terAlignment* l_terAlignment) { numEdits=l_terAlignment->numEdits; numWords=l_terAlignment->numWords; bestRef=l_terAlignment->bestRef; numIns=l_terAlignment->numIns; numDel=l_terAlignment->numDel; numSub=l_terAlignment->numSub; numSft=l_terAlignment->numSft; numWsf=l_terAlignment->numWsf; averageWords=l_terAlignment->averageWords; ref=l_terAlignment->ref; hyp=l_terAlignment->hyp; aftershift=l_terAlignment->aftershift; // allshifts=l_terAlignment->allshifts; hyp_int=l_terAlignment->hyp_int; aftershift_int=l_terAlignment->aftershift_int; alignment=l_terAlignment->alignment; allshifts=(*(new vector((int)l_terAlignment->allshifts.size()))); for (int l_i=0; l_i< (int)l_terAlignment->allshifts.size(); l_i++) { allshifts.at(l_i).set(l_terAlignment->allshifts.at(l_i)); } } string terAlignment::toString() { stringstream s; s.str ( "" ); s << "Original Ref: \t" << join ( " ", ref ) << endl; s << "Original Hyp: \t" << join ( " ", hyp ) <0 ) { s << "Alignment: ("; // s += "\nAlignment: ("; for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) { s << alignment[i]; // s+=alignment[i]; } // s += ")"; s << ")"; } s << endl; if ( ( int ) allshifts.size() == 0 ) { // s += "\nNumShifts: 0"; s << "NumShifts: 0"; } else { // s += "\nNumShifts: " + (int)allshifts.size(); s << "NumShifts: "<< ( int ) allshifts.size(); for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { s << endl << " " ; s << ( ( terShift ) allshifts[i] ).toString(); // s += "\n " + allshifts[i]; } } s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")"; // s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")"; return s.str(); } string terAlignment::join ( string delim, vector arr ) { if ( ( int ) arr.size() == 0 ) return ""; // if ((int)delim.compare("") == 0) delim = new String(""); // String s = new String(""); stringstream s; s.str ( "" ); for ( int i = 0; i < ( int ) arr.size(); i++ ) { if ( i == 0 ) { s << arr.at ( i ); } else { s << delim << arr.at ( i ); } } return s.str(); // return ""; } double terAlignment::score() { if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) { return 1.0; } if ( numWords <= 0.0 ) { return 0.0; } return ( double ) numEdits / numWords; } double terAlignment::scoreAv() { if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) { return 1.0; } if ( averageWords <= 0.0 ) { return 0.0; } return ( double ) numEdits / averageWords; } void terAlignment::scoreDetails() { numIns = numDel = numSub = numWsf = numSft = 0; if((int)allshifts.size()>0) { for(int i = 0; i < (int)allshifts.size(); ++i) { numWsf += allshifts[i].size(); } numSft = allshifts.size(); } if((int)alignment.size()>0 ) { for(int i = 0; i < (int)alignment.size(); ++i) { switch (alignment[i]) { case 'S': case 'T': numSub++; break; case 'D': numDel++; break; case 'I': numIns++; break; } } } // if(numEdits != numSft + numDel + numIns + numSub) // System.out.println("** Error, unmatch edit erros " + numEdits + // " vs " + (numSft + numDel + numIns + numSub)); } string terAlignment::printAlignments() { stringstream to_return; for(int i = 0; i < (int)alignment.size(); ++i) { char alignInfo=alignment.at(i); if (alignInfo == 'A' ) { alignInfo='A'; } if (i==0) { to_return << alignInfo; } else { to_return << " " << alignInfo; } } return to_return.str(); } string terAlignment::printAllShifts() { stringstream to_return; if ( ( int ) allshifts.size() == 0 ) { // s += "\nNumShifts: 0"; to_return << "NbrShifts: 0"; } else { // s += "\nNumShifts: " + (int)allshifts.size(); to_return << "NbrShifts: "<< ( int ) allshifts.size(); for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { to_return << "\t" ; to_return << ( ( terShift ) allshifts[i] ).toString(); // s += "\n " + allshifts[i]; } } return to_return.str(); } }