mosesdecoder/mert/TER/terAlignment.cpp

238 lines
6.6 KiB
C++
Raw Permalink Normal View History

2014-08-29 16:46:56 +04:00
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
2015-02-19 15:27:23 +03:00
under the terms of the GNU Lesser General Public License as published by
2015-09-26 00:20:09 +03:00
the Free Software Foundation, either version 2.1 of the licence, or
2014-08-29 16:46:56 +04:00
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "terAlignment.h"
using namespace std;
2015-02-16 21:34:41 +03:00
namespace TERCPPNS_TERCpp
{
2015-02-19 15:27:23 +03:00
terAlignment::terAlignment()
{
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
2015-02-19 15:27:23 +03:00
// TERshift[] allshifts = null;
2015-02-19 15:27:23 +03:00
numEdits=0;
numWords=0;
// bestRef="";
2015-02-19 15:27:23 +03:00
numIns=0;
numDel=0;
numSub=0;
numSft=0;
numWsf=0;
averageWords=0;
2015-02-19 15:27:23 +03:00
}
void terAlignment::set(terAlignment& l_terAlignment)
{
numEdits=l_terAlignment.numEdits;
numWords=l_terAlignment.numWords;
bestRef=l_terAlignment.bestRef;
numIns=l_terAlignment.numIns;
numDel=l_terAlignment.numDel;
numSub=l_terAlignment.numSub;
numSft=l_terAlignment.numSft;
numWsf=l_terAlignment.numWsf;
averageWords=l_terAlignment.averageWords;
ref=l_terAlignment.ref;
hyp=l_terAlignment.hyp;
aftershift=l_terAlignment.aftershift;
// allshifts=l_terAlignment.allshifts;
2015-02-19 15:27:23 +03:00
hyp_int=l_terAlignment.hyp_int;
aftershift_int=l_terAlignment.aftershift_int;
alignment=l_terAlignment.alignment;
allshifts=(*(new vector<terShift>((int)l_terAlignment.allshifts.size())));
for (int l_i=0; l_i< (int)l_terAlignment.allshifts.size(); l_i++) {
allshifts.at(l_i).set(l_terAlignment.allshifts.at(l_i));
}
}
void terAlignment::set(terAlignment* l_terAlignment)
{
numEdits=l_terAlignment->numEdits;
numWords=l_terAlignment->numWords;
bestRef=l_terAlignment->bestRef;
numIns=l_terAlignment->numIns;
numDel=l_terAlignment->numDel;
numSub=l_terAlignment->numSub;
numSft=l_terAlignment->numSft;
numWsf=l_terAlignment->numWsf;
averageWords=l_terAlignment->averageWords;
ref=l_terAlignment->ref;
hyp=l_terAlignment->hyp;
aftershift=l_terAlignment->aftershift;
// allshifts=l_terAlignment->allshifts;
2015-02-19 15:27:23 +03:00
hyp_int=l_terAlignment->hyp_int;
aftershift_int=l_terAlignment->aftershift_int;
alignment=l_terAlignment->alignment;
allshifts=(*(new vector<terShift>((int)l_terAlignment->allshifts.size())));
for (int l_i=0; l_i< (int)l_terAlignment->allshifts.size(); l_i++) {
allshifts.at(l_i).set(l_terAlignment->allshifts.at(l_i));
}
2015-02-19 15:27:23 +03:00
}
string terAlignment::toString()
{
stringstream s;
s.str ( "" );
s << "Original Ref: \t" << join ( " ", ref ) << endl;
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
s << "Hyp After Shift:\t" << join ( " ", aftershift );
2014-08-29 16:46:56 +04:00
// s << "Hyp After Shift: " << join ( " ", aftershift );
2015-02-19 15:27:23 +03:00
s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
2015-02-19 15:27:23 +03:00
if ( ( int ) sizeof ( alignment ) >0 ) {
s << "Alignment: (";
// s += "\nAlignment: (";
2015-02-19 15:27:23 +03:00
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
s << alignment[i];
// s+=alignment[i];
2015-02-19 15:27:23 +03:00
}
// s += ")";
2015-02-19 15:27:23 +03:00
s << ")";
}
s << endl;
if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
2015-02-19 15:27:23 +03:00
s << "NumShifts: 0";
} else {
// s += "\nNumShifts: " + (int)allshifts.size();
2015-02-19 15:27:23 +03:00
s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
2015-02-19 15:27:23 +03:00
}
}
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
2015-02-19 15:27:23 +03:00
return s.str();
2015-02-19 15:27:23 +03:00
}
string terAlignment::join ( string delim, vector<string> arr )
{
if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
2015-02-19 15:27:23 +03:00
stringstream s;
s.str ( "" );
for ( int i = 0; i < ( int ) arr.size(); i++ ) {
if ( i == 0 ) {
s << arr.at ( i );
} else {
s << delim << arr.at ( i );
}
}
return s.str();
// return "";
2015-02-19 15:27:23 +03:00
}
double terAlignment::score()
{
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( numWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / numWords;
}
double terAlignment::scoreAv()
{
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( averageWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / averageWords;
}
void terAlignment::scoreDetails()
{
numIns = numDel = numSub = numWsf = numSft = 0;
if((int)allshifts.size()>0) {
for(int i = 0; i < (int)allshifts.size(); ++i) {
numWsf += allshifts[i].size();
}
2015-02-19 15:27:23 +03:00
numSft = allshifts.size();
}
if((int)alignment.size()>0 ) {
for(int i = 0; i < (int)alignment.size(); ++i) {
switch (alignment[i]) {
case 'S':
case 'T':
numSub++;
break;
case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
2014-08-29 16:46:56 +04:00
}
2015-02-19 15:27:23 +03:00
}
// if(numEdits != numSft + numDel + numIns + numSub)
// System.out.println("** Error, unmatch edit erros " + numEdits +
// " vs " + (numSft + numDel + numIns + numSub));
}
string terAlignment::printAlignments()
{
stringstream to_return;
for(int i = 0; i < (int)alignment.size(); ++i) {
char alignInfo=alignment.at(i);
if (alignInfo == 'A' ) {
alignInfo='A';
}
2014-08-29 16:46:56 +04:00
2015-02-19 15:27:23 +03:00
if (i==0) {
to_return << alignInfo;
} else {
to_return << " " << alignInfo;
}
}
2015-02-19 15:27:23 +03:00
return to_return.str();
}
2014-08-29 16:46:56 +04:00
string terAlignment::printAllShifts()
{
2015-02-19 15:27:23 +03:00
stringstream to_return;
if ( ( int ) allshifts.size() == 0 ) {
2014-08-29 16:46:56 +04:00
// s += "\nNumShifts: 0";
2015-02-19 15:27:23 +03:00
to_return << "NbrShifts: 0";
} else {
2014-08-29 16:46:56 +04:00
// s += "\nNumShifts: " + (int)allshifts.size();
2015-02-19 15:27:23 +03:00
to_return << "NbrShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
to_return << "\t" ;
to_return << ( ( terShift ) allshifts[i] ).toString();
2014-08-29 16:46:56 +04:00
// s += "\n " + allshifts[i];
2015-02-19 15:27:23 +03:00
}
}
return to_return.str();
}
2015-09-26 00:20:09 +03:00
}