Fixed several bugs in LRscore-MERT. Namely, solved a float-to-int conversion; added hypothesis counter to the scores file to enable later computation of average reordering score; fixed special case of 1-word hypothesis; enabled reading of word-based alignments from n-best-list.

This commit is contained in:
Arianna Bisazza 2012-09-24 15:40:18 +02:00
parent 52cc46cebd
commit ff276e9911
4 changed files with 61 additions and 19 deletions

View File

@ -155,7 +155,10 @@ void Data::loadNBest(const string &file)
string temp; string temp;
getNextPound(line, temp, "|||"); //fourth field sentence score getNextPound(line, temp, "|||"); //fourth field sentence score
if (line.length() > 0) { if (line.length() > 0) {
getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer getNextPound(line, alignment, "|||"); //fifth field (if present) is either phrase or word alignment
if (line.length() > 0) {
getNextPound(line, alignment, "|||"); //sixth field (if present) is word alignment
}
} }
} }
//TODO check alignment exists if scorers need it //TODO check alignment exists if scorers need it

View File

@ -86,6 +86,7 @@ void Permutation::set(const string & alignment,const int sourceLength)
//cout << "SP:" << sourcePos << " TP:" << targetPos << endl; //cout << "SP:" << sourcePos << " TP:" << targetPos << endl;
if (sourcePos > sourceLength) { if (sourcePos > sourceLength) {
cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl; cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl;
cerr << "******** Permutation::set :" << alignment << ": len : " << sourceLength <<endl;
exit(1); exit(1);
} }
//If have multiple target pos aligned to one source, //If have multiple target pos aligned to one source,
@ -185,11 +186,15 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
{ {
float score=0; float score=0;
//cout << "*****Permutation::distance" <<endl; //bool debug= (verboselevel()>3); // TODO: fix verboselevel()
//cout << "Ref:" << endl; bool debug=false;
//dump(); if (debug) {
//cout << "Comp:" << endl; cout << "*****Permutation::distance" <<endl;
//permCompare.dump(); cout << "Hypo:" << endl;
dump();
cout << "Ref: " << endl;
permCompare.dump();
}
if (type == HAMMING_DISTANCE) { if (type == HAMMING_DISTANCE) {
score = calculateHamming(permCompare); score = calculateHamming(permCompare);
@ -204,8 +209,10 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
score = score * exp(brevityPenalty); score = score * exp(brevityPenalty);
} }
//cout << "Distance type:" << type << endl; if (debug) {
//cout << "Score: "<< score << endl; cout << "Distance type:" << type << endl;
cout << "Score: "<< score << endl;
}
return score; return score;
} }
@ -244,6 +251,10 @@ float Permutation::calculateKendall(const Permutation & compare) const
cerr << "Empty permutation" << endl; cerr << "Empty permutation" << endl;
return 0; return 0;
} }
if (getLength() == 1) {
cerr << "One-word sentence. Kendall score = 1" << endl;
return 1;
}
for (size_t i=0; i<getLength(); i++) { for (size_t i=0; i<getLength(); i++) {
for (size_t j=0; j<getLength(); j++) { for (size_t j=0; j<getLength(); j++) {
if ((m_array[i] < m_array[j]) && (compareArray[i] > compareArray[j])) { if ((m_array[i] < m_array[j]) && (compareArray[i] > compareArray[j])) {

View File

@ -8,6 +8,7 @@ namespace MosesTuning
const int PermutationScorer::SCORE_PRECISION = 5; const int PermutationScorer::SCORE_PRECISION = 5;
const int PermutationScorer::SCORE_MULTFACT = 100000; // 100000=10^SCORE_PRECISION
PermutationScorer::PermutationScorer(const string &distanceMetric, const string &config) PermutationScorer::PermutationScorer(const string &distanceMetric, const string &config)
:StatisticsBasedScorer(distanceMetric,config) :StatisticsBasedScorer(distanceMetric,config)
@ -145,12 +146,15 @@ int PermutationScorer::getNumberWords (const string& text) const
void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry) void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{ {
//cout << "*******prepareStats" ; //bool debug= (verboselevel()>3); // TODO: fix verboselevel()
//cout << text << endl; bool debug=false;
//cout << sid << endl; if (debug) {
//cout << "Reference0align:" << endl; cout << "*******prepareStats" ;
//m_referencePerms[0][sid].dump(); cout << text << endl;
cout << sid << endl;
cout << "Reference0align:" << endl;
m_referencePerms[0][sid].dump();
}
string sentence = ""; string sentence = "";
string align = text; string align = text;
@ -169,7 +173,25 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
float distanceValue; float distanceValue;
//need to create permutations for each nbest line //need to create permutations for each nbest line
string standardFormat = Permutation::convertMosesToStandard(align); //here we check if the alignments extracted from the nbest are phrase-based or word-based, in which case no conversion is needed
bool isWordAlignment=true;
string alignCopy = align;
string align1;
getNextPound(alignCopy,align1," ");
if (align1.length() > 0) {
size_t phraseDelimeter = align1.find("=");
if(phraseDelimeter!= string::npos)
isWordAlignment=false;
}
string standardFormat = align;
if(!isWordAlignment)
standardFormat= Permutation::convertMosesToStandard(align);
if (debug) {
cerr << "Nbest alignment: " << align << endl;
cerr << "-->std alignment: " << standardFormat << endl;
}
Permutation perm(standardFormat, m_sourceLengths[sid],translationLength); Permutation perm(standardFormat, m_sourceLengths[sid],translationLength);
//perm.dump(); //perm.dump();
@ -201,9 +223,10 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
} }
//SCOREROUT eg: 0.04546 //SCOREROUT eg: 0.04546
distanceValue*=SCORE_MULTFACT; //SCOREROUT eg: 4546 to transform float into integer
ostringstream tempStream; ostringstream tempStream;
tempStream.precision(SCORE_PRECISION); tempStream.precision(SCORE_PRECISION);
tempStream << distanceValue; tempStream << distanceValue << " 1"; //use for final normalization over the amount of test sentences
string str = tempStream.str(); string str = tempStream.str();
entry.set(str); entry.set(str);
@ -214,8 +237,8 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const
{ {
//cerr << "*******PermutationScorer::calculateScore" ; //cerr << "*******PermutationScorer::calculateScore" ;
//cerr << " " << comps[0] << endl; //cerr << " " << comps[0]/comps[1] << endl;
return comps[0]; return (((statscore_t) comps[0]) / comps[1]) / SCORE_MULTFACT;
} }
} }

View File

@ -32,10 +32,15 @@ public:
void setReferenceFiles(const std::vector<std::string>& referenceFiles); void setReferenceFiles(const std::vector<std::string>& referenceFiles);
void prepareStats(size_t sid, const std::string& text, ScoreStats& entry); void prepareStats(size_t sid, const std::string& text, ScoreStats& entry);
static const int SCORE_PRECISION; static const int SCORE_PRECISION;
static const int SCORE_MULTFACT;
size_t NumberOfScores() const { size_t NumberOfScores() const {
//cerr << "PermutationScorer number of scores: 1" << endl; //cerr << "PermutationScorer number of scores: 1" << endl;
return 1; //return 1;
//cerr << "PermutationScorer number of scores: 2" << endl;
//the second it is just a counter for the normalization of the amount of test sentences
return 2;
}; };
bool useAlignment() const { bool useAlignment() const {
//cout << "PermutationScorer::useAlignment returning true" << endl; //cout << "PermutationScorer::useAlignment returning true" << endl;