Fixed several bugs in LRscore-MERT. Namely, solved a float-to-int conversion; added hypothesis counter to the scores file to enable later computation of average reordering score; fixed special case of 1-word hypothesis; enabled reading of word-based alignments from n-best-list.

This commit is contained in:
Arianna Bisazza 2012-09-24 15:40:18 +02:00
parent 52cc46cebd
commit ff276e9911
4 changed files with 61 additions and 19 deletions

View File

@ -155,7 +155,10 @@ void Data::loadNBest(const string &file)
string temp;
getNextPound(line, temp, "|||"); //fourth field sentence score
if (line.length() > 0) {
getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer
getNextPound(line, alignment, "|||"); //fifth field (if present) is either phrase or word alignment
if (line.length() > 0) {
getNextPound(line, alignment, "|||"); //sixth field (if present) is word alignment
}
}
}
//TODO check alignment exists if scorers need it

View File

@ -86,6 +86,7 @@ void Permutation::set(const string & alignment,const int sourceLength)
//cout << "SP:" << sourcePos << " TP:" << targetPos << endl;
if (sourcePos > sourceLength) {
cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl;
cerr << "******** Permutation::set :" << alignment << ": len : " << sourceLength <<endl;
exit(1);
}
//If have multiple target pos aligned to one source,
@ -185,11 +186,15 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
{
float score=0;
//cout << "*****Permutation::distance" <<endl;
//cout << "Ref:" << endl;
//dump();
//cout << "Comp:" << endl;
//permCompare.dump();
//bool debug= (verboselevel()>3); // TODO: fix verboselevel()
bool debug=false;
if (debug) {
cout << "*****Permutation::distance" <<endl;
cout << "Hypo:" << endl;
dump();
cout << "Ref: " << endl;
permCompare.dump();
}
if (type == HAMMING_DISTANCE) {
score = calculateHamming(permCompare);
@ -204,8 +209,10 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
score = score * exp(brevityPenalty);
}
//cout << "Distance type:" << type << endl;
//cout << "Score: "<< score << endl;
if (debug) {
cout << "Distance type:" << type << endl;
cout << "Score: "<< score << endl;
}
return score;
}
@ -244,6 +251,10 @@ float Permutation::calculateKendall(const Permutation & compare) const
cerr << "Empty permutation" << endl;
return 0;
}
if (getLength() == 1) {
cerr << "One-word sentence. Kendall score = 1" << endl;
return 1;
}
for (size_t i=0; i<getLength(); i++) {
for (size_t j=0; j<getLength(); j++) {
if ((m_array[i] < m_array[j]) && (compareArray[i] > compareArray[j])) {

View File

@ -8,6 +8,7 @@ namespace MosesTuning
const int PermutationScorer::SCORE_PRECISION = 5;
const int PermutationScorer::SCORE_MULTFACT = 100000; // 100000=10^SCORE_PRECISION
PermutationScorer::PermutationScorer(const string &distanceMetric, const string &config)
:StatisticsBasedScorer(distanceMetric,config)
@ -145,12 +146,15 @@ int PermutationScorer::getNumberWords (const string& text) const
void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
//cout << "*******prepareStats" ;
//cout << text << endl;
//cout << sid << endl;
//cout << "Reference0align:" << endl;
//m_referencePerms[0][sid].dump();
//bool debug= (verboselevel()>3); // TODO: fix verboselevel()
bool debug=false;
if (debug) {
cout << "*******prepareStats" ;
cout << text << endl;
cout << sid << endl;
cout << "Reference0align:" << endl;
m_referencePerms[0][sid].dump();
}
string sentence = "";
string align = text;
@ -169,7 +173,25 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
float distanceValue;
//need to create permutations for each nbest line
string standardFormat = Permutation::convertMosesToStandard(align);
//here we check if the alignments extracted from the nbest are phrase-based or word-based, in which case no conversion is needed
bool isWordAlignment=true;
string alignCopy = align;
string align1;
getNextPound(alignCopy,align1," ");
if (align1.length() > 0) {
size_t phraseDelimeter = align1.find("=");
if(phraseDelimeter!= string::npos)
isWordAlignment=false;
}
string standardFormat = align;
if(!isWordAlignment)
standardFormat= Permutation::convertMosesToStandard(align);
if (debug) {
cerr << "Nbest alignment: " << align << endl;
cerr << "-->std alignment: " << standardFormat << endl;
}
Permutation perm(standardFormat, m_sourceLengths[sid],translationLength);
//perm.dump();
@ -201,9 +223,10 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
}
//SCOREROUT eg: 0.04546
distanceValue*=SCORE_MULTFACT; //SCOREROUT eg: 4546 to transform float into integer
ostringstream tempStream;
tempStream.precision(SCORE_PRECISION);
tempStream << distanceValue;
tempStream << distanceValue << " 1"; //use for final normalization over the amount of test sentences
string str = tempStream.str();
entry.set(str);
@ -214,8 +237,8 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const
{
//cerr << "*******PermutationScorer::calculateScore" ;
//cerr << " " << comps[0] << endl;
return comps[0];
//cerr << " " << comps[0]/comps[1] << endl;
return (((statscore_t) comps[0]) / comps[1]) / SCORE_MULTFACT;
}
}

View File

@ -32,10 +32,15 @@ public:
void setReferenceFiles(const std::vector<std::string>& referenceFiles);
void prepareStats(size_t sid, const std::string& text, ScoreStats& entry);
static const int SCORE_PRECISION;
static const int SCORE_MULTFACT;
size_t NumberOfScores() const {
//cerr << "PermutationScorer number of scores: 1" << endl;
return 1;
//return 1;
//cerr << "PermutationScorer number of scores: 2" << endl;
//the second it is just a counter for the normalization of the amount of test sentences
return 2;
};
bool useAlignment() const {
//cout << "PermutationScorer::useAlignment returning true" << endl;