mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Fixed several bugs in LRscore-MERT. Namely, solved a float-to-int conversion; added hypothesis counter to the scores file to enable later computation of average reordering score; fixed special case of 1-word hypothesis; enabled reading of word-based alignments from n-best-list.
This commit is contained in:
parent
52cc46cebd
commit
ff276e9911
@ -155,7 +155,10 @@ void Data::loadNBest(const string &file)
|
||||
string temp;
|
||||
getNextPound(line, temp, "|||"); //fourth field sentence score
|
||||
if (line.length() > 0) {
|
||||
getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer
|
||||
getNextPound(line, alignment, "|||"); //fifth field (if present) is either phrase or word alignment
|
||||
if (line.length() > 0) {
|
||||
getNextPound(line, alignment, "|||"); //sixth field (if present) is word alignment
|
||||
}
|
||||
}
|
||||
}
|
||||
//TODO check alignment exists if scorers need it
|
||||
|
@ -86,6 +86,7 @@ void Permutation::set(const string & alignment,const int sourceLength)
|
||||
//cout << "SP:" << sourcePos << " TP:" << targetPos << endl;
|
||||
if (sourcePos > sourceLength) {
|
||||
cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl;
|
||||
cerr << "******** Permutation::set :" << alignment << ": len : " << sourceLength <<endl;
|
||||
exit(1);
|
||||
}
|
||||
//If have multiple target pos aligned to one source,
|
||||
@ -185,11 +186,15 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
|
||||
{
|
||||
float score=0;
|
||||
|
||||
//cout << "*****Permutation::distance" <<endl;
|
||||
//cout << "Ref:" << endl;
|
||||
//dump();
|
||||
//cout << "Comp:" << endl;
|
||||
//permCompare.dump();
|
||||
//bool debug= (verboselevel()>3); // TODO: fix verboselevel()
|
||||
bool debug=false;
|
||||
if (debug) {
|
||||
cout << "*****Permutation::distance" <<endl;
|
||||
cout << "Hypo:" << endl;
|
||||
dump();
|
||||
cout << "Ref: " << endl;
|
||||
permCompare.dump();
|
||||
}
|
||||
|
||||
if (type == HAMMING_DISTANCE) {
|
||||
score = calculateHamming(permCompare);
|
||||
@ -204,8 +209,10 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
|
||||
score = score * exp(brevityPenalty);
|
||||
}
|
||||
|
||||
//cout << "Distance type:" << type << endl;
|
||||
//cout << "Score: "<< score << endl;
|
||||
if (debug) {
|
||||
cout << "Distance type:" << type << endl;
|
||||
cout << "Score: "<< score << endl;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
@ -244,6 +251,10 @@ float Permutation::calculateKendall(const Permutation & compare) const
|
||||
cerr << "Empty permutation" << endl;
|
||||
return 0;
|
||||
}
|
||||
if (getLength() == 1) {
|
||||
cerr << "One-word sentence. Kendall score = 1" << endl;
|
||||
return 1;
|
||||
}
|
||||
for (size_t i=0; i<getLength(); i++) {
|
||||
for (size_t j=0; j<getLength(); j++) {
|
||||
if ((m_array[i] < m_array[j]) && (compareArray[i] > compareArray[j])) {
|
||||
|
@ -8,6 +8,7 @@ namespace MosesTuning
|
||||
|
||||
|
||||
const int PermutationScorer::SCORE_PRECISION = 5;
|
||||
const int PermutationScorer::SCORE_MULTFACT = 100000; // 100000=10^SCORE_PRECISION
|
||||
|
||||
PermutationScorer::PermutationScorer(const string &distanceMetric, const string &config)
|
||||
:StatisticsBasedScorer(distanceMetric,config)
|
||||
@ -145,12 +146,15 @@ int PermutationScorer::getNumberWords (const string& text) const
|
||||
|
||||
void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
||||
{
|
||||
//cout << "*******prepareStats" ;
|
||||
//cout << text << endl;
|
||||
//cout << sid << endl;
|
||||
//cout << "Reference0align:" << endl;
|
||||
//m_referencePerms[0][sid].dump();
|
||||
|
||||
//bool debug= (verboselevel()>3); // TODO: fix verboselevel()
|
||||
bool debug=false;
|
||||
if (debug) {
|
||||
cout << "*******prepareStats" ;
|
||||
cout << text << endl;
|
||||
cout << sid << endl;
|
||||
cout << "Reference0align:" << endl;
|
||||
m_referencePerms[0][sid].dump();
|
||||
}
|
||||
|
||||
string sentence = "";
|
||||
string align = text;
|
||||
@ -169,7 +173,25 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
|
||||
float distanceValue;
|
||||
|
||||
//need to create permutations for each nbest line
|
||||
string standardFormat = Permutation::convertMosesToStandard(align);
|
||||
//here we check if the alignments extracted from the nbest are phrase-based or word-based, in which case no conversion is needed
|
||||
bool isWordAlignment=true;
|
||||
string alignCopy = align;
|
||||
string align1;
|
||||
getNextPound(alignCopy,align1," ");
|
||||
if (align1.length() > 0) {
|
||||
size_t phraseDelimeter = align1.find("=");
|
||||
if(phraseDelimeter!= string::npos)
|
||||
isWordAlignment=false;
|
||||
}
|
||||
string standardFormat = align;
|
||||
if(!isWordAlignment)
|
||||
standardFormat= Permutation::convertMosesToStandard(align);
|
||||
|
||||
if (debug) {
|
||||
cerr << "Nbest alignment: " << align << endl;
|
||||
cerr << "-->std alignment: " << standardFormat << endl;
|
||||
}
|
||||
|
||||
Permutation perm(standardFormat, m_sourceLengths[sid],translationLength);
|
||||
//perm.dump();
|
||||
|
||||
@ -201,9 +223,10 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
|
||||
}
|
||||
|
||||
//SCOREROUT eg: 0.04546
|
||||
distanceValue*=SCORE_MULTFACT; //SCOREROUT eg: 4546 to transform float into integer
|
||||
ostringstream tempStream;
|
||||
tempStream.precision(SCORE_PRECISION);
|
||||
tempStream << distanceValue;
|
||||
tempStream << distanceValue << " 1"; //use for final normalization over the amount of test sentences
|
||||
string str = tempStream.str();
|
||||
entry.set(str);
|
||||
|
||||
@ -214,8 +237,8 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
|
||||
statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const
|
||||
{
|
||||
//cerr << "*******PermutationScorer::calculateScore" ;
|
||||
//cerr << " " << comps[0] << endl;
|
||||
return comps[0];
|
||||
//cerr << " " << comps[0]/comps[1] << endl;
|
||||
return (((statscore_t) comps[0]) / comps[1]) / SCORE_MULTFACT;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -32,10 +32,15 @@ public:
|
||||
void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
void prepareStats(size_t sid, const std::string& text, ScoreStats& entry);
|
||||
static const int SCORE_PRECISION;
|
||||
static const int SCORE_MULTFACT;
|
||||
|
||||
size_t NumberOfScores() const {
|
||||
//cerr << "PermutationScorer number of scores: 1" << endl;
|
||||
return 1;
|
||||
//return 1;
|
||||
|
||||
//cerr << "PermutationScorer number of scores: 2" << endl;
|
||||
//the second it is just a counter for the normalization of the amount of test sentences
|
||||
return 2;
|
||||
};
|
||||
bool useAlignment() const {
|
||||
//cout << "PermutationScorer::useAlignment returning true" << endl;
|
||||
|
Loading…
Reference in New Issue
Block a user