mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
Fixed several bugs in LRscore-MERT. Namely, solved a float-to-int conversion; added hypothesis counter to the scores file to enable later computation of average reordering score; fixed special case of 1-word hypothesis; enabled reading of word-based alignments from n-best-list.
This commit is contained in:
parent
52cc46cebd
commit
ff276e9911
@ -155,7 +155,10 @@ void Data::loadNBest(const string &file)
|
|||||||
string temp;
|
string temp;
|
||||||
getNextPound(line, temp, "|||"); //fourth field sentence score
|
getNextPound(line, temp, "|||"); //fourth field sentence score
|
||||||
if (line.length() > 0) {
|
if (line.length() > 0) {
|
||||||
getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer
|
getNextPound(line, alignment, "|||"); //fifth field (if present) is either phrase or word alignment
|
||||||
|
if (line.length() > 0) {
|
||||||
|
getNextPound(line, alignment, "|||"); //sixth field (if present) is word alignment
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//TODO check alignment exists if scorers need it
|
//TODO check alignment exists if scorers need it
|
||||||
|
@ -86,6 +86,7 @@ void Permutation::set(const string & alignment,const int sourceLength)
|
|||||||
//cout << "SP:" << sourcePos << " TP:" << targetPos << endl;
|
//cout << "SP:" << sourcePos << " TP:" << targetPos << endl;
|
||||||
if (sourcePos > sourceLength) {
|
if (sourcePos > sourceLength) {
|
||||||
cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl;
|
cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl;
|
||||||
|
cerr << "******** Permutation::set :" << alignment << ": len : " << sourceLength <<endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
//If have multiple target pos aligned to one source,
|
//If have multiple target pos aligned to one source,
|
||||||
@ -185,11 +186,15 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
|
|||||||
{
|
{
|
||||||
float score=0;
|
float score=0;
|
||||||
|
|
||||||
//cout << "*****Permutation::distance" <<endl;
|
//bool debug= (verboselevel()>3); // TODO: fix verboselevel()
|
||||||
//cout << "Ref:" << endl;
|
bool debug=false;
|
||||||
//dump();
|
if (debug) {
|
||||||
//cout << "Comp:" << endl;
|
cout << "*****Permutation::distance" <<endl;
|
||||||
//permCompare.dump();
|
cout << "Hypo:" << endl;
|
||||||
|
dump();
|
||||||
|
cout << "Ref: " << endl;
|
||||||
|
permCompare.dump();
|
||||||
|
}
|
||||||
|
|
||||||
if (type == HAMMING_DISTANCE) {
|
if (type == HAMMING_DISTANCE) {
|
||||||
score = calculateHamming(permCompare);
|
score = calculateHamming(permCompare);
|
||||||
@ -204,8 +209,10 @@ float Permutation::distance(const Permutation &permCompare, const distanceMetric
|
|||||||
score = score * exp(brevityPenalty);
|
score = score * exp(brevityPenalty);
|
||||||
}
|
}
|
||||||
|
|
||||||
//cout << "Distance type:" << type << endl;
|
if (debug) {
|
||||||
//cout << "Score: "<< score << endl;
|
cout << "Distance type:" << type << endl;
|
||||||
|
cout << "Score: "<< score << endl;
|
||||||
|
}
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -244,6 +251,10 @@ float Permutation::calculateKendall(const Permutation & compare) const
|
|||||||
cerr << "Empty permutation" << endl;
|
cerr << "Empty permutation" << endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (getLength() == 1) {
|
||||||
|
cerr << "One-word sentence. Kendall score = 1" << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
for (size_t i=0; i<getLength(); i++) {
|
for (size_t i=0; i<getLength(); i++) {
|
||||||
for (size_t j=0; j<getLength(); j++) {
|
for (size_t j=0; j<getLength(); j++) {
|
||||||
if ((m_array[i] < m_array[j]) && (compareArray[i] > compareArray[j])) {
|
if ((m_array[i] < m_array[j]) && (compareArray[i] > compareArray[j])) {
|
||||||
|
@ -8,6 +8,7 @@ namespace MosesTuning
|
|||||||
|
|
||||||
|
|
||||||
const int PermutationScorer::SCORE_PRECISION = 5;
|
const int PermutationScorer::SCORE_PRECISION = 5;
|
||||||
|
const int PermutationScorer::SCORE_MULTFACT = 100000; // 100000=10^SCORE_PRECISION
|
||||||
|
|
||||||
PermutationScorer::PermutationScorer(const string &distanceMetric, const string &config)
|
PermutationScorer::PermutationScorer(const string &distanceMetric, const string &config)
|
||||||
:StatisticsBasedScorer(distanceMetric,config)
|
:StatisticsBasedScorer(distanceMetric,config)
|
||||||
@ -145,12 +146,15 @@ int PermutationScorer::getNumberWords (const string& text) const
|
|||||||
|
|
||||||
void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
||||||
{
|
{
|
||||||
//cout << "*******prepareStats" ;
|
//bool debug= (verboselevel()>3); // TODO: fix verboselevel()
|
||||||
//cout << text << endl;
|
bool debug=false;
|
||||||
//cout << sid << endl;
|
if (debug) {
|
||||||
//cout << "Reference0align:" << endl;
|
cout << "*******prepareStats" ;
|
||||||
//m_referencePerms[0][sid].dump();
|
cout << text << endl;
|
||||||
|
cout << sid << endl;
|
||||||
|
cout << "Reference0align:" << endl;
|
||||||
|
m_referencePerms[0][sid].dump();
|
||||||
|
}
|
||||||
|
|
||||||
string sentence = "";
|
string sentence = "";
|
||||||
string align = text;
|
string align = text;
|
||||||
@ -169,7 +173,25 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
|
|||||||
float distanceValue;
|
float distanceValue;
|
||||||
|
|
||||||
//need to create permutations for each nbest line
|
//need to create permutations for each nbest line
|
||||||
string standardFormat = Permutation::convertMosesToStandard(align);
|
//here we check if the alignments extracted from the nbest are phrase-based or word-based, in which case no conversion is needed
|
||||||
|
bool isWordAlignment=true;
|
||||||
|
string alignCopy = align;
|
||||||
|
string align1;
|
||||||
|
getNextPound(alignCopy,align1," ");
|
||||||
|
if (align1.length() > 0) {
|
||||||
|
size_t phraseDelimeter = align1.find("=");
|
||||||
|
if(phraseDelimeter!= string::npos)
|
||||||
|
isWordAlignment=false;
|
||||||
|
}
|
||||||
|
string standardFormat = align;
|
||||||
|
if(!isWordAlignment)
|
||||||
|
standardFormat= Permutation::convertMosesToStandard(align);
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
cerr << "Nbest alignment: " << align << endl;
|
||||||
|
cerr << "-->std alignment: " << standardFormat << endl;
|
||||||
|
}
|
||||||
|
|
||||||
Permutation perm(standardFormat, m_sourceLengths[sid],translationLength);
|
Permutation perm(standardFormat, m_sourceLengths[sid],translationLength);
|
||||||
//perm.dump();
|
//perm.dump();
|
||||||
|
|
||||||
@ -201,9 +223,10 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
|
|||||||
}
|
}
|
||||||
|
|
||||||
//SCOREROUT eg: 0.04546
|
//SCOREROUT eg: 0.04546
|
||||||
|
distanceValue*=SCORE_MULTFACT; //SCOREROUT eg: 4546 to transform float into integer
|
||||||
ostringstream tempStream;
|
ostringstream tempStream;
|
||||||
tempStream.precision(SCORE_PRECISION);
|
tempStream.precision(SCORE_PRECISION);
|
||||||
tempStream << distanceValue;
|
tempStream << distanceValue << " 1"; //use for final normalization over the amount of test sentences
|
||||||
string str = tempStream.str();
|
string str = tempStream.str();
|
||||||
entry.set(str);
|
entry.set(str);
|
||||||
|
|
||||||
@ -214,8 +237,8 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
|
|||||||
statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const
|
statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const
|
||||||
{
|
{
|
||||||
//cerr << "*******PermutationScorer::calculateScore" ;
|
//cerr << "*******PermutationScorer::calculateScore" ;
|
||||||
//cerr << " " << comps[0] << endl;
|
//cerr << " " << comps[0]/comps[1] << endl;
|
||||||
return comps[0];
|
return (((statscore_t) comps[0]) / comps[1]) / SCORE_MULTFACT;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -32,10 +32,15 @@ public:
|
|||||||
void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||||
void prepareStats(size_t sid, const std::string& text, ScoreStats& entry);
|
void prepareStats(size_t sid, const std::string& text, ScoreStats& entry);
|
||||||
static const int SCORE_PRECISION;
|
static const int SCORE_PRECISION;
|
||||||
|
static const int SCORE_MULTFACT;
|
||||||
|
|
||||||
size_t NumberOfScores() const {
|
size_t NumberOfScores() const {
|
||||||
//cerr << "PermutationScorer number of scores: 1" << endl;
|
//cerr << "PermutationScorer number of scores: 1" << endl;
|
||||||
return 1;
|
//return 1;
|
||||||
|
|
||||||
|
//cerr << "PermutationScorer number of scores: 2" << endl;
|
||||||
|
//the second it is just a counter for the normalization of the amount of test sentences
|
||||||
|
return 2;
|
||||||
};
|
};
|
||||||
bool useAlignment() const {
|
bool useAlignment() const {
|
||||||
//cout << "PermutationScorer::useAlignment returning true" << endl;
|
//cout << "PermutationScorer::useAlignment returning true" << endl;
|
||||||
|
Loading…
Reference in New Issue
Block a user