/* * Permutation.cpp * met - Minimum Error Training * * Created by Alexandra Birch 18/11/09. * */ #include #include #include #include "Permutation.h" #include "Util.h" using namespace std; namespace MosesTuning { Permutation::Permutation(const string &alignment, const int sourceLength, const int targetLength ) { if (sourceLength > 0) { set(alignment, sourceLength); } m_targetLength = targetLength; } size_t Permutation::getLength() const { return int(m_array.size()); } void Permutation::dump() const { int j=0; for (vector::const_iterator i = m_array.begin(); i !=m_array.end(); i++) { cout << "("; cout << j << ":" << *i ; cout << "), "; j++; } cout << endl; } //Sent alignment string //Eg: "0-1 0-0 1-2 3-0 4-5 6-7 " // Inidiviual word alignments which can be one-one, // or null aligned, or many-many. The format is sourcepos - targetpos //Its the output of the berkley aligner subtracting 1 from each number //sourceLength needed because last source words might not be aligned void Permutation::set(const string & alignment,const int sourceLength) { //cout << "******** Permutation::set :" << alignment << ": len : " << sourceLength < tokens; // Create vector to hold our words while (ss >> buf) tokens.push_back(buf); vector tempPerm(sourceLength, -1); //Set tempPerm to have one target position per source position for (size_t i=0; i sourceLength) { cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl; cerr << "******** Permutation::set :" << alignment << ": len : " << sourceLength < targetPos) { tempPerm[sourcePos] = targetPos; } } //TODO //Set final permutation in m_array //Take care of: source - null // multiple_source - one target // unaligned target // Input: 1-9 2-1 4-3 4-4 5-6 6-6 7-6 8-8 // Convert source: 1 2 3 4 5 6 7 8 // target: 9 1 -1 3 6 6 6 8 -> 8 1 2 3 4 5 6 7 // 1st step: Add null aligned source to previous alignment // target: 9 1 -1 3 6 6 6 8 -> 9 1 1 3 6 6 6 8 int last=0; m_array.assign(sourceLength, -1); //get a searcheable index multimap invMap; multimap::iterator it; //cout << " SourceP -> TargetP " << endl; for (size_t i=0; i " << tempPerm[i] << endl; //Key is target pos, value is source pos invMap.insert(pair(tempPerm[i],int(i))); } // 2nd step: Get target into index of multimap and sort // Convert source: 1 2 3 4 5 6 7 8 // target: 9 1 0 3 6 6 6 8 -> 0 1 3 6 6 6 8 9 // source: 3 2 4 5 6 7 8 1 int i=0; //cout << " TargetP => SourceP : TargetIndex " << endl; for ( it=invMap.begin() ; it != invMap.end(); it++ ) { //cout << (*it).first << " => " << (*it).second << " : " << i << endl; //find source position m_array[(*it).second] = i; i++; } bool ok = checkValidPermutation(m_array); //dump(); if (!ok) { throw runtime_error(" Created invalid permutation"); } } //Static vector Permutation::invert(const vector & inVector) { vector outVector(inVector.size()); for (size_t i=0; i const & inVector) { vector test(inVector.size(),-1); for (size_t i=0; i< inVector.size(); i++) { //No multiple entries of same value allowed if (test[inVector[i]] > -1) { cerr << "Permutation error: multiple entries of same value\n" << endl; return false; } test[inVector[i]] ++; } for (size_t i=0; i3); // TODO: fix verboselevel() bool debug=false; if (debug) { cout << "*****Permutation::distance" < compareArray = compare.getArray(); if (getLength() != compare.getLength()) { cerr << "1stperm: " << getLength() << " 2ndperm: " << compare.getLength() << endl; throw runtime_error("Length of permutations not equal"); } if (getLength() == 0) { cerr << "Empty permutation" << endl; return 0; } for (size_t i=0; i compareArray = compare.getArray(); if (getLength() != compare.getLength()) { cerr << "1stperm: " << getLength() << " 2ndperm: " << compare.getLength() << endl; throw runtime_error("Length of permutations not equal"); } if (getLength() == 0) { cerr << "Empty permutation" << endl; return 0; } if (getLength() == 1) { cerr << "One-word sentence. Kendall score = 1" << endl; return 1; } for (size_t i=0; i compareArray[j])) { score++; } } } score = (score / ((getLength()*getLength() - getLength()) /2 ) ); //Adjusted Kendall's tau correlates better with human judgements score = sqrt (score); score = 1 - score; return score; } vector Permutation::getArray() const { vector ret = m_array; return ret; } //Static //This function is called with test which is // the 5th field in moses nbest output when called with -include-alignment-in-n-best //eg. 0=0 1-2=1-2 3=3 4=4 5=5 6=6 7-9=7-8 10=9 11-13=10-11 (source-target) string Permutation::convertMosesToStandard(string const & alignment) { if (alignment.length() == 0) { cerr << "Alignment input string empty" << endl; } string working = alignment; string out; stringstream oss; while (working.length() > 0) { string align; getNextPound(working,align," "); //If found an alignment if (align.length() > 0) { size_t posDelimeter = align.find("="); if(posDelimeter== string::npos) { cerr << "Delimiter not found = :"<< align << endl; exit(0); } int firstSourcePos,lastSourcePos,firstTargetPos,lastTargetPos; string sourcePoss = align.substr(0, posDelimeter); string targetPoss = align.substr(posDelimeter+1); posDelimeter = sourcePoss.find("-"); if(posDelimeter < string::npos) { firstSourcePos = atoi((sourcePoss.substr(0, posDelimeter)).c_str()); lastSourcePos = atoi((sourcePoss.substr(posDelimeter+1)).c_str()); } else { firstSourcePos = atoi(sourcePoss.c_str()); lastSourcePos = firstSourcePos; } posDelimeter = targetPoss.find("-"); if(posDelimeter < string::npos) { firstTargetPos = atoi((targetPoss.substr(0, posDelimeter)).c_str()); lastTargetPos = atoi((targetPoss.substr(posDelimeter+1)).c_str()); } else { firstTargetPos = atoi(targetPoss.c_str()); lastTargetPos = firstTargetPos; } for (int i = firstSourcePos; i <= lastSourcePos; i++) { for (int j = firstTargetPos; j <= lastTargetPos; j++) { oss << i << "-" << j << " "; } } } //else case where two spaces ? } out = oss.str(); //cout << "ConverttoStandard: " << out << endl; return out; } }