print out span widths of non-terms. Extra argument --OutputNTLengths

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4230 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2011-09-16 17:13:34 +00:00
parent 4d5b17f444
commit 4313e335b5
2 changed files with 20 additions and 9 deletions

View File

@ -40,6 +40,7 @@ bool lowCountFlag = false;
bool goodTuringFlag = false;
bool kneserNeyFlag = false;
bool logProbFlag = false;
bool outputNTLengths = false;
inline float maybeLogProb( float a ) { return logProbFlag ? log(a) : a; }
char line[LINE_MAX_LENGTH];
@ -54,7 +55,7 @@ int main(int argc, char* argv[])
<< "consolidating direct and indirect rule tables\n";
if (argc < 4) {
cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect]\n";
cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--OutputNTLengths] \n";
exit(1);
}
char* &fileNameDirect = argv[1];
@ -94,6 +95,8 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
outputNTLengths = true;
} else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
@ -271,6 +274,11 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// counts, for debugging
fileConsolidated << "||| " << countE << " " << countF; // << " " << countEF;
if (outputNTLengths)
{
fileConsolidated << " ||| " << itemDirect[5];
}
fileConsolidated << endl;
}
fileDirect.Close();

View File

@ -529,15 +529,18 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount,
// nt lengths
if (outputNTLengths)
{
map<size_t, map<size_t, float> > sourceProb, targetProb;
// 1st sourcePos, 2nd = length, 3rd = prob
calcNTLengthProb(phrasePair, sourceProb, targetProb);
phraseTableFile << " ||| ";
outputNTLengthProbs(phraseTableFile, sourceProb, "S");
outputNTLengthProbs(phraseTableFile, targetProb, "T");
if (!inverseFlag)
{
map<size_t, map<size_t, float> > sourceProb, targetProb;
// 1st sourcePos, 2nd = length, 3rd = prob
calcNTLengthProb(phrasePair, sourceProb, targetProb);
outputNTLengthProbs(phraseTableFile, sourceProb, "S");
outputNTLengthProbs(phraseTableFile, targetProb, "T");
}
}
phraseTableFile << endl;