From 4313e335b534102f18c3d6308b60caa1ffdcfc98 Mon Sep 17 00:00:00 2001 From: hieuhoang1972 Date: Fri, 16 Sep 2011 17:13:34 +0000 Subject: [PATCH] print out span widths of non-terms. Extra argument --OutputNTLengths git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4230 1f5c12ca-751b-0410-a591-d2e778427230 --- .../training/phrase-extract/consolidate.cpp | 10 +++++++++- scripts/training/phrase-extract/score.cpp | 19 +++++++++++-------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/scripts/training/phrase-extract/consolidate.cpp b/scripts/training/phrase-extract/consolidate.cpp index 8d31a1d27..cb4b96659 100644 --- a/scripts/training/phrase-extract/consolidate.cpp +++ b/scripts/training/phrase-extract/consolidate.cpp @@ -40,6 +40,7 @@ bool lowCountFlag = false; bool goodTuringFlag = false; bool kneserNeyFlag = false; bool logProbFlag = false; +bool outputNTLengths = false; inline float maybeLogProb( float a ) { return logProbFlag ? log(a) : a; } char line[LINE_MAX_LENGTH]; @@ -54,7 +55,7 @@ int main(int argc, char* argv[]) << "consolidating direct and indirect rule tables\n"; if (argc < 4) { - cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect]\n"; + cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--OutputNTLengths] \n"; exit(1); } char* &fileNameDirect = argv[1]; @@ -94,6 +95,8 @@ int main(int argc, char* argv[]) } else if (strcmp(argv[i],"--LogProb") == 0) { logProbFlag = true; cerr << "using log-probabilities\n"; + } else if (strcmp(argv[i],"--OutputNTLengths") == 0) { + outputNTLengths = true; } else { cerr << "ERROR: unknown option " << argv[i] << endl; exit(1); @@ -271,6 +274,11 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC // counts, for debugging fileConsolidated << "||| " << countE << " " << countF; // << " " << countEF; + if (outputNTLengths) + { + fileConsolidated << " ||| " << itemDirect[5]; + } + fileConsolidated << endl; } fileDirect.Close(); diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp index 00f6dfcf6..5cf71bf91 100644 --- a/scripts/training/phrase-extract/score.cpp +++ b/scripts/training/phrase-extract/score.cpp @@ -529,15 +529,18 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount, // nt lengths if (outputNTLengths) { - map > sourceProb, targetProb; - // 1st sourcePos, 2nd = length, 3rd = prob - - calcNTLengthProb(phrasePair, sourceProb, targetProb); - phraseTableFile << " ||| "; - outputNTLengthProbs(phraseTableFile, sourceProb, "S"); - outputNTLengthProbs(phraseTableFile, targetProb, "T"); - + + if (!inverseFlag) + { + map > sourceProb, targetProb; + // 1st sourcePos, 2nd = length, 3rd = prob + + calcNTLengthProb(phrasePair, sourceProb, targetProb); + + outputNTLengthProbs(phraseTableFile, sourceProb, "S"); + outputNTLengthProbs(phraseTableFile, targetProb, "T"); + } } phraseTableFile << endl;