added option -include-alignment-in-n-best to include the word alignment for each sentence in the n-best list file

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1246 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-11 19:27:11 +03:00 · 2007-02-26 20:59:41 +00:00 · 2007-02-26 20:59:41 +00:00 · f1d2bd0eb5
commit f1d2bd0eb5
parent 3413bf7046
5 changed files with 32 additions and 5 deletions
--- a/moses-cmd/src/IOStream.cpp
+++ b/moses-cmd/src/IOStream.cpp
@ -207,6 +207,7 @@ void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bo
 void IOStream::OutputNBestList(const LatticePathList &nBestList, long translationId)
 {
 	bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
+	bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
 	
 	LatticePathList::const_iterator iter;
 	for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
@ -331,7 +332,25 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
    }
 		
 		// total						
-		*m_nBestStream << "||| " << path.GetTotalScore() << endl;
+    *m_nBestStream << "||| " << path.GetTotalScore();
+    if (includeAlignment) {
+		*m_nBestStream << " |||";
+		for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
+		{
+			const Hypothesis &edge = *edges[currEdge];
+			WordsRange sourceRange = edge.GetCurrSourceWordsRange();
+			WordsRange targetRange = edge.GetCurrTargetWordsRange();
+			*m_nBestStream << " " << sourceRange.GetStartPos();
+			if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+			  *m_nBestStream << "-" << sourceRange.GetEndPos();
+			}
+			*m_nBestStream << "=" << targetRange.GetStartPos();
+			if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+			  *m_nBestStream << "-" << targetRange.GetEndPos();
+			}
+		}
+    }
+    *m_nBestStream << endl;
 	}

 	*m_nBestStream<<std::flush;
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@ -42,7 +42,8 @@ Parameter::Parameter()
 	AddParam("input-factors", "list of factors in the input");
 	AddParam("input-file", "i", "location of the input file to be translated");
 	AddParam("inputtype", "text (0) or confusion network (1)");
-	AddParam("labeled-n-best-list", "labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
+	AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
+	AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");
 	AddParam("lmodel-file", "location and properties of the language models");
 	AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis");
 	AddParam("mapping", "description of decoding steps");
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -122,6 +122,9 @@ bool StaticData::LoadData(Parameter *parameter)
 	// include feature names in the n-best list
 	SetBooleanParameter( &m_labeledNBestList, "labeled-n-best-list", true );

+	// include word alignment in the n-best list
+	SetBooleanParameter( &m_nBestIncludesAlignment, "include-alignment-in-n-best", false );
+
 	// printing source phrase spans
 	SetBooleanParameter( &m_reportSegmentation, "report-segmentation", false );

--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@ -79,7 +79,7 @@ protected:
 		  , m_maxNoPartTransOpt;
 	
 	std::string									m_nBestFilePath;
-	bool                        m_fLMsLoaded, m_labeledNBestList;
+	bool                        m_fLMsLoaded, m_labeledNBestList,m_nBestIncludesAlignment;
 	/***
 	 * false = treat unknown words as unknowns, and translate them as themselves;
 	 * true = drop (ignore) them
@ -269,6 +269,10 @@ public:
 	{
 		return m_labeledNBestList;
 	}
+	bool NBestIncludesAlignment() const
+	{
+		return m_nBestIncludesAlignment;
+	}
 	const std::vector<std::string> &GetDescription() const
 	{
 		return m_parameter->GetParam("description");
--- a/scripts/generic/multi-bleu.perl
+++ b/scripts/generic/multi-bleu.perl
@ -118,7 +118,7 @@ for(my $n=1;$n<=4;$n++) {
 }

 if ($length_reference==0){
-  printf "BLEU = 0, 0/0/0/0 (BP=0, ration=0, hyp_len=0, ref_len=0)\n";
+  printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
  exit(1);
 }

@ -129,7 +129,7 @@ $bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
 				my_log( $bleu[2] ) +
 				my_log( $bleu[3] ) +
 				my_log( $bleu[4] ) ) / 4) ;
-printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f, hyp_len=%d, ref_len=%d)\n",
+printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
    100*$bleu,
    100*$bleu[1],
    100*$bleu[2],