added option -include-alignment-in-n-best to include the word alignment for each sentence in the n-best list file

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1246 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
phkoehn 2007-02-26 20:59:41 +00:00
parent 3413bf7046
commit f1d2bd0eb5
5 changed files with 32 additions and 5 deletions

View File

@ -207,6 +207,7 @@ void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bo
void IOStream::OutputNBestList(const LatticePathList &nBestList, long translationId)
{
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
LatticePathList::const_iterator iter;
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
@ -331,7 +332,25 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
}
// total
*m_nBestStream << "||| " << path.GetTotalScore() << endl;
*m_nBestStream << "||| " << path.GetTotalScore();
if (includeAlignment) {
*m_nBestStream << " |||";
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
WordsRange sourceRange = edge.GetCurrSourceWordsRange();
WordsRange targetRange = edge.GetCurrTargetWordsRange();
*m_nBestStream << " " << sourceRange.GetStartPos();
if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
*m_nBestStream << "-" << sourceRange.GetEndPos();
}
*m_nBestStream << "=" << targetRange.GetStartPos();
if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
*m_nBestStream << "-" << targetRange.GetEndPos();
}
}
}
*m_nBestStream << endl;
}
*m_nBestStream<<std::flush;

View File

@ -42,7 +42,8 @@ Parameter::Parameter()
AddParam("input-factors", "list of factors in the input");
AddParam("input-file", "i", "location of the input file to be translated");
AddParam("inputtype", "text (0) or confusion network (1)");
AddParam("labeled-n-best-list", "labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");
AddParam("lmodel-file", "location and properties of the language models");
AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis");
AddParam("mapping", "description of decoding steps");

View File

@ -122,6 +122,9 @@ bool StaticData::LoadData(Parameter *parameter)
// include feature names in the n-best list
SetBooleanParameter( &m_labeledNBestList, "labeled-n-best-list", true );
// include word alignment in the n-best list
SetBooleanParameter( &m_nBestIncludesAlignment, "include-alignment-in-n-best", false );
// printing source phrase spans
SetBooleanParameter( &m_reportSegmentation, "report-segmentation", false );

View File

@ -79,7 +79,7 @@ protected:
, m_maxNoPartTransOpt;
std::string m_nBestFilePath;
bool m_fLMsLoaded, m_labeledNBestList;
bool m_fLMsLoaded, m_labeledNBestList,m_nBestIncludesAlignment;
/***
* false = treat unknown words as unknowns, and translate them as themselves;
* true = drop (ignore) them
@ -269,6 +269,10 @@ public:
{
return m_labeledNBestList;
}
bool NBestIncludesAlignment() const
{
return m_nBestIncludesAlignment;
}
const std::vector<std::string> &GetDescription() const
{
return m_parameter->GetParam("description");

View File

@ -118,7 +118,7 @@ for(my $n=1;$n<=4;$n++) {
}
if ($length_reference==0){
printf "BLEU = 0, 0/0/0/0 (BP=0, ration=0, hyp_len=0, ref_len=0)\n";
printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
exit(1);
}
@ -129,7 +129,7 @@ $bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
my_log( $bleu[2] ) +
my_log( $bleu[3] ) +
my_log( $bleu[4] ) ) / 4) ;
printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f, hyp_len=%d, ref_len=%d)\n",
printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
100*$bleu,
100*$bleu[1],
100*$bleu[2],