move OutputBestSurface() to Hypothesis class

This commit is contained in:
Hieu Hoang 2014-12-29 00:48:40 +05:30
parent f865e7e43c
commit cd6ec01999
5 changed files with 116 additions and 39 deletions

View File

@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "InputType.h"
#include "Manager.h"
#include "IOWrapper.h"
#include "moses/FF/FFState.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
@ -489,5 +490,112 @@ void Hypothesis::OutputInput(std::ostream& os) const
if (inp_phrases[i]) os << *inp_phrases[i];
}
void Hypothesis::OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
if (hypo != NULL) {
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
}
}
//////////////////////////////////////////////////////////////////////////
/***
* print surface factor only for the given phrase
*/
void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
out << phrase;
} else {
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(edge, placeholderFactor);
}
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
}
UTIL_THROW_IF2(factor == NULL,
"No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << "UNK" << *factor;
} else {
out << *factor;
}
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"No factor " << i << " at position " << pos);
out << "|" << *factor;
}
out << " ";
}
}
// trace ("report segmentation") option "-t" / "-tt"
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
const int sourceStart = sourceRange.GetStartPos();
const int sourceEnd = sourceRange.GetEndPos();
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
if (reportSegmentation == 2) {
out << ",wa=";
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
Hypothesis::OutputAlignment(out, ai, 0, 0);
out << ",total=";
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
IOWrapper::OutputAllFeatureScores(scoreBreakdown, out);
}
out << "| ";
}
}
std::map<size_t, const Factor*> Hypothesis::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
{
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
const Phrase &inputPhrase = inputPath.GetPhrase();
std::map<size_t, const Factor*> ret;
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
"Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
return ret;
}
}

View File

@ -277,6 +277,13 @@ public:
void OutputInput(std::ostream& os) const;
static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) const;
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const;
// creates a map of TARGET positions which should be replaced by word using placeholder
std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor) const;
};
std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);

View File

@ -520,15 +520,6 @@ void IOWrapper::OutputSurface(std::ostream &out, const Hypothesis &edge, const s
}
}
void IOWrapper::OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors)
{
if (hypo != NULL) {
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
}
}
void IOWrapper::OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
@ -593,33 +584,6 @@ void IOWrapper::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*tra
out << endl;
}
void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, char reportSegmentation, bool reportAllFactors)
{
if (hypo != NULL) {
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
VERBOSE(3,"Best path: ");
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (!m_surpressSingleBestOutput) {
if (StaticData::Instance().GetOutputHypoScore()) {
cout << hypo->GetTotalScore() << " ";
}
if (StaticData::Instance().IsPathRecoveryEnabled()) {
hypo->OutputInput(cout);
cout << "||| ";
}
OutputBestSurface(cout, hypo, *m_outputFactorOrder, reportSegmentation, reportAllFactors);
cout << endl;
}
} else {
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (!m_surpressSingleBestOutput) {
cout << endl;
}
}
}
bool IOWrapper::ReadInput(InputTypeEnum inputType, InputType*& source)
{
delete source;

View File

@ -128,7 +128,6 @@ public:
Moses::InputType* GetInput(Moses::InputType *inputType);
bool ReadInput(Moses::InputTypeEnum inputType, Moses::InputType*& source);
void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, char reportSegmentation, bool reportAllFactors);
void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
void Backtrack(const Moses::Hypothesis *hypo);
@ -172,7 +171,6 @@ public:
// CHART
// phrase-based
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
char reportSegmentation, bool reportAllFactors, std::ostream& out);

View File

@ -129,7 +129,7 @@ void TranslationTask::RunPb()
if (staticData.GetReportSegmentation() == 2) {
manager.GetOutputLanguageModelOrder(out, bestHypo);
}
m_ioWrapper.OutputBestSurface(
bestHypo->OutputBestSurface(
out,
bestHypo,
staticData.GetOutputFactorOrder(),