Add phrase segmentation information to n-best list

This commit is contained in:
Jonathan Clark 2012-04-18 17:09:02 -04:00
parent 8782b6df38
commit bb36565e78
3 changed files with 31 additions and 36 deletions

View File

@ -186,9 +186,11 @@ InputType*IOWrapper::GetInput(InputType* inputType)
/***
* print surface factor only for the given phrase
*/
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
bool reportSegmentation, bool reportAllFactors)
{
CHECK(outputFactorOrder.size() > 0);
const Phrase& phrase = edge.GetCurrTargetPhrase();
if (reportAllFactors == true) {
out << phrase;
} else {
@ -204,6 +206,22 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<Fa
out << " ";
}
}
// trace option "-t"
if (reportSegmentation == true && phrase.GetSize() > 0) {
out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
<< "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
}
}
void OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
bool reportSegmentation, bool reportAllFactors)
{
if (hypo != NULL) {
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
}
}
void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
@ -264,35 +282,13 @@ void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPa
}
}
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
,bool reportSegmentation, bool reportAllFactors)
{
if ( hypo != NULL) {
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
// trace option "-t"
if (reportSegmentation == true
&& hypo->GetCurrTargetPhrase().GetSize() > 0) {
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
}
}
}
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out)
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, std::ostream &out)
{
const std::vector<const Hypothesis *> &edges = path.GetEdges();
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge.GetCurrTargetPhrase(), StaticData::Instance().GetOutputFactorOrder(), reportAllFactors);
if (reportSegmentation == true
&& edge.GetCurrTargetPhrase().GetSize() > 0) {
out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
<< "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
}
OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
}
out << endl;
}
@ -347,7 +343,7 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b
OutputInput(cout, hypo);
cout << "||| ";
}
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
cout << endl;
}
} else {
@ -358,10 +354,7 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b
}
}
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId)
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation)
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
@ -378,7 +371,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
out << translationId << " ||| ";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge.GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
}
out << " |||";

View File

@ -119,9 +119,9 @@ public:
IOWrapper *GetIODevice(const Moses::StaticData &staticData);
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
const TranslationSystem* system, long translationId);
const TranslationSystem* system, long translationId, bool reportSegmentation);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
bool reportSegmentation, bool reportAllFactors, std::ostream& out);

View File

@ -154,7 +154,7 @@ public:
OutputInput(out, bestHypo);
out << "||| ";
}
OutputSurface(
OutputBestSurface(
out,
bestHypo,
staticData.GetOutputFactorOrder(),
@ -239,7 +239,8 @@ public:
TrellisPathList nBestList;
ostringstream out;
manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
OutputNBest(out,nBestList, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber);
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber,
staticData.GetReportSegmentation());
m_nbestCollector->Write(m_lineNumber, out.str());
}
@ -248,7 +249,8 @@ public:
TrellisPathList latticeSamples;
ostringstream out;
manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber);
OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber,
staticData.GetReportSegmentation());
m_latticeSamplesCollector->Write(m_lineNumber, out.str());
}