From bb36565e78582afb2a96bf4e383b8a058ae89173 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Wed, 18 Apr 2012 17:09:02 -0400 Subject: [PATCH] Add phrase segmentation information to n-best list --- moses-cmd/src/IOWrapper.cpp | 55 ++++++++++++++++--------------------- moses-cmd/src/IOWrapper.h | 4 +-- moses-cmd/src/Main.cpp | 8 ++++-- 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/moses-cmd/src/IOWrapper.cpp b/moses-cmd/src/IOWrapper.cpp index 5fcdbab3a..03133e092 100644 --- a/moses-cmd/src/IOWrapper.cpp +++ b/moses-cmd/src/IOWrapper.cpp @@ -186,9 +186,11 @@ InputType*IOWrapper::GetInput(InputType* inputType) /*** * print surface factor only for the given phrase */ -void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector &outputFactorOrder, bool reportAllFactors) +void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector &outputFactorOrder, + bool reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); + const Phrase& phrase = edge.GetCurrTargetPhrase(); if (reportAllFactors == true) { out << phrase; } else { @@ -204,6 +206,22 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector 0) { + out << "|" << edge.GetCurrSourceWordsRange().GetStartPos() + << "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| "; + } +} + +void OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector &outputFactorOrder, + bool reportSegmentation, bool reportAllFactors) +{ + if (hypo != NULL) { + // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence + OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors); + OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors); + } } void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) @@ -264,35 +282,13 @@ void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPa } } -void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector &outputFactorOrder - ,bool reportSegmentation, bool reportAllFactors) -{ - if ( hypo != NULL) { - OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors); - OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); - - // trace option "-t" - if (reportSegmentation == true - && hypo->GetCurrTargetPhrase().GetSize() > 0) { - out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos() - << "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| "; - } - } -} - -void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out) +void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, std::ostream &out) { const std::vector &edges = path.GetEdges(); for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; - - OutputSurface(out, edge.GetCurrTargetPhrase(), StaticData::Instance().GetOutputFactorOrder(), reportAllFactors); - if (reportSegmentation == true - && edge.GetCurrTargetPhrase().GetSize() > 0) { - out << "|" << edge.GetCurrSourceWordsRange().GetStartPos() - << "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| "; - } + OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors); } out << endl; } @@ -347,7 +343,7 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b OutputInput(cout, hypo); cout << "||| "; } - OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors); + OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors); cout << endl; } } else { @@ -358,10 +354,7 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b } } - - - -void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector& outputFactorOrder, const TranslationSystem* system, long translationId) +void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool labeledOutput = staticData.IsLabeledNBestList(); @@ -378,7 +371,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; - OutputSurface(out, edge.GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); + OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; diff --git a/moses-cmd/src/IOWrapper.h b/moses-cmd/src/IOWrapper.h index 9ce85a75c..e7936f33c 100644 --- a/moses-cmd/src/IOWrapper.h +++ b/moses-cmd/src/IOWrapper.h @@ -119,9 +119,9 @@ public: IOWrapper *GetIODevice(const Moses::StaticData &staticData); bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source); -void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors); +void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector &outputFactorOrder, bool reportSegmentation, bool reportAllFactors); void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector&, - const TranslationSystem* system, long translationId); + const TranslationSystem* system, long translationId, bool reportSegmentation); void OutputLatticeMBRNBest(std::ostream& out, const std::vector& solutions,long translationId); void OutputBestHypo(const std::vector& mbrBestHypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, std::ostream& out); diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp index f86d9f093..66fba54a3 100644 --- a/moses-cmd/src/Main.cpp +++ b/moses-cmd/src/Main.cpp @@ -154,7 +154,7 @@ public: OutputInput(out, bestHypo); out << "||| "; } - OutputSurface( + OutputBestSurface( out, bestHypo, staticData.GetOutputFactorOrder(), @@ -239,7 +239,8 @@ public: TrellisPathList nBestList; ostringstream out; manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest()); - OutputNBest(out,nBestList, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber); + OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber, + staticData.GetReportSegmentation()); m_nbestCollector->Write(m_lineNumber, out.str()); } @@ -248,7 +249,8 @@ public: TrellisPathList latticeSamples; ostringstream out; manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples); - OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber); + OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber, + staticData.GetReportSegmentation()); m_latticeSamplesCollector->Write(m_lineNumber, out.str()); }