From 395285d879e63042e29e3ad3a7c71a9bf16aa586 Mon Sep 17 00:00:00 2001 From: Rico Sennrich Date: Fri, 4 Apr 2014 15:54:48 +0100 Subject: [PATCH] more logging for incremental chart search --- moses-chart-cmd/IOWrapper.cpp | 127 ++++++++++++++++++++++++++++++++++ moses-chart-cmd/IOWrapper.h | 8 +++ moses-chart-cmd/Main.cpp | 8 +++ moses/Incremental.cpp | 4 +- search/applied.hh | 3 +- search/edge_generator.cc | 1 + search/header.hh | 12 +++- search/nbest.cc | 2 +- 8 files changed, 160 insertions(+), 5 deletions(-) diff --git a/moses-chart-cmd/IOWrapper.cpp b/moses-chart-cmd/IOWrapper.cpp index d032d69fe..56c166422 100644 --- a/moses-chart-cmd/IOWrapper.cpp +++ b/moses-chart-cmd/IOWrapper.cpp @@ -300,6 +300,38 @@ void IOWrapper::ReconstructApplicationContext(const ChartHypothesis &hypo, } } + +// Given a hypothesis and sentence, reconstructs the 'application context' -- +// the source RHS symbols of the SCFG rule that was applied, plus their spans. +void IOWrapper::ReconstructApplicationContext(const search::Applied *applied, + const Sentence &sentence, + ApplicationContext &context) +{ + context.clear(); + const WordsRange &span = applied->GetRange(); + const search::Applied *child = applied->Children(); + size_t i = span.GetStartPos(); + size_t j = 0; + + while (i <= span.GetEndPos()) { + if (j == applied->GetArity() || i < child->GetRange().GetStartPos()) { + // Symbol is a terminal. + const Word &symbol = sentence.GetWord(i); + context.push_back(std::make_pair(symbol, WordsRange(i, i))); + ++i; + } else { + // Symbol is a non-terminal. + const Word &symbol = static_cast(child->GetNote().vp)->GetTargetLHS(); + const WordsRange &range = child->GetRange(); + context.push_back(std::make_pair(symbol, range)); + i = range.GetEndPos()+1; + ++child; + ++j; + } + } +} + + // Emulates the old operator<<(ostream &, const DottedRule &) function. The // output format is a bit odd (reverse order and double spacing between symbols) // but there are scripts and tools that expect the output of -T to look like @@ -330,6 +362,20 @@ void IOWrapper::OutputTranslationOption(std::ostream &out, ApplicationContext &a << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown(); } +void IOWrapper::OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId) +{ + ReconstructApplicationContext(applied, sentence, applicationContext); + const TargetPhrase &phrase = *static_cast(applied->GetNote().vp); + out << "Trans Opt " << translationId + << " " << applied->GetRange() + << ": "; + WriteApplicationContext(out, applicationContext); + out << ": " << phrase.GetTargetLHS() + << "->" << phrase + << " " << applied->GetScore(); // << hypo->GetScoreBreakdown() TODO: missing in incremental search hypothesis +} + + void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId) { if (hypo != NULL) { @@ -346,6 +392,21 @@ void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext & } } + +void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId) +{ + if (applied != NULL) { + OutputTranslationOption(out, applicationContext, applied, sentence, translationId); + out << std::endl; + } + + // recursive + const search::Applied *child = applied->Children(); + for (size_t i = 0; i < applied->GetArity(); i++) { + OutputTranslationOptions(out, applicationContext, child++, sentence, translationId); + } +} + void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId) { @@ -375,6 +436,33 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica } } +void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId) +{ + + if (applied != NULL) { + OutputTranslationOption(out, applicationContext, applied, sentence, translationId); + + const std::string key = "Tree"; + std::string value; + bool hasProperty; + const TargetPhrase &currTarPhr = *static_cast(applied->GetNote().vp); + currTarPhr.GetProperty(key, value, hasProperty); + + out << " ||| "; + if (hasProperty) + out << " " << value; + else + out << " " << "noTreeInfo"; + out << std::endl; + } + + // recursive + const search::Applied *child = applied->Children(); + for (size_t i = 0; i < applied->GetArity(); i++) { + OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId); + } +} + void IOWrapper::OutputDetailedTranslationReport( const ChartHypothesis *hypo, const Sentence &sentence, @@ -392,6 +480,23 @@ void IOWrapper::OutputDetailedTranslationReport( m_detailOutputCollector->Write(translationId, out.str()); } +void IOWrapper::OutputDetailedTranslationReport( + const search::Applied *applied, + const Sentence &sentence, + long translationId) +{ + if (applied == NULL) { + return; + } + std::ostringstream out; + ApplicationContext applicationContext; + + OutputTranslationOptions(out, applicationContext, applied, sentence, translationId); + UTIL_THROW_IF2(m_detailOutputCollector == NULL, + "No ouput file for detailed reports specified"); + m_detailOutputCollector->Write(translationId, out.str()); +} + void IOWrapper::OutputDetailedTreeFragmentsTranslationReport( const ChartHypothesis *hypo, const Sentence &sentence, @@ -424,6 +529,28 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport( } +void IOWrapper::OutputDetailedTreeFragmentsTranslationReport( + const search::Applied *applied, + const Sentence &sentence, + long translationId) +{ + if (applied == NULL) { + return; + } + std::ostringstream out; + ApplicationContext applicationContext; + + OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId); + UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL, + "No output file for tree fragments specified"); + + //Tree of full sentence + //TODO: incremental search doesn't support stateful features + + m_detailTreeFragmentsOutputCollector->Write(translationId, out.str()); + +} + //DIMw void IOWrapper::OutputDetailedAllTranslationReport( const ChartTrellisPathList &nBestList, diff --git a/moses-chart-cmd/IOWrapper.h b/moses-chart-cmd/IOWrapper.h index a7832a9b2..9e09ef00f 100644 --- a/moses-chart-cmd/IOWrapper.h +++ b/moses-chart-cmd/IOWrapper.h @@ -93,11 +93,17 @@ protected: size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget); void OutputAlignment(std::vector< std::set > &retAlignmentsS2T, const Moses::AlignmentInfo &ai); void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId); + void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId); void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId); + void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId); void OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId); + void OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId); void ReconstructApplicationContext(const Moses::ChartHypothesis &hypo, const Moses::Sentence &sentence, ApplicationContext &context); + void ReconstructApplicationContext(const search::Applied *applied, + const Moses::Sentence &sentence, + ApplicationContext &context); void WriteApplicationContext(std::ostream &out, const ApplicationContext &context); @@ -125,7 +131,9 @@ public: void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, long translationId); void OutputNBestList(const std::vector &nbest, long translationId); void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId); + void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId); void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId); + void OutputDetailedTreeFragmentsTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId); void OutputDetailedAllTranslationReport(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId); void Backtrack(const Moses::ChartHypothesis *hypo); diff --git a/moses-chart-cmd/Main.cpp b/moses-chart-cmd/Main.cpp index a419062fd..f2baff0fa 100644 --- a/moses-chart-cmd/Main.cpp +++ b/moses-chart-cmd/Main.cpp @@ -102,6 +102,14 @@ public: const std::vector &nbest = manager.ProcessSentence(); if (!nbest.empty()) { m_ioWrapper.OutputBestHypo(nbest[0], translationId); + if (staticData.IsDetailedTranslationReportingEnabled()) { + const Sentence &sentence = dynamic_cast(*m_source); + m_ioWrapper.OutputDetailedTranslationReport(&nbest[0], sentence, translationId); + } + if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) { + const Sentence &sentence = dynamic_cast(*m_source); + m_ioWrapper.OutputDetailedTreeFragmentsTranslationReport(&nbest[0], sentence, translationId); + } } else { m_ioWrapper.OutputBestNone(translationId); } diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index e55cf5e11..4ee639b3e 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -152,12 +152,13 @@ template void Fill::Add(const TargetPhraseCollection &targe search::Note note; note.vp = &phrase; edge.SetNote(note); + edge.SetRange(range); edges_.AddEdge(edge); } } -template void Fill::AddPhraseOOV(TargetPhrase &phrase, std::list &, const WordsRange &) +template void Fill::AddPhraseOOV(TargetPhrase &phrase, std::list &, const WordsRange &range) { std::vector words; UTIL_THROW_IF2(phrase.GetSize() > 1, @@ -173,6 +174,7 @@ template void Fill::AddPhraseOOV(TargetPhrase &phrase, std: search::Note note; note.vp = &phrase; edge.SetNote(note); + edge.SetRange(range); edges_.AddEdge(edge); } diff --git a/search/applied.hh b/search/applied.hh index bd659e5c0..8130e243d 100644 --- a/search/applied.hh +++ b/search/applied.hh @@ -24,9 +24,10 @@ template class GenericApplied : public Header { *child_out = Below(part->End()); } - GenericApplied(void *location, Score score, Arity arity, Note note) : Header(location, arity) { + GenericApplied(void *location, Score score, Arity arity, Note note, Moses::WordsRange range) : Header(location, arity) { SetScore(score); SetNote(note); + SetRange(range); } explicit GenericApplied(History from) : Header(from) {} diff --git a/search/edge_generator.cc b/search/edge_generator.cc index dd9d61e41..1f933453d 100644 --- a/search/edge_generator.cc +++ b/search/edge_generator.cc @@ -80,6 +80,7 @@ template PartialEdge EdgeGenerator::Pop(Context &context) { alternate.SetScore(top.GetScore() + alternate_changed.Bound() - old_value.Bound()); alternate.SetNote(top.GetNote()); + alternate.SetRange(top.GetRange()); PartialVertex *alternate_nt = alternate.NT(); for (Arity i = 0; i < victim; ++i) alternate_nt[i] = top_nt[i]; diff --git a/search/header.hh b/search/header.hh index 69f0eed04..d70524097 100644 --- a/search/header.hh +++ b/search/header.hh @@ -1,9 +1,10 @@ #ifndef SEARCH_HEADER__ #define SEARCH_HEADER__ -// Header consisting of Score, Arity, and Note +// Header consisting of Score, Arity, Note and WordsRange #include "search/types.hh" +#include "moses/WordsRange.h" #include @@ -38,6 +39,13 @@ class Header { *reinterpret_cast(base_ + sizeof(Score) + sizeof(Arity)) = to; } + Moses::WordsRange GetRange() const { + return *reinterpret_cast(base_ + sizeof(Score) + sizeof(Arity) + sizeof(Note)); + } + void SetRange(Moses::WordsRange to) { + *reinterpret_cast(base_ + sizeof(Score) + sizeof(Arity) + sizeof(Note)) = to; + } + uint8_t *Base() { return base_; } const uint8_t *Base() const { return base_; } @@ -50,7 +58,7 @@ class Header { *reinterpret_cast(base_ + sizeof(Score)) = arity; } - static const std::size_t kHeaderSize = sizeof(Score) + sizeof(Arity) + sizeof(Note); + static const std::size_t kHeaderSize = sizeof(Score) + sizeof(Arity) + sizeof(Note) + sizeof(Moses::WordsRange); uint8_t *After() { return base_ + kHeaderSize; } const uint8_t *After() const { return base_ + kHeaderSize; } diff --git a/search/nbest.cc b/search/nbest.cc index acfc08049..8445826f1 100644 --- a/search/nbest.cc +++ b/search/nbest.cc @@ -70,7 +70,7 @@ void NBestList::MoveTop(util::Pool &pool) { Score change = child->in_->Visit(pool, child->index_); if (change != -INFINITY) { assert(change < 0.001); - QueueEntry new_entry(pool.Allocate(QueueEntry::Size(entry.GetArity())), basis + change, entry.GetArity(), entry.GetNote()); + QueueEntry new_entry(pool.Allocate(QueueEntry::Size(entry.GetArity())), basis + change, entry.GetArity(), entry.GetNote(), entry.GetRange()); std::copy(children_begin, child, new_entry.Children()); RevealedRef *update = new_entry.Children() + (child - children_begin); update->in_ = child->in_;