From 7dbcef925d25d4b4f1deb5e10530bbd3915a812a Mon Sep 17 00:00:00 2001 From: redpony Date: Fri, 28 Sep 2007 16:43:33 +0000 Subject: [PATCH] Implemented -r (-recover-input-path) to recover the actual path through the CN/word lattice that was used in the best translation. Also cleaned up handling of source Phrases in the context of a particular hypothesis, which may improve performance translating with lexicalized reordering models. Removed PhraseReference since the concept was broken for CN/lattice inputs. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1468 1f5c12ca-751b-0410-a591-d2e778427230 --- moses-cmd/src/IOStream.cpp | 22 +++++++++ moses/src/Hypothesis.h | 4 +- moses/src/Makefile.am | 1 - moses/src/Manager.cpp | 2 +- moses/src/Parameter.cpp | 1 + moses/src/PhraseReference.cpp | 28 ----------- moses/src/PhraseReference.h | 48 ------------------- moses/src/SentenceStats.cpp | 2 +- moses/src/SentenceStats.h | 5 +- moses/src/StaticData.cpp | 8 ++++ moses/src/StaticData.h | 5 ++ .../tests/lattice-distortion/moses.ini | 3 ++ .../lattice-distortion/truth/results.txt | 12 ++--- 13 files changed, 51 insertions(+), 90 deletions(-) delete mode 100644 moses/src/PhraseReference.cpp delete mode 100644 moses/src/PhraseReference.h diff --git a/moses-cmd/src/IOStream.cpp b/moses-cmd/src/IOStream.cpp index 0c3b404b7..158b4fe53 100755 --- a/moses-cmd/src/IOStream.cpp +++ b/moses-cmd/src/IOStream.cpp @@ -194,6 +194,24 @@ void IOStream::OutputBestHypo(const std::vector& mbrBestHypo, lo } } +void OutputInput(std::vector& map, const Hypothesis* hypo) +{ + if (hypo->GetPrevHypo()) + { + OutputInput(map, hypo->GetPrevHypo()); + map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase(); + } +} + +void OutputInput(std::ostream& os, const Hypothesis* hypo) +{ + size_t len = StaticData::Instance().GetInput()->GetSize(); + std::vector inp_phrases(len, 0); + OutputInput(inp_phrases, hypo); + for (size_t i=0; i factorsToPrint) const; diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am index c5283c803..626cbe74f 100644 --- a/moses/src/Makefile.am +++ b/moses/src/Makefile.am @@ -41,7 +41,6 @@ libmoses_a_SOURCES = \ PhraseDictionaryNode.cpp \ PhraseDictionaryTree.cpp \ PhraseDictionaryTreeAdaptor.cpp \ - PhraseReference.cpp \ PrefixTreeMap.cpp \ ScoreComponentCollection.cpp \ ScoreIndexManager.cpp \ diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp index 07ea234a9..4f72a55b3 100755 --- a/moses/src/Manager.cpp +++ b/moses/src/Manager.cpp @@ -428,7 +428,7 @@ void Manager::CalcDecoderStatistics() const string buff; string buff2; TRACE_ERR( "Source and Target Units:" - << hypo->GetSourcePhrase()); + << *StaticData::Instance().GetInput()); buff2.insert(0,"] "); buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString()); buff2.insert(0,":"); diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp index e393ac27f..fff68e6b6 100755 --- a/moses/src/Parameter.cpp +++ b/moses/src/Parameter.cpp @@ -78,6 +78,7 @@ Parameter::Parameter() AddParam("mbr-scale", "scaling factor to convert log linear score into a probability."); AddParam("decoder-type", "MAP/MBR decoder (default=MAP=0)"); AddParam("use-persistent-cache", "cache translation options across sentences (default=true)"); + AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation"); } Parameter::~Parameter() diff --git a/moses/src/PhraseReference.cpp b/moses/src/PhraseReference.cpp deleted file mode 100644 index 2d7de2c01..000000000 --- a/moses/src/PhraseReference.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#include "PhraseReference.h" -using std::ostream; - -ostream& operator << (ostream& out, const PhraseReference& phrase) -{ - return out << phrase.GetSubphrase(); -} diff --git a/moses/src/PhraseReference.h b/moses/src/PhraseReference.h deleted file mode 100644 index 2fff52fa5..000000000 --- a/moses/src/PhraseReference.h +++ /dev/null @@ -1,48 +0,0 @@ -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#ifndef MOSES_PHRASE_REFERENCE_H -#define MOSES_PHRASE_REFERENCE_H - -#include -#include "InputType.h" -#include "WordsRange.h" - -/*** - * hold a reference to a subphrase, the parent Phrase of which may be separately memory-managed - */ -class PhraseReference -{ - public: - - PhraseReference() : fullPhrase(NULL), range(0, 0) {} - PhraseReference(const InputType& phrase, const WordsRange& r) : fullPhrase(&phrase), range(r) {} - - const InputType& GetFullPhrase() const {return *fullPhrase;} - Phrase GetSubphrase() const {return fullPhrase->GetSubString(range);} - - protected: - - const InputType* fullPhrase; - WordsRange range; -}; - -std::ostream& operator << (std::ostream& out, const PhraseReference& phrase); - -#endif //MOSES_PHRASE_REFERENCE_H diff --git a/moses/src/SentenceStats.cpp b/moses/src/SentenceStats.cpp index 400eedbaf..a90ee3676 100644 --- a/moses/src/SentenceStats.cpp +++ b/moses/src/SentenceStats.cpp @@ -41,6 +41,6 @@ void SentenceStats::AddDeletedWords(const Hypothesis& hypo) if(hypo.GetPrevHypo() != NULL && hypo.GetPrevHypo()->GetCurrSourceWordsRange().GetNumWordsCovered() > 0) AddDeletedWords(*hypo.GetPrevHypo()); if(hypo.GetCurrTargetWordsRange().GetNumWordsCovered() == 0) { - m_deletedWords.push_back(PhraseReference(hypo.GetSourcePhrase(), hypo.GetCurrSourceWordsRange())); + m_deletedWords.push_back(hypo.GetSourcePhrase()); } } diff --git a/moses/src/SentenceStats.h b/moses/src/SentenceStats.h index 709e051d4..714375c5e 100644 --- a/moses/src/SentenceStats.h +++ b/moses/src/SentenceStats.h @@ -29,7 +29,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "TypeDef.h" //FactorArray #include "InputType.h" #include "Util.h" //Join() -#include "PhraseReference.h" struct RecombinationInfo { @@ -74,7 +73,7 @@ class SentenceStats size_t GetTotalSourceWords() const {return m_totalSourceWords;} size_t GetNumWordsDeleted() const {return m_deletedWords.size();} size_t GetNumWordsInserted() const {return m_insertedWords.size();} - const std::vector& GetDeletedWords() const {return m_deletedWords;} + const std::vector& GetDeletedWords() const {return m_deletedWords;} const std::vector& GetInsertedWords() const {return m_insertedWords;} void AddRecombination(const Hypothesis& worseHypo, const Hypothesis& betterHypo) @@ -99,7 +98,7 @@ class SentenceStats //words size_t m_totalSourceWords; - std::vector m_deletedWords; //count deleted words/phrases in the final hypothesis + std::vector m_deletedWords; //count deleted words/phrases in the final hypothesis std::vector m_insertedWords; //count inserted words in the final hypothesis }; diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index 923621988..2e7c5e7b6 100755 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -101,6 +101,14 @@ bool StaticData::LoadData(Parameter *parameter) if (m_inputType == 2) { s_it = "word lattice"; } VERBOSE(2,"input type is: "<GetParam("recover-input-path").size()) { + m_recoverPath = Scan(m_parameter->GetParam("recover-input-path")[0]); + if (m_recoverPath && m_inputType == SentenceInput) { + TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n"); + m_recoverPath = false; + } + } + // factor delimiter if (m_parameter->GetParam("factor-delimiter").size() > 0) { m_factorDelimiter = m_parameter->GetParam("factor-delimiter")[0]; diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h index 3d1a6b87d..e840b7e18 100755 --- a/moses/src/StaticData.h +++ b/moses/src/StaticData.h @@ -90,6 +90,7 @@ protected: bool m_wordDeletionEnabled; bool m_sourceStartPosMattersForRecombination; + bool m_recoverPath; InputTypeEnum m_inputType; size_t m_numInputScores; @@ -230,6 +231,10 @@ public: { return m_maxHypoStackSize; } + size_t IsPathRecoveryEnabled() const + { + return m_recoverPath; + } int GetMaxDistortion() const { return m_maxDistortion; diff --git a/regression-testing/tests/lattice-distortion/moses.ini b/regression-testing/tests/lattice-distortion/moses.ini index 0aabf01ba..1a565d4e3 100644 --- a/regression-testing/tests/lattice-distortion/moses.ini +++ b/regression-testing/tests/lattice-distortion/moses.ini @@ -55,6 +55,9 @@ T 0 [verbose] 2 +[recover-input-path] +1 + [n-best-list] nbest 10 diff --git a/regression-testing/tests/lattice-distortion/truth/results.txt b/regression-testing/tests/lattice-distortion/truth/results.txt index 0d47a7393..e3da5a06e 100644 --- a/regression-testing/tests/lattice-distortion/truth/results.txt +++ b/regression-testing/tests/lattice-distortion/truth/results.txt @@ -1,9 +1,9 @@ -TRANSLATION_1=1 2 -TRANSLATION_2=1 2 3 -TRANSLATION_3=1 2 3 -TRANSLATION_4=1 2 3 -TRANSLATION_5=4 1234 5 -TRANSLATION_6=1 +TRANSLATION_1=A B ||| 1 2 +TRANSLATION_2=A B C ||| 1 2 3 +TRANSLATION_3=A B C ||| 1 2 3 +TRANSLATION_4=A B C ||| 1 2 3 +TRANSLATION_5=D E X ||| 4 1234 5 +TRANSLATION_6=A ||| 1 LMLOAD_TIME ~ 0.000 PTLOAD_TIME ~ 0.000 SCORE_1 = 1.090