From a1b2f66d91a3bb2122674f727e257ed256d9f729 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 8 May 2013 11:28:31 +0100 Subject: [PATCH] each new target phrase in factored decomposition holds scores from previous steps. --- moses/DecodeStepGeneration.cpp | 11 +++++-- moses/DecodeStepTranslation.cpp | 38 ++++++++++++----------- moses/DecodeStepTranslation.h | 5 +-- moses/MockHypothesis.cpp | 2 +- moses/PartialTranslOptColl.cpp | 14 ++++++++- moses/PartialTranslOptColl.h | 2 ++ moses/Sentence.cpp | 2 +- moses/TargetPhrase.cpp | 9 ++++-- moses/TargetPhrase.h | 5 +++ moses/TranslationOption.cpp | 38 +++++++---------------- moses/TranslationOption.h | 7 +---- moses/TranslationOptionCollection.cpp | 11 +++++-- moses/TranslationOptionCollectionText.cpp | 1 - 13 files changed, 78 insertions(+), 67 deletions(-) diff --git a/moses/DecodeStepGeneration.cpp b/moses/DecodeStepGeneration.cpp index 0f0c5c2dc..ba2115467 100644 --- a/moses/DecodeStepGeneration.cpp +++ b/moses/DecodeStepGeneration.cpp @@ -44,8 +44,15 @@ TranslationOption *DecodeStepGeneration::MergeGeneration(const TranslationOption return NULL; } - TranslationOption *newTransOpt = new TranslationOption(oldTO); - newTransOpt->MergeNewFeatures(mergePhrase, generationScore, m_newOutputFactors); + const TargetPhrase &inPhrase = oldTO.GetTargetPhrase(); + TargetPhrase outPhrase(inPhrase); + outPhrase.SetScore(generationScore); + + outPhrase.MergeFactors(mergePhrase, m_newOutputFactors); + + const WordsRange &sourceWordsRange = oldTO.GetSourceWordsRange(); + + TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase); return newTransOpt; } diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp index ad00c51b1..5424a07e8 100644 --- a/moses/DecodeStepTranslation.cpp +++ b/moses/DecodeStepTranslation.cpp @@ -34,19 +34,6 @@ DecodeStepTranslation::DecodeStepTranslation(const PhraseDictionary* pdf, const { } - -TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const -{ - if (IsFilteringStep()) { - if (!oldTO.IsCompatible(targetPhrase, m_conflictFactors)) return 0; - } - - TranslationOption *newTransOpt = new TranslationOption(oldTO); - newTransOpt->MergeNewFeatures(targetPhrase, targetPhrase.GetScoreBreakdown(), m_newOutputFactors); - return newTransOpt; -} - - void DecodeStepTranslation::Process(const TranslationSystem* system , const TranslationOption &inputPartialTranslOpt , const DecodeStep &decodeStep @@ -66,7 +53,8 @@ void DecodeStepTranslation::Process(const TranslationSystem* system const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange(); const PhraseDictionary* phraseDictionary = decodeStep.GetPhraseDictionaryFeature(); - const size_t currSize = inputPartialTranslOpt.GetTargetPhrase().GetSize(); + const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase(); + const size_t currSize = inPhrase.GetSize(); const size_t tableLimit = phraseDictionary->GetTableLimit(); const TargetPhraseCollection *phraseColl= @@ -78,13 +66,27 @@ void DecodeStepTranslation::Process(const TranslationSystem* system for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase) { const TargetPhrase& targetPhrase = **iterTargetPhrase; + const ScoreComponentCollection &transScores = targetPhrase.GetScoreBreakdown(); // skip if the if (targetPhrase.GetSize() != currSize) continue; - TranslationOption *newTransOpt = MergeTranslation(inputPartialTranslOpt, targetPhrase); - if (newTransOpt != NULL) { - outputPartialTranslOptColl.Add(system, newTransOpt ); + TargetPhrase outPhrase(inPhrase); + + if (IsFilteringStep()) { + if (!inputPartialTranslOpt.IsCompatible(targetPhrase, m_conflictFactors)) + continue; } + + outPhrase.SetScore(transScores); + outPhrase.Evaluate(); // need to do this as all non-transcores would be screwed up + + outPhrase.MergeFactors(targetPhrase, m_newOutputFactors); + + TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase); + assert(newTransOpt != NULL); + + outputPartialTranslOptColl.Add(system, newTransOpt ); + } } else if (sourceWordsRange.GetNumWordsCovered() == 1) { // unknown handler @@ -117,7 +119,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(const TranslationSystem* s for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase) { const TargetPhrase &targetPhrase = **iterTargetPhrase; - TranslationOption *transOpt = new TranslationOption(wordsRange, targetPhrase, source); + TranslationOption *transOpt = new TranslationOption(wordsRange, targetPhrase); outputPartialTranslOptColl.Add (system, transOpt); diff --git a/moses/DecodeStepTranslation.h b/moses/DecodeStepTranslation.h index 1c8032535..c5c88f6c6 100644 --- a/moses/DecodeStepTranslation.h +++ b/moses/DecodeStepTranslation.h @@ -56,10 +56,7 @@ public: , size_t startPos, size_t endPos, bool adhereTableLimit) const; private: - /*! create new TranslationOption from merging oldTO with mergePhrase - This function runs IsCompatible() to ensure the two can be merged - */ - TranslationOption *MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const; + }; diff --git a/moses/MockHypothesis.cpp b/moses/MockHypothesis.cpp index b0e407fac..3a86965cd 100644 --- a/moses/MockHypothesis.cpp +++ b/moses/MockHypothesis.cpp @@ -65,7 +65,7 @@ MockHypothesisGuard::MockHypothesisGuard( m_targetPhrases.push_back(TargetPhrase()); m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|"); m_toptions.push_back(new TranslationOption - (wordsRange,m_targetPhrases.back(),m_sentence)); + (wordsRange,m_targetPhrases.back())); m_hypothesis = Hypothesis::Create(*prevHypo,*m_toptions.back(),NULL); } diff --git a/moses/PartialTranslOptColl.cpp b/moses/PartialTranslOptColl.cpp index 91ad65415..aacee75ed 100644 --- a/moses/PartialTranslOptColl.cpp +++ b/moses/PartialTranslOptColl.cpp @@ -21,6 +21,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "PartialTranslOptColl.h" #include +#include + +using namespace std; namespace Moses { @@ -37,7 +40,6 @@ PartialTranslOptColl::PartialTranslOptColl() /** add a partial translation option to the collection (without pruning) */ void PartialTranslOptColl::AddNoPrune(const TranslationSystem* system, TranslationOption *partialTranslOpt) { - partialTranslOpt->CalcScore(system); if (partialTranslOpt->GetFutureScore() >= m_worstScore) { m_list.push_back(partialTranslOpt); if (partialTranslOpt->GetFutureScore() > m_bestScore) @@ -95,6 +97,16 @@ void PartialTranslOptColl::Prune() // TRACE_ERR( "pruned to size " << m_list.size() << ", total pruned: " << m_totalPruned << std::endl); } +// friend +ostream& operator<<(ostream& out, const PartialTranslOptColl& possibleTranslation) +{ + for (size_t i = 0; i < possibleTranslation.m_list.size(); ++i) { + const TranslationOption &transOpt = *possibleTranslation.m_list[i]; + out << transOpt << endl; + } + return out; +} + } diff --git a/moses/PartialTranslOptColl.h b/moses/PartialTranslOptColl.h index bbe4d05a1..8184bdfd5 100644 --- a/moses/PartialTranslOptColl.h +++ b/moses/PartialTranslOptColl.h @@ -43,6 +43,8 @@ namespace Moses */ class PartialTranslOptColl { + friend std::ostream& operator<<(std::ostream& out, const PartialTranslOptColl& possibleTranslation); + protected: std::vector m_list; float m_bestScore; /**< score of the best translation option */ diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index d048dff4d..7e7dc543f 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -145,7 +145,7 @@ int Sentence::Read(std::istream& in,const std::vector& factorOrder) const XmlOption *xmlOption = *iterXmlOpts; - TranslationOption *transOpt = new TranslationOption(xmlOption->range, xmlOption->targetPhrase, *this); + TranslationOption *transOpt = new TranslationOption(xmlOption->range, xmlOption->targetPhrase); m_xmlOptionsList.push_back(transOpt); for(size_t j=transOpt->GetSourceWordsRange().GetStartPos(); j<=transOpt->GetSourceWordsRange().GetEndPos(); j++) { diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index 3c8a9c33a..d2fbfa5b6 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -80,18 +80,21 @@ void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const void TargetPhrase::Evaluate() { - ScoreComponentCollection estimatedFutureScore; + ScoreComponentCollection futureScoreBreakdown; const std::vector &ffs = FeatureFunction::GetFeatureFunctions(); for (size_t i = 0; i < ffs.size(); ++i) { const FeatureFunction &ff = *ffs[i]; if (!ff.IsDecodeFeature()) { - ff.Evaluate(*this, m_scoreBreakdown, estimatedFutureScore); + ff.Evaluate(*this, m_scoreBreakdown, futureScoreBreakdown); } } - m_fullScore = m_scoreBreakdown.GetWeightedScore() + estimatedFutureScore.GetWeightedScore(); + float weightedScore = m_scoreBreakdown.GetWeightedScore(); + float futureScore = futureScoreBreakdown.GetWeightedScore(); + + m_fullScore = weightedScore + futureScore; } void TargetPhrase::SetXMLScore(float score) diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h index 41c407471..ec9b9e027 100644 --- a/moses/TargetPhrase.h +++ b/moses/TargetPhrase.h @@ -70,6 +70,11 @@ public: void SetXMLScore(float score); void SetInputScore(const Scores &scoreVector); + void SetScore(const ScoreComponentCollection &scores) + { + m_scoreBreakdown.PlusEquals(scores); + } + TargetPhrase *MergeNext(const TargetPhrase &targetPhrase) const; // used for translation step diff --git a/moses/TranslationOption.cpp b/moses/TranslationOption.cpp index 504111f45..7e0e6bcb8 100644 --- a/moses/TranslationOption.cpp +++ b/moses/TranslationOption.cpp @@ -36,32 +36,32 @@ namespace Moses //TODO this should be a factory function! TranslationOption::TranslationOption(const WordsRange &wordsRange - , const TargetPhrase &targetPhrase - , const InputType &inputType) + , const TargetPhrase &targetPhrase) : m_targetPhrase(targetPhrase) , m_sourceWordsRange(wordsRange) , m_scoreBreakdown(targetPhrase.GetScoreBreakdown()) -{} + , m_futureScore(targetPhrase.GetFutureScore()) +{ +} //TODO this should be a factory function! TranslationOption::TranslationOption(const WordsRange &wordsRange , const TargetPhrase &targetPhrase - , const InputType &inputType , const UnknownWordPenaltyProducer* up) : m_targetPhrase(targetPhrase) , m_sourceWordsRange (wordsRange) - , m_futureScore(0) - ,m_scoreBreakdown(targetPhrase.GetScoreBreakdown()) + , m_scoreBreakdown(targetPhrase.GetScoreBreakdown()) + , m_futureScore(targetPhrase.GetFutureScore()) { } TranslationOption::TranslationOption(const TranslationOption ©, const WordsRange &sourceWordsRange) - : m_targetPhrase(copy.m_targetPhrase) +: m_targetPhrase(copy.m_targetPhrase) //, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented - , m_sourceWordsRange(sourceWordsRange) - , m_futureScore(copy.m_futureScore) - , m_scoreBreakdown(copy.m_scoreBreakdown) - , m_cachedScores(copy.m_cachedScores) +, m_sourceWordsRange(sourceWordsRange) +, m_scoreBreakdown(copy.m_scoreBreakdown) +, m_futureScore(copy.m_futureScore) +, m_cachedScores(copy.m_cachedScores) {} void TranslationOption::MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection& score, const std::vector& featuresToAdd) @@ -98,22 +98,6 @@ bool TranslationOption::Overlap(const Hypothesis &hypothesis) const return bitmap.Overlap(GetSourceWordsRange()); } -void TranslationOption::CalcScore(const TranslationSystem* system) -{ - // LM scores - float ngramScore = 0; - float retFullScore = 0; - float oovScore = 0; - - const LMList &lmList = StaticData::Instance().GetLMList(); - - lmList.CalcScore(GetTargetPhrase(), retFullScore, ngramScore, oovScore, &m_scoreBreakdown); - - // future score - m_futureScore = retFullScore - ngramScore + oovScore - + m_scoreBreakdown.GetWeightedScore(); -} - TO_STRING_BODY(TranslationOption); // friend diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h index 54b2bb6fb..d5f4bb6bf 100644 --- a/moses/TranslationOption.h +++ b/moses/TranslationOption.h @@ -79,12 +79,10 @@ protected: public: /** constructor. Used by initial translation step */ TranslationOption(const WordsRange &wordsRange - , const TargetPhrase &targetPhrase - , const InputType &inputType); + , const TargetPhrase &targetPhrase); /** constructor. Used to create trans opt from unknown word */ TranslationOption(const WordsRange &wordsRange , const TargetPhrase &targetPhrase - , const InputType &inputType , const UnknownWordPenaltyProducer* uwpProducer); /** copy constructor, but change words range. used by caching */ @@ -154,9 +152,6 @@ public: return &(it->second); } - /** Calculate future score and n-gram score of this trans option, plus the score breakdowns */ - void CalcScore(const TranslationSystem* system); - void CacheScores(const FeatureFunction &scoreProducer, const Scores &score); TO_STRING(); diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index c6cfa45c9..33035f7f8 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -269,9 +269,8 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s targetPhrase.Evaluate(); - transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source + transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase , StaticData::Instance().GetUnknownWordPenaltyProducer()); - transOpt->CalcScore(m_system); Add(transOpt); @@ -455,6 +454,8 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( , bool adhereTableLimit , size_t graphInd) { + cerr << "range=" << startPos << " to " << endPos << endl; + if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) { Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network @@ -501,12 +502,16 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( vector::const_iterator iterPartialTranslOpt; for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) { TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt; + cerr << inputPartialTranslOpt << endl; + decodeStep.Process(m_system, inputPartialTranslOpt , decodeStep , *newPtoc , this , adhereTableLimit); } + + // last but 1 partial trans not required anymore totalEarlyPruned += newPtoc->GetPrunedCount(); delete oldPtoc; @@ -520,7 +525,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( vector::const_iterator iterColl; for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) { TranslationOption *transOpt = *iterColl; - transOpt->CalcScore(m_system); + cerr << *transOpt << endl; Add(transOpt); } diff --git a/moses/TranslationOptionCollectionText.cpp b/moses/TranslationOptionCollectionText.cpp index e34a1b398..700261863 100644 --- a/moses/TranslationOptionCollectionText.cpp +++ b/moses/TranslationOptionCollectionText.cpp @@ -64,7 +64,6 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit //get vector of TranslationOptions from Sentence for(size_t i=0; iCalcScore(m_system); Add(xmlOptions[i]); }