From 9f28bf5619f697984d6fad9a487560724fd127db Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Tue, 27 Oct 2015 14:29:56 -0400 Subject: [PATCH] Specify "zero" values for feature scores for PhraseDictionaryGroup User-specified default scores for when models in the group do not contain a phrase pair. --- .../PhraseDictionaryGroup.cpp | 22 +++++++++++++++---- .../TranslationModel/PhraseDictionaryGroup.h | 2 ++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/moses/TranslationModel/PhraseDictionaryGroup.cpp b/moses/TranslationModel/PhraseDictionaryGroup.cpp index b672d09e7..0f6b736f9 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.cpp +++ b/moses/TranslationModel/PhraseDictionaryGroup.cpp @@ -33,7 +33,8 @@ namespace Moses PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line) : PhraseDictionary(line, true), m_numModels(0), - m_restrict(false) + m_restrict(false), + m_specifiedZeros(false) { ReadParameters(); } @@ -45,6 +46,9 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value) m_numModels = m_memberPDStrs.size(); } else if (key == "restrict") { m_restrict = Scan(value); + } else if (key == "zeros") { + m_specifiedZeros = true; + m_zeros = Scan(Tokenize(value, ",")); } else { PhraseDictionary::SetParameter(key, value); } @@ -67,10 +71,20 @@ void PhraseDictionaryGroup::Load() } } UTIL_THROW_IF2(!pdFound, - "Could not find component phrase table " << pdName); + "Could not find member phrase table " << pdName); } UTIL_THROW_IF2(componentWeights != m_numScoreComponents, - "Total number of component model scores is unequal to specified number of scores"); + "Total number of member model scores is unequal to specified number of scores"); + + // Determine "zero" scores for features + if (m_specifiedZeros) { + UTIL_THROW_IF2(m_zeros.size() != m_numScoreComponents, + "Number of specified zeros is unequal to number of member model scores"); + } else { + // Default is all 0 (as opposed to e.g. -99 or similar to approximate log(0) + // or a smoothed "not in model" score) + m_zeros = vector(m_numScoreComponents, 0); + } } void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch( @@ -150,7 +164,7 @@ CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd); // Add phrase entry allPhrases.push_back(phrase); - allScores[targetPhrase] = vector(m_numScoreComponents, 0); + allScores[targetPhrase] = vector(m_zeros); } vector& scores = allScores.find(targetPhrase)->second; diff --git a/moses/TranslationModel/PhraseDictionaryGroup.h b/moses/TranslationModel/PhraseDictionaryGroup.h index f8deca41f..7674b4934 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.h +++ b/moses/TranslationModel/PhraseDictionaryGroup.h @@ -70,6 +70,8 @@ protected: std::vector m_memberPDs; size_t m_numModels; bool m_restrict; + bool m_specifiedZeros; + std::vector m_zeros; std::vector m_pdFeature; typedef std::vector PhraseCache;