diff --git a/moses/BitmapContainer.h b/moses/BitmapContainer.h index 5f301a1e8..680504b89 100644 --- a/moses/BitmapContainer.h +++ b/moses/BitmapContainer.h @@ -129,7 +129,7 @@ public: // Fallback: scoreA < scoreB == false, non-deterministic sort return false; } - return (phrA->Compare(*phrB) < 0); + return (phrA->Compare(*phrB) > 0); } } }; diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index 35f598cfb..c6a248419 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -150,8 +150,8 @@ EvaluateWhenApplied(StatefulFeatureFunction const& sfff, int state_idx) // Manager& manager = this->GetManager(); //Get the manager and the ttask // ttasksptr const& ttask = manager.GetTtask(); FFState const* prev = m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL; - m_ffStates[state_idx] - = sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown); + m_ffStates[state_idx] + = sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown); } } diff --git a/moses/LM/IRST.cpp b/moses/LM/IRST.cpp index deca19abf..aed43043f 100644 --- a/moses/LM/IRST.cpp +++ b/moses/LM/IRST.cpp @@ -93,12 +93,12 @@ LanguageModelIRST:: TRACE_ERR( "reset mmap\n"); if (m_lmtb) m_lmtb->reset_mmap(); #endif - + delete m_lmtb; } -bool +bool LanguageModelIRST:: IsUseable(const FactorMask &mask) const { @@ -106,7 +106,7 @@ IsUseable(const FactorMask &mask) const return ret; } -void +void LanguageModelIRST:: Load() { @@ -135,7 +135,7 @@ Load() if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub); } -void +void LanguageModelIRST:: CreateFactors(FactorCollection &factorCollection) { @@ -179,31 +179,31 @@ CreateFactors(FactorCollection &factorCollection) } } -int +int LanguageModelIRST:: GetLmID( const std::string &str ) const { return d->encode( str.c_str() ); // at the level of micro tags } -int +int LanguageModelIRST:: GetLmID( const Word &word ) const { return GetLmID( word.GetFactor(m_factorType) ); } -int +int LanguageModelIRST:: GetLmID( const Factor *factor ) const { size_t factorId = factor->GetId(); - + if ((factorId >= m_lmIdLookup.size()) || (m_lmIdLookup[factorId] == m_empty)) { if (d->incflag()==1) { std::string s = factor->GetString().as_string(); int code = d->encode(s.c_str()); - + ////////// ///poiche' non c'e' distinzione tra i factorIDs delle parole sorgenti ///e delle parole target in Moses, puo' accadere che una parola target @@ -231,7 +231,7 @@ GetLmID( const Factor *factor ) const /// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori /// a te la scelta!!!! //////////////// - + if (factorId >= m_lmIdLookup.size()) { //resize and fill with m_empty @@ -242,7 +242,7 @@ GetLmID( const Factor *factor ) const //insert new code m_lmIdLookup[factorId] = code; return code; - + } else { return m_unknownId; } @@ -251,7 +251,7 @@ GetLmID( const Factor *factor ) const } } -FFState const* +FFState const* LanguageModelIRST:: EmptyHypothesisState(const InputType &/*input*/) const { @@ -260,12 +260,12 @@ EmptyHypothesisState(const InputType &/*input*/) const return ret.release(); } -void +void LanguageModelIRST:: CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { - bool isContextAdaptive - = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT; + bool isContextAdaptive + = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT; fullScore = 0; ngramScore = 0; @@ -308,7 +308,7 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov ++idx; } #ifdef IRSTLM_CONTEXT_DEPENDENT - } + } #endif ngramScore = 0.0; int end_loop = (int) phrase.GetSize(); @@ -334,20 +334,20 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp); } #ifdef IRSTLM_CONTEXT_DEPENDENT - } -#endif + } +#endif before_boundary = TransformLMScore(before_boundary); ngramScore = TransformLMScore(ngramScore); fullScore = ngramScore + before_boundary; } -FFState* +FFState* LanguageModelIRST:: -EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, +EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { - bool isContextAdaptive - = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT; + bool isContextAdaptive + = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT; if (!hypo.GetCurrTargetLength()) { std::auto_ptr ret(new IRSTLMState(ps)); @@ -387,18 +387,17 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, position = (const int) begin+1; float score; #ifdef IRSTLM_CONTEXT_DEPENDENT - if (CW) - { - score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); - while (position < adjust_end) { - for (idx=1; idxclprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); - ++position; + if (CW) { + score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); + while (position < adjust_end) { + for (idx=1; idxclprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); + ++position; + } + } else { #endif score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); position = (const int) begin+1; @@ -430,12 +429,12 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, --idx; } #ifdef IRSTLM_CONTEXT_DEPENDENT - if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); + if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); else #else - score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); + score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); #endif - } else { + } else { // need to set the LM state if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words @@ -447,16 +446,16 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, msp = (char *) m_lmtb->cmaxsuffptr(codes,m_lmtb_size); } } - + score = TransformLMScore(score); out->PlusEquals(this, score); - + std::auto_ptr ret(new IRSTLMState(msp)); - + return ret.release(); } -LMResult +LMResult LanguageModelIRST:: GetValue(const vector &contextFactor, State* finalState) const { @@ -494,7 +493,7 @@ GetValue(const vector &contextFactor, State* finalState) const return result; } -bool +bool LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold) { if (sentences_done==-1) return true; @@ -512,7 +511,7 @@ void LanguageModelIRST::InitializeForInput(ttasksptr const& ttask) #endif } -void +void LanguageModelIRST:: CleanUpAfterSentenceProcessing(const InputType& source) { @@ -528,7 +527,7 @@ CleanUpAfterSentenceProcessing(const InputType& source) } } -void +void LanguageModelIRST:: SetParameter(const std::string& key, const std::string& value) { @@ -539,6 +538,6 @@ SetParameter(const std::string& key, const std::string& value) } m_lmtb_size = m_nGramOrder; } - + } diff --git a/moses/Phrase.h b/moses/Phrase.h index f9fee0148..72454595f 100644 --- a/moses/Phrase.h +++ b/moses/Phrase.h @@ -56,7 +56,7 @@ public: /// return shared pointer to ttask // only TargetPhrases have non-NULL ttaskptrs! virtual ttasksptr GetTtask() const { - return ttasksptr(); + return ttasksptr(); } /// check if this phrase belongs to a valid ttask diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp index 920f264f9..1d532025e 100644 --- a/moses/SearchCubePruning.cpp +++ b/moses/SearchCubePruning.cpp @@ -44,7 +44,7 @@ public: // Fallback: compare pointers, non-deterministic sort return A < B; } - return (phrA->Compare(*phrB) < 0); + return (phrA->Compare(*phrB) > 0); } } }; diff --git a/moses/Sentence.h b/moses/Sentence.h index cb2a5cfce..22ae81ec7 100644 --- a/moses/Sentence.h +++ b/moses/Sentence.h @@ -61,7 +61,7 @@ protected: void ProcessPlaceholders(const std::vector< std::pair > &placeholders); // "Document Level Translation" instructions, see aux_interpret_dlt - std::vector > m_dlt_meta; + std::vector > m_dlt_meta; public: Sentence(); diff --git a/moses/Syntax/F2S/Manager-inl.h b/moses/Syntax/F2S/Manager-inl.h index db4d2858e..7d5368f94 100644 --- a/moses/Syntax/F2S/Manager-inl.h +++ b/moses/Syntax/F2S/Manager-inl.h @@ -287,8 +287,8 @@ void Manager::RecombineAndSort( // any 'duplicate' vertices are deleted. // TODO Set? typedef boost::unordered_map Map; + SVertexRecombinationHasher, + SVertexRecombinationEqualityPred> Map; Map map; for (std::vector::const_iterator p = buffer.begin(); p != buffer.end(); ++p) { diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h index 57a52703b..522174219 100644 --- a/moses/Syntax/S2T/Manager-inl.h +++ b/moses/Syntax/S2T/Manager-inl.h @@ -351,8 +351,8 @@ void Manager::RecombineAndSort(const std::vector &buffer, // any 'duplicate' vertices are deleted. // TODO Set? typedef boost::unordered_map Map; + SVertexRecombinationHasher, + SVertexRecombinationEqualityPred> Map; Map map; for (std::vector::const_iterator p = buffer.begin(); p != buffer.end(); ++p) { diff --git a/moses/Syntax/SVertexRecombinationEqualityPred.h b/moses/Syntax/SVertexRecombinationEqualityPred.h index 3461e4fa2..36131d6ca 100644 --- a/moses/Syntax/SVertexRecombinationEqualityPred.h +++ b/moses/Syntax/SVertexRecombinationEqualityPred.h @@ -11,7 +11,7 @@ namespace Syntax class SVertexRecombinationEqualityPred { - public: +public: bool operator()(const SVertex *v1, const SVertex *v2) const { assert(v1->states.size() == v2->states.size()); for (std::size_t i = 0; i < v1->states.size(); ++i) { diff --git a/moses/Syntax/SVertexRecombinationHasher.h b/moses/Syntax/SVertexRecombinationHasher.h index e0cbc06ba..a64cbee44 100644 --- a/moses/Syntax/SVertexRecombinationHasher.h +++ b/moses/Syntax/SVertexRecombinationHasher.h @@ -11,7 +11,7 @@ namespace Syntax class SVertexRecombinationHasher { - public: +public: std::size_t operator()(const SVertex *v) const { std::size_t seed = 0; for (std::vector::const_iterator p = v->states.begin(); diff --git a/moses/Syntax/T2S/Manager-inl.h b/moses/Syntax/T2S/Manager-inl.h index e9d6cd82d..6f64233ec 100644 --- a/moses/Syntax/T2S/Manager-inl.h +++ b/moses/Syntax/T2S/Manager-inl.h @@ -247,8 +247,8 @@ void Manager::RecombineAndSort( // any 'duplicate' vertices are deleted. // TODO Set? typedef boost::unordered_map Map; + SVertexRecombinationHasher, + SVertexRecombinationEqualityPred> Map; Map map; for (std::vector::const_iterator p = buffer.begin(); p != buffer.end(); ++p) { diff --git a/moses/TranslationModel/PhraseDictionaryGroup.cpp b/moses/TranslationModel/PhraseDictionaryGroup.cpp index b672d09e7..0f6b736f9 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.cpp +++ b/moses/TranslationModel/PhraseDictionaryGroup.cpp @@ -33,7 +33,8 @@ namespace Moses PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line) : PhraseDictionary(line, true), m_numModels(0), - m_restrict(false) + m_restrict(false), + m_specifiedZeros(false) { ReadParameters(); } @@ -45,6 +46,9 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value) m_numModels = m_memberPDStrs.size(); } else if (key == "restrict") { m_restrict = Scan(value); + } else if (key == "zeros") { + m_specifiedZeros = true; + m_zeros = Scan(Tokenize(value, ",")); } else { PhraseDictionary::SetParameter(key, value); } @@ -67,10 +71,20 @@ void PhraseDictionaryGroup::Load() } } UTIL_THROW_IF2(!pdFound, - "Could not find component phrase table " << pdName); + "Could not find member phrase table " << pdName); } UTIL_THROW_IF2(componentWeights != m_numScoreComponents, - "Total number of component model scores is unequal to specified number of scores"); + "Total number of member model scores is unequal to specified number of scores"); + + // Determine "zero" scores for features + if (m_specifiedZeros) { + UTIL_THROW_IF2(m_zeros.size() != m_numScoreComponents, + "Number of specified zeros is unequal to number of member model scores"); + } else { + // Default is all 0 (as opposed to e.g. -99 or similar to approximate log(0) + // or a smoothed "not in model" score) + m_zeros = vector(m_numScoreComponents, 0); + } } void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch( @@ -150,7 +164,7 @@ CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd); // Add phrase entry allPhrases.push_back(phrase); - allScores[targetPhrase] = vector(m_numScoreComponents, 0); + allScores[targetPhrase] = vector(m_zeros); } vector& scores = allScores.find(targetPhrase)->second; diff --git a/moses/TranslationModel/PhraseDictionaryGroup.h b/moses/TranslationModel/PhraseDictionaryGroup.h index f8deca41f..7674b4934 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.h +++ b/moses/TranslationModel/PhraseDictionaryGroup.h @@ -70,6 +70,8 @@ protected: std::vector m_memberPDs; size_t m_numModels; bool m_restrict; + bool m_specifiedZeros; + std::vector m_zeros; std::vector m_pdFeature; typedef std::vector PhraseCache; diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index f0a7fc7c4..e8c5307c9 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -166,21 +166,20 @@ options() const } /// parse document-level translation info stored on the input -void +void TranslationTask:: interpret_dlt() { if (m_source->GetType() != SentenceInput) return; Sentence const& snt = static_cast(*m_source); typedef std::map dltmap_t; - BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) - { - dltmap_t::const_iterator i = M.find("type"); - if (i == M.end() || i->second != "adaptive-lm") continue; - dltmap_t::const_iterator j = M.find("context-weights"); - if (j == M.end()) continue; - SetContextWeights(j->second); - } + BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) { + dltmap_t::const_iterator i = M.find("type"); + if (i == M.end() || i->second != "adaptive-lm") continue; + dltmap_t::const_iterator j = M.find("context-weights"); + if (j == M.end()) continue; + SetContextWeights(j->second); + } } @@ -191,14 +190,14 @@ void TranslationTask::Run() << " input and iowrapper."); const size_t translationId = m_source->GetTranslationId(); - + // report wall time spent on translation Timer translationTime; translationTime.start(); interpret_dlt(); // parse document-level translation info stored on the input - + // report thread number #if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS) VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl);