diff --git a/moses/LM/Backward.cpp b/moses/LM/Backward.cpp index 263c90fec..69a8104fc 100644 --- a/moses/LM/Backward.cpp +++ b/moses/LM/Backward.cpp @@ -24,7 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "lm/left.hh" #include "lm/model.hh" -#include "moses/FFState.h" +#include "moses/FF/FFState.h" #include "moses/Hypothesis.h" #include "moses/Phrase.h" @@ -39,7 +39,7 @@ namespace Moses { /** Constructs a new backward language model. */ -template BackwardLanguageModel::BackwardLanguageModel(const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(file,factorType,lazy) +template BackwardLanguageModel::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(line,file,factorType,lazy) { // // This space intentionally left blank @@ -288,30 +288,30 @@ template FFState *BackwardLanguageModel::Evaluate(const Phr } -LanguageModel *ConstructBackwardLM(const std::string &file, FactorType factorType, bool lazy) +LanguageModel *ConstructBackwardLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy) { try { lm::ngram::ModelType model_type; if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) { switch(model_type) { case lm::ngram::PROBING: - return new BackwardLanguageModel(file, factorType, lazy); + return new BackwardLanguageModel(line, file, factorType, lazy); case lm::ngram::REST_PROBING: - return new BackwardLanguageModel(file, factorType, lazy); + return new BackwardLanguageModel(line, file, factorType, lazy); case lm::ngram::TRIE: - return new BackwardLanguageModel(file, factorType, lazy); + return new BackwardLanguageModel(line, file, factorType, lazy); case lm::ngram::QUANT_TRIE: - return new BackwardLanguageModel(file, factorType, lazy); + return new BackwardLanguageModel(line, file, factorType, lazy); case lm::ngram::ARRAY_TRIE: - return new BackwardLanguageModel(file, factorType, lazy); + return new BackwardLanguageModel(line, file, factorType, lazy); case lm::ngram::QUANT_ARRAY_TRIE: - return new BackwardLanguageModel(file, factorType, lazy); + return new BackwardLanguageModel(line, file, factorType, lazy); default: std::cerr << "Unrecognized kenlm model type " << model_type << std::endl; abort(); } } else { - return new BackwardLanguageModel(file, factorType, lazy); + return new BackwardLanguageModel(line, file, factorType, lazy); } } catch (std::exception &e) { std::cerr << e.what() << std::endl; diff --git a/moses/LM/Backward.h b/moses/LM/Backward.h index c81c0633d..d881af9cd 100644 --- a/moses/LM/Backward.h +++ b/moses/LM/Backward.h @@ -33,7 +33,7 @@ namespace Moses { //! This will also load. Returns a templated backward LM. -LanguageModel *ConstructBackwardLM(const std::string &file, FactorType factorType, bool lazy); +LanguageModel *ConstructBackwardLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy); class FFState; // template class BackwardLanguageModelTest; @@ -45,7 +45,7 @@ class BackwardLanguageModelTest; template class BackwardLanguageModel : public LanguageModelKen { public: - BackwardLanguageModel(const std::string &file, FactorType factorType, bool lazy); + BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy); virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const; diff --git a/moses/LM/BackwardLMState.h b/moses/LM/BackwardLMState.h index e6d1f325a..09a768462 100644 --- a/moses/LM/BackwardLMState.h +++ b/moses/LM/BackwardLMState.h @@ -22,7 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #ifndef moses_BackwardLMState_h #define moses_BackwardLMState_h -#include "moses/FFState.h" +#include "moses/FF/FFState.h" #include "moses/LM/Backward.h" #include "lm/state.hh" diff --git a/moses/LM/BackwardTest.cpp b/moses/LM/BackwardTest.cpp index dc5de32bd..7fed72270 100644 --- a/moses/LM/BackwardTest.cpp +++ b/moses/LM/BackwardTest.cpp @@ -70,6 +70,7 @@ public: backwardLM( static_cast< BackwardLanguageModel * >( ConstructBackwardLM( + "LM1=1.0", boost::unit_test::framework::master_test_suite().argv[1], 0, false) @@ -116,9 +117,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "the", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 1 ); @@ -141,9 +144,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "the licenses", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 2 ); @@ -166,9 +171,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "the licenses for", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 3 ); @@ -191,9 +198,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "the licenses for most", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 4 ); @@ -235,9 +244,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "the", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 1 ); @@ -261,9 +272,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "licenses", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 1 ); @@ -287,9 +300,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "for", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 1 ); @@ -313,9 +328,11 @@ public: outputFactorOrder.push_back(0); phrase.CreateFromString( + Input, outputFactorOrder, "most", - StaticData::Instance().GetFactorDelimiter()); + StaticData::Instance().GetFactorDelimiter(), + NULL); BOOST_CHECK( phrase.GetSize() == 1 ); diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile index 770648aed..777864f23 100644 --- a/moses/LM/Jamfile +++ b/moses/LM/Jamfile @@ -75,7 +75,11 @@ obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : : #Top-level LM library. If you've added a file that doesn't depend on external #libraries, put it here. -alias LM : Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o +alias LM : Backward.cpp BackwardLMState.cpp Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o ../../lm//kenlm ..//headers $(dependencies) ; alias macros : : : : $(lmmacros) ; + +#Unit test for Backward LM +import testing ; +run BackwardTest.cpp ..//moses LM ../../lm//kenlm /top//boost_unit_test_framework : : backward.arpa ; diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp index cf5e0d061..eb597242a 100644 --- a/moses/LM/Ken.cpp +++ b/moses/LM/Ken.cpp @@ -60,62 +60,62 @@ struct KenLMState : public FFState { } }; -/* - * An implementation of single factor LM using Ken's code. - */ -template class LanguageModelKen : public LanguageModel -{ -public: - LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy); - - const FFState *EmptyHypothesisState(const InputType &/*input*/) const { - KenLMState *ret = new KenLMState(); - ret->state = m_ngram->BeginSentenceState(); - return ret; - } - - void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; - - FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; - - FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; - - void IncrementalCallback(Incremental::Manager &manager) const { - manager.LMCallback(*m_ngram, m_lmIdLookup); - } - - bool IsUseable(const FactorMask &mask) const; -private: - LanguageModelKen(const LanguageModelKen ©_from); - - lm::WordIndex TranslateID(const Word &word) const { - std::size_t factor = word.GetFactor(m_factorType)->GetId(); - return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]); - } - - // Convert last words of hypothesis into vocab ids, returning an end pointer. - lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const { - lm::WordIndex *index = indices; - lm::WordIndex *end = indices + m_ngram->Order() - 1; - int position = hypo.GetCurrTargetWordsRange().GetEndPos(); - for (; ; ++index, --position) { - if (index == end) return index; - if (position == -1) { - *index = m_ngram->GetVocabulary().BeginSentence(); - return index + 1; - } - *index = TranslateID(hypo.GetWord(position)); - } - } - - boost::shared_ptr m_ngram; - - std::vector m_lmIdLookup; - - FactorType m_factorType; - - const Factor *m_beginSentenceFactor; -}; +///* +// * An implementation of single factor LM using Ken's code. +// */ +//template class LanguageModelKen : public LanguageModel +//{ +//public: +// LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy); +// +// const FFState *EmptyHypothesisState(const InputType &/*input*/) const { +// KenLMState *ret = new KenLMState(); +// ret->state = m_ngram->BeginSentenceState(); +// return ret; +// } +// +// void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; +// +// FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; +// +// FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; +// +// void IncrementalCallback(Incremental::Manager &manager) const { +// manager.LMCallback(*m_ngram, m_lmIdLookup); +// } +// +// bool IsUseable(const FactorMask &mask) const; +//private: +// LanguageModelKen(const LanguageModelKen ©_from); +// +// lm::WordIndex TranslateID(const Word &word) const { +// std::size_t factor = word.GetFactor(m_factorType)->GetId(); +// return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]); +// } +// +// // Convert last words of hypothesis into vocab ids, returning an end pointer. +// lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const { +// lm::WordIndex *index = indices; +// lm::WordIndex *end = indices + m_ngram->Order() - 1; +// int position = hypo.GetCurrTargetWordsRange().GetEndPos(); +// for (; ; ++index, --position) { +// if (index == end) return index; +// if (position == -1) { +// *index = m_ngram->GetVocabulary().BeginSentence(); +// return index + 1; +// } +// *index = TranslateID(hypo.GetWord(position)); +// } +// } +// +// boost::shared_ptr m_ngram; +// +// std::vector m_lmIdLookup; +// +// FactorType m_factorType; +// +// const Factor *m_beginSentenceFactor; +//}; class MappingBuilder : public lm::EnumerateVocab { @@ -137,6 +137,8 @@ private: std::vector &m_mapping; }; +} // namespace + template LanguageModelKen::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy) :LanguageModel("KENLM", line) ,m_factorType(factorType) @@ -168,6 +170,13 @@ template LanguageModelKen::LanguageModelKen(const LanguageM { } +template const FFState * LanguageModelKen::EmptyHypothesisState(const InputType &/*input*/) const +{ + KenLMState *ret = new KenLMState(); + ret->state = m_ngram->BeginSentenceState(); + return ret; +} + template void LanguageModelKen::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { fullScore = 0; @@ -342,6 +351,10 @@ template FFState *LanguageModelKen::EvaluateChart(const Cha return newState; } +template void LanguageModelKen::IncrementalCallback(Incremental::Manager &manager) const { + manager.LMCallback(*m_ngram, m_lmIdLookup); +} + template bool LanguageModelKen::IsUseable(const FactorMask &mask) const { @@ -349,7 +362,6 @@ bool LanguageModelKen::IsUseable(const FactorMask &mask) const return ret; } -} // namespace LanguageModel *ConstructKenLM(const std::string &line) { diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h index 7df38af75..2c03a7851 100644 --- a/moses/LM/Ken.h +++ b/moses/LM/Ken.h @@ -23,19 +23,80 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #define moses_LanguageModelKen_h #include +#include +#include "lm/word_index.hh" + +#include "moses/LM/Base.h" +#include "moses/Hypothesis.h" #include "moses/TypeDef.h" +#include "moses/Word.h" namespace Moses { -class LanguageModel; +//class LanguageModel; + class FFState; LanguageModel *ConstructKenLM(const std::string &line); //! This will also load. Returns a templated KenLM class LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy); +/* + * An implementation of single factor LM using Kenneth's code. + */ +template class LanguageModelKen : public LanguageModel +{ +public: + LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy); + + virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const; + + virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; + + virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; + + virtual FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; + + virtual void IncrementalCallback(Incremental::Manager &manager) const; + + virtual bool IsUseable(const FactorMask &mask) const; + +protected: + boost::shared_ptr m_ngram; + + const Factor *m_beginSentenceFactor; + + FactorType m_factorType; + + lm::WordIndex TranslateID(const Word &word) const { + std::size_t factor = word.GetFactor(m_factorType)->GetId(); + return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]); + } + +private: + LanguageModelKen(const LanguageModelKen ©_from); + + // Convert last words of hypothesis into vocab ids, returning an end pointer. + lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const { + lm::WordIndex *index = indices; + lm::WordIndex *end = indices + m_ngram->Order() - 1; + int position = hypo.GetCurrTargetWordsRange().GetEndPos(); + for (; ; ++index, --position) { + if (index == end) return index; + if (position == -1) { + *index = m_ngram->GetVocabulary().BeginSentence(); + return index + 1; + } + *index = TranslateID(hypo.GetWord(position)); + } + } + + std::vector m_lmIdLookup; + +}; + } // namespace Moses #endif