diff --git a/moses/LM/Backward.cpp b/moses/LM/Backward.cpp index 2fb7451b5..411f559ca 100644 --- a/moses/LM/Backward.cpp +++ b/moses/LM/Backward.cpp @@ -40,7 +40,8 @@ namespace Moses { /** Constructs a new backward language model. */ -template BackwardLanguageModel::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(line,file,factorType,lazy) +// TODO(lane): load_method instead of lazy bool +template BackwardLanguageModel::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(line,file,factorType, lazy ? util::LAZY : util::POPULATE_OR_READ) { // // This space intentionally left blank diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp index c81f3b859..a27940e72 100644 --- a/moses/LM/Ken.cpp +++ b/moses/LM/Ken.cpp @@ -69,63 +69,6 @@ struct KenLMState : public FFState { }; -///* -// * An implementation of single factor LM using Ken's code. -// */ -//template class LanguageModelKen : public LanguageModel -//{ -//public: -// LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy); -// -// const FFState *EmptyHypothesisState(const InputType &/*input*/) const { -// KenLMState *ret = new KenLMState(); -// ret->state = m_ngram->BeginSentenceState(); -// return ret; -// } -// -// void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; -// -// FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; -// -// FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; -// -// void IncrementalCallback(Incremental::Manager &manager) const { -// manager.LMCallback(*m_ngram, m_lmIdLookup); -// } -// -// bool IsUseable(const FactorMask &mask) const; -//private: -// LanguageModelKen(const LanguageModelKen ©_from); -// -// lm::WordIndex TranslateID(const Word &word) const { -// std::size_t factor = word.GetFactor(m_factorType)->GetId(); -// return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]); -// } -// -// // Convert last words of hypothesis into vocab ids, returning an end pointer. -// lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const { -// lm::WordIndex *index = indices; -// lm::WordIndex *end = indices + m_ngram->Order() - 1; -// int position = hypo.GetCurrTargetWordsRange().GetEndPos(); -// for (; ; ++index, --position) { -// if (index == end) return index; -// if (position == -1) { -// *index = m_ngram->GetVocabulary().BeginSentence(); -// return index + 1; -// } -// *index = TranslateID(hypo.GetWord(position)); -// } -// } -// -// boost::shared_ptr m_ngram; -// -// std::vector m_lmIdLookup; -// -// FactorType m_factorType; -// -// const Factor *m_beginSentenceFactor; -//}; - class MappingBuilder : public lm::EnumerateVocab { public: @@ -148,7 +91,7 @@ private: } // namespace -template void LanguageModelKen::LoadModel(const std::string &file, bool lazy) +template void LanguageModelKen::LoadModel(const std::string &file, util::LoadMethod load_method) { lm::ngram::Config config; if(this->m_verbosity >= 1) { @@ -159,19 +102,19 @@ template void LanguageModelKen::LoadModel(const std::string FactorCollection &collection = FactorCollection::Instance(); MappingBuilder builder(collection, m_lmIdLookup); config.enumerate_vocab = &builder; - config.load_method = lazy ? util::LAZY : util::POPULATE_OR_READ; + config.load_method = load_method; m_ngram.reset(new Model(file.c_str(), config)); m_beginSentenceFactor = collection.AddFactor(BOS_); } -template LanguageModelKen::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy) +template LanguageModelKen::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method) :LanguageModel(line) ,m_factorType(factorType) { ReadParameters(); - LoadModel(file, lazy); + LoadModel(file, load_method); } template LanguageModelKen::LanguageModelKen(const LanguageModelKen ©_from) @@ -479,7 +422,7 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig) { FactorType factorType = 0; string filePath; - bool lazy = false; + util::LoadMethod load_method = util::POPULATE_OR_READ; util::TokenIter argument(lineOrig, ' '); ++argument; // KENLM @@ -500,38 +443,53 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig) } else if (name == "path") { filePath.assign(value.data(), value.size()); } else if (name == "lazyken") { - lazy = boost::lexical_cast(value); + // deprecated: use load instead. + load_method = boost::lexical_cast(value) ? util::LAZY : util::POPULATE_OR_READ; + } else if (name == "load") { + if (value == "lazy") { + load_method = util::LAZY; + } else if (value == "populate_or_lazy") { + load_method = util::POPULATE_OR_LAZY; + } else if (value == "populate_or_read" || value == "populate") { + load_method = util::POPULATE_OR_READ; + } else if (value == "read") { + load_method = util::READ; + } else if (value == "parallel_read") { + load_method = util::PARALLEL_READ; + } else { + UTIL_THROW2("Unknown KenLM load method " << value); + } } else { // pass to base class to interpret line << " " << name << "=" << value; } } - return ConstructKenLM(line.str(), filePath, factorType, lazy); + return ConstructKenLM(line.str(), filePath, factorType, load_method); } -LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy) +LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method) { lm::ngram::ModelType model_type; if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) { switch(model_type) { case lm::ngram::PROBING: - return new LanguageModelKen(line, file, factorType, lazy); + return new LanguageModelKen(line, file, factorType, load_method); case lm::ngram::REST_PROBING: - return new LanguageModelKen(line, file, factorType, lazy); + return new LanguageModelKen(line, file, factorType, load_method); case lm::ngram::TRIE: - return new LanguageModelKen(line, file, factorType, lazy); + return new LanguageModelKen(line, file, factorType, load_method); case lm::ngram::QUANT_TRIE: - return new LanguageModelKen(line, file, factorType, lazy); + return new LanguageModelKen(line, file, factorType, load_method); case lm::ngram::ARRAY_TRIE: - return new LanguageModelKen(line, file, factorType, lazy); + return new LanguageModelKen(line, file, factorType, load_method); case lm::ngram::QUANT_ARRAY_TRIE: - return new LanguageModelKen(line, file, factorType, lazy); + return new LanguageModelKen(line, file, factorType, load_method); default: UTIL_THROW2("Unrecognized kenlm model type " << model_type); } } else { - return new LanguageModelKen(line, file, factorType, lazy); + return new LanguageModelKen(line, file, factorType, load_method); } } diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h index 3a94e4c0b..4934228c2 100644 --- a/moses/LM/Ken.h +++ b/moses/LM/Ken.h @@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include "lm/word_index.hh" +#include "util/mmap.hh" #include "moses/LM/Base.h" #include "moses/Hypothesis.h" @@ -41,7 +42,7 @@ class FFState; LanguageModel *ConstructKenLM(const std::string &line); //! This will also load. Returns a templated KenLM class -LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy); +LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method); /* * An implementation of single factor LM using Kenneth's code. @@ -49,7 +50,7 @@ LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, template class LanguageModelKen : public LanguageModel { public: - LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy); + LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method); virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const; @@ -73,7 +74,7 @@ protected: FactorType m_factorType; - void LoadModel(const std::string &file, bool lazy); + void LoadModel(const std::string &file, util::LoadMethod load_method); lm::WordIndex TranslateID(const Word &word) const { std::size_t factor = word.GetFactor(m_factorType)->GetId(); diff --git a/moses/LM/Reloading.h b/moses/LM/Reloading.h index 3993fe9d7..d5ae83d17 100644 --- a/moses/LM/Reloading.h +++ b/moses/LM/Reloading.h @@ -64,8 +64,8 @@ private: template class ReloadingLanguageModel : public LanguageModelKen { public: - - ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(line, file, factorType, lazy), m_file(file), m_lazy(lazy) { + // TODO(Lane) copy less code, update to load_method + ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) { std::cerr << "ReloadingLM constructor: " << m_file << std::endl; // std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl; @@ -74,7 +74,8 @@ public: virtual void InitializeForInput(ttasksptr const& ttask) { std::cerr << "ReloadingLM InitializeForInput" << std::endl; - LanguageModelKen::LoadModel(m_file, m_lazy); + // TODO(lane): load_method + LanguageModelKen::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ); /* lm::ngram::Config config; if(this->m_verbosity >= 1) {