From 578e65298f365b7844665d5f2a0f9e298c832ae7 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Mon, 2 Jan 2017 12:57:52 -0600 Subject: [PATCH] Add InMemoryPerSentenceOnDemandLM --- moses/FF/Factory.cpp | 2 + moses/LM/Implementation.cpp | 2 +- moses/LM/InMemoryPerSentenceOnDemandLM.cpp | 91 ++++++++++++++ moses/LM/InMemoryPerSentenceOnDemandLM.h | 135 +++++++++++++++++++++ moses/LM/Jamfile | 2 +- moses/LM/Ken.cpp | 10 ++ moses/LM/Ken.h | 6 + 7 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 moses/LM/InMemoryPerSentenceOnDemandLM.cpp create mode 100644 moses/LM/InMemoryPerSentenceOnDemandLM.h diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index a048410d0..9ae145504 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -68,6 +68,7 @@ #include "moses/FF/SkeletonStatelessFF.h" #include "moses/FF/SkeletonStatefulFF.h" #include "moses/LM/SkeletonLM.h" +#include "moses/LM/InMemoryPerSentenceOnDemandLM.h" #include "moses/FF/SkeletonTranslationOptionListFeature.h" #include "moses/LM/BilingualLM.h" #include "moses/TranslationModel/SkeletonPT.h" @@ -299,6 +300,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(SkeletonStatelessFF); MOSES_FNAME(SkeletonStatefulFF); MOSES_FNAME(SkeletonLM); + MOSES_FNAME(InMemoryPerSentenceOnDemandLM); MOSES_FNAME(SkeletonTranslationOptionListFeature); MOSES_FNAME(SkeletonPT); diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index eb67100ca..c0a69994d 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -61,7 +61,7 @@ void LanguageModelImplementation::ShiftOrPush(std::vector &contextF { if (contextFactor.size() < GetNGramOrder()) { contextFactor.push_back(&word); - } else { + } else if (GetNGramOrder() > 0) { // shift for (size_t currNGramOrder = 0 ; currNGramOrder < GetNGramOrder() - 1 ; currNGramOrder++) { contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1]; diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp new file mode 100644 index 000000000..12ef78f4e --- /dev/null +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp @@ -0,0 +1,91 @@ +#include +#include "InMemoryPerSentenceOnDemandLM.h" +#include "moses/FactorCollection.h" +#include "moses/Util.h" +#include "moses/StaticData.h" +#include "moses/TranslationTask.h" +#include "moses/ContextScope.h" +#include "moses/LM/Ken.h" +#include "lm/model.hh" +#include "util/mmap.hh" + +#include +#include +#include + +using namespace std; + +namespace Moses +{ + InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false) +{ + ReadParameters(); +} + +InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM() +{ +} + +void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) { + + // The context scope object for this translation task + // contains a map of translation task-specific data + boost::shared_ptr contextScope = ttask->GetScope(); + + // The key to the map is this object + void const* key = static_cast(this); + + // The value stored in the map is a string representing a phrase table + boost::shared_ptr value = contextScope->get(key); + + // Create a stream to read the phrase table data + stringstream strme(*(value.get())); + + char * nullpointer = (char *) 0; + const char * filename = std::tmpnam(nullpointer); + ofstream tmp; + tmp.open(filename); + + // Read the phrase table data, one line at a time + string line; + while (getline(strme, line)) { + + tmp << line << "\n"; + + } + + tmp.close(); + + LanguageModelKen & lm = GetPerThreadLM(); + lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ); + + initialized = true; + + VERBOSE(1, filename); + if (initialized) { + VERBOSE(1, "\tLM initialized\n"); + } + + // std::remove(filename); + +} + +LanguageModelKen& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const { + + LanguageModelKen *lm; + lm = m_perThreadLM.get(); + if (lm == NULL) { + lm = new LanguageModelKen(); + m_perThreadLM.reset(lm); + } + assert(lm); + return *lm; + +} + + + +} + + + diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h new file mode 100644 index 000000000..f0c1effa7 --- /dev/null +++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h @@ -0,0 +1,135 @@ +// $Id$ +#pragma once + +#include +#include "SingleFactor.h" +#include +#include "lm/model.hh" +#include "moses/LM/Ken.h" +#include "moses/FF/FFState.h" + +namespace Moses +{ + +struct InMemoryPerSentenceOnDemandLMState : public FFState { + lm::ngram::State state; + virtual size_t hash() const { + size_t ret = hash_value(state); + return ret; + } + virtual bool operator==(const FFState& o) const { + const InMemoryPerSentenceOnDemandLMState &other = static_cast(o); + bool ret = state == other.state; + return ret; + } + +}; + +class InMemoryPerSentenceOnDemandLM : public LanguageModel +{ +public: + InMemoryPerSentenceOnDemandLM(const std::string &line); + ~InMemoryPerSentenceOnDemandLM(); + + void InitializeForInput(ttasksptr const& ttask); + + virtual void SetParameter(const std::string& key, const std::string& value) { + GetPerThreadLM().SetParameter(key, value); + } + + virtual const FFState* EmptyHypothesisState(const InputType &input) const { + if (initialized) { + return GetPerThreadLM().EmptyHypothesisState(input); + } else { + return new InMemoryPerSentenceOnDemandLMState(); + } + } + + virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { + if (initialized) { + return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out); + } else { + UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); + } + } + + virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const { + if (initialized) { + return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator); + } else { + UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); + } + } + + virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const { + if (initialized) { + return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator); + } else { + UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n"); + } + } + + + virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { + if (initialized) { + GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount); + } + } + + virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { + if (initialized) { + GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount); + } + } + + virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) { + GetPerThreadLM().IssueRequestsFor(hypo, input_state); + } + + virtual void sync() { + GetPerThreadLM().sync(); + } + + virtual void SetFFStateIdx(int state_idx) { + if (initialized) { + GetPerThreadLM().SetFFStateIdx(state_idx); + } + } + + virtual void IncrementalCallback(Incremental::Manager &manager) const { + if (initialized) { + GetPerThreadLM().IncrementalCallback(manager); + } + } + + virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const { + if (initialized) { + GetPerThreadLM().ReportHistoryOrder(out, phrase); + } + } + + virtual void EvaluateInIsolation(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedScores) const { + if (initialized) { + GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores); + } + } + + bool IsUseable(const FactorMask &mask) const { + return GetPerThreadLM().IsUseable(mask); + } + + +protected: + LanguageModelKen & GetPerThreadLM() const; + + mutable boost::thread_specific_ptr > m_perThreadLM; + + bool initialized; + +}; + + +} diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile index 75b66603c..4eafbd632 100644 --- a/moses/LM/Jamfile +++ b/moses/LM/Jamfile @@ -138,7 +138,7 @@ if $(with-dalm) { #Top-level LM library. If you've added a file that doesn't depend on external #libraries, put it here. -alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp +alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp ../../lm//kenlm ..//headers $(dependencies) ; alias macros : : : : $(lmmacros) ; diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp index c7ac663cc..e42e60274 100644 --- a/moses/LM/Ken.cpp +++ b/moses/LM/Ken.cpp @@ -105,6 +105,7 @@ template void LanguageModelKen::LoadModel(const std::string config.load_method = load_method; m_ngram.reset(new Model(file.c_str(), config)); + VERBOSE(2, "LanguageModelKen " << m_description << " reset to " << file << "\n"); } template LanguageModelKen::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method) @@ -116,6 +117,15 @@ template LanguageModelKen::LanguageModelKen(const std::stri LoadModel(file, load_method); } +template LanguageModelKen::LanguageModelKen() + :LanguageModel("KENLM") + ,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_)) + ,m_factorType(0) +{ + ReadParameters(); +} + + template LanguageModelKen::LanguageModelKen(const LanguageModelKen ©_from) :LanguageModel(copy_from.GetArgLine()), m_ngram(copy_from.m_ngram), diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h index 4934228c2..33590d659 100644 --- a/moses/LM/Ken.h +++ b/moses/LM/Ken.h @@ -33,11 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/TypeDef.h" #include "moses/Word.h" + + namespace Moses { //class LanguageModel; class FFState; +class InMemoryPerSentenceOnDemandLM; LanguageModel *ConstructKenLM(const std::string &line); @@ -67,6 +70,8 @@ public: virtual bool IsUseable(const FactorMask &mask) const; + friend class InMemoryPerSentenceOnDemandLM; + protected: boost::shared_ptr m_ngram; @@ -84,6 +89,7 @@ protected: std::vector m_lmIdLookup; private: + LanguageModelKen(); LanguageModelKen(const LanguageModelKen ©_from); // Convert last words of hypothesis into vocab ids, returning an end pointer.