Add InMemoryPerSentenceOnDemandLM

2024-10-26 11:28:48 +03:00 · 2017-01-02 12:57:52 -06:00 · 2017-01-02 12:57:52 -06:00 · 578e65298f
commit 578e65298f
parent 999d6b6371
7 changed files with 246 additions and 2 deletions
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -68,6 +68,7 @@
 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"
 #include "moses/LM/SkeletonLM.h"
+#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
 #include "moses/FF/SkeletonTranslationOptionListFeature.h"
 #include "moses/LM/BilingualLM.h"
 #include "moses/TranslationModel/SkeletonPT.h"
@ -299,6 +300,7 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(SkeletonStatelessFF);
  MOSES_FNAME(SkeletonStatefulFF);
  MOSES_FNAME(SkeletonLM);
+  MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
  MOSES_FNAME(SkeletonTranslationOptionListFeature);
  MOSES_FNAME(SkeletonPT);

--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@ -61,7 +61,7 @@ void LanguageModelImplementation::ShiftOrPush(std::vector<const Word*> &contextF
 {
  if (contextFactor.size() < GetNGramOrder()) {
    contextFactor.push_back(&word);
-  } else {
+  } else if (GetNGramOrder() > 0) {
    // shift
    for (size_t currNGramOrder = 0 ; currNGramOrder < GetNGramOrder() - 1 ; currNGramOrder++) {
      contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
@ -0,0 +1,91 @@
+#include <boost/foreach.hpp>
+#include "InMemoryPerSentenceOnDemandLM.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include "moses/StaticData.h"
+#include "moses/TranslationTask.h"
+#include "moses/ContextScope.h"
+#include "moses/LM/Ken.h"
+#include "lm/model.hh"
+#include "util/mmap.hh"
+
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+namespace Moses
+{
+  InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
+{
+  ReadParameters();
+}
+
+InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
+{
+}
+
+void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
+
+  // The context scope object for this translation task
+  //     contains a map of translation task-specific data
+  boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
+
+  // The key to the map is this object
+  void const* key = static_cast<void const*>(this);
+
+  // The value stored in the map is a string representing a phrase table
+  boost::shared_ptr<string> value = contextScope->get<string>(key);
+
+  // Create a stream to read the phrase table data
+  stringstream strme(*(value.get()));
+
+  char * nullpointer = (char *) 0;
+  const char * filename = std::tmpnam(nullpointer);
+  ofstream tmp;
+  tmp.open(filename);
+
+  // Read the phrase table data, one line at a time
+  string line;
+  while (getline(strme, line)) {
+
+    tmp << line << "\n";
+
+  }
+
+  tmp.close();
+
+  LanguageModelKen<lm::ngram::ProbingModel> & lm = GetPerThreadLM();
+  lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ);
+
+  initialized = true;
+
+  VERBOSE(1, filename);
+  if (initialized) {
+    VERBOSE(1, "\tLM initialized\n"); 
+  }
+
+  //  std::remove(filename);
+
+}
+
+LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
+
+  LanguageModelKen<lm::ngram::ProbingModel> *lm;
+  lm = m_perThreadLM.get();
+  if (lm == NULL) {
+    lm = new LanguageModelKen<lm::ngram::ProbingModel>();
+    m_perThreadLM.reset(lm);
+  }
+  assert(lm);
+  return *lm;
+
+}
+
+
+
+}
+
+
+
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.h
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h
@ -0,0 +1,135 @@
+// $Id$
+#pragma once
+
+#include <vector>
+#include "SingleFactor.h"
+#include <boost/thread/tss.hpp>
+#include "lm/model.hh"
+#include "moses/LM/Ken.h"
+#include "moses/FF/FFState.h"
+
+namespace Moses
+{
+
+struct InMemoryPerSentenceOnDemandLMState : public FFState {
+  lm::ngram::State state;
+  virtual size_t hash() const {
+    size_t ret = hash_value(state);
+    return ret;
+  }
+  virtual bool operator==(const FFState& o) const {
+    const InMemoryPerSentenceOnDemandLMState &other = static_cast<const InMemoryPerSentenceOnDemandLMState &>(o);
+    bool ret = state == other.state;
+    return ret;
+  }
+
+};
+
+class InMemoryPerSentenceOnDemandLM : public LanguageModel
+{
+public:
+  InMemoryPerSentenceOnDemandLM(const std::string &line);
+  ~InMemoryPerSentenceOnDemandLM();
+
+  void InitializeForInput(ttasksptr const& ttask);
+
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    GetPerThreadLM().SetParameter(key, value);
+  }
+
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+    if (initialized) {
+      return GetPerThreadLM().EmptyHypothesisState(input);
+    } else {
+      return new InMemoryPerSentenceOnDemandLMState();
+    }
+  }
+
+  virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
+    if (initialized) {
+      return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out);
+    } else {
+      UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+    }
+  }
+
+  virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
+    if (initialized) {
+      return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator);
+    } else {
+      UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+    }
+  }
+
+  virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
+    if (initialized) {
+      return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator);
+    } else {
+      UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+    }
+  }
+
+
+  virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
+    if (initialized) {
+      GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount);
+    }
+  }
+
+  virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
+    if (initialized) {
+      GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount);
+    }
+  }
+
+  virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) {
+    GetPerThreadLM().IssueRequestsFor(hypo, input_state);
+  }
+
+  virtual void sync() {
+    GetPerThreadLM().sync();
+  }
+ 
+  virtual void SetFFStateIdx(int state_idx) {
+    if (initialized) {
+      GetPerThreadLM().SetFFStateIdx(state_idx);
+    }
+  }
+
+  virtual void IncrementalCallback(Incremental::Manager &manager) const {
+    if (initialized) {
+      GetPerThreadLM().IncrementalCallback(manager);
+    }
+  }
+
+  virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
+    if (initialized) {
+      GetPerThreadLM().ReportHistoryOrder(out, phrase);
+    }
+  }
+  
+  virtual void EvaluateInIsolation(const Phrase &source
+                                   , const TargetPhrase &targetPhrase
+                                   , ScoreComponentCollection &scoreBreakdown
+                                   , ScoreComponentCollection &estimatedScores) const {
+    if (initialized) {
+      GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores);
+    }
+  }
+
+  bool IsUseable(const FactorMask &mask) const {
+    return GetPerThreadLM().IsUseable(mask);
+  }
+
+
+protected:
+  LanguageModelKen<lm::ngram::ProbingModel> & GetPerThreadLM() const;
+
+  mutable boost::thread_specific_ptr<LanguageModelKen<lm::ngram::ProbingModel> > m_perThreadLM;
+
+  bool initialized;
+
+};
+
+
+}
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@ -138,7 +138,7 @@ if $(with-dalm) {

 #Top-level LM library.  If you've added a file that doesn't depend on external
 #libraries, put it here.  
-alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp 
+alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
  ../../lm//kenlm ..//headers $(dependencies) ;

 alias macros : : : : <define>$(lmmacros) ;
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@ -105,6 +105,7 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
  config.load_method = load_method;

  m_ngram.reset(new Model(file.c_str(), config));
+  VERBOSE(2, "LanguageModelKen " << m_description << " reset to " << file << "\n");
 }

 template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
@ -116,6 +117,15 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
  LoadModel(file, load_method);
 }

+template <class Model> LanguageModelKen<Model>::LanguageModelKen()
+  :LanguageModel("KENLM")
+  ,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
+  ,m_factorType(0)
+{
+  ReadParameters();
+}
+
+
 template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
  :LanguageModel(copy_from.GetArgLine()),
   m_ngram(copy_from.m_ngram),
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@ -33,11 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "moses/TypeDef.h"
 #include "moses/Word.h"

+
+
 namespace Moses
 {

 //class LanguageModel;
 class FFState;
+class InMemoryPerSentenceOnDemandLM;

 LanguageModel *ConstructKenLM(const std::string &line);

@ -67,6 +70,8 @@ public:

  virtual bool IsUseable(const FactorMask &mask) const;

+  friend class InMemoryPerSentenceOnDemandLM;
+
 protected:
  boost::shared_ptr<Model> m_ngram;

@ -84,6 +89,7 @@ protected:
  std::vector<lm::WordIndex> m_lmIdLookup;

 private:
+  LanguageModelKen();
  LanguageModelKen(const LanguageModelKen<Model> &copy_from);

  // Convert last words of hypothesis into vocab ids, returning an end pointer.