thread-safety, precomputation and caching for NeuralLMWrapper

doesn't work with default nplm, but with the fork at https://github.com/rsennrich/nplm
2024-12-26 21:42:19 +03:00 · 2014-07-17 16:50:08 +01:00 · 2014-07-17 16:50:08 +01:00 · eb5336ad9c
commit eb5336ad9c
parent 0d8d77e3da
4 changed files with 25 additions and 41 deletions
--- a/lm/Jamfile
+++ b/lm/Jamfile
@ -17,7 +17,7 @@ wrappers = ;
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
  lib neuralLM : : <search>$(with-nplm)/src ;
-  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
+  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <cxxflags>-fopenmp ;
  alias nplm : nplm.o neuralLM ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
  wrappers += nplm ;
 }
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@ -84,7 +84,7 @@ if $(with-ldhtlm) {
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
  lib neuralLM : : <search>$(with-nplm)/lib <search>$(with-nplm)/lib64 ;
-  obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen-3.1.4 ;
+  obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
  alias nplm : NeuralLMWrapper.o neuralLM : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
  dependencies += nplm ;
  lmmacros += LM_NEURAL ;
--- a/moses/LM/NeuralLMWrapper.cpp
+++ b/moses/LM/NeuralLMWrapper.cpp
@ -1,6 +1,7 @@

 #include "moses/StaticData.h"
 #include "moses/FactorCollection.h"
+#include <boost/functional/hash.hpp>
 #include "NeuralLMWrapper.h"
 #include "neuralLM.h"
 #include <model.h>
@ -12,21 +13,19 @@ namespace Moses
 NeuralLMWrapper::NeuralLMWrapper(const std::string &line)
 :LanguageModelSingleFactor(line)
 {
-  // This space intentionally left blank
+  ReadParameters();
 }


 NeuralLMWrapper::~NeuralLMWrapper()
 {
-  delete m_neuralLM;
+  delete m_neuralLM_shared;
 }


 void NeuralLMWrapper::Load()
 {

-  TRACE_ERR("Loading NeuralLM " << m_filePath << endl);
-
  // Set parameters required by ancestor classes
  FactorCollection &factorCollection = FactorCollection::Instance();
  m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
@ -34,59 +33,42 @@ void NeuralLMWrapper::Load()
  m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
  m_sentenceEndWord[m_factorType] = m_sentenceEnd;

-  m_neuralLM = new nplm::neuralLM();
-  m_neuralLM->read(m_filePath);
-  m_neuralLM->set_log_base(10);
+  m_neuralLM_shared = new nplm::neuralLM(m_filePath, true);
+  m_neuralLM_shared->set_log_base(10);
+  //TODO: config option?
+  m_neuralLM_shared->set_cache(1000000);
+
+  UTIL_THROW_IF2(m_nGramOrder != m_neuralLM_shared->get_order(),
+                 "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() << ", but Moses expects " << m_nGramOrder);

-  //TODO: Implement this
 }


 LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
 {

-  unsigned int hashCode = 0;
+  if (!m_neuralLM.get()) {
+    m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared));
+  }
+  size_t hashCode = 0;
+
  vector<int> words(contextFactor.size());
-//  TRACE_ERR("NeuralLM words:");
-  for (size_t i=0, n=contextFactor.size(); i<n; i+=1) {
+  for (size_t i=0, n=contextFactor.size(); i<n; i++) {
    const Word* word = contextFactor[i];
    const Factor* factor = word->GetFactor(m_factorType);
    const std::string string= factor->GetString().as_string();
    int neuralLM_wordID = m_neuralLM->lookup_word(string);
    words[i] = neuralLM_wordID;
-    hashCode += neuralLM_wordID;
-//    TRACE_ERR(" " << string << "(" << neuralLM_wordID << ")" );
+    boost::hash_combine(hashCode, neuralLM_wordID);
  }

  double value = m_neuralLM->lookup_ngram(words);
-//  TRACE_ERR("\t=\t" << value);
-//  TRACE_ERR(endl);

  // Create a new struct to hold the result
  LMResult ret;
  ret.score = value;
  ret.unknown = false;

-
-  // State* finalState is a void pointer
-  //
-  // Construct a hash value from the vector of words (contextFactor)
-  //
-  // The hash value must be the same size as sizeof(void*)
-  //
-  // TODO Set finalState to the above hash value
-
-  // use last word as state info
-//  const Factor *factor;
-//  size_t hash_value(const Factor &f);
-//  if (contextFactor.size()) {
-//    factor = contextFactor.back()->GetFactor(m_factorType);
-//  } else {
-//    factor = NULL;
-//  }
-//
-//  (*finalState) = (State*) factor;
-
  (*finalState) = (State*) hashCode;

  return ret;
--- a/moses/LM/NeuralLMWrapper.h
+++ b/moses/LM/NeuralLMWrapper.h
@ -2,6 +2,8 @@

 #include "SingleFactor.h"

+#include <boost/thread/tss.hpp>
+
 namespace nplm {
  class neuralLM;
 }
@ -9,16 +11,16 @@ namespace nplm {
 namespace Moses
 {

-/** Implementation of single factor LM using IRST's code.
- */
 class NeuralLMWrapper : public LanguageModelSingleFactor
 {
 protected:
-  nplm::neuralLM *m_neuralLM;
+  // big data (vocab, weights, cache) shared among threads
+  nplm::neuralLM *m_neuralLM_shared;
+  // thread-specific nplm for thread-safety
+  mutable boost::thread_specific_ptr<nplm::neuralLM> m_neuralLM;

 public:
  NeuralLMWrapper(const std::string &line);
-  //  NeuralLM(const std::string &line);
  ~NeuralLMWrapper();

  virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;