thread-safety, precomputation and caching for NeuralLMWrapper

doesn't work with default nplm, but with the fork at https://github.com/rsennrich/nplm
This commit is contained in:
Rico Sennrich 2014-07-17 16:50:08 +01:00
parent 0d8d77e3da
commit eb5336ad9c
4 changed files with 25 additions and 41 deletions

View File

@ -17,7 +17,7 @@ wrappers = ;
local with-nplm = [ option.get "with-nplm" ] ;
if $(with-nplm) {
lib neuralLM : : <search>$(with-nplm)/src ;
obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <cxxflags>-fopenmp ;
alias nplm : nplm.o neuralLM ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
wrappers += nplm ;
}

View File

@ -84,7 +84,7 @@ if $(with-ldhtlm) {
local with-nplm = [ option.get "with-nplm" ] ;
if $(with-nplm) {
lib neuralLM : : <search>$(with-nplm)/lib <search>$(with-nplm)/lib64 ;
obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen-3.1.4 ;
obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
alias nplm : NeuralLMWrapper.o neuralLM : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
dependencies += nplm ;
lmmacros += LM_NEURAL ;

View File

@ -1,6 +1,7 @@
#include "moses/StaticData.h"
#include "moses/FactorCollection.h"
#include <boost/functional/hash.hpp>
#include "NeuralLMWrapper.h"
#include "neuralLM.h"
#include <model.h>
@ -12,21 +13,19 @@ namespace Moses
NeuralLMWrapper::NeuralLMWrapper(const std::string &line)
:LanguageModelSingleFactor(line)
{
// This space intentionally left blank
ReadParameters();
}
NeuralLMWrapper::~NeuralLMWrapper()
{
delete m_neuralLM;
delete m_neuralLM_shared;
}
void NeuralLMWrapper::Load()
{
TRACE_ERR("Loading NeuralLM " << m_filePath << endl);
// Set parameters required by ancestor classes
FactorCollection &factorCollection = FactorCollection::Instance();
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
@ -34,59 +33,42 @@ void NeuralLMWrapper::Load()
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
m_neuralLM = new nplm::neuralLM();
m_neuralLM->read(m_filePath);
m_neuralLM->set_log_base(10);
m_neuralLM_shared = new nplm::neuralLM(m_filePath, true);
m_neuralLM_shared->set_log_base(10);
//TODO: config option?
m_neuralLM_shared->set_cache(1000000);
UTIL_THROW_IF2(m_nGramOrder != m_neuralLM_shared->get_order(),
"Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() << ", but Moses expects " << m_nGramOrder);
//TODO: Implement this
}
LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
unsigned int hashCode = 0;
if (!m_neuralLM.get()) {
m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared));
}
size_t hashCode = 0;
vector<int> words(contextFactor.size());
// TRACE_ERR("NeuralLM words:");
for (size_t i=0, n=contextFactor.size(); i<n; i+=1) {
for (size_t i=0, n=contextFactor.size(); i<n; i++) {
const Word* word = contextFactor[i];
const Factor* factor = word->GetFactor(m_factorType);
const std::string string= factor->GetString().as_string();
int neuralLM_wordID = m_neuralLM->lookup_word(string);
words[i] = neuralLM_wordID;
hashCode += neuralLM_wordID;
// TRACE_ERR(" " << string << "(" << neuralLM_wordID << ")" );
boost::hash_combine(hashCode, neuralLM_wordID);
}
double value = m_neuralLM->lookup_ngram(words);
// TRACE_ERR("\t=\t" << value);
// TRACE_ERR(endl);
// Create a new struct to hold the result
LMResult ret;
ret.score = value;
ret.unknown = false;
// State* finalState is a void pointer
//
// Construct a hash value from the vector of words (contextFactor)
//
// The hash value must be the same size as sizeof(void*)
//
// TODO Set finalState to the above hash value
// use last word as state info
// const Factor *factor;
// size_t hash_value(const Factor &f);
// if (contextFactor.size()) {
// factor = contextFactor.back()->GetFactor(m_factorType);
// } else {
// factor = NULL;
// }
//
// (*finalState) = (State*) factor;
(*finalState) = (State*) hashCode;
return ret;

View File

@ -2,6 +2,8 @@
#include "SingleFactor.h"
#include <boost/thread/tss.hpp>
namespace nplm {
class neuralLM;
}
@ -9,16 +11,16 @@ namespace nplm {
namespace Moses
{
/** Implementation of single factor LM using IRST's code.
*/
class NeuralLMWrapper : public LanguageModelSingleFactor
{
protected:
nplm::neuralLM *m_neuralLM;
// big data (vocab, weights, cache) shared among threads
nplm::neuralLM *m_neuralLM_shared;
// thread-specific nplm for thread-safety
mutable boost::thread_specific_ptr<nplm::neuralLM> m_neuralLM;
public:
NeuralLMWrapper(const std::string &line);
// NeuralLM(const std::string &line);
~NeuralLMWrapper();
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;