add NeuralLM

This commit is contained in:
Hieu Hoang 2013-10-28 22:11:37 +00:00
parent 31aa9f2a63
commit 0af1df9f8b
7 changed files with 167 additions and 15 deletions

View File

@ -48,6 +48,7 @@
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.755343734" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.816413868" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.330225535" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib&quot;"/>
@ -65,6 +66,7 @@
<listOptionValue builtIn="false" value="/opt/local/lib"/>
</option>
<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="neuralLM"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>

View File

@ -47,6 +47,7 @@
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
@ -61,6 +62,7 @@
<listOptionValue builtIn="false" value="/opt/local/lib"/>
</option>
<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="neuralLM"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>

View File

@ -34,6 +34,8 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../nplm/src&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen&quot;"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
@ -48,6 +50,7 @@
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
<listOptionValue builtIn="false" value="LM_RAND"/>
<listOptionValue builtIn="false" value="HAVE_NPLM"/>
<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
<listOptionValue builtIn="false" value="_LARGE_FILES"/>
</option>
@ -71,9 +74,6 @@
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1742823107" name="ChartTranslationOption.cpp" rcbsApplicability="disable" resourcePath="ChartTranslationOption.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1616881050">
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1616881050" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
</fileInfo>
<sourceEntries>
<entry excluding="FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>

View File

@ -1156,16 +1156,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InputFeature.h</locationURI>
</link>
<link>
<name>FF/InternalStructStatelessFF.cpp</name>
<type>1</type>
<location>/Users/mnadejde/Documents/workspace/moses_120913/mosesdecoder/moses/FF/InternalStructStatelessFF.cpp</location>
</link>
<link>
<name>FF/InternalStructStatelessFF.h</name>
<type>1</type>
<location>/Users/mnadejde/Documents/workspace/moses_120913/mosesdecoder/moses/FF/InternalStructStatelessFF.h</location>
</link>
<link>
<name>FF/LexicalReordering</name>
<type>2</type>
@ -1436,6 +1426,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/MultiFactor.h</locationURI>
</link>
<link>
<name>LM/NeuralLM.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/NeuralLM.cpp</locationURI>
</link>
<link>
<name>LM/NeuralLM.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/NeuralLM.h</locationURI>
</link>
<link>
<name>LM/ORLM.cpp</name>
<type>1</type>

View File

@ -31,7 +31,6 @@
#include "moses/FF/OSM-Feature/OpSequenceModel.h"
#include "moses/FF/ControlRecombination.h"
#include "moses/FF/ExternalFeature.h"
#include "moses/FF/InternalStructStatelessFF.h"
#include "moses/FF/ConstrainedDecoding.h"
#include "moses/FF/SkeletonStatelessFF.h"
@ -60,6 +59,10 @@
#include "moses/SyntacticLanguageModel.h"
#endif
#ifdef HAVE_NPLM
#include "moses/LM/NeuralLM.h"
#endif
#include "util/exception.hh"
#include <vector>
@ -155,7 +158,6 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(InternalStructStatelessFF);
MOSES_FNAME(SkeletonLM);
MOSES_FNAME(SkeletonPT);
@ -174,6 +176,10 @@ FeatureRegistry::FeatureRegistry()
#ifdef LM_RAND
MOSES_FNAME2("RANDLM", LanguageModelRandLM);
#endif
#ifdef HAVE_NPLM
MOSES_FNAME(NeuralLM);
#endif
Add("KENLM", new KenFactory());
}

106
moses/LM/NeuralLM.cpp Normal file
View File

@ -0,0 +1,106 @@
#include "moses/StaticData.h"
#include "moses/FactorCollection.h"
#include "NeuralLM.h"
#include "neuralLM.h"
#include "model.h"
using namespace std;
namespace Moses
{
NeuralLM::NeuralLM(const std::string &line)
:LanguageModelSingleFactor("NeuralLM", line)
{
// This space intentionally left blank
}
NeuralLM::~NeuralLM()
{
delete m_neuralLM;
}
bool NeuralLM::Load(const std::string &filePath, FactorType factorType, size_t nGramOrder)
{
TRACE_ERR("Loading NeuralLM " << filePath << endl);
// Store parameters
m_nGramOrder = nGramOrder;
m_filePath = filePath;
m_factorType = factorType;
if (factorType == NOT_FOUND) {
m_factorType = 0;
}
// Set parameters required by ancestor classes
FactorCollection &factorCollection = FactorCollection::Instance();
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
m_sentenceStartWord[m_factorType] = m_sentenceStart;
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
m_neuralLM = new nplm::neuralLM(24234);
m_neuralLM->read(m_filePath);
m_neuralLM->set_log_base(10);
return true;
//TODO: Implement this
}
LMResult NeuralLM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
unsigned int hashCode = 0;
vector<int> words(contextFactor.size());
// TRACE_ERR("NeuralLM words:");
for (size_t i=0, n=contextFactor.size(); i<n; i+=1) {
const Word* word = contextFactor[i];
const Factor* factor = word->GetFactor(m_factorType);
const std::string string= factor->GetString().as_string();
int neuralLM_wordID = m_neuralLM->lookup_word(string);
words[i] = neuralLM_wordID;
hashCode += neuralLM_wordID;
// TRACE_ERR(" " << string << "(" << neuralLM_wordID << ")" );
}
double value = m_neuralLM->lookup_ngram(words);
// TRACE_ERR("\t=\t" << value);
// TRACE_ERR(endl);
// Create a new struct to hold the result
LMResult ret;
ret.score = value;
ret.unknown = false;
// State* finalState is a void pointer
//
// Construct a hash value from the vector of words (contextFactor)
//
// The hash value must be the same size as sizeof(void*)
//
// TODO Set finalState to the above hash value
// use last word as state info
// const Factor *factor;
// size_t hash_value(const Factor &f);
// if (contextFactor.size()) {
// factor = contextFactor.back()->GetFactor(m_factorType);
// } else {
// factor = NULL;
// }
//
// (*finalState) = (State*) factor;
(*finalState) = (State*) hashCode;
return ret;
}
}

36
moses/LM/NeuralLM.h Normal file
View File

@ -0,0 +1,36 @@
#pragma once
#include "SingleFactor.h"
namespace nplm {
class neuralLM;
}
namespace Moses
{
/** Implementation of single factor LM using IRST's code.
*/
class NeuralLM : public LanguageModelSingleFactor
{
protected:
nplm::neuralLM *m_neuralLM;
public:
NeuralLM(const std::string &line);
// NeuralLM(const std::string &line);
~NeuralLM();
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;
virtual bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
};
} // namespace