mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
add NeuralLM
This commit is contained in:
parent
31aa9f2a63
commit
0af1df9f8b
@ -48,6 +48,7 @@
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.755343734" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.816413868" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
|
||||
<option id="gnu.cpp.link.option.paths.330225535" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../nplm/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../cmph/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib""/>
|
||||
@ -65,6 +66,7 @@
|
||||
<listOptionValue builtIn="false" value="/opt/local/lib"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="neuralLM"/>
|
||||
<listOptionValue builtIn="false" value="dstruct"/>
|
||||
<listOptionValue builtIn="false" value="flm"/>
|
||||
<listOptionValue builtIn="false" value="oolm"/>
|
||||
|
@ -47,6 +47,7 @@
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
|
||||
<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../irstlm/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../nplm/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../randlm/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../cmph/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
@ -61,6 +62,7 @@
|
||||
<listOptionValue builtIn="false" value="/opt/local/lib"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="neuralLM"/>
|
||||
<listOptionValue builtIn="false" value="dstruct"/>
|
||||
<listOptionValue builtIn="false" value="flm"/>
|
||||
<listOptionValue builtIn="false" value="oolm"/>
|
||||
|
@ -34,6 +34,8 @@
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../cmph/include""/>
|
||||
<listOptionValue builtIn="false" value="/opt/local/include/"/>
|
||||
<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../nplm/src""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../eigen""/>
|
||||
<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../randlm/include/RandLM""/>
|
||||
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
|
||||
@ -48,6 +50,7 @@
|
||||
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
|
||||
<listOptionValue builtIn="false" value="LM_IRST"/>
|
||||
<listOptionValue builtIn="false" value="LM_RAND"/>
|
||||
<listOptionValue builtIn="false" value="HAVE_NPLM"/>
|
||||
<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
|
||||
<listOptionValue builtIn="false" value="_LARGE_FILES"/>
|
||||
</option>
|
||||
@ -71,9 +74,6 @@
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1742823107" name="ChartTranslationOption.cpp" rcbsApplicability="disable" resourcePath="ChartTranslationOption.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1616881050">
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1616881050" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
|
||||
</fileInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
|
@ -1156,16 +1156,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InputFeature.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/InternalStructStatelessFF.cpp</name>
|
||||
<type>1</type>
|
||||
<location>/Users/mnadejde/Documents/workspace/moses_120913/mosesdecoder/moses/FF/InternalStructStatelessFF.cpp</location>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/InternalStructStatelessFF.h</name>
|
||||
<type>1</type>
|
||||
<location>/Users/mnadejde/Documents/workspace/moses_120913/mosesdecoder/moses/FF/InternalStructStatelessFF.h</location>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/LexicalReordering</name>
|
||||
<type>2</type>
|
||||
@ -1436,6 +1426,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/MultiFactor.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/NeuralLM.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/NeuralLM.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/NeuralLM.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/NeuralLM.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/ORLM.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -31,7 +31,6 @@
|
||||
#include "moses/FF/OSM-Feature/OpSequenceModel.h"
|
||||
#include "moses/FF/ControlRecombination.h"
|
||||
#include "moses/FF/ExternalFeature.h"
|
||||
#include "moses/FF/InternalStructStatelessFF.h"
|
||||
#include "moses/FF/ConstrainedDecoding.h"
|
||||
|
||||
#include "moses/FF/SkeletonStatelessFF.h"
|
||||
@ -60,6 +59,10 @@
|
||||
#include "moses/SyntacticLanguageModel.h"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_NPLM
|
||||
#include "moses/LM/NeuralLM.h"
|
||||
#endif
|
||||
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include <vector>
|
||||
@ -155,7 +158,6 @@ FeatureRegistry::FeatureRegistry()
|
||||
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
MOSES_FNAME(SkeletonStatefulFF);
|
||||
MOSES_FNAME(InternalStructStatelessFF);
|
||||
MOSES_FNAME(SkeletonLM);
|
||||
MOSES_FNAME(SkeletonPT);
|
||||
|
||||
@ -174,6 +176,10 @@ FeatureRegistry::FeatureRegistry()
|
||||
#ifdef LM_RAND
|
||||
MOSES_FNAME2("RANDLM", LanguageModelRandLM);
|
||||
#endif
|
||||
#ifdef HAVE_NPLM
|
||||
MOSES_FNAME(NeuralLM);
|
||||
#endif
|
||||
|
||||
Add("KENLM", new KenFactory());
|
||||
}
|
||||
|
||||
|
106
moses/LM/NeuralLM.cpp
Normal file
106
moses/LM/NeuralLM.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "NeuralLM.h"
|
||||
#include "neuralLM.h"
|
||||
#include "model.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
NeuralLM::NeuralLM(const std::string &line)
|
||||
:LanguageModelSingleFactor("NeuralLM", line)
|
||||
{
|
||||
// This space intentionally left blank
|
||||
}
|
||||
|
||||
|
||||
NeuralLM::~NeuralLM()
|
||||
{
|
||||
delete m_neuralLM;
|
||||
}
|
||||
|
||||
|
||||
bool NeuralLM::Load(const std::string &filePath, FactorType factorType, size_t nGramOrder)
|
||||
{
|
||||
|
||||
TRACE_ERR("Loading NeuralLM " << filePath << endl);
|
||||
|
||||
// Store parameters
|
||||
m_nGramOrder = nGramOrder;
|
||||
m_filePath = filePath;
|
||||
m_factorType = factorType;
|
||||
if (factorType == NOT_FOUND) {
|
||||
m_factorType = 0;
|
||||
}
|
||||
|
||||
// Set parameters required by ancestor classes
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
|
||||
m_sentenceStartWord[m_factorType] = m_sentenceStart;
|
||||
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
|
||||
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
|
||||
|
||||
m_neuralLM = new nplm::neuralLM(24234);
|
||||
m_neuralLM->read(m_filePath);
|
||||
m_neuralLM->set_log_base(10);
|
||||
|
||||
return true;
|
||||
//TODO: Implement this
|
||||
}
|
||||
|
||||
|
||||
LMResult NeuralLM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
||||
{
|
||||
|
||||
unsigned int hashCode = 0;
|
||||
vector<int> words(contextFactor.size());
|
||||
// TRACE_ERR("NeuralLM words:");
|
||||
for (size_t i=0, n=contextFactor.size(); i<n; i+=1) {
|
||||
const Word* word = contextFactor[i];
|
||||
const Factor* factor = word->GetFactor(m_factorType);
|
||||
const std::string string= factor->GetString().as_string();
|
||||
int neuralLM_wordID = m_neuralLM->lookup_word(string);
|
||||
words[i] = neuralLM_wordID;
|
||||
hashCode += neuralLM_wordID;
|
||||
// TRACE_ERR(" " << string << "(" << neuralLM_wordID << ")" );
|
||||
}
|
||||
|
||||
double value = m_neuralLM->lookup_ngram(words);
|
||||
// TRACE_ERR("\t=\t" << value);
|
||||
// TRACE_ERR(endl);
|
||||
|
||||
// Create a new struct to hold the result
|
||||
LMResult ret;
|
||||
ret.score = value;
|
||||
ret.unknown = false;
|
||||
|
||||
|
||||
// State* finalState is a void pointer
|
||||
//
|
||||
// Construct a hash value from the vector of words (contextFactor)
|
||||
//
|
||||
// The hash value must be the same size as sizeof(void*)
|
||||
//
|
||||
// TODO Set finalState to the above hash value
|
||||
|
||||
// use last word as state info
|
||||
// const Factor *factor;
|
||||
// size_t hash_value(const Factor &f);
|
||||
// if (contextFactor.size()) {
|
||||
// factor = contextFactor.back()->GetFactor(m_factorType);
|
||||
// } else {
|
||||
// factor = NULL;
|
||||
// }
|
||||
//
|
||||
// (*finalState) = (State*) factor;
|
||||
|
||||
(*finalState) = (State*) hashCode;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
36
moses/LM/NeuralLM.h
Normal file
36
moses/LM/NeuralLM.h
Normal file
@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include "SingleFactor.h"
|
||||
|
||||
namespace nplm {
|
||||
class neuralLM;
|
||||
}
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Implementation of single factor LM using IRST's code.
|
||||
*/
|
||||
class NeuralLM : public LanguageModelSingleFactor
|
||||
{
|
||||
protected:
|
||||
nplm::neuralLM *m_neuralLM;
|
||||
|
||||
public:
|
||||
NeuralLM(const std::string &line);
|
||||
// NeuralLM(const std::string &line);
|
||||
~NeuralLM();
|
||||
|
||||
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;
|
||||
|
||||
virtual bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user