mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-08 04:27:53 +03:00
LM state
This commit is contained in:
parent
d163c37674
commit
21c31578d2
@ -12,6 +12,7 @@ import path ;
|
|||||||
Hypothesis.cpp
|
Hypothesis.cpp
|
||||||
InputPath.cpp
|
InputPath.cpp
|
||||||
InputPaths.cpp
|
InputPaths.cpp
|
||||||
|
LanguageModel.cpp
|
||||||
Main.cpp
|
Main.cpp
|
||||||
Manager.cpp
|
Manager.cpp
|
||||||
MemPool.cpp
|
MemPool.cpp
|
||||||
|
@ -6,7 +6,11 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
|
#include "System.h"
|
||||||
#include "moses/Util.h"
|
#include "moses/Util.h"
|
||||||
|
#include "moses/InputFileStream.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
LanguageModel::LanguageModel(size_t startInd, const std::string &line)
|
LanguageModel::LanguageModel(size_t startInd, const std::string &line)
|
||||||
:StatefulFeatureFunction(startInd, line)
|
:StatefulFeatureFunction(startInd, line)
|
||||||
@ -18,6 +22,50 @@ LanguageModel::~LanguageModel() {
|
|||||||
// TODO Auto-generated destructor stub
|
// TODO Auto-generated destructor stub
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LanguageModel::Load(System &system)
|
||||||
|
{
|
||||||
|
Moses::FactorCollection &fc = system.GetVocab();
|
||||||
|
|
||||||
|
Moses::InputFileStream infile(m_path);
|
||||||
|
size_t lineNum = 0;
|
||||||
|
string line;
|
||||||
|
while (getline(infile, line)) {
|
||||||
|
if (++lineNum % 10000 == 0) {
|
||||||
|
cerr << lineNum << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<string> substrings;
|
||||||
|
Moses::Tokenize(substrings, line, "\t");
|
||||||
|
|
||||||
|
if (substrings.size() < 2)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
assert(substrings.size() == 2 || substrings.size() == 3);
|
||||||
|
|
||||||
|
SCORE prob = Moses::Scan<SCORE>(substrings[0]);
|
||||||
|
if (substrings[1] == "<unk>") {
|
||||||
|
m_oov = prob;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
SCORE backoff = 0.f;
|
||||||
|
if (substrings.size() == 3)
|
||||||
|
backoff = Moses::Scan<SCORE>(substrings[2]);
|
||||||
|
|
||||||
|
// ngram
|
||||||
|
vector<string> key;
|
||||||
|
Moses::Tokenize(key, substrings[1], " ");
|
||||||
|
|
||||||
|
vector<const Moses::Factor*> factorKey;
|
||||||
|
for (size_t i = 0; i < key.size(); ++i) {
|
||||||
|
factorKey.push_back(fc.AddFactor(key[i], false));
|
||||||
|
}
|
||||||
|
|
||||||
|
m_root.insert(factorKey, LMScores(prob, backoff));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void LanguageModel::SetParameter(const std::string& key, const std::string& value)
|
void LanguageModel::SetParameter(const std::string& key, const std::string& value)
|
||||||
{
|
{
|
||||||
if (key == "path") {
|
if (key == "path") {
|
||||||
@ -27,7 +75,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
|
|||||||
m_factorType = Moses::Scan<Moses::FactorType>(value);
|
m_factorType = Moses::Scan<Moses::FactorType>(value);
|
||||||
}
|
}
|
||||||
else if (key == "order") {
|
else if (key == "order") {
|
||||||
|
m_order = Moses::Scan<size_t>(value);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
StatefulFeatureFunction::SetParameter(key, value);
|
StatefulFeatureFunction::SetParameter(key, value);
|
||||||
@ -36,7 +84,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
|
|||||||
|
|
||||||
const Moses::FFState* LanguageModel::EmptyHypothesisState(const Manager &mgr, const Phrase &input) const
|
const Moses::FFState* LanguageModel::EmptyHypothesisState(const Manager &mgr, const Phrase &input) const
|
||||||
{
|
{
|
||||||
|
return new Moses::PointerState(&m_root);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -54,4 +102,5 @@ Moses::FFState* LanguageModel::EvaluateWhenApplied(const Manager &mgr,
|
|||||||
Scores &score) const
|
Scores &score) const
|
||||||
{
|
{
|
||||||
|
|
||||||
|
return new Moses::PointerState(&m_root);
|
||||||
}
|
}
|
||||||
|
@ -9,14 +9,46 @@
|
|||||||
#define LANGUAGEMODEL_H_
|
#define LANGUAGEMODEL_H_
|
||||||
|
|
||||||
#include "StatefulFeatureFunction.h"
|
#include "StatefulFeatureFunction.h"
|
||||||
|
#include "TypeDef.h"
|
||||||
|
#include "moses/Factor.h"
|
||||||
#include "moses/TypeDef.h"
|
#include "moses/TypeDef.h"
|
||||||
|
#include "moses/LM/PointerState.h"
|
||||||
|
#include "MorphoTrie/MorphTrie.h"
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
struct LMScores
|
||||||
|
{
|
||||||
|
LMScores()
|
||||||
|
{}
|
||||||
|
|
||||||
|
LMScores(const LMScores ©)
|
||||||
|
:prob(copy.prob)
|
||||||
|
,backoff(copy.backoff)
|
||||||
|
{}
|
||||||
|
|
||||||
|
LMScores(float inProb, float inBackoff)
|
||||||
|
:prob(inProb)
|
||||||
|
,backoff(inBackoff)
|
||||||
|
{}
|
||||||
|
|
||||||
|
float prob, backoff;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream& operator<<(std::ostream &out, const LMScores &obj)
|
||||||
|
{
|
||||||
|
out << "(" << obj.prob << "," << obj.backoff << ")" << std::flush;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
class LanguageModel : public StatefulFeatureFunction
|
class LanguageModel : public StatefulFeatureFunction
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
LanguageModel(size_t startInd, const std::string &line);
|
LanguageModel(size_t startInd, const std::string &line);
|
||||||
virtual ~LanguageModel();
|
virtual ~LanguageModel();
|
||||||
|
|
||||||
|
virtual void Load(System &system);
|
||||||
|
|
||||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||||
|
|
||||||
virtual const Moses::FFState* EmptyHypothesisState(const Manager &mgr, const Phrase &input) const;
|
virtual const Moses::FFState* EmptyHypothesisState(const Manager &mgr, const Phrase &input) const;
|
||||||
@ -35,6 +67,11 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
std::string m_path;
|
std::string m_path;
|
||||||
Moses::FactorType m_factorType;
|
Moses::FactorType m_factorType;
|
||||||
|
size_t m_order;
|
||||||
|
|
||||||
|
MorphTrie<const Moses::Factor*, LMScores> m_root;
|
||||||
|
SCORE m_oov;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* LANGUAGEMODEL_H_ */
|
#endif /* LANGUAGEMODEL_H_ */
|
||||||
|
@ -17,19 +17,19 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
Node::Node()
|
PhraseTableMemory::Node::Node()
|
||||||
{}
|
{}
|
||||||
|
|
||||||
Node::~Node()
|
PhraseTableMemory::Node::~Node()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void Node::AddRule(Phrase &source, TargetPhrase *target)
|
void PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target)
|
||||||
{
|
{
|
||||||
AddRule(source, target, 0);
|
AddRule(source, target, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
|
PhraseTableMemory::Node &PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
|
||||||
{
|
{
|
||||||
if (pos == source.GetSize()) {
|
if (pos == source.GetSize()) {
|
||||||
TargetPhrases *tp = m_targetPhrases.get();
|
TargetPhrases *tp = m_targetPhrases.get();
|
||||||
@ -48,7 +48,7 @@ Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhrases::shared_const_ptr Node::Find(const PhraseBase &source, size_t pos) const
|
TargetPhrases::shared_const_ptr PhraseTableMemory::Node::Find(const PhraseBase &source, size_t pos) const
|
||||||
{
|
{
|
||||||
assert(source.GetSize());
|
assert(source.GetSize());
|
||||||
if (pos == source.GetSize()) {
|
if (pos == source.GetSize()) {
|
||||||
|
@ -9,26 +9,27 @@
|
|||||||
#define PHRASETABLEMEMORY_H_
|
#define PHRASETABLEMEMORY_H_
|
||||||
#include "PhraseTable.h"
|
#include "PhraseTable.h"
|
||||||
|
|
||||||
class Node
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
Node();
|
|
||||||
~Node();
|
|
||||||
void AddRule(Phrase &source, TargetPhrase *target);
|
|
||||||
TargetPhrases::shared_const_ptr Find(const PhraseBase &source, size_t pos = 0) const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
typedef boost::unordered_map<Word, Node, Moses::UnorderedComparer<Word>, Moses::UnorderedComparer<Word> > Children;
|
|
||||||
Children m_children;
|
|
||||||
TargetPhrases::shared_ptr m_targetPhrases;
|
|
||||||
|
|
||||||
Node &AddRule(Phrase &source, TargetPhrase *target, size_t pos);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
class PhraseTableMemory : public PhraseTable
|
class PhraseTableMemory : public PhraseTable
|
||||||
{
|
{
|
||||||
|
//////////////////////////////////////
|
||||||
|
class Node
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Node();
|
||||||
|
~Node();
|
||||||
|
void AddRule(Phrase &source, TargetPhrase *target);
|
||||||
|
TargetPhrases::shared_const_ptr Find(const PhraseBase &source, size_t pos = 0) const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
typedef boost::unordered_map<Word, Node, Moses::UnorderedComparer<Word>, Moses::UnorderedComparer<Word> > Children;
|
||||||
|
Children m_children;
|
||||||
|
TargetPhrases::shared_ptr m_targetPhrases;
|
||||||
|
|
||||||
|
Node &AddRule(Phrase &source, TargetPhrase *target, size_t pos);
|
||||||
|
|
||||||
|
};
|
||||||
|
//////////////////////////////////////
|
||||||
public:
|
public:
|
||||||
PhraseTableMemory(size_t startInd, const std::string &line);
|
PhraseTableMemory(size_t startInd, const std::string &line);
|
||||||
virtual ~PhraseTableMemory();
|
virtual ~PhraseTableMemory();
|
||||||
|
@ -10,7 +10,6 @@
|
|||||||
StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd, const std::string &line)
|
StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd, const std::string &line)
|
||||||
:FeatureFunction(startInd, line)
|
:FeatureFunction(startInd, line)
|
||||||
{
|
{
|
||||||
ReadParameters();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StatefulFeatureFunction::~StatefulFeatureFunction() {
|
StatefulFeatureFunction::~StatefulFeatureFunction() {
|
||||||
|
Loading…
Reference in New Issue
Block a user