mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-06 19:49:41 +03:00
LM state
This commit is contained in:
parent
d163c37674
commit
21c31578d2
@ -12,6 +12,7 @@ import path ;
|
||||
Hypothesis.cpp
|
||||
InputPath.cpp
|
||||
InputPaths.cpp
|
||||
LanguageModel.cpp
|
||||
Main.cpp
|
||||
Manager.cpp
|
||||
MemPool.cpp
|
||||
|
@ -6,7 +6,11 @@
|
||||
*/
|
||||
|
||||
#include "LanguageModel.h"
|
||||
#include "System.h"
|
||||
#include "moses/Util.h"
|
||||
#include "moses/InputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
LanguageModel::LanguageModel(size_t startInd, const std::string &line)
|
||||
:StatefulFeatureFunction(startInd, line)
|
||||
@ -18,6 +22,50 @@ LanguageModel::~LanguageModel() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void LanguageModel::Load(System &system)
|
||||
{
|
||||
Moses::FactorCollection &fc = system.GetVocab();
|
||||
|
||||
Moses::InputFileStream infile(m_path);
|
||||
size_t lineNum = 0;
|
||||
string line;
|
||||
while (getline(infile, line)) {
|
||||
if (++lineNum % 10000 == 0) {
|
||||
cerr << lineNum << " ";
|
||||
}
|
||||
|
||||
vector<string> substrings;
|
||||
Moses::Tokenize(substrings, line, "\t");
|
||||
|
||||
if (substrings.size() < 2)
|
||||
continue;
|
||||
|
||||
assert(substrings.size() == 2 || substrings.size() == 3);
|
||||
|
||||
SCORE prob = Moses::Scan<SCORE>(substrings[0]);
|
||||
if (substrings[1] == "<unk>") {
|
||||
m_oov = prob;
|
||||
continue;
|
||||
}
|
||||
|
||||
SCORE backoff = 0.f;
|
||||
if (substrings.size() == 3)
|
||||
backoff = Moses::Scan<SCORE>(substrings[2]);
|
||||
|
||||
// ngram
|
||||
vector<string> key;
|
||||
Moses::Tokenize(key, substrings[1], " ");
|
||||
|
||||
vector<const Moses::Factor*> factorKey;
|
||||
for (size_t i = 0; i < key.size(); ++i) {
|
||||
factorKey.push_back(fc.AddFactor(key[i], false));
|
||||
}
|
||||
|
||||
m_root.insert(factorKey, LMScores(prob, backoff));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void LanguageModel::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "path") {
|
||||
@ -27,7 +75,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
|
||||
m_factorType = Moses::Scan<Moses::FactorType>(value);
|
||||
}
|
||||
else if (key == "order") {
|
||||
|
||||
m_order = Moses::Scan<size_t>(value);
|
||||
}
|
||||
else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
@ -36,7 +84,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
|
||||
|
||||
const Moses::FFState* LanguageModel::EmptyHypothesisState(const Manager &mgr, const Phrase &input) const
|
||||
{
|
||||
|
||||
return new Moses::PointerState(&m_root);
|
||||
}
|
||||
|
||||
void
|
||||
@ -54,4 +102,5 @@ Moses::FFState* LanguageModel::EvaluateWhenApplied(const Manager &mgr,
|
||||
Scores &score) const
|
||||
{
|
||||
|
||||
return new Moses::PointerState(&m_root);
|
||||
}
|
||||
|
@ -9,14 +9,46 @@
|
||||
#define LANGUAGEMODEL_H_
|
||||
|
||||
#include "StatefulFeatureFunction.h"
|
||||
#include "TypeDef.h"
|
||||
#include "moses/Factor.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/LM/PointerState.h"
|
||||
#include "MorphoTrie/MorphTrie.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct LMScores
|
||||
{
|
||||
LMScores()
|
||||
{}
|
||||
|
||||
LMScores(const LMScores ©)
|
||||
:prob(copy.prob)
|
||||
,backoff(copy.backoff)
|
||||
{}
|
||||
|
||||
LMScores(float inProb, float inBackoff)
|
||||
:prob(inProb)
|
||||
,backoff(inBackoff)
|
||||
{}
|
||||
|
||||
float prob, backoff;
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream &out, const LMScores &obj)
|
||||
{
|
||||
out << "(" << obj.prob << "," << obj.backoff << ")" << std::flush;
|
||||
return out;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
class LanguageModel : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
LanguageModel(size_t startInd, const std::string &line);
|
||||
virtual ~LanguageModel();
|
||||
|
||||
virtual void Load(System &system);
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
virtual const Moses::FFState* EmptyHypothesisState(const Manager &mgr, const Phrase &input) const;
|
||||
@ -35,6 +67,11 @@ public:
|
||||
protected:
|
||||
std::string m_path;
|
||||
Moses::FactorType m_factorType;
|
||||
size_t m_order;
|
||||
|
||||
MorphTrie<const Moses::Factor*, LMScores> m_root;
|
||||
SCORE m_oov;
|
||||
|
||||
};
|
||||
|
||||
#endif /* LANGUAGEMODEL_H_ */
|
||||
|
@ -17,19 +17,19 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
Node::Node()
|
||||
PhraseTableMemory::Node::Node()
|
||||
{}
|
||||
|
||||
Node::~Node()
|
||||
PhraseTableMemory::Node::~Node()
|
||||
{
|
||||
}
|
||||
|
||||
void Node::AddRule(Phrase &source, TargetPhrase *target)
|
||||
void PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target)
|
||||
{
|
||||
AddRule(source, target, 0);
|
||||
}
|
||||
|
||||
Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
|
||||
PhraseTableMemory::Node &PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
|
||||
{
|
||||
if (pos == source.GetSize()) {
|
||||
TargetPhrases *tp = m_targetPhrases.get();
|
||||
@ -48,7 +48,7 @@ Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhrases::shared_const_ptr Node::Find(const PhraseBase &source, size_t pos) const
|
||||
TargetPhrases::shared_const_ptr PhraseTableMemory::Node::Find(const PhraseBase &source, size_t pos) const
|
||||
{
|
||||
assert(source.GetSize());
|
||||
if (pos == source.GetSize()) {
|
||||
|
@ -9,26 +9,27 @@
|
||||
#define PHRASETABLEMEMORY_H_
|
||||
#include "PhraseTable.h"
|
||||
|
||||
class Node
|
||||
{
|
||||
public:
|
||||
Node();
|
||||
~Node();
|
||||
void AddRule(Phrase &source, TargetPhrase *target);
|
||||
TargetPhrases::shared_const_ptr Find(const PhraseBase &source, size_t pos = 0) const;
|
||||
|
||||
protected:
|
||||
typedef boost::unordered_map<Word, Node, Moses::UnorderedComparer<Word>, Moses::UnorderedComparer<Word> > Children;
|
||||
Children m_children;
|
||||
TargetPhrases::shared_ptr m_targetPhrases;
|
||||
|
||||
Node &AddRule(Phrase &source, TargetPhrase *target, size_t pos);
|
||||
|
||||
};
|
||||
|
||||
|
||||
class PhraseTableMemory : public PhraseTable
|
||||
{
|
||||
//////////////////////////////////////
|
||||
class Node
|
||||
{
|
||||
public:
|
||||
Node();
|
||||
~Node();
|
||||
void AddRule(Phrase &source, TargetPhrase *target);
|
||||
TargetPhrases::shared_const_ptr Find(const PhraseBase &source, size_t pos = 0) const;
|
||||
|
||||
protected:
|
||||
typedef boost::unordered_map<Word, Node, Moses::UnorderedComparer<Word>, Moses::UnorderedComparer<Word> > Children;
|
||||
Children m_children;
|
||||
TargetPhrases::shared_ptr m_targetPhrases;
|
||||
|
||||
Node &AddRule(Phrase &source, TargetPhrase *target, size_t pos);
|
||||
|
||||
};
|
||||
//////////////////////////////////////
|
||||
public:
|
||||
PhraseTableMemory(size_t startInd, const std::string &line);
|
||||
virtual ~PhraseTableMemory();
|
||||
|
@ -10,7 +10,6 @@
|
||||
StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd, const std::string &line)
|
||||
:FeatureFunction(startInd, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
StatefulFeatureFunction::~StatefulFeatureFunction() {
|
||||
|
Loading…
Reference in New Issue
Block a user