This commit is contained in:
Hieu Hoang 2015-10-30 11:45:38 +00:00
parent d163c37674
commit 21c31578d2
6 changed files with 112 additions and 25 deletions

View File

@ -12,6 +12,7 @@ import path ;
Hypothesis.cpp Hypothesis.cpp
InputPath.cpp InputPath.cpp
InputPaths.cpp InputPaths.cpp
LanguageModel.cpp
Main.cpp Main.cpp
Manager.cpp Manager.cpp
MemPool.cpp MemPool.cpp

View File

@ -6,7 +6,11 @@
*/ */
#include "LanguageModel.h" #include "LanguageModel.h"
#include "System.h"
#include "moses/Util.h" #include "moses/Util.h"
#include "moses/InputFileStream.h"
using namespace std;
LanguageModel::LanguageModel(size_t startInd, const std::string &line) LanguageModel::LanguageModel(size_t startInd, const std::string &line)
:StatefulFeatureFunction(startInd, line) :StatefulFeatureFunction(startInd, line)
@ -18,6 +22,50 @@ LanguageModel::~LanguageModel() {
// TODO Auto-generated destructor stub // TODO Auto-generated destructor stub
} }
void LanguageModel::Load(System &system)
{
Moses::FactorCollection &fc = system.GetVocab();
Moses::InputFileStream infile(m_path);
size_t lineNum = 0;
string line;
while (getline(infile, line)) {
if (++lineNum % 10000 == 0) {
cerr << lineNum << " ";
}
vector<string> substrings;
Moses::Tokenize(substrings, line, "\t");
if (substrings.size() < 2)
continue;
assert(substrings.size() == 2 || substrings.size() == 3);
SCORE prob = Moses::Scan<SCORE>(substrings[0]);
if (substrings[1] == "<unk>") {
m_oov = prob;
continue;
}
SCORE backoff = 0.f;
if (substrings.size() == 3)
backoff = Moses::Scan<SCORE>(substrings[2]);
// ngram
vector<string> key;
Moses::Tokenize(key, substrings[1], " ");
vector<const Moses::Factor*> factorKey;
for (size_t i = 0; i < key.size(); ++i) {
factorKey.push_back(fc.AddFactor(key[i], false));
}
m_root.insert(factorKey, LMScores(prob, backoff));
}
}
void LanguageModel::SetParameter(const std::string& key, const std::string& value) void LanguageModel::SetParameter(const std::string& key, const std::string& value)
{ {
if (key == "path") { if (key == "path") {
@ -27,7 +75,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
m_factorType = Moses::Scan<Moses::FactorType>(value); m_factorType = Moses::Scan<Moses::FactorType>(value);
} }
else if (key == "order") { else if (key == "order") {
m_order = Moses::Scan<size_t>(value);
} }
else { else {
StatefulFeatureFunction::SetParameter(key, value); StatefulFeatureFunction::SetParameter(key, value);
@ -36,7 +84,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
const Moses::FFState* LanguageModel::EmptyHypothesisState(const Manager &mgr, const Phrase &input) const const Moses::FFState* LanguageModel::EmptyHypothesisState(const Manager &mgr, const Phrase &input) const
{ {
return new Moses::PointerState(&m_root);
} }
void void
@ -54,4 +102,5 @@ Moses::FFState* LanguageModel::EvaluateWhenApplied(const Manager &mgr,
Scores &score) const Scores &score) const
{ {
return new Moses::PointerState(&m_root);
} }

View File

@ -9,14 +9,46 @@
#define LANGUAGEMODEL_H_ #define LANGUAGEMODEL_H_
#include "StatefulFeatureFunction.h" #include "StatefulFeatureFunction.h"
#include "TypeDef.h"
#include "moses/Factor.h"
#include "moses/TypeDef.h" #include "moses/TypeDef.h"
#include "moses/LM/PointerState.h"
#include "MorphoTrie/MorphTrie.h"
////////////////////////////////////////////////////////////////////////////////////////
struct LMScores
{
LMScores()
{}
LMScores(const LMScores &copy)
:prob(copy.prob)
,backoff(copy.backoff)
{}
LMScores(float inProb, float inBackoff)
:prob(inProb)
,backoff(inBackoff)
{}
float prob, backoff;
};
inline std::ostream& operator<<(std::ostream &out, const LMScores &obj)
{
out << "(" << obj.prob << "," << obj.backoff << ")" << std::flush;
return out;
}
////////////////////////////////////////////////////////////////////////////////////////
class LanguageModel : public StatefulFeatureFunction class LanguageModel : public StatefulFeatureFunction
{ {
public: public:
LanguageModel(size_t startInd, const std::string &line); LanguageModel(size_t startInd, const std::string &line);
virtual ~LanguageModel(); virtual ~LanguageModel();
virtual void Load(System &system);
virtual void SetParameter(const std::string& key, const std::string& value); virtual void SetParameter(const std::string& key, const std::string& value);
virtual const Moses::FFState* EmptyHypothesisState(const Manager &mgr, const Phrase &input) const; virtual const Moses::FFState* EmptyHypothesisState(const Manager &mgr, const Phrase &input) const;
@ -35,6 +67,11 @@ public:
protected: protected:
std::string m_path; std::string m_path;
Moses::FactorType m_factorType; Moses::FactorType m_factorType;
size_t m_order;
MorphTrie<const Moses::Factor*, LMScores> m_root;
SCORE m_oov;
}; };
#endif /* LANGUAGEMODEL_H_ */ #endif /* LANGUAGEMODEL_H_ */

View File

@ -17,19 +17,19 @@
using namespace std; using namespace std;
Node::Node() PhraseTableMemory::Node::Node()
{} {}
Node::~Node() PhraseTableMemory::Node::~Node()
{ {
} }
void Node::AddRule(Phrase &source, TargetPhrase *target) void PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target)
{ {
AddRule(source, target, 0); AddRule(source, target, 0);
} }
Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos) PhraseTableMemory::Node &PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
{ {
if (pos == source.GetSize()) { if (pos == source.GetSize()) {
TargetPhrases *tp = m_targetPhrases.get(); TargetPhrases *tp = m_targetPhrases.get();
@ -48,7 +48,7 @@ Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
} }
} }
TargetPhrases::shared_const_ptr Node::Find(const PhraseBase &source, size_t pos) const TargetPhrases::shared_const_ptr PhraseTableMemory::Node::Find(const PhraseBase &source, size_t pos) const
{ {
assert(source.GetSize()); assert(source.GetSize());
if (pos == source.GetSize()) { if (pos == source.GetSize()) {

View File

@ -9,26 +9,27 @@
#define PHRASETABLEMEMORY_H_ #define PHRASETABLEMEMORY_H_
#include "PhraseTable.h" #include "PhraseTable.h"
class Node
{
public:
Node();
~Node();
void AddRule(Phrase &source, TargetPhrase *target);
TargetPhrases::shared_const_ptr Find(const PhraseBase &source, size_t pos = 0) const;
protected:
typedef boost::unordered_map<Word, Node, Moses::UnorderedComparer<Word>, Moses::UnorderedComparer<Word> > Children;
Children m_children;
TargetPhrases::shared_ptr m_targetPhrases;
Node &AddRule(Phrase &source, TargetPhrase *target, size_t pos);
};
class PhraseTableMemory : public PhraseTable class PhraseTableMemory : public PhraseTable
{ {
//////////////////////////////////////
class Node
{
public:
Node();
~Node();
void AddRule(Phrase &source, TargetPhrase *target);
TargetPhrases::shared_const_ptr Find(const PhraseBase &source, size_t pos = 0) const;
protected:
typedef boost::unordered_map<Word, Node, Moses::UnorderedComparer<Word>, Moses::UnorderedComparer<Word> > Children;
Children m_children;
TargetPhrases::shared_ptr m_targetPhrases;
Node &AddRule(Phrase &source, TargetPhrase *target, size_t pos);
};
//////////////////////////////////////
public: public:
PhraseTableMemory(size_t startInd, const std::string &line); PhraseTableMemory(size_t startInd, const std::string &line);
virtual ~PhraseTableMemory(); virtual ~PhraseTableMemory();

View File

@ -10,7 +10,6 @@
StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd, const std::string &line) StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd, const std::string &line)
:FeatureFunction(startInd, line) :FeatureFunction(startInd, line)
{ {
ReadParameters();
} }
StatefulFeatureFunction::~StatefulFeatureFunction() { StatefulFeatureFunction::~StatefulFeatureFunction() {