This commit is contained in:
Hieu Hoang 2015-10-30 11:45:38 +00:00
parent d163c37674
commit 21c31578d2
6 changed files with 112 additions and 25 deletions

View File

@ -12,6 +12,7 @@ import path ;
Hypothesis.cpp
InputPath.cpp
InputPaths.cpp
LanguageModel.cpp
Main.cpp
Manager.cpp
MemPool.cpp

View File

@ -6,7 +6,11 @@
*/
#include "LanguageModel.h"
#include "System.h"
#include "moses/Util.h"
#include "moses/InputFileStream.h"
using namespace std;
LanguageModel::LanguageModel(size_t startInd, const std::string &line)
:StatefulFeatureFunction(startInd, line)
@ -18,6 +22,50 @@ LanguageModel::~LanguageModel() {
// TODO Auto-generated destructor stub
}
void LanguageModel::Load(System &system)
{
Moses::FactorCollection &fc = system.GetVocab();
Moses::InputFileStream infile(m_path);
size_t lineNum = 0;
string line;
while (getline(infile, line)) {
if (++lineNum % 10000 == 0) {
cerr << lineNum << " ";
}
vector<string> substrings;
Moses::Tokenize(substrings, line, "\t");
if (substrings.size() < 2)
continue;
assert(substrings.size() == 2 || substrings.size() == 3);
SCORE prob = Moses::Scan<SCORE>(substrings[0]);
if (substrings[1] == "<unk>") {
m_oov = prob;
continue;
}
SCORE backoff = 0.f;
if (substrings.size() == 3)
backoff = Moses::Scan<SCORE>(substrings[2]);
// ngram
vector<string> key;
Moses::Tokenize(key, substrings[1], " ");
vector<const Moses::Factor*> factorKey;
for (size_t i = 0; i < key.size(); ++i) {
factorKey.push_back(fc.AddFactor(key[i], false));
}
m_root.insert(factorKey, LMScores(prob, backoff));
}
}
void LanguageModel::SetParameter(const std::string& key, const std::string& value)
{
if (key == "path") {
@ -27,7 +75,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
m_factorType = Moses::Scan<Moses::FactorType>(value);
}
else if (key == "order") {
m_order = Moses::Scan<size_t>(value);
}
else {
StatefulFeatureFunction::SetParameter(key, value);
@ -36,7 +84,7 @@ void LanguageModel::SetParameter(const std::string& key, const std::string& valu
const Moses::FFState* LanguageModel::EmptyHypothesisState(const Manager &mgr, const Phrase &input) const
{
return new Moses::PointerState(&m_root);
}
void
@ -54,4 +102,5 @@ Moses::FFState* LanguageModel::EvaluateWhenApplied(const Manager &mgr,
Scores &score) const
{
return new Moses::PointerState(&m_root);
}

View File

@ -9,14 +9,46 @@
#define LANGUAGEMODEL_H_
#include "StatefulFeatureFunction.h"
#include "TypeDef.h"
#include "moses/Factor.h"
#include "moses/TypeDef.h"
#include "moses/LM/PointerState.h"
#include "MorphoTrie/MorphTrie.h"
////////////////////////////////////////////////////////////////////////////////////////
struct LMScores
{
LMScores()
{}
LMScores(const LMScores &copy)
:prob(copy.prob)
,backoff(copy.backoff)
{}
LMScores(float inProb, float inBackoff)
:prob(inProb)
,backoff(inBackoff)
{}
float prob, backoff;
};
inline std::ostream& operator<<(std::ostream &out, const LMScores &obj)
{
out << "(" << obj.prob << "," << obj.backoff << ")" << std::flush;
return out;
}
////////////////////////////////////////////////////////////////////////////////////////
class LanguageModel : public StatefulFeatureFunction
{
public:
LanguageModel(size_t startInd, const std::string &line);
virtual ~LanguageModel();
virtual void Load(System &system);
virtual void SetParameter(const std::string& key, const std::string& value);
virtual const Moses::FFState* EmptyHypothesisState(const Manager &mgr, const Phrase &input) const;
@ -35,6 +67,11 @@ public:
protected:
std::string m_path;
Moses::FactorType m_factorType;
size_t m_order;
MorphTrie<const Moses::Factor*, LMScores> m_root;
SCORE m_oov;
};
#endif /* LANGUAGEMODEL_H_ */

View File

@ -17,19 +17,19 @@
using namespace std;
Node::Node()
PhraseTableMemory::Node::Node()
{}
Node::~Node()
PhraseTableMemory::Node::~Node()
{
}
void Node::AddRule(Phrase &source, TargetPhrase *target)
void PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target)
{
AddRule(source, target, 0);
}
Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
PhraseTableMemory::Node &PhraseTableMemory::Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
{
if (pos == source.GetSize()) {
TargetPhrases *tp = m_targetPhrases.get();
@ -48,7 +48,7 @@ Node &Node::AddRule(Phrase &source, TargetPhrase *target, size_t pos)
}
}
TargetPhrases::shared_const_ptr Node::Find(const PhraseBase &source, size_t pos) const
TargetPhrases::shared_const_ptr PhraseTableMemory::Node::Find(const PhraseBase &source, size_t pos) const
{
assert(source.GetSize());
if (pos == source.GetSize()) {

View File

@ -9,6 +9,10 @@
#define PHRASETABLEMEMORY_H_
#include "PhraseTable.h"
class PhraseTableMemory : public PhraseTable
{
//////////////////////////////////////
class Node
{
public:
@ -25,10 +29,7 @@ protected:
Node &AddRule(Phrase &source, TargetPhrase *target, size_t pos);
};
class PhraseTableMemory : public PhraseTable
{
//////////////////////////////////////
public:
PhraseTableMemory(size_t startInd, const std::string &line);
virtual ~PhraseTableMemory();

View File

@ -10,7 +10,6 @@
StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd, const std::string &line)
:FeatureFunction(startInd, line)
{
ReadParameters();
}
StatefulFeatureFunction::~StatefulFeatureFunction() {