2013-11-05 18:37:56 +04:00
|
|
|
|
|
|
|
#include "DALM.h"
|
|
|
|
#include "moses/FactorCollection.h"
|
2013-11-11 22:27:15 +04:00
|
|
|
#include "logger.h"
|
|
|
|
#include "vocabulary.h"
|
|
|
|
#include "lm.h"
|
2013-11-05 18:37:56 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2013-11-11 18:39:53 +04:00
|
|
|
|
|
|
|
|
2013-11-05 18:37:56 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-11-11 18:39:53 +04:00
|
|
|
/////////////////////////
|
|
|
|
void push(DALM::VocabId *ngram, size_t n, DALM::VocabId wid){
|
|
|
|
for(size_t i = n-1; i+1 >= 1 ; i--){
|
|
|
|
ngram[i] = ngram[i-1];
|
|
|
|
}
|
|
|
|
ngram[0] = wid;
|
|
|
|
}
|
|
|
|
|
|
|
|
void read_ini(const char *inifile, string &model, string &words){
|
|
|
|
ifstream ifs(inifile);
|
|
|
|
string line;
|
|
|
|
|
|
|
|
getline(ifs, line);
|
|
|
|
while(ifs){
|
|
|
|
unsigned int pos = line.find("=");
|
|
|
|
string key = line.substr(0, pos);
|
|
|
|
string value = line.substr(pos+1, line.size()-pos);
|
|
|
|
if(key=="MODEL"){
|
|
|
|
model = value;
|
|
|
|
}else if(key=="WORDS"){
|
|
|
|
words = value;
|
|
|
|
}
|
|
|
|
getline(ifs, line);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////////////////////
|
|
|
|
|
|
|
|
|
2013-11-05 18:37:56 +04:00
|
|
|
LanguageModelDALM::LanguageModelDALM(const std::string &line)
|
|
|
|
:LanguageModelSingleFactor(line)
|
|
|
|
{
|
|
|
|
ReadParameters();
|
|
|
|
|
|
|
|
if (m_factorType == NOT_FOUND) {
|
|
|
|
m_factorType = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
FactorCollection &factorCollection = FactorCollection::Instance();
|
|
|
|
|
|
|
|
// needed by parent language model classes. Why didn't they set these themselves?
|
|
|
|
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
|
|
|
|
m_sentenceStartWord[m_factorType] = m_sentenceStart;
|
|
|
|
|
|
|
|
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
|
|
|
|
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
LanguageModelDALM::~LanguageModelDALM()
|
|
|
|
{
|
2013-11-11 22:27:15 +04:00
|
|
|
delete m_logger;
|
|
|
|
delete m_vocab;
|
|
|
|
delete m_lm;
|
2013-11-05 18:37:56 +04:00
|
|
|
}
|
|
|
|
|
2013-11-11 18:39:53 +04:00
|
|
|
void LanguageModelDALM::Load()
|
|
|
|
{
|
|
|
|
/////////////////////
|
|
|
|
// READING INIFILE //
|
|
|
|
/////////////////////
|
|
|
|
string model; // Path to the double-array file.
|
|
|
|
string words; // Path to the vocabulary file.
|
|
|
|
read_ini(m_filePath.c_str(), model, words);
|
|
|
|
|
|
|
|
////////////////
|
|
|
|
// LOADING LM //
|
|
|
|
////////////////
|
|
|
|
|
|
|
|
// Preparing a logger object.
|
2013-11-11 21:19:44 +04:00
|
|
|
m_logger = new DALM::Logger(stderr);
|
|
|
|
m_logger->setLevel(DALM::LOGGER_INFO);
|
|
|
|
|
|
|
|
// Load the vocabulary file.
|
|
|
|
m_vocab = new DALM::Vocabulary(words, *m_logger);
|
|
|
|
|
|
|
|
// Load the language model.
|
|
|
|
m_lm = new DALM::LM(model, *m_vocab, *m_logger);
|
2013-11-11 18:39:53 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2013-11-05 18:37:56 +04:00
|
|
|
LMResult LanguageModelDALM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
|
|
|
{
|
|
|
|
LMResult ret;
|
|
|
|
ret.score = contextFactor.size();
|
|
|
|
ret.unknown = false;
|
|
|
|
|
|
|
|
// use last word as state info
|
|
|
|
const Factor *factor;
|
|
|
|
size_t hash_value(const Factor &f);
|
|
|
|
if (contextFactor.size()) {
|
|
|
|
factor = contextFactor.back()->GetFactor(m_factorType);
|
|
|
|
} else {
|
|
|
|
factor = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
(*finalState) = (State*) factor;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|