mosesdecoder/moses/SyntacticLanguageModel.cpp

178 lines
4.4 KiB
C++
Raw Normal View History

2013-09-27 12:35:24 +04:00
// This file should be compiled only when the HAVE_SYNLM flag is enabled.
2013-08-29 20:56:25 +04:00
//
2013-09-27 12:35:24 +04:00
// The following ifdef prevents XCode and other non-bjam build systems
2013-08-29 20:56:25 +04:00
// from attempting to compile this file when HAVE_SYNLM is disabled.
//
#ifdef HAVE_SYNLM
//
#include "StaticData.h"
#include "SyntacticLanguageModel.h"
#include "HHMMLangModel-gf.h"
#include "TextObsModel.h"
#include "SyntacticLanguageModelFiles.h"
#include "SyntacticLanguageModelState.h"
namespace Moses
{
2013-05-29 21:16:15 +04:00
SyntacticLanguageModel::SyntacticLanguageModel(const std::string &line)
// Initialize member variables
/*
: m_NumScoreComponents(weights.size())
, m_files(new SyntacticLanguageModelFiles<YModel,XModel>(filePath))
, m_factorType(factorType)
, m_beamWidth(beamWidth) {
*/
{
/* taken from StaticData::LoadSyntacticLanguageModel()
cerr << "Loading syntactic language models..." << std::endl;
2013-05-29 21:16:15 +04:00
const vector<float> weights = Scan<float>(m_parameter->GetParam("weight-slm"));
const vector<string> files = m_parameter->GetParam("slmodel-file");
2013-05-29 21:16:15 +04:00
const FactorType factorType = (m_parameter->GetParam("slmodel-factor").size() > 0) ?
TransformScore(Scan<int>(m_parameter->GetParam("slmodel-factor")[0]))
: 0;
2013-05-29 21:16:15 +04:00
const size_t beamWidth = (m_parameter->GetParam("slmodel-beam").size() > 0) ?
TransformScore(Scan<int>(m_parameter->GetParam("slmodel-beam")[0]))
: 500;
2013-05-29 21:16:15 +04:00
if (files.size() < 1) {
cerr << "No syntactic language model files specified!" << std::endl;
return false;
}
2013-05-29 21:16:15 +04:00
// check if feature is used
if (weights.size() >= 1) {
2013-05-29 21:16:15 +04:00
//cout.setf(ios::scientific,ios::floatfield);
//cerr.setf(ios::scientific,ios::floatfield);
2013-05-29 21:16:15 +04:00
// create the feature
m_syntacticLanguageModel = new SyntacticLanguageModel(files,weights,factorType,beamWidth);
2013-05-29 21:16:15 +04:00
/////////////////////////////////////////
// BEGIN LANE's UNSTABLE EXPERIMENT :)
//
2013-05-29 21:16:15 +04:00
//double ppl = m_syntacticLanguageModel->perplexity();
//cerr << "Probability is " << ppl << endl;
2013-05-29 21:16:15 +04:00
//
// END LANE's UNSTABLE EXPERIMENT
/////////////////////////////////////////
2013-05-29 21:16:15 +04:00
if (m_syntacticLanguageModel==NULL) {
return false;
}
}
2013-05-29 21:16:15 +04:00
return true;
2013-05-29 21:16:15 +04:00
*/
}
2013-05-29 21:16:15 +04:00
SyntacticLanguageModel::~SyntacticLanguageModel()
{
VERBOSE(3,"Destructing SyntacticLanguageModel" << std::endl);
delete m_files;
}
2013-05-29 21:16:15 +04:00
size_t SyntacticLanguageModel::GetNumScoreComponents() const
{
return m_NumScoreComponents;
}
2013-05-29 21:16:15 +04:00
std::string SyntacticLanguageModel::GetScoreProducerDescription() const
{
return "SyntacticLM";
}
const FFState* SyntacticLanguageModel::EmptyHypothesisState(const InputType &input) const
{
2013-05-29 21:16:15 +04:00
return new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
2013-05-29 21:16:15 +04:00
}
2013-05-29 21:16:15 +04:00
/*
double SyntacticLanguageModel::perplexity() {
2013-05-29 21:16:15 +04:00
SyntacticLanguageModelState<YModel,XModel,S,R> *prev =
new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
2013-05-29 21:16:15 +04:00
std::cerr << "Initial prob:" << "\t" << prev->getProb() <<std::endl;
2013-05-29 21:16:15 +04:00
std::vector<std::string> words(3);
words[0] = "no";
words[1] = ",";
words[2] = "zxvth";
2013-05-29 21:16:15 +04:00
for (std::vector<std::string>::iterator i=words.begin();
i != words.end();
i++) {
2013-05-29 21:16:15 +04:00
prev = new SyntacticLanguageModelState<YModel,XModel,S,R>(prev, *i);
std::cerr << *i << "\t" << prev->getProb() <<std::endl;
}
2013-05-29 21:16:15 +04:00
if (true) exit(-1);
return prev->getProb();
}
*/
FFState* SyntacticLanguageModel::Evaluate(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
VERBOSE(3,"Evaluating SyntacticLanguageModel for a hypothesis" << endl);
SyntacticLanguageModelState<YModel,XModel,S,R>* tmpState = NULL;
SyntacticLanguageModelState<YModel,XModel,S,R>* nextState = NULL;
2013-05-29 21:16:15 +04:00
const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
2013-05-29 21:16:15 +04:00
for (size_t i=0, n=targetPhrase.GetSize(); i<n; i++) {
const Word& word = targetPhrase.GetWord(i);
const Factor* factor = word.GetFactor(m_factorType);
const std::string& string = factor->GetString();
if (i==0) {
nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>((const SyntacticLanguageModelState<YModel,XModel,S,R>*)prev_state, string);
} else {
tmpState = nextState;
nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(tmpState, string);
delete tmpState;
}
double score = nextState->getScore();
VERBOSE(3,"SynLM evaluated a score of " << score << endl);
accumulator->Assign( this, score );
}
2013-05-29 21:16:15 +04:00
return nextState;
}
}
2013-08-29 20:56:25 +04:00
#endif