2013-09-27 12:35:24 +04:00
|
|
|
// This file should be compiled only when the HAVE_SYNLM flag is enabled.
|
2013-08-29 20:56:25 +04:00
|
|
|
//
|
2013-09-27 12:35:24 +04:00
|
|
|
// The following ifdef prevents XCode and other non-bjam build systems
|
2013-08-29 20:56:25 +04:00
|
|
|
// from attempting to compile this file when HAVE_SYNLM is disabled.
|
|
|
|
//
|
|
|
|
#ifdef HAVE_SYNLM
|
|
|
|
|
2011-05-13 23:28:23 +04:00
|
|
|
//
|
|
|
|
|
|
|
|
#include "StaticData.h"
|
|
|
|
#include "SyntacticLanguageModel.h"
|
|
|
|
#include "HHMMLangModel-gf.h"
|
|
|
|
#include "TextObsModel.h"
|
|
|
|
#include "SyntacticLanguageModelFiles.h"
|
|
|
|
#include "SyntacticLanguageModelState.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace Moses
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
SyntacticLanguageModel::SyntacticLanguageModel(const std::string &line)
|
|
|
|
// Initialize member variables
|
|
|
|
/*
|
|
|
|
: m_NumScoreComponents(weights.size())
|
|
|
|
, m_files(new SyntacticLanguageModelFiles<YModel,XModel>(filePath))
|
|
|
|
, m_factorType(factorType)
|
|
|
|
, m_beamWidth(beamWidth) {
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
/* taken from StaticData::LoadSyntacticLanguageModel()
|
|
|
|
cerr << "Loading syntactic language models..." << std::endl;
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const vector<float> weights = Scan<float>(m_parameter->GetParam("weight-slm"));
|
|
|
|
const vector<string> files = m_parameter->GetParam("slmodel-file");
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const FactorType factorType = (m_parameter->GetParam("slmodel-factor").size() > 0) ?
|
|
|
|
TransformScore(Scan<int>(m_parameter->GetParam("slmodel-factor")[0]))
|
|
|
|
: 0;
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const size_t beamWidth = (m_parameter->GetParam("slmodel-beam").size() > 0) ?
|
|
|
|
TransformScore(Scan<int>(m_parameter->GetParam("slmodel-beam")[0]))
|
|
|
|
: 500;
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
if (files.size() < 1) {
|
|
|
|
cerr << "No syntactic language model files specified!" << std::endl;
|
|
|
|
return false;
|
|
|
|
}
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// check if feature is used
|
|
|
|
if (weights.size() >= 1) {
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
//cout.setf(ios::scientific,ios::floatfield);
|
|
|
|
//cerr.setf(ios::scientific,ios::floatfield);
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// create the feature
|
|
|
|
m_syntacticLanguageModel = new SyntacticLanguageModel(files,weights,factorType,beamWidth);
|
2013-01-28 21:33:45 +04:00
|
|
|
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
/////////////////////////////////////////
|
|
|
|
// BEGIN LANE's UNSTABLE EXPERIMENT :)
|
|
|
|
//
|
2013-01-28 21:33:45 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
//double ppl = m_syntacticLanguageModel->perplexity();
|
|
|
|
//cerr << "Probability is " << ppl << endl;
|
2013-01-28 21:33:45 +04:00
|
|
|
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
//
|
|
|
|
// END LANE's UNSTABLE EXPERIMENT
|
|
|
|
/////////////////////////////////////////
|
2013-01-28 21:33:45 +04:00
|
|
|
|
|
|
|
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
if (m_syntacticLanguageModel==NULL) {
|
|
|
|
return false;
|
|
|
|
}
|
2011-05-13 23:28:23 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
return true;
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
*/
|
|
|
|
}
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
SyntacticLanguageModel::~SyntacticLanguageModel()
|
|
|
|
{
|
|
|
|
VERBOSE(3,"Destructing SyntacticLanguageModel" << std::endl);
|
|
|
|
delete m_files;
|
|
|
|
}
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
size_t SyntacticLanguageModel::GetNumScoreComponents() const
|
|
|
|
{
|
|
|
|
return m_NumScoreComponents;
|
|
|
|
}
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
std::string SyntacticLanguageModel::GetScoreProducerDescription() const
|
|
|
|
{
|
|
|
|
return "SyntacticLM";
|
|
|
|
}
|
|
|
|
|
|
|
|
const FFState* SyntacticLanguageModel::EmptyHypothesisState(const InputType &input) const
|
|
|
|
{
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
return new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
}
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
/*
|
|
|
|
double SyntacticLanguageModel::perplexity() {
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
SyntacticLanguageModelState<YModel,XModel,S,R> *prev =
|
|
|
|
new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
std::cerr << "Initial prob:" << "\t" << prev->getProb() <<std::endl;
|
2011-05-13 23:28:23 +04:00
|
|
|
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
std::vector<std::string> words(3);
|
|
|
|
words[0] = "no";
|
|
|
|
words[1] = ",";
|
|
|
|
words[2] = "zxvth";
|
2011-05-13 23:28:23 +04:00
|
|
|
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
for (std::vector<std::string>::iterator i=words.begin();
|
|
|
|
i != words.end();
|
|
|
|
i++) {
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
prev = new SyntacticLanguageModelState<YModel,XModel,S,R>(prev, *i);
|
|
|
|
std::cerr << *i << "\t" << prev->getProb() <<std::endl;
|
2011-05-13 23:28:23 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
if (true) exit(-1);
|
|
|
|
|
|
|
|
return prev->getProb();
|
|
|
|
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
FFState* SyntacticLanguageModel::Evaluate(const Hypothesis& cur_hypo,
|
|
|
|
const FFState* prev_state,
|
|
|
|
ScoreComponentCollection* accumulator) const
|
|
|
|
{
|
|
|
|
|
|
|
|
VERBOSE(3,"Evaluating SyntacticLanguageModel for a hypothesis" << endl);
|
|
|
|
|
|
|
|
SyntacticLanguageModelState<YModel,XModel,S,R>* tmpState = NULL;
|
|
|
|
SyntacticLanguageModelState<YModel,XModel,S,R>* nextState = NULL;
|
|
|
|
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
|
2011-05-13 23:28:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
for (size_t i=0, n=targetPhrase.GetSize(); i<n; i++) {
|
|
|
|
|
|
|
|
const Word& word = targetPhrase.GetWord(i);
|
|
|
|
const Factor* factor = word.GetFactor(m_factorType);
|
|
|
|
|
|
|
|
const std::string& string = factor->GetString();
|
|
|
|
|
|
|
|
if (i==0) {
|
|
|
|
nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>((const SyntacticLanguageModelState<YModel,XModel,S,R>*)prev_state, string);
|
|
|
|
} else {
|
|
|
|
tmpState = nextState;
|
|
|
|
nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(tmpState, string);
|
|
|
|
delete tmpState;
|
|
|
|
}
|
|
|
|
|
|
|
|
double score = nextState->getScore();
|
|
|
|
VERBOSE(3,"SynLM evaluated a score of " << score << endl);
|
|
|
|
accumulator->Assign( this, score );
|
2011-05-13 23:28:23 +04:00
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
|
|
|
|
|
|
|
|
return nextState;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2011-05-13 23:28:23 +04:00
|
|
|
}
|
2013-08-29 20:56:25 +04:00
|
|
|
|
|
|
|
#endif
|