This commit is contained in:
Hieu Hoang 2014-07-15 17:47:36 +01:00
parent 3b0ab6e6de
commit ce014e0b35
7 changed files with 3 additions and 281 deletions

View File

@ -83,7 +83,6 @@
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.957797720" name="LBLLM.h" rcbsApplicability="disable" resourcePath="LM/LBLLM.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>

View File

@ -1526,16 +1526,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/Ken.h</locationURI>
</link>
<link>
<name>LM/LBLLM.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.cpp</locationURI>
</link>
<link>
<name>LM/LBLLM.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.h</locationURI>
</link>
<link>
<name>LM/LDHT.cpp</name>
<type>1</type>

View File

@ -94,7 +94,6 @@
#endif
#ifdef LM_LBL
#include "moses/LM/LBLLM.h"
#include "moses/LM/oxlm/LBLLM2.h"
#endif
@ -247,14 +246,9 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME2("DALM", LanguageModelDALM);
#endif
#ifdef LM_LBL
MOSES_FNAME2("LBLLM-LM.old", LBLLM<oxlm::LM>);
MOSES_FNAME2("LBLLM-FactoredLM.old", LBLLM<oxlm::FactoredLM>);
MOSES_FNAME2("LBLLM-FactoredMaxentLM.old", LBLLM<oxlm::FactoredMaxentLM>);
MOSES_FNAME2("LBLLM-LM", LBLLM2<oxlm::LM>);
MOSES_FNAME2("LBLLM-FactoredLM", LBLLM2<oxlm::FactoredLM>);
MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM2<oxlm::FactoredMaxentLM>);
#endif
Add("KENLM", new KenFactory());

View File

@ -94,10 +94,9 @@ if $(with-nplm) {
local with-lbllm = [ option.get "with-lbllm" ] ;
if $(with-lbllm) {
lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
obj LBLLM.o : LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
obj LBLLM2.o : oxlm/LBLLM2.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
alias lbllm : LBLLM.o LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
alias lbllm : LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
dependencies += lbllm ;
lmmacros += LM_LBL ;
}

View File

@ -1,28 +0,0 @@
#include <vector>
#include <boost/archive/text_iarchive.hpp>
#include "LBLLM.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/Hypothesis.h"
using namespace std;
namespace Moses
{
int LBLLMState::Compare(const FFState& other) const
{
const LBLLMState &otherState = static_cast<const LBLLMState&>(other);
if (m_left != otherState.m_left) {
return (m_left < otherState.m_left) ? -1 : +1;
}
else if (m_right != otherState.m_right) {
return (m_right < otherState.m_right) ? -1 : +1;
}
return 0;
}
////////////////////////////////////////////////////////////////
}

View File

@ -1,232 +0,0 @@
#pragma once
#include <string>
#include <boost/shared_ptr.hpp>
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/FFState.h"
#include "moses/Util.h"
#include "moses/ChartHypothesis.h"
// lbl stuff
#include "corpus/corpus.h"
#include "lbl/lbl_features.h"
#include "lbl/model.h"
#include "lbl/process_identifier.h"
#include "lbl/query_cache.h"
#include "lbl/cdec_lbl_mapper.h"
#include "lbl/cdec_rule_converter.h"
#include "lbl/cdec_state_converter.h"
#include "oxlm/Mapper.h"
namespace Moses
{
class LBLLMState : public FFState
{
std::vector<int> m_left, m_right;
public:
LBLLMState()
{}
LBLLMState(const std::vector<int> &left, const std::vector<int> &right)
:m_left(left)
,m_right(right)
{}
int Compare(const FFState& other) const;
};
// FF class
template<class Model>
class LBLLM : public StatefulFeatureFunction
{
public:
LBLLM(const std::string &line)
:StatefulFeatureFunction(2, line)
,m_order(5)
{
ReadParameters();
}
void Load()
{
model.load(m_path);
config = model.getConfig();
int context_width = config->ngram_order - 1;
// For each state, we store at most context_width word ids to the left and
// to the right and a kSTAR separator. The last bit represents the actual
// size of the state.
//int max_state_size = (2 * context_width + 1) * sizeof(int) + 1;
//FeatureFunction::SetStateSize(max_state_size);
dict = model.getDict();
mapper = boost::make_shared<OXLMMapper>(dict);
//stateConverter = boost::make_shared<CdecStateConverter>(max_state_size - 1);
//ruleConverter = boost::make_shared<CdecRuleConverter>(mapper, stateConverter);
kSTART = dict.Convert("<s>");
kSTOP = dict.Convert("</s>");
kUNKNOWN = dict.Convert("<unk>");
kSTAR = dict.Convert("<{STAR}>");
}
bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
return new LBLLMState();
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
}
FFState* EvaluateWhenApplied(
const ChartHypothesis &hypo,
int featureID,
ScoreComponentCollection* accumulator) const
{
/*
std::vector<int> leftIds, rightIds;
Phrase leftPhrase, rightPhrase;
hypo.GetOutputPhrase(1, m_order, leftPhrase);
hypo.GetOutputPhrase(2, m_order, rightPhrase);
leftIds = mapper->convert(leftPhrase);
rightIds = mapper->convert(rightPhrase);
LBLFeatures leftScores = scoreFullContexts(leftIds);
LBLFeatures rightScores = scoreFullContexts(rightIds);
std::vector<float> scores(2);
scores[0] = leftScores.LMScore + rightScores.LMScore;
scores[1] = leftScores.OOVScore + rightScores.OOVScore;
accumulator->PlusEquals(this, scores);
LBLLMState *state = new LBLLMState(leftIds, rightIds);
return state;
*/
// baseline non-optimized scoring
Phrase phrase;
hypo.GetOutputPhrase(phrase);
std::cerr << "phrase=" << phrase << std::endl;
std::vector<int> ids;
ids = mapper->convert(phrase);
LBLFeatures leftScores = scoreFullContexts(ids);
std::vector<float> scores(2);
scores[0] = leftScores.LMScore;
scores[1] = leftScores.OOVScore;
accumulator->Assign(this, scores);
LBLLMState *state = new LBLLMState();
return state;
}
void SetParameter(const std::string& key, const std::string& value)
{
if (key == "path") {
m_path = value;
}
else if (key == "order") {
m_order = Scan<int>(value);
}
else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
protected:
std::string m_path;
int m_order;
int fid;
int fidOOV;
oxlm::Dict dict;
boost::shared_ptr<oxlm::ModelData> config;
Model model;
boost::shared_ptr<OXLMMapper> mapper;
/*
boost::shared_ptr<oxlm::CdecRuleConverter> ruleConverter;
boost::shared_ptr<oxlm::CdecStateConverter> stateConverter;
*/
int kSTART;
int kSTOP;
int kUNKNOWN;
int kSTAR;
////////////////////////////////////
LBLFeatures scoreFullContexts(const vector<int>& symbols) const {
LBLFeatures ret;
int last_star = -1;
int context_width = config->ngram_order - 1;
for (size_t i = 0; i < symbols.size(); ++i) {
if (symbols[i] == kSTAR) {
last_star = i;
} else if (i - last_star > context_width) {
ret += scoreContext(symbols, i);
}
}
return ret;
}
LBLFeatures scoreContext(const vector<int>& symbols, int position) const {
int word = symbols[position];
int context_width = config->ngram_order - 1;
vector<int> context;
for (int i = 1; i <= context_width && position - i >= 0; ++i) {
assert(symbols[position - i] != kSTAR);
context.push_back(symbols[position - i]);
}
if (!context.empty() && context.back() == kSTART) {
context.resize(context_width, kSTART);
} else {
context.resize(context_width, kUNKNOWN);
}
double score;
score = model.predict(word, context);
return LBLFeatures(score, word == kUNKNOWN);
}
};
}

View File

@ -63,7 +63,6 @@ public:
kSTART = dict.Convert("<s>");
kSTOP = dict.Convert("</s>");
kUNKNOWN = dict.Convert("<unk>");
kSTAR = dict.Convert("<{STAR}>");
}
@ -85,8 +84,10 @@ public:
double score;
score = model.predict(word, context);
/*
std::string str = DebugContextFactor(contextFactor);
std::cerr << "contextFactor=" << str << " " << score << std::endl;
*/
LMResult ret;
ret.score = score;
@ -112,7 +113,6 @@ protected:
int kSTART;
int kSTOP;
int kUNKNOWN;
int kSTAR;
boost::shared_ptr<OXLMMapper> mapper;