mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
cleanup
This commit is contained in:
parent
3b0ab6e6de
commit
ce014e0b35
@ -83,7 +83,6 @@
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.957797720" name="LBLLM.h" rcbsApplicability="disable" resourcePath="LM/LBLLM.h" toolsToInvoke=""/>
|
||||
<sourceEntries>
|
||||
<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
|
@ -1526,16 +1526,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/Ken.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/LBLLM.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/LBLLM.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/LDHT.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -94,7 +94,6 @@
|
||||
#endif
|
||||
|
||||
#ifdef LM_LBL
|
||||
#include "moses/LM/LBLLM.h"
|
||||
#include "moses/LM/oxlm/LBLLM2.h"
|
||||
#endif
|
||||
|
||||
@ -247,14 +246,9 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME2("DALM", LanguageModelDALM);
|
||||
#endif
|
||||
#ifdef LM_LBL
|
||||
MOSES_FNAME2("LBLLM-LM.old", LBLLM<oxlm::LM>);
|
||||
MOSES_FNAME2("LBLLM-FactoredLM.old", LBLLM<oxlm::FactoredLM>);
|
||||
MOSES_FNAME2("LBLLM-FactoredMaxentLM.old", LBLLM<oxlm::FactoredMaxentLM>);
|
||||
|
||||
MOSES_FNAME2("LBLLM-LM", LBLLM2<oxlm::LM>);
|
||||
MOSES_FNAME2("LBLLM-FactoredLM", LBLLM2<oxlm::FactoredLM>);
|
||||
MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM2<oxlm::FactoredMaxentLM>);
|
||||
|
||||
#endif
|
||||
|
||||
Add("KENLM", new KenFactory());
|
||||
|
@ -94,10 +94,9 @@ if $(with-nplm) {
|
||||
local with-lbllm = [ option.get "with-lbllm" ] ;
|
||||
if $(with-lbllm) {
|
||||
lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
|
||||
obj LBLLM.o : LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
|
||||
obj LBLLM2.o : oxlm/LBLLM2.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
|
||||
obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
|
||||
alias lbllm : LBLLM.o LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
|
||||
alias lbllm : LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
|
||||
dependencies += lbllm ;
|
||||
lmmacros += LM_LBL ;
|
||||
}
|
||||
|
@ -1,28 +0,0 @@
|
||||
#include <vector>
|
||||
#include <boost/archive/text_iarchive.hpp>
|
||||
#include "LBLLM.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
int LBLLMState::Compare(const FFState& other) const
|
||||
{
|
||||
const LBLLMState &otherState = static_cast<const LBLLMState&>(other);
|
||||
|
||||
if (m_left != otherState.m_left) {
|
||||
return (m_left < otherState.m_left) ? -1 : +1;
|
||||
}
|
||||
else if (m_right != otherState.m_right) {
|
||||
return (m_right < otherState.m_right) ? -1 : +1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
}
|
||||
|
232
moses/LM/LBLLM.h
232
moses/LM/LBLLM.h
@ -1,232 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include "moses/FF/StatefulFeatureFunction.h"
|
||||
#include "moses/FF/FFState.h"
|
||||
#include "moses/Util.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
|
||||
// lbl stuff
|
||||
#include "corpus/corpus.h"
|
||||
#include "lbl/lbl_features.h"
|
||||
#include "lbl/model.h"
|
||||
#include "lbl/process_identifier.h"
|
||||
#include "lbl/query_cache.h"
|
||||
|
||||
#include "lbl/cdec_lbl_mapper.h"
|
||||
#include "lbl/cdec_rule_converter.h"
|
||||
#include "lbl/cdec_state_converter.h"
|
||||
|
||||
#include "oxlm/Mapper.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class LBLLMState : public FFState
|
||||
{
|
||||
std::vector<int> m_left, m_right;
|
||||
public:
|
||||
LBLLMState()
|
||||
{}
|
||||
|
||||
LBLLMState(const std::vector<int> &left, const std::vector<int> &right)
|
||||
:m_left(left)
|
||||
,m_right(right)
|
||||
{}
|
||||
|
||||
int Compare(const FFState& other) const;
|
||||
};
|
||||
|
||||
|
||||
// FF class
|
||||
template<class Model>
|
||||
class LBLLM : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
LBLLM(const std::string &line)
|
||||
:StatefulFeatureFunction(2, line)
|
||||
,m_order(5)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void Load()
|
||||
{
|
||||
model.load(m_path);
|
||||
|
||||
config = model.getConfig();
|
||||
int context_width = config->ngram_order - 1;
|
||||
// For each state, we store at most context_width word ids to the left and
|
||||
// to the right and a kSTAR separator. The last bit represents the actual
|
||||
// size of the state.
|
||||
//int max_state_size = (2 * context_width + 1) * sizeof(int) + 1;
|
||||
//FeatureFunction::SetStateSize(max_state_size);
|
||||
|
||||
dict = model.getDict();
|
||||
mapper = boost::make_shared<OXLMMapper>(dict);
|
||||
//stateConverter = boost::make_shared<CdecStateConverter>(max_state_size - 1);
|
||||
//ruleConverter = boost::make_shared<CdecRuleConverter>(mapper, stateConverter);
|
||||
|
||||
kSTART = dict.Convert("<s>");
|
||||
kSTOP = dict.Convert("</s>");
|
||||
kUNKNOWN = dict.Convert("<unk>");
|
||||
kSTAR = dict.Convert("<{STAR}>");
|
||||
}
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
}
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
|
||||
return new LBLLMState();
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
FFState* EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
FFState* EvaluateWhenApplied(
|
||||
const ChartHypothesis &hypo,
|
||||
int featureID,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
/*
|
||||
std::vector<int> leftIds, rightIds;
|
||||
Phrase leftPhrase, rightPhrase;
|
||||
hypo.GetOutputPhrase(1, m_order, leftPhrase);
|
||||
hypo.GetOutputPhrase(2, m_order, rightPhrase);
|
||||
|
||||
leftIds = mapper->convert(leftPhrase);
|
||||
rightIds = mapper->convert(rightPhrase);
|
||||
|
||||
LBLFeatures leftScores = scoreFullContexts(leftIds);
|
||||
LBLFeatures rightScores = scoreFullContexts(rightIds);
|
||||
|
||||
std::vector<float> scores(2);
|
||||
scores[0] = leftScores.LMScore + rightScores.LMScore;
|
||||
scores[1] = leftScores.OOVScore + rightScores.OOVScore;
|
||||
|
||||
accumulator->PlusEquals(this, scores);
|
||||
|
||||
LBLLMState *state = new LBLLMState(leftIds, rightIds);
|
||||
return state;
|
||||
*/
|
||||
|
||||
// baseline non-optimized scoring
|
||||
Phrase phrase;
|
||||
hypo.GetOutputPhrase(phrase);
|
||||
std::cerr << "phrase=" << phrase << std::endl;
|
||||
|
||||
std::vector<int> ids;
|
||||
ids = mapper->convert(phrase);
|
||||
|
||||
LBLFeatures leftScores = scoreFullContexts(ids);
|
||||
std::vector<float> scores(2);
|
||||
scores[0] = leftScores.LMScore;
|
||||
scores[1] = leftScores.OOVScore;
|
||||
|
||||
accumulator->Assign(this, scores);
|
||||
|
||||
LBLLMState *state = new LBLLMState();
|
||||
return state;
|
||||
|
||||
}
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "path") {
|
||||
m_path = value;
|
||||
}
|
||||
else if (key == "order") {
|
||||
m_order = Scan<int>(value);
|
||||
}
|
||||
else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
std::string m_path;
|
||||
int m_order;
|
||||
|
||||
int fid;
|
||||
int fidOOV;
|
||||
oxlm::Dict dict;
|
||||
boost::shared_ptr<oxlm::ModelData> config;
|
||||
Model model;
|
||||
|
||||
boost::shared_ptr<OXLMMapper> mapper;
|
||||
/*
|
||||
boost::shared_ptr<oxlm::CdecRuleConverter> ruleConverter;
|
||||
boost::shared_ptr<oxlm::CdecStateConverter> stateConverter;
|
||||
*/
|
||||
|
||||
int kSTART;
|
||||
int kSTOP;
|
||||
int kUNKNOWN;
|
||||
int kSTAR;
|
||||
|
||||
////////////////////////////////////
|
||||
LBLFeatures scoreFullContexts(const vector<int>& symbols) const {
|
||||
LBLFeatures ret;
|
||||
int last_star = -1;
|
||||
int context_width = config->ngram_order - 1;
|
||||
for (size_t i = 0; i < symbols.size(); ++i) {
|
||||
if (symbols[i] == kSTAR) {
|
||||
last_star = i;
|
||||
} else if (i - last_star > context_width) {
|
||||
ret += scoreContext(symbols, i);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
LBLFeatures scoreContext(const vector<int>& symbols, int position) const {
|
||||
int word = symbols[position];
|
||||
int context_width = config->ngram_order - 1;
|
||||
vector<int> context;
|
||||
for (int i = 1; i <= context_width && position - i >= 0; ++i) {
|
||||
assert(symbols[position - i] != kSTAR);
|
||||
context.push_back(symbols[position - i]);
|
||||
}
|
||||
|
||||
if (!context.empty() && context.back() == kSTART) {
|
||||
context.resize(context_width, kSTART);
|
||||
} else {
|
||||
context.resize(context_width, kUNKNOWN);
|
||||
}
|
||||
|
||||
double score;
|
||||
score = model.predict(word, context);
|
||||
return LBLFeatures(score, word == kUNKNOWN);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -63,7 +63,6 @@ public:
|
||||
kSTART = dict.Convert("<s>");
|
||||
kSTOP = dict.Convert("</s>");
|
||||
kUNKNOWN = dict.Convert("<unk>");
|
||||
kSTAR = dict.Convert("<{STAR}>");
|
||||
}
|
||||
|
||||
|
||||
@ -85,8 +84,10 @@ public:
|
||||
double score;
|
||||
score = model.predict(word, context);
|
||||
|
||||
/*
|
||||
std::string str = DebugContextFactor(contextFactor);
|
||||
std::cerr << "contextFactor=" << str << " " << score << std::endl;
|
||||
*/
|
||||
|
||||
LMResult ret;
|
||||
ret.score = score;
|
||||
@ -112,7 +113,6 @@ protected:
|
||||
int kSTART;
|
||||
int kSTOP;
|
||||
int kUNKNOWN;
|
||||
int kSTAR;
|
||||
|
||||
boost::shared_ptr<OXLMMapper> mapper;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user