mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
refactor LexicalReordering
This commit is contained in:
parent
97a26104aa
commit
3bbccf078c
@ -69,12 +69,15 @@
|
||||
<listOptionValue builtIn="false" value="irstlm"/>
|
||||
<listOptionValue builtIn="false" value="OnDiskPt"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="boost_system-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_thread-mt"/>
|
||||
<listOptionValue builtIn="false" value="lm"/>
|
||||
<listOptionValue builtIn="false" value="util"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.userobjs.1542590830" name="Other objects" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/lib64/libboost_system-mt.a""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/lib64/libboost_thread-mt.a""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.983725033" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
|
@ -1,7 +1,5 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.656913512">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.656913512" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
@ -68,8 +66,11 @@
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1593212764" name="FeatureVectorTest.cpp" rcbsApplicability="disable" resourcePath="FeatureVectorTest.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1240665323">
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1240665323" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
|
||||
</fileInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/SRI.h|LM/SRI.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
<entry excluding="FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/SRI.h|LM/SRI.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
|
@ -5,63 +5,50 @@
|
||||
#include "LexicalReorderingState.h"
|
||||
#include "StaticData.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
LexicalReordering::LexicalReordering(std::vector<FactorType>& f_factors,
|
||||
std::vector<FactorType>& e_factors,
|
||||
const LexicalReorderingConfiguration& configuration,
|
||||
const std::string &filePath,
|
||||
const std::vector<float>& weights)
|
||||
: StatefulFeatureFunction("LexicalReordering",
|
||||
configuration.GetNumScoreComponents()),
|
||||
m_configuration(new LexicalReorderingConfiguration(configuration))
|
||||
LexicalReordering::LexicalReordering(const std::string &line)
|
||||
: StatefulFeatureFunction("LexicalReordering", 6)
|
||||
{
|
||||
m_configuration->SetScoreProducer(this);
|
||||
std::cerr << "Creating lexical reordering...\n";
|
||||
std::cerr << "weights: ";
|
||||
for(size_t w = 0; w < weights.size(); ++w) {
|
||||
std::cerr << weights[w] << " ";
|
||||
}
|
||||
std::cerr << "\n";
|
||||
std::cerr << "Initializing LexicalReordering.." << std::endl;
|
||||
|
||||
vector<string> tokens = Tokenize(line);
|
||||
|
||||
m_configuration = new LexicalReorderingConfiguration(tokens[1]);
|
||||
m_configuration->SetScoreProducer(this);
|
||||
m_modelTypeString = m_configuration->GetModelString();
|
||||
|
||||
vector<FactorType> f_factors = Tokenize<FactorType>(tokens[2]);
|
||||
vector<FactorType> e_factors = Tokenize<FactorType>(tokens[3]);
|
||||
|
||||
switch(m_configuration->GetCondition()) {
|
||||
case LexicalReorderingConfiguration::FE:
|
||||
case LexicalReorderingConfiguration::E:
|
||||
m_factorsE = e_factors;
|
||||
if(m_factorsE.empty()) {
|
||||
UserMessage::Add("TL factor mask for lexical reordering is unexpectedly empty");
|
||||
case LexicalReorderingConfiguration::FE:
|
||||
case LexicalReorderingConfiguration::E:
|
||||
m_factorsE = e_factors;
|
||||
if(m_factorsE.empty()) {
|
||||
UserMessage::Add("TL factor mask for lexical reordering is unexpectedly empty");
|
||||
exit(1);
|
||||
}
|
||||
if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
|
||||
break; // else fall through
|
||||
case LexicalReorderingConfiguration::F:
|
||||
m_factorsF = f_factors;
|
||||
if(m_factorsF.empty()) {
|
||||
UserMessage::Add("SL factor mask for lexical reordering is unexpectedly empty");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UserMessage::Add("Unknown conditioning option!");
|
||||
exit(1);
|
||||
}
|
||||
if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
|
||||
break; // else fall through
|
||||
case LexicalReorderingConfiguration::F:
|
||||
m_factorsF = f_factors;
|
||||
if(m_factorsF.empty()) {
|
||||
UserMessage::Add("SL factor mask for lexical reordering is unexpectedly empty");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UserMessage::Add("Unknown conditioning option!");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t numberOfScoreComponents = m_configuration->GetNumScoreComponents();
|
||||
if (weights.size() > numberOfScoreComponents) {
|
||||
m_configuration->SetAdditionalScoreComponents(weights.size() - numberOfScoreComponents);
|
||||
} else if(weights.size() < numberOfScoreComponents) {
|
||||
std::ostringstream os;
|
||||
os << "Lexical reordering model (type " << m_modelTypeString << "): expected " << numberOfScoreComponents << " weights, got " << weights.size() << std::endl;
|
||||
UserMessage::Add(os.str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const_cast<StaticData&>(StaticData::Instance()).SetWeights(this, weights);
|
||||
const string &filePath = tokens[4];
|
||||
|
||||
m_table = LexicalReorderingTable::LoadAvailable(filePath, m_factorsF, m_factorsE, std::vector<FactorType>());
|
||||
|
||||
}
|
||||
|
||||
LexicalReordering::~LexicalReordering()
|
||||
|
@ -26,25 +26,21 @@ class InputType;
|
||||
*/
|
||||
class LexicalReordering : public StatefulFeatureFunction {
|
||||
public:
|
||||
LexicalReordering(std::vector<FactorType>& f_factors,
|
||||
std::vector<FactorType>& e_factors,
|
||||
const LexicalReorderingConfiguration& configuration,
|
||||
const std::string &filePath,
|
||||
const std::vector<float>& weights);
|
||||
virtual ~LexicalReordering();
|
||||
|
||||
virtual FFState* Evaluate(const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
|
||||
LexicalReordering(const std::string &line);
|
||||
virtual ~LexicalReordering();
|
||||
|
||||
virtual FFState* Evaluate(const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
|
||||
|
||||
void InitializeForInput(const InputType& i){
|
||||
m_table->InitializeForInput(i);
|
||||
}
|
||||
|
||||
Scores GetProb(const Phrase& f, const Phrase& e) const;
|
||||
|
||||
void InitializeForInput(const InputType& i){
|
||||
m_table->InitializeForInput(i);
|
||||
}
|
||||
|
||||
Scores GetProb(const Phrase& f, const Phrase& e) const;
|
||||
|
||||
virtual FFState* EvaluateChart(const ChartHypothesis&,
|
||||
int /* featureID */,
|
||||
ScoreComponentCollection*) const {
|
||||
|
@ -434,12 +434,18 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName, const
|
||||
|
||||
}
|
||||
|
||||
void Parameter::AddFeature(const std::string &line)
|
||||
{
|
||||
PARAM_VEC &features = m_setting["feature"];
|
||||
features.push_back(line);
|
||||
}
|
||||
|
||||
void Parameter::ConvertWeightArgsDistortion()
|
||||
{
|
||||
const string oldWeightName = "weight-d";
|
||||
|
||||
// distortion / lex distortion
|
||||
PARAM_VEC &oldWeights = m_setting[oldWeightName];
|
||||
const PARAM_VEC &oldWeights = GetParam(oldWeightName);
|
||||
|
||||
if (oldWeights.size() > 0)
|
||||
{
|
||||
@ -449,10 +455,10 @@ void Parameter::ConvertWeightArgsDistortion()
|
||||
// everything but the last is lex reordering model
|
||||
|
||||
size_t currOldInd = 1;
|
||||
PARAM_VEC &lextable = m_setting["distortion-file"];
|
||||
const PARAM_VEC &lextable = GetParam("distortion-file");
|
||||
|
||||
for (size_t indTable = 0; indTable < lextable.size(); ++indTable) {
|
||||
string &line = lextable[indTable];
|
||||
const string &line = lextable[indTable];
|
||||
vector<string> toks = Tokenize(line);
|
||||
|
||||
size_t numFF = Scan<size_t>(toks[2]);
|
||||
@ -468,6 +474,17 @@ void Parameter::ConvertWeightArgsDistortion()
|
||||
}
|
||||
SetWeight("LexicalReordering", indTable, weights);
|
||||
|
||||
stringstream strme;
|
||||
strme << "LexicalReordering "
|
||||
<< toks[1] << " ";
|
||||
|
||||
vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
|
||||
CHECK(factors.size() == 2);
|
||||
strme << factors[0] << " " << factors[1] << " ";
|
||||
|
||||
strme << toks[3];
|
||||
|
||||
AddFeature(strme.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -73,6 +73,7 @@ protected:
|
||||
void ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string &newWeightName);
|
||||
void CreateWeightsMap();
|
||||
void WeightOverwrite();
|
||||
void AddFeature(const std::string &line);
|
||||
|
||||
|
||||
public:
|
||||
|
@ -605,6 +605,13 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
|
||||
const vector<float> &weights = m_parameter->GetWeights(feature, featureIndex);
|
||||
//SetWeights(model, weights);
|
||||
}
|
||||
else if (feature == "LexicalReordering") {
|
||||
LexicalReordering *model = new LexicalReordering(line);
|
||||
m_reorderModels.push_back(model);
|
||||
|
||||
const vector<float> &weights = m_parameter->GetWeights(feature, featureIndex);
|
||||
SetWeights(model, weights);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -614,7 +621,6 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!LoadLexicalReorderingModel()) return false;
|
||||
if (!LoadLanguageModels()) return false;
|
||||
if (!LoadGenerationTables()) return false;
|
||||
if (!LoadPhraseTables()) return false;
|
||||
@ -766,66 +772,6 @@ StaticData::~StaticData()
|
||||
}
|
||||
#endif
|
||||
|
||||
bool StaticData::LoadLexicalReorderingModel()
|
||||
{
|
||||
VERBOSE(1, "Loading lexical distortion models...");
|
||||
const vector<string> fileStr = m_parameter->GetParam("distortion-file");
|
||||
|
||||
VERBOSE(1, "have " << fileStr.size() << " models" << std::endl);
|
||||
|
||||
//load all models
|
||||
for(size_t i = 0; i < fileStr.size(); ++i) {
|
||||
vector<string> spec = Tokenize<string>(fileStr[i], " ");
|
||||
const vector<float> &weights= m_parameter->GetWeights("LexicalReordering", i);
|
||||
|
||||
if(spec.size() != 4) {
|
||||
UserMessage::Add("Invalid Lexical Reordering Model Specification: " + fileStr[i]);
|
||||
return false;
|
||||
}
|
||||
|
||||
// spec[0] = factor map
|
||||
// spec[1] = name
|
||||
// spec[2] = num weights
|
||||
// spec[3] = fileName
|
||||
|
||||
// decode factor map
|
||||
|
||||
vector<FactorType> input, output;
|
||||
vector<string> inputfactors = Tokenize(spec[0],"-");
|
||||
if(inputfactors.size() == 2) {
|
||||
input = Tokenize<FactorType>(inputfactors[0],",");
|
||||
output = Tokenize<FactorType>(inputfactors[1],",");
|
||||
} else if(inputfactors.size() == 1) {
|
||||
//if there is only one side assume it is on e side... why?
|
||||
output = Tokenize<FactorType>(inputfactors[0],",");
|
||||
} else {
|
||||
//format error
|
||||
return false;
|
||||
}
|
||||
|
||||
string modelType = spec[1];
|
||||
|
||||
// decode num weights and fetch weights from array
|
||||
std::vector<float> mweights;
|
||||
size_t numWeights = atoi(spec[2].c_str());
|
||||
if(numWeights > weights.size()) {
|
||||
UserMessage::Add("Lexicalized distortion model: Not enough weights, add to [weight-d]");
|
||||
return false;
|
||||
}
|
||||
|
||||
for(size_t k = 0; k < numWeights; ++k) {
|
||||
mweights.push_back(weights[k]);
|
||||
}
|
||||
|
||||
string filePath = spec[3];
|
||||
|
||||
LexicalReordering *reorderModel = new LexicalReordering(input, output, LexicalReorderingConfiguration(modelType), filePath, mweights);
|
||||
|
||||
m_reorderModels.push_back(reorderModel);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StaticData::LoadLanguageModels()
|
||||
{
|
||||
if (m_parameter->GetParam("lmodel-file").size() > 0) {
|
||||
|
@ -247,7 +247,6 @@ protected:
|
||||
bool LoadGenerationTables();
|
||||
//! load decoding steps
|
||||
bool LoadDecodeGraphs();
|
||||
bool LoadLexicalReorderingModel();
|
||||
//References used for scoring feature (eg BleuScoreFeature) for online training
|
||||
bool LoadReferences();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user