refactor LexicalReordering

This commit is contained in:
Hieu Hoang 2013-01-15 18:32:13 +00:00
parent 97a26104aa
commit 3bbccf078c
8 changed files with 83 additions and 133 deletions

View File

@ -69,12 +69,15 @@
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.userobjs.1542590830" name="Other objects" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64/libboost_system-mt.a&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64/libboost_thread-mt.a&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.983725033" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>

View File

@ -1,7 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.656913512">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.656913512" moduleId="org.eclipse.cdt.core.settings" name="Debug">
@ -68,8 +66,11 @@
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1593212764" name="FeatureVectorTest.cpp" rcbsApplicability="disable" resourcePath="FeatureVectorTest.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1240665323">
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1240665323" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
</fileInfo>
<sourceEntries>
<entry excluding="LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/SRI.h|LM/SRI.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
<entry excluding="FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/SRI.h|LM/SRI.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>

View File

@ -5,63 +5,50 @@
#include "LexicalReorderingState.h"
#include "StaticData.h"
using namespace std;
namespace Moses
{
LexicalReordering::LexicalReordering(std::vector<FactorType>& f_factors,
std::vector<FactorType>& e_factors,
const LexicalReorderingConfiguration& configuration,
const std::string &filePath,
const std::vector<float>& weights)
: StatefulFeatureFunction("LexicalReordering",
configuration.GetNumScoreComponents()),
m_configuration(new LexicalReorderingConfiguration(configuration))
LexicalReordering::LexicalReordering(const std::string &line)
: StatefulFeatureFunction("LexicalReordering", 6)
{
m_configuration->SetScoreProducer(this);
std::cerr << "Creating lexical reordering...\n";
std::cerr << "weights: ";
for(size_t w = 0; w < weights.size(); ++w) {
std::cerr << weights[w] << " ";
}
std::cerr << "\n";
std::cerr << "Initializing LexicalReordering.." << std::endl;
vector<string> tokens = Tokenize(line);
m_configuration = new LexicalReorderingConfiguration(tokens[1]);
m_configuration->SetScoreProducer(this);
m_modelTypeString = m_configuration->GetModelString();
vector<FactorType> f_factors = Tokenize<FactorType>(tokens[2]);
vector<FactorType> e_factors = Tokenize<FactorType>(tokens[3]);
switch(m_configuration->GetCondition()) {
case LexicalReorderingConfiguration::FE:
case LexicalReorderingConfiguration::E:
m_factorsE = e_factors;
if(m_factorsE.empty()) {
UserMessage::Add("TL factor mask for lexical reordering is unexpectedly empty");
case LexicalReorderingConfiguration::FE:
case LexicalReorderingConfiguration::E:
m_factorsE = e_factors;
if(m_factorsE.empty()) {
UserMessage::Add("TL factor mask for lexical reordering is unexpectedly empty");
exit(1);
}
if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
break; // else fall through
case LexicalReorderingConfiguration::F:
m_factorsF = f_factors;
if(m_factorsF.empty()) {
UserMessage::Add("SL factor mask for lexical reordering is unexpectedly empty");
exit(1);
}
break;
default:
UserMessage::Add("Unknown conditioning option!");
exit(1);
}
if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
break; // else fall through
case LexicalReorderingConfiguration::F:
m_factorsF = f_factors;
if(m_factorsF.empty()) {
UserMessage::Add("SL factor mask for lexical reordering is unexpectedly empty");
exit(1);
}
break;
default:
UserMessage::Add("Unknown conditioning option!");
exit(1);
}
size_t numberOfScoreComponents = m_configuration->GetNumScoreComponents();
if (weights.size() > numberOfScoreComponents) {
m_configuration->SetAdditionalScoreComponents(weights.size() - numberOfScoreComponents);
} else if(weights.size() < numberOfScoreComponents) {
std::ostringstream os;
os << "Lexical reordering model (type " << m_modelTypeString << "): expected " << numberOfScoreComponents << " weights, got " << weights.size() << std::endl;
UserMessage::Add(os.str());
exit(1);
}
const_cast<StaticData&>(StaticData::Instance()).SetWeights(this, weights);
const string &filePath = tokens[4];
m_table = LexicalReorderingTable::LoadAvailable(filePath, m_factorsF, m_factorsE, std::vector<FactorType>());
}
LexicalReordering::~LexicalReordering()

View File

@ -26,25 +26,21 @@ class InputType;
*/
class LexicalReordering : public StatefulFeatureFunction {
public:
LexicalReordering(std::vector<FactorType>& f_factors,
std::vector<FactorType>& e_factors,
const LexicalReorderingConfiguration& configuration,
const std::string &filePath,
const std::vector<float>& weights);
virtual ~LexicalReordering();
virtual FFState* Evaluate(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
LexicalReordering(const std::string &line);
virtual ~LexicalReordering();
virtual FFState* Evaluate(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
void InitializeForInput(const InputType& i){
m_table->InitializeForInput(i);
}
Scores GetProb(const Phrase& f, const Phrase& e) const;
void InitializeForInput(const InputType& i){
m_table->InitializeForInput(i);
}
Scores GetProb(const Phrase& f, const Phrase& e) const;
virtual FFState* EvaluateChart(const ChartHypothesis&,
int /* featureID */,
ScoreComponentCollection*) const {

View File

@ -434,12 +434,18 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName, const
}
void Parameter::AddFeature(const std::string &line)
{
PARAM_VEC &features = m_setting["feature"];
features.push_back(line);
}
void Parameter::ConvertWeightArgsDistortion()
{
const string oldWeightName = "weight-d";
// distortion / lex distortion
PARAM_VEC &oldWeights = m_setting[oldWeightName];
const PARAM_VEC &oldWeights = GetParam(oldWeightName);
if (oldWeights.size() > 0)
{
@ -449,10 +455,10 @@ void Parameter::ConvertWeightArgsDistortion()
// everything but the last is lex reordering model
size_t currOldInd = 1;
PARAM_VEC &lextable = m_setting["distortion-file"];
const PARAM_VEC &lextable = GetParam("distortion-file");
for (size_t indTable = 0; indTable < lextable.size(); ++indTable) {
string &line = lextable[indTable];
const string &line = lextable[indTable];
vector<string> toks = Tokenize(line);
size_t numFF = Scan<size_t>(toks[2]);
@ -468,6 +474,17 @@ void Parameter::ConvertWeightArgsDistortion()
}
SetWeight("LexicalReordering", indTable, weights);
stringstream strme;
strme << "LexicalReordering "
<< toks[1] << " ";
vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
CHECK(factors.size() == 2);
strme << factors[0] << " " << factors[1] << " ";
strme << toks[3];
AddFeature(strme.str());
}
}

View File

@ -73,6 +73,7 @@ protected:
void ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string &newWeightName);
void CreateWeightsMap();
void WeightOverwrite();
void AddFeature(const std::string &line);
public:

View File

@ -605,6 +605,13 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
const vector<float> &weights = m_parameter->GetWeights(feature, featureIndex);
//SetWeights(model, weights);
}
else if (feature == "LexicalReordering") {
LexicalReordering *model = new LexicalReordering(line);
m_reorderModels.push_back(model);
const vector<float> &weights = m_parameter->GetWeights(feature, featureIndex);
SetWeights(model, weights);
}
}
@ -614,7 +621,6 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
}
#endif
if (!LoadLexicalReorderingModel()) return false;
if (!LoadLanguageModels()) return false;
if (!LoadGenerationTables()) return false;
if (!LoadPhraseTables()) return false;
@ -766,66 +772,6 @@ StaticData::~StaticData()
}
#endif
bool StaticData::LoadLexicalReorderingModel()
{
VERBOSE(1, "Loading lexical distortion models...");
const vector<string> fileStr = m_parameter->GetParam("distortion-file");
VERBOSE(1, "have " << fileStr.size() << " models" << std::endl);
//load all models
for(size_t i = 0; i < fileStr.size(); ++i) {
vector<string> spec = Tokenize<string>(fileStr[i], " ");
const vector<float> &weights= m_parameter->GetWeights("LexicalReordering", i);
if(spec.size() != 4) {
UserMessage::Add("Invalid Lexical Reordering Model Specification: " + fileStr[i]);
return false;
}
// spec[0] = factor map
// spec[1] = name
// spec[2] = num weights
// spec[3] = fileName
// decode factor map
vector<FactorType> input, output;
vector<string> inputfactors = Tokenize(spec[0],"-");
if(inputfactors.size() == 2) {
input = Tokenize<FactorType>(inputfactors[0],",");
output = Tokenize<FactorType>(inputfactors[1],",");
} else if(inputfactors.size() == 1) {
//if there is only one side assume it is on e side... why?
output = Tokenize<FactorType>(inputfactors[0],",");
} else {
//format error
return false;
}
string modelType = spec[1];
// decode num weights and fetch weights from array
std::vector<float> mweights;
size_t numWeights = atoi(spec[2].c_str());
if(numWeights > weights.size()) {
UserMessage::Add("Lexicalized distortion model: Not enough weights, add to [weight-d]");
return false;
}
for(size_t k = 0; k < numWeights; ++k) {
mweights.push_back(weights[k]);
}
string filePath = spec[3];
LexicalReordering *reorderModel = new LexicalReordering(input, output, LexicalReorderingConfiguration(modelType), filePath, mweights);
m_reorderModels.push_back(reorderModel);
}
return true;
}
bool StaticData::LoadLanguageModels()
{
if (m_parameter->GetParam("lmodel-file").size() > 0) {

View File

@ -247,7 +247,6 @@ protected:
bool LoadGenerationTables();
//! load decoding steps
bool LoadDecodeGraphs();
bool LoadLexicalReorderingModel();
//References used for scoring feature (eg BleuScoreFeature) for online training
bool LoadReferences();