add [feature] arg. Use for GlobalLexicalModel. Not tested

This commit is contained in:
Hieu Hoang 2012-12-31 16:41:33 +00:00
parent 19b36b12ab
commit 0784921314
5 changed files with 43 additions and 44 deletions

View File

@ -9,21 +9,39 @@ using namespace std;
namespace Moses
{
GlobalLexicalModel::GlobalLexicalModel(const string &filePath,
const vector< FactorType >& inFactors,
const vector< FactorType >& outFactors)
: StatelessFeatureFunction("GlobalLexicalModel",1)
GlobalLexicalModel::GlobalLexicalModel(const std::string &line)
: StatelessFeatureFunction("GlobalLexicalModel",1)
{
std::cerr << "Creating global lexical model...\n";
string filePath;
vector<FactorType> inputFactors, outputFactors;
vector<string> toks = Tokenize(line);
for (size_t i = 0; i < toks.size(); ++i) {
vector<string> args = Tokenize(toks[i], "=");
if (args[0] == "file") {
CHECK(args.size() == 2);
filePath = args[1];
}
else if (args[0] == "inputFactors") {
inputFactors = Tokenize<FactorType>(args[1],",");
}
else if (args[0] == "outputFactors") {
outputFactors = Tokenize<FactorType>(args[1],",");
}
}
// load model
LoadData( filePath, inFactors, outFactors );
LoadData( filePath, inputFactors, outputFactors );
// define bias word
FactorCollection &factorCollection = FactorCollection::Instance();
m_bias = new Word();
const Factor* factor = factorCollection.AddFactor( Input, inFactors[0], "**BIAS**" );
m_bias->SetFactor( inFactors[0], factor );
const Factor* factor = factorCollection.AddFactor( Input, inputFactors[0], "**BIAS**" );
m_bias->SetFactor( inputFactors[0], factor );
}
GlobalLexicalModel::~GlobalLexicalModel()

View File

@ -63,9 +63,7 @@ private:
float GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const;
public:
GlobalLexicalModel(const std::string &filePath,
const std::vector< FactorType >& inFactors,
const std::vector< FactorType >& outFactors);
GlobalLexicalModel(const std::string &line);
virtual ~GlobalLexicalModel();
void InitializeForInput( Sentence const& in );

View File

@ -190,6 +190,9 @@ Parameter::Parameter()
AddParam("weight", "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
AddParam("weight-overwrite", "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument");
AddParam("input-scores", "2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
AddParam("feature", "");
}
Parameter::~Parameter()

View File

@ -529,6 +529,20 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
cerr << "XML tags opening and closing brackets for XML input are: " << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl;
}
// all features
const vector<string> &features = m_parameter->GetParam("feature");
for (size_t i = 0; i < features.size(); ++i) {
const string &line = features[i];
vector<string> toks = Tokenize(line);
if (toks[0] == "GlobalLexicalModel") {
GlobalLexicalModel *model = new GlobalLexicalModel(line);
const vector<float> &weights = m_parameter->GetWeights(toks[0], 0);
SetWeights(model, weights);
}
}
#ifdef HAVE_SYNLM
if (m_parameter->GetParam("slmodel-file").size() > 0) {
if (!LoadSyntacticLanguageModel()) return false;
@ -539,7 +553,6 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
if (!LoadLanguageModels()) return false;
if (!LoadGenerationTables()) return false;
if (!LoadPhraseTables()) return false;
if (!LoadGlobalLexicalModel()) return false;
if (!LoadGlobalLexicalModelUnlimited()) return false;
if (!LoadDecodeGraphs()) return false;
if (!LoadReferences()) return false;
@ -765,37 +778,6 @@ bool StaticData::LoadLexicalReorderingModel()
return true;
}
bool StaticData::LoadGlobalLexicalModel()
{
const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-lex"));
const vector<string> &file = m_parameter->GetParam("global-lexical-file");
if (weight.size() != file.size()) {
std::cerr << "number of weights and models for the global lexical model does not match ("
<< weight.size() << " != " << file.size() << ")" << std::endl;
return false;
}
for (size_t i = 0; i < weight.size(); i++ ) {
vector<string> spec = Tokenize<string>(file[i], " ");
if ( spec.size() != 2 ) {
std::cerr << "wrong global lexical model specification: " << file[i] << endl;
return false;
}
vector< string > factors = Tokenize(spec[0],"-");
if ( factors.size() != 2 ) {
std::cerr << "wrong factor definition for global lexical model: " << spec[0] << endl;
return false;
}
vector<FactorType> inputFactors = Tokenize<FactorType>(factors[0],",");
vector<FactorType> outputFactors = Tokenize<FactorType>(factors[1],",");
GlobalLexicalModel *globalLexicalModel = new GlobalLexicalModel( spec[1], inputFactors, outputFactors );
SetWeight(globalLexicalModel, weight[i]);
}
return true;
}
bool StaticData::LoadGlobalLexicalModelUnlimited()
{
const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-glm"));

View File

@ -52,7 +52,6 @@ namespace Moses
class InputType;
class LexicalReordering;
class GlobalLexicalModel;
class GlobalLexicalModelUnlimited;
class PhraseDictionaryFeature;
class SparsePhraseDictionaryFeature;
@ -250,7 +249,6 @@ protected:
//! load decoding steps
bool LoadDecodeGraphs();
bool LoadLexicalReorderingModel();
bool LoadGlobalLexicalModel();
bool LoadGlobalLexicalModelUnlimited();
//References used for scoring feature (eg BleuScoreFeature) for online training
bool LoadReferences();