mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-13 00:59:02 +03:00
add [feature] arg. Use for GlobalLexicalModel. Not tested
This commit is contained in:
parent
19b36b12ab
commit
0784921314
@ -9,21 +9,39 @@ using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
GlobalLexicalModel::GlobalLexicalModel(const string &filePath,
|
||||
const vector< FactorType >& inFactors,
|
||||
const vector< FactorType >& outFactors)
|
||||
: StatelessFeatureFunction("GlobalLexicalModel",1)
|
||||
GlobalLexicalModel::GlobalLexicalModel(const std::string &line)
|
||||
: StatelessFeatureFunction("GlobalLexicalModel",1)
|
||||
{
|
||||
std::cerr << "Creating global lexical model...\n";
|
||||
|
||||
string filePath;
|
||||
vector<FactorType> inputFactors, outputFactors;
|
||||
|
||||
vector<string> toks = Tokenize(line);
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
vector<string> args = Tokenize(toks[i], "=");
|
||||
|
||||
if (args[0] == "file") {
|
||||
CHECK(args.size() == 2);
|
||||
filePath = args[1];
|
||||
}
|
||||
else if (args[0] == "inputFactors") {
|
||||
inputFactors = Tokenize<FactorType>(args[1],",");
|
||||
}
|
||||
else if (args[0] == "outputFactors") {
|
||||
outputFactors = Tokenize<FactorType>(args[1],",");
|
||||
}
|
||||
}
|
||||
|
||||
// load model
|
||||
LoadData( filePath, inFactors, outFactors );
|
||||
LoadData( filePath, inputFactors, outputFactors );
|
||||
|
||||
// define bias word
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
m_bias = new Word();
|
||||
const Factor* factor = factorCollection.AddFactor( Input, inFactors[0], "**BIAS**" );
|
||||
m_bias->SetFactor( inFactors[0], factor );
|
||||
const Factor* factor = factorCollection.AddFactor( Input, inputFactors[0], "**BIAS**" );
|
||||
m_bias->SetFactor( inputFactors[0], factor );
|
||||
|
||||
}
|
||||
|
||||
GlobalLexicalModel::~GlobalLexicalModel()
|
||||
|
@ -63,9 +63,7 @@ private:
|
||||
float GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const;
|
||||
|
||||
public:
|
||||
GlobalLexicalModel(const std::string &filePath,
|
||||
const std::vector< FactorType >& inFactors,
|
||||
const std::vector< FactorType >& outFactors);
|
||||
GlobalLexicalModel(const std::string &line);
|
||||
virtual ~GlobalLexicalModel();
|
||||
|
||||
void InitializeForInput( Sentence const& in );
|
||||
|
@ -190,6 +190,9 @@ Parameter::Parameter()
|
||||
AddParam("weight", "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
|
||||
AddParam("weight-overwrite", "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument");
|
||||
AddParam("input-scores", "2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
|
||||
|
||||
AddParam("feature", "");
|
||||
|
||||
}
|
||||
|
||||
Parameter::~Parameter()
|
||||
|
@ -529,6 +529,20 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
|
||||
cerr << "XML tags opening and closing brackets for XML input are: " << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl;
|
||||
}
|
||||
|
||||
// all features
|
||||
const vector<string> &features = m_parameter->GetParam("feature");
|
||||
for (size_t i = 0; i < features.size(); ++i) {
|
||||
const string &line = features[i];
|
||||
vector<string> toks = Tokenize(line);
|
||||
|
||||
if (toks[0] == "GlobalLexicalModel") {
|
||||
GlobalLexicalModel *model = new GlobalLexicalModel(line);
|
||||
const vector<float> &weights = m_parameter->GetWeights(toks[0], 0);
|
||||
SetWeights(model, weights);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef HAVE_SYNLM
|
||||
if (m_parameter->GetParam("slmodel-file").size() > 0) {
|
||||
if (!LoadSyntacticLanguageModel()) return false;
|
||||
@ -539,7 +553,6 @@ SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);
|
||||
if (!LoadLanguageModels()) return false;
|
||||
if (!LoadGenerationTables()) return false;
|
||||
if (!LoadPhraseTables()) return false;
|
||||
if (!LoadGlobalLexicalModel()) return false;
|
||||
if (!LoadGlobalLexicalModelUnlimited()) return false;
|
||||
if (!LoadDecodeGraphs()) return false;
|
||||
if (!LoadReferences()) return false;
|
||||
@ -765,37 +778,6 @@ bool StaticData::LoadLexicalReorderingModel()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StaticData::LoadGlobalLexicalModel()
|
||||
{
|
||||
const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-lex"));
|
||||
const vector<string> &file = m_parameter->GetParam("global-lexical-file");
|
||||
|
||||
if (weight.size() != file.size()) {
|
||||
std::cerr << "number of weights and models for the global lexical model does not match ("
|
||||
<< weight.size() << " != " << file.size() << ")" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < weight.size(); i++ ) {
|
||||
vector<string> spec = Tokenize<string>(file[i], " ");
|
||||
if ( spec.size() != 2 ) {
|
||||
std::cerr << "wrong global lexical model specification: " << file[i] << endl;
|
||||
return false;
|
||||
}
|
||||
vector< string > factors = Tokenize(spec[0],"-");
|
||||
if ( factors.size() != 2 ) {
|
||||
std::cerr << "wrong factor definition for global lexical model: " << spec[0] << endl;
|
||||
return false;
|
||||
}
|
||||
vector<FactorType> inputFactors = Tokenize<FactorType>(factors[0],",");
|
||||
vector<FactorType> outputFactors = Tokenize<FactorType>(factors[1],",");
|
||||
|
||||
GlobalLexicalModel *globalLexicalModel = new GlobalLexicalModel( spec[1], inputFactors, outputFactors );
|
||||
SetWeight(globalLexicalModel, weight[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StaticData::LoadGlobalLexicalModelUnlimited()
|
||||
{
|
||||
const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-glm"));
|
||||
|
@ -52,7 +52,6 @@ namespace Moses
|
||||
|
||||
class InputType;
|
||||
class LexicalReordering;
|
||||
class GlobalLexicalModel;
|
||||
class GlobalLexicalModelUnlimited;
|
||||
class PhraseDictionaryFeature;
|
||||
class SparsePhraseDictionaryFeature;
|
||||
@ -250,7 +249,6 @@ protected:
|
||||
//! load decoding steps
|
||||
bool LoadDecodeGraphs();
|
||||
bool LoadLexicalReorderingModel();
|
||||
bool LoadGlobalLexicalModel();
|
||||
bool LoadGlobalLexicalModelUnlimited();
|
||||
//References used for scoring feature (eg BleuScoreFeature) for online training
|
||||
bool LoadReferences();
|
||||
|
Loading…
Reference in New Issue
Block a user