mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-08 20:46:59 +03:00
LanguageModel, KenLM: avoid StaticData usage
* drop global lmodel-oov-feature option, and add it to LM FF config line instead use oov-feature=1 (bool) option instead * drop LanguageModel::GetWeight() * KenLM: use m_verbosity of FF instead of IFVERBOSE macro which uses StaticData * train-model.perl: move language model OOV feature onto LM feature spec line
This commit is contained in:
parent
3b63930943
commit
e36fb96557
@ -229,11 +229,12 @@ Manager::
|
||||
PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
|
||||
{
|
||||
const LanguageModel &abstract = LanguageModel::GetFirstLM();
|
||||
const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
|
||||
const StaticData &data = StaticData::Instance();
|
||||
const float lm_weight = data.GetWeights(&abstract)[0];
|
||||
const float oov_weight = abstract.OOVFeatureEnabled() ? data.GetWeights(&abstract)[1] : 0.0;
|
||||
size_t cpl = data.options().cube.pop_limit;
|
||||
size_t nbs = data.options().nbest.nbest_size;
|
||||
search::Config config(abstract.GetWeight() * log_10, cpl, search::NBestConfig(nbs));
|
||||
search::Config config(lm_weight * log_10, cpl, search::NBestConfig(nbs));
|
||||
search::Context<Model> context(config, model);
|
||||
|
||||
size_t size = m_source.GetSize();
|
||||
|
@ -26,7 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/ChartManager.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
@ -35,30 +34,19 @@ namespace Moses
|
||||
{
|
||||
|
||||
LanguageModel::LanguageModel(const std::string &line) :
|
||||
StatefulFeatureFunction(StaticData::Instance().GetLMEnableOOVFeature() ? 2 : 1, line )
|
||||
StatefulFeatureFunction(line, /* registerNow = */ false),
|
||||
m_enableOOVFeature(false)
|
||||
{
|
||||
m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature();
|
||||
// load m_enableOOVFeature via SetParameter() first
|
||||
ReadParameters();
|
||||
this->m_numScoreComponents = this->m_numTuneableComponents = m_enableOOVFeature ? 2 : 1;
|
||||
// register with the correct m_numScoreComponents
|
||||
Register();
|
||||
}
|
||||
|
||||
|
||||
LanguageModel::~LanguageModel() {}
|
||||
|
||||
float LanguageModel::GetWeight() const
|
||||
{
|
||||
//return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[0];
|
||||
return StaticData::Instance().GetWeights(this)[0];
|
||||
}
|
||||
|
||||
float LanguageModel::GetOOVWeight() const
|
||||
{
|
||||
if (m_enableOOVFeature) {
|
||||
//return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[1];
|
||||
return StaticData::Instance().GetWeights(this)[1];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void LanguageModel::IncrementalCallback(Incremental::Manager &manager) const
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Incremental search is only supported by KenLM.");
|
||||
@ -82,7 +70,7 @@ void LanguageModel::EvaluateInIsolation(const Phrase &source
|
||||
|
||||
float estimateScore = fullScore - nGramScore;
|
||||
|
||||
if (StaticData::Instance().GetLMEnableOOVFeature()) {
|
||||
if (m_enableOOVFeature) {
|
||||
vector<float> scores(2), estimateScores(2);
|
||||
scores[0] = nGramScore;
|
||||
scores[1] = oovCount;
|
||||
@ -120,4 +108,13 @@ const LanguageModel &LanguageModel::GetFirstLM()
|
||||
throw std::logic_error("Incremental search only supports one language model.");
|
||||
}
|
||||
|
||||
void LanguageModel::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if(key == "oov-feature") {
|
||||
m_enableOOVFeature = Scan<bool>(value);
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -46,8 +46,6 @@ class LanguageModel : public StatefulFeatureFunction
|
||||
protected:
|
||||
LanguageModel(const std::string &line);
|
||||
|
||||
// This can't be in the constructor for virual function dispatch reasons
|
||||
|
||||
bool m_enableOOVFeature;
|
||||
|
||||
public:
|
||||
@ -59,9 +57,7 @@ public:
|
||||
return m_enableOOVFeature;
|
||||
}
|
||||
|
||||
float GetWeight() const;
|
||||
float GetOOVWeight() const;
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;
|
||||
|
||||
|
@ -154,7 +154,7 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
|
||||
ReadParameters();
|
||||
|
||||
lm::ngram::Config config;
|
||||
IFVERBOSE(1) {
|
||||
if(this->m_verbosity >= 1) {
|
||||
config.messages = &std::cerr;
|
||||
}
|
||||
else {
|
||||
|
@ -98,6 +98,8 @@ private:
|
||||
|
||||
std::vector<lm::WordIndex> m_lmIdLookup;
|
||||
|
||||
protected:
|
||||
//bool m_oovFeatureEnabled; /// originally from LanguageModel, copied here to separate the interfaces. Called m_enableOOVFeature there
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -981,6 +981,9 @@ ConvertWeightArgsLM()
|
||||
featureLine += "lazyken=0 ";
|
||||
}
|
||||
|
||||
if(oovWeights.size() > lmIndex)
|
||||
featureLine += "oov-feature=1 ";
|
||||
|
||||
featureLine += "path=" + modelToks[3]; // file
|
||||
|
||||
AddFeature(featureLine);
|
||||
|
@ -63,7 +63,6 @@ StaticData StaticData::s_instance;
|
||||
StaticData::StaticData()
|
||||
: m_sourceStartPosMattersForRecombination(false)
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
, m_lmEnableOOVFeature(false)
|
||||
, m_isAlwaysCreateDirectTranslationOption(false)
|
||||
, m_currentWeightSetting("default")
|
||||
, m_treeStructure(NULL)
|
||||
@ -284,8 +283,6 @@ ini_oov_options()
|
||||
// m_parameter->SetParameter<string>(m_unknownWordPrefix, "unknown-word-prefix", "UNK" );
|
||||
// m_parameter->SetParameter<string>(m_unknownWordSuffix, "unknown-word-suffix", "" );
|
||||
|
||||
m_parameter->SetParameter(m_lmEnableOOVFeature, "lmodel-oov-feature", false);
|
||||
|
||||
//source word deletion
|
||||
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
|
||||
|
||||
|
@ -116,7 +116,6 @@ protected:
|
||||
std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">"
|
||||
|
||||
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
|
||||
bool m_lmEnableOOVFeature;
|
||||
|
||||
bool m_isAlwaysCreateDirectTranslationOption;
|
||||
//! constructor. only the 1 static variable can be created
|
||||
@ -366,10 +365,6 @@ public:
|
||||
return m_lmcache_cleanup_threshold;
|
||||
}
|
||||
|
||||
bool GetLMEnableOOVFeature() const {
|
||||
return m_lmEnableOOVFeature;
|
||||
}
|
||||
|
||||
const std::string& GetOutputUnknownsFile() const {
|
||||
return m_outputUnknownsFile;
|
||||
}
|
||||
|
@ -2295,19 +2295,22 @@ sub create_ini {
|
||||
}
|
||||
|
||||
my $lm_oov_prob = 0.1;
|
||||
|
||||
my $lm_extra_options = "";
|
||||
|
||||
if ($_POST_DECODING_TRANSLIT || $_TRANSLITERATION_PHRASE_TABLE){
|
||||
$lm_oov_prob = -100.0;
|
||||
$_LMODEL_OOV_FEATURE = "yes";
|
||||
}
|
||||
|
||||
if ($_LMODEL_OOV_FEATURE) {
|
||||
# enable language model OOV feature
|
||||
$lm_extra_options = " oov-feature=1";
|
||||
}
|
||||
|
||||
$feature_spec .= "$type name=LM$i factor=$f path=$fn order=$o\n";
|
||||
$feature_spec .= "$type name=LM$i factor=$f path=$fn order=$o$lm_extra_options\n";
|
||||
$weight_spec .= "LM$i= 0.5".($_LMODEL_OOV_FEATURE?" $lm_oov_prob":"")."\n";
|
||||
$i++;
|
||||
}
|
||||
if ($_LMODEL_OOV_FEATURE) {
|
||||
print INI "\n# language model OOV feature enabled\n[lmodel-oov-feature]\n1\n\n";
|
||||
}
|
||||
|
||||
# hierarchical model settings
|
||||
print INI "\n";
|
||||
|
Loading…
Reference in New Issue
Block a user