LanguageModel, KenLM: avoid StaticData usage

* drop global lmodel-oov-feature option, and add it to LM FF config line instead
	use oov-feature=1 (bool) option instead
* drop LanguageModel::GetWeight()
* KenLM: use m_verbosity of FF instead of IFVERBOSE macro which uses StaticData

* train-model.perl: move language model OOV feature onto LM feature spec line
This commit is contained in:
David Madl 2015-11-10 15:07:06 +00:00
parent 3b63930943
commit e36fb96557
9 changed files with 35 additions and 41 deletions

View File

@ -229,11 +229,12 @@ Manager::
PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
{
const LanguageModel &abstract = LanguageModel::GetFirstLM();
const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
const StaticData &data = StaticData::Instance();
const float lm_weight = data.GetWeights(&abstract)[0];
const float oov_weight = abstract.OOVFeatureEnabled() ? data.GetWeights(&abstract)[1] : 0.0;
size_t cpl = data.options().cube.pop_limit;
size_t nbs = data.options().nbest.nbest_size;
search::Config config(abstract.GetWeight() * log_10, cpl, search::NBestConfig(nbs));
search::Config config(lm_weight * log_10, cpl, search::NBestConfig(nbs));
search::Context<Model> context(config, model);
size_t size = m_source.GetSize();

View File

@ -26,7 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/ChartManager.h"
#include "moses/FactorCollection.h"
#include "moses/Phrase.h"
#include "moses/StaticData.h"
#include "util/exception.hh"
using namespace std;
@ -35,30 +34,19 @@ namespace Moses
{
LanguageModel::LanguageModel(const std::string &line) :
StatefulFeatureFunction(StaticData::Instance().GetLMEnableOOVFeature() ? 2 : 1, line )
StatefulFeatureFunction(line, /* registerNow = */ false),
m_enableOOVFeature(false)
{
m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature();
// load m_enableOOVFeature via SetParameter() first
ReadParameters();
this->m_numScoreComponents = this->m_numTuneableComponents = m_enableOOVFeature ? 2 : 1;
// register with the correct m_numScoreComponents
Register();
}
LanguageModel::~LanguageModel() {}
float LanguageModel::GetWeight() const
{
//return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[0];
return StaticData::Instance().GetWeights(this)[0];
}
float LanguageModel::GetOOVWeight() const
{
if (m_enableOOVFeature) {
//return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[1];
return StaticData::Instance().GetWeights(this)[1];
} else {
return 0;
}
}
void LanguageModel::IncrementalCallback(Incremental::Manager &manager) const
{
UTIL_THROW(util::Exception, "Incremental search is only supported by KenLM.");
@ -82,7 +70,7 @@ void LanguageModel::EvaluateInIsolation(const Phrase &source
float estimateScore = fullScore - nGramScore;
if (StaticData::Instance().GetLMEnableOOVFeature()) {
if (m_enableOOVFeature) {
vector<float> scores(2), estimateScores(2);
scores[0] = nGramScore;
scores[1] = oovCount;
@ -120,4 +108,13 @@ const LanguageModel &LanguageModel::GetFirstLM()
throw std::logic_error("Incremental search only supports one language model.");
}
void LanguageModel::SetParameter(const std::string& key, const std::string& value)
{
if(key == "oov-feature") {
m_enableOOVFeature = Scan<bool>(value);
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
} // namespace Moses

View File

@ -46,8 +46,6 @@ class LanguageModel : public StatefulFeatureFunction
protected:
LanguageModel(const std::string &line);
// This can't be in the constructor for virual function dispatch reasons
bool m_enableOOVFeature;
public:
@ -59,9 +57,7 @@ public:
return m_enableOOVFeature;
}
float GetWeight() const;
float GetOOVWeight() const;
virtual void SetParameter(const std::string& key, const std::string& value);
virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;

View File

@ -154,7 +154,7 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
ReadParameters();
lm::ngram::Config config;
IFVERBOSE(1) {
if(this->m_verbosity >= 1) {
config.messages = &std::cerr;
}
else {

View File

@ -98,6 +98,8 @@ private:
std::vector<lm::WordIndex> m_lmIdLookup;
protected:
//bool m_oovFeatureEnabled; /// originally from LanguageModel, copied here to separate the interfaces. Called m_enableOOVFeature there
};
} // namespace Moses

View File

@ -981,6 +981,9 @@ ConvertWeightArgsLM()
featureLine += "lazyken=0 ";
}
if(oovWeights.size() > lmIndex)
featureLine += "oov-feature=1 ";
featureLine += "path=" + modelToks[3]; // file
AddFeature(featureLine);

View File

@ -63,7 +63,6 @@ StaticData StaticData::s_instance;
StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false)
, m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default")
, m_treeStructure(NULL)
@ -284,8 +283,6 @@ ini_oov_options()
// m_parameter->SetParameter<string>(m_unknownWordPrefix, "unknown-word-prefix", "UNK" );
// m_parameter->SetParameter<string>(m_unknownWordSuffix, "unknown-word-suffix", "" );
m_parameter->SetParameter(m_lmEnableOOVFeature, "lmodel-oov-feature", false);
//source word deletion
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );

View File

@ -116,7 +116,6 @@ protected:
std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">"
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
bool m_lmEnableOOVFeature;
bool m_isAlwaysCreateDirectTranslationOption;
//! constructor. only the 1 static variable can be created
@ -366,10 +365,6 @@ public:
return m_lmcache_cleanup_threshold;
}
bool GetLMEnableOOVFeature() const {
return m_lmEnableOOVFeature;
}
const std::string& GetOutputUnknownsFile() const {
return m_outputUnknownsFile;
}

View File

@ -2295,19 +2295,22 @@ sub create_ini {
}
my $lm_oov_prob = 0.1;
my $lm_extra_options = "";
if ($_POST_DECODING_TRANSLIT || $_TRANSLITERATION_PHRASE_TABLE){
$lm_oov_prob = -100.0;
$_LMODEL_OOV_FEATURE = "yes";
}
if ($_LMODEL_OOV_FEATURE) {
# enable language model OOV feature
$lm_extra_options = " oov-feature=1";
}
$feature_spec .= "$type name=LM$i factor=$f path=$fn order=$o\n";
$feature_spec .= "$type name=LM$i factor=$f path=$fn order=$o$lm_extra_options\n";
$weight_spec .= "LM$i= 0.5".($_LMODEL_OOV_FEATURE?" $lm_oov_prob":"")."\n";
$i++;
}
if ($_LMODEL_OOV_FEATURE) {
print INI "\n# language model OOV feature enabled\n[lmodel-oov-feature]\n1\n\n";
}
# hierarchical model settings
print INI "\n";