One step closer to eliminating the requirement to provide num-features=... in the config file.

Some FF (Mmsapt, LexicalReordering, Many single-value FF) provide this number during "registration";
when missing, a default weight vector of uniform 1.0 is automatically generated. This eliminates the
need for the user to figure out what the exact number of features is for each FF, which can get complicated,
e.g. in the case of Mmsapt/PhraseDictionaryBitextSampling.
This commit is contained in:
Ulrich Germann 2015-04-29 20:16:52 +01:00
parent c76f1c338d
commit e4f5c69109
31 changed files with 137 additions and 69 deletions

View File

@ -8,7 +8,7 @@ using namespace std;
namespace Moses
{
CountNonTerms::CountNonTerms(const std::string &line)
:StatelessFeatureFunction(line)
:StatelessFeatureFunction(line,true)
,m_all(true)
,m_sourceSyntax(false)
,m_targetSyntax(false)

View File

@ -30,8 +30,9 @@ using namespace std;
namespace Moses
{
DecodeFeature::DecodeFeature(const std::string &line)
: StatelessFeatureFunction(line)
DecodeFeature::DecodeFeature(const std::string &line, bool registerNow)
: StatelessFeatureFunction(line, registerNow)
, m_container(NULL)
{
VERBOSE(2,"DecodeFeature:" << std::endl);

View File

@ -40,7 +40,7 @@ class DecodeFeature : public StatelessFeatureFunction
{
public:
DecodeFeature(const std::string &line);
DecodeFeature(const std::string &line, bool registerNow);
DecodeFeature(size_t numScoreComponents
, const std::string &line);

View File

@ -1,3 +1,4 @@
#include "util/exception.hh"
#include "moses/FF/Factory.h"
#include "moses/StaticData.h"
@ -146,26 +147,50 @@ protected:
FeatureFactory() {}
};
template <class F> void FeatureFactory::DefaultSetup(F *feature)
template <class F>
void
FeatureFactory
::DefaultSetup(F *feature)
{
StaticData &static_data = StaticData::InstanceNonConst();
const string &featureName = feature->GetScoreProducerDescription();
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
if (feature->IsTuneable() || weights.size()) {
// if it's tuneable, ini file MUST have weights
// even it it's not tuneable, people can still set the weights in the ini file
if (feature->GetNumScoreComponents())
{
if (weights.size() == 0)
{
weights = feature->DefaultWeights();
if (weights.size() == 0)
{
TRACE_ERR("WARNING: No weights specified in config file for FF "
<< featureName << ". This FF does not supply default values.\n"
<< "WARNING: Auto-initializing all weights for this FF to 1.0");
weights.assign(feature->GetNumScoreComponents(),1.0);
}
else
{
TRACE_ERR("WARNING: No weights specified in config file for FF "
<< featureName << ". Using default values supplied by FF.");
}
}
UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
"FATAL ERROR: Mismatch in number of features and number "
<< "of weights for Feature Function " << featureName
<< " (features: " << feature->GetNumScoreComponents()
<< " vs. weights: " << weights.size() << ")");
static_data.SetWeights(feature, weights);
}
else if (feature->IsTuneable())
static_data.SetWeights(feature, weights);
} else if (feature->GetNumScoreComponents() > 0) {
std::vector<float> defaultWeights = feature->DefaultWeights();
static_data.SetWeights(feature, defaultWeights);
}
}
namespace
{
template <class F> class DefaultFeatureFactory : public FeatureFactory
template <class F>
class DefaultFeatureFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {

View File

@ -59,7 +59,7 @@ void FeatureFunction::SetupAll(TranslationTask const& ttask)
}
FeatureFunction::
FeatureFunction(const std::string& line)
FeatureFunction(const std::string& line, bool registerNow)
: m_tuneable(true)
, m_requireSortingAfterSourceContext(false)
, m_verbosity(std::numeric_limits<std::size_t>::max())
@ -67,7 +67,8 @@ FeatureFunction(const std::string& line)
, m_index(0)
{
m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
ParseLine(line);
if (registerNow) Register();
}
FeatureFunction::
@ -80,15 +81,14 @@ FeatureFunction(size_t numScoreComponents,
, m_index(0)
{
m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
ParseLine(line);
Register();
}
void
FeatureFunction::
Initialize(const std::string &line)
Register()
{
ParseLine(line);
ScoreComponentCollection::RegisterScoreProducer(this);
s_staticColl.push_back(this);
}
@ -166,7 +166,8 @@ void FeatureFunction::ReadParameters()
std::vector<float> FeatureFunction::DefaultWeights() const
{
UTIL_THROW2(GetScoreProducerDescription() << ": No default weights");
return std::vector<float>(this->m_numScoreComponents,1.0);
// UTIL_THROW2(GetScoreProducerDescription() << ": No default weights");
}
void FeatureFunction::SetTuneableComponents(const std::string& value)

View File

@ -49,7 +49,9 @@ protected:
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
void Initialize(const std::string &line);
void Register();
private:
// void Initialize(const std::string &line);
void ParseLine(const std::string &line);
public:
@ -63,7 +65,7 @@ public:
static void CallChangeSource(InputType * const&input);
// see my note in FeatureFunction.cpp --- UG
FeatureFunction(const std::string &line);
FeatureFunction(const std::string &line, bool initializeNow);
FeatureFunction(size_t numScoreComponents, const std::string &line);
virtual bool IsStateless() const = 0;
virtual ~FeatureFunction();

View File

@ -13,7 +13,7 @@ namespace Moses
InputFeature *InputFeature::s_instance = NULL;
InputFeature::InputFeature(const std::string &line)
: StatelessFeatureFunction(line)
: StatelessFeatureFunction(line,true)
, m_numRealWordCount(0)
{
m_numInputScores = this->m_numScoreComponents;

View File

@ -17,7 +17,7 @@ namespace Moses
{
LexicalReordering::
LexicalReordering(const std::string &line)
: StatefulFeatureFunction(line)
: StatefulFeatureFunction(line,false)
{
VERBOSE(1, "Initializing Lexical Reordering Feature.." << std::endl);
@ -65,13 +65,17 @@ LexicalReordering(const std::string &line)
}
// sanity check: number of default scores
size_t numScores = m_configuration->GetNumScoreComponents();
size_t numScores
= m_numScoreComponents
= m_numTuneableComponents
= m_configuration->GetNumScoreComponents();
UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
"wrong number of default scores (" << m_defaultScores.size()
<< ") for lexicalized reordering model (expected "
<< m_configuration->GetNumScoreComponents() << ")");
m_configuration->ConfigureSparse(sparseArgs, this);
this->Register();
}
LexicalReordering::

View File

@ -11,7 +11,7 @@ using namespace std;
namespace Moses
{
NieceTerminal::NieceTerminal(const std::string &line)
:StatelessFeatureFunction(line)
:StatelessFeatureFunction(line,true)
,m_hardConstraint(false)
{
ReadParameters();

View File

@ -5,13 +5,15 @@ namespace Moses
std::vector<const StatefulFeatureFunction*> StatefulFeatureFunction::m_statefulFFs;
StatefulFeatureFunction::StatefulFeatureFunction(const std::string &line)
: FeatureFunction(line)
StatefulFeatureFunction
::StatefulFeatureFunction(const std::string &line, bool registerNow)
: FeatureFunction(line, registerNow)
{
m_statefulFFs.push_back(this);
}
StatefulFeatureFunction::StatefulFeatureFunction(size_t numScoreComponents, const std::string &line)
StatefulFeatureFunction
::StatefulFeatureFunction(size_t numScoreComponents, const std::string &line)
: FeatureFunction(numScoreComponents, line)
{
m_statefulFFs.push_back(this);

View File

@ -22,7 +22,7 @@ public:
return m_statefulFFs;
}
StatefulFeatureFunction(const std::string &line);
StatefulFeatureFunction(const std::string &line, bool registerNow);
StatefulFeatureFunction(size_t numScoreComponents, const std::string &line);
/**

View File

@ -5,14 +5,16 @@ namespace Moses
std::vector<const StatelessFeatureFunction*> StatelessFeatureFunction::m_statelessFFs;
StatelessFeatureFunction::StatelessFeatureFunction(const std::string &line)
:FeatureFunction(line)
StatelessFeatureFunction
::StatelessFeatureFunction(const std::string &line, bool registerNow)
: FeatureFunction(line, registerNow)
{
m_statelessFFs.push_back(this);
}
StatelessFeatureFunction::StatelessFeatureFunction(size_t numScoreComponents, const std::string &line)
:FeatureFunction(numScoreComponents, line)
StatelessFeatureFunction
::StatelessFeatureFunction(size_t numScoreComponents, const std::string &line)
: FeatureFunction(numScoreComponents, line)
{
m_statelessFFs.push_back(this);
}

View File

@ -20,7 +20,7 @@ public:
return m_statelessFFs;
}
StatelessFeatureFunction(const std::string &line);
StatelessFeatureFunction(const std::string &line, bool registerNow);
StatelessFeatureFunction(size_t numScoreComponents, const std::string &line);
/**

View File

@ -36,7 +36,7 @@ namespace Moses
std::vector<GenerationDictionary*> GenerationDictionary::s_staticColl;
GenerationDictionary::GenerationDictionary(const std::string &line)
: DecodeFeature(line)
: DecodeFeature(line, true)
{
s_staticColl.push_back(this);

View File

@ -17,7 +17,7 @@ namespace Syntax
std::vector<RuleTableFF*> RuleTableFF::s_instances;
RuleTableFF::RuleTableFF(const std::string &line)
: PhraseDictionary(line)
: PhraseDictionary(line, true)
{
ReadParameters();
// caching for memory pt is pointless

View File

@ -44,7 +44,7 @@ namespace Moses
{
PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
:PhraseDictionary(line)
:PhraseDictionary(line, true)
,m_inMemory(true)
,m_useAlignmentInfo(true)
,m_hash(10, 16)

View File

@ -44,10 +44,10 @@ CacheColl::~CacheColl()
}
}
PhraseDictionary::PhraseDictionary(const std::string &line)
:DecodeFeature(line)
,m_tableLimit(20) // default
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
: DecodeFeature(line, registerNow)
, m_tableLimit(20) // default
, m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
{
m_id = s_staticColl.size();
s_staticColl.push_back(this);

View File

@ -80,7 +80,7 @@ public:
return s_staticColl;
}
PhraseDictionary(const std::string &line);
PhraseDictionary(const std::string &line, bool registerNow);
virtual ~PhraseDictionary() {
}

View File

@ -10,7 +10,7 @@ namespace Moses
{
PhraseDictionaryDynSuffixArray::
PhraseDictionaryDynSuffixArray(const std::string &line)
: PhraseDictionary(line)
: PhraseDictionary(line, true)
,m_biSA(new BilingualDynSuffixArray())
{
ReadParameters();

View File

@ -36,7 +36,7 @@ PhraseDictionaryDynamicCacheBased *PhraseDictionaryDynamicCacheBased::s_instance
//! contructor
PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::string &line)
: PhraseDictionary(line)
: PhraseDictionary(line, true)
{
std::cerr << "Initializing PhraseDictionaryDynamicCacheBased feature..." << std::endl;

View File

@ -26,7 +26,7 @@ namespace Moses
{
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
:PhraseDictionary(line)
:PhraseDictionary(line, true)
{
ReadParameters();
@ -54,7 +54,7 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
}
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line)
:PhraseDictionary(line)
:PhraseDictionary(line, true)
{
if (type == 1) {
// PhraseDictionaryMultiModelCounts

View File

@ -12,7 +12,7 @@ using namespace std;
namespace Moses
{
PhraseDictionaryTransliteration::PhraseDictionaryTransliteration(const std::string &line)
: PhraseDictionary(line)
: PhraseDictionary(line, true)
{
ReadParameters();
UTIL_THROW_IF2(m_mosesDir.empty() ||

View File

@ -26,7 +26,7 @@ namespace Moses
PhraseDictionaryTreeAdaptor::
PhraseDictionaryTreeAdaptor(const std::string &line)
: PhraseDictionary(line)
: PhraseDictionary(line, true)
{
ReadParameters();
}

View File

@ -80,7 +80,7 @@ namespace Moses
{
PhraseDictionaryFuzzyMatch::PhraseDictionaryFuzzyMatch(const std::string &line)
:PhraseDictionary(line)
:PhraseDictionary(line, true)
,m_config(3)
,m_FuzzyMatchWrapper(NULL)
{

View File

@ -35,7 +35,7 @@ using namespace std;
namespace Moses
{
PhraseDictionaryOnDisk::PhraseDictionaryOnDisk(const std::string &line)
: MyBase(line)
: MyBase(line, true)
, m_maxSpanDefault(NOT_FOUND)
, m_maxSpanLabelled(NOT_FOUND)
{

View File

@ -41,7 +41,7 @@ class RuleTableTrie : public PhraseDictionary
{
public:
RuleTableTrie(const std::string &line)
: PhraseDictionary(line) {
: PhraseDictionary(line, true) {
}
virtual ~RuleTableTrie();

View File

@ -7,7 +7,7 @@ using namespace std;
namespace Moses
{
SkeletonPT::SkeletonPT(const std::string &line)
: PhraseDictionary(line)
: PhraseDictionary(line, true)
{
ReadParameters();
}

View File

@ -71,7 +71,7 @@ namespace Moses
Mmsapt::
Mmsapt(string const& line)
: PhraseDictionary(line)
: PhraseDictionary(line, false)
, m_bias_log(NULL)
, m_bias_loglevel(0)
, m_lr_func(NULL)
@ -80,7 +80,9 @@ namespace Moses
// , m_tpc_ctr(0)
, ofactor(1,0)
{
this->init(line);
init(line);
setup_local_feature_functions();
Register();
}
void
@ -382,14 +384,10 @@ namespace Moses
}
void
Mmsapt::
Load(bool with_checks)
Mmsapt
::setup_local_feature_functions()
{
boost::unique_lock<boost::shared_mutex> lock(m_lock);
// can load only once
// UTIL_THROW_IF2(shards.size(),"Mmsapt is already loaded at " << HERE);
// load feature sets
BOOST_FOREACH(string const& fsname, m_feature_set_names)
{
@ -398,7 +396,8 @@ namespace Moses
{
// lexical scores
string lexfile = m_bname + L1 + "-" + L2 + ".lex";
sptr<PScoreLex1<Token> > ff(new PScoreLex1<Token>(param["lex_alpha"],lexfile));
sptr<PScoreLex1<Token> >
ff(new PScoreLex1<Token>(param["lex_alpha"],lexfile));
register_ff(ff,m_active_ff_common);
// these are always computed on pooled data
@ -428,7 +427,19 @@ namespace Moses
}
}
// cerr << "Features: " << Join("|",m_feature_names) << endl;
this->m_numScoreComponents = this->m_feature_names.size();
this->m_numTuneableComponents = this->m_numScoreComponents;
}
void
Mmsapt::
Load(bool with_checks)
{
// load feature functions (i.e., load underlying data bases, if any)
BOOST_FOREACH(sptr<pscorer>& ff, m_active_ff_fix) ff->load();
BOOST_FOREACH(sptr<pscorer>& ff, m_active_ff_dyn) ff->load();
BOOST_FOREACH(sptr<pscorer>& ff, m_active_ff_common) ff->load();
#if 0
if (with_checks)
{
UTIL_THROW_IF2(this->m_feature_names.size() != this->m_numScoreComponents,
@ -437,13 +448,14 @@ namespace Moses
<< ") does not match number specified in Moses config file ("
<< this->m_numScoreComponents << ")!\n";);
}
#endif
// Load corpora. For the time being, we can have one memory-mapped static
// corpus and one in-memory dynamic corpus
// sptr<mmbitext> btfix(new mmbitext());
boost::unique_lock<boost::shared_mutex> lock(m_lock);
btfix.m_num_workers = this->m_workers;
btfix.open(m_bname, L1, L2);
btfix.setDefaultSampleSize(m_default_sample_size);
// shards.push_back(btfix);
btdyn.reset(new imbitext(btfix.V1, btfix.V2, m_default_sample_size, m_workers));
if (m_bias_file.size())
@ -860,4 +872,9 @@ namespace Moses
// return btfix.SetupDocumentBias(bias);
// }
vector<float>
Mmsapt
::DefaultWeights() const
{ return vector<float>(this->GetNumScoreComponents(), 1.); }
}

View File

@ -134,6 +134,7 @@ namespace Moses
bool poolCounts;
std::vector<FactorType> ofactor;
void setup_local_feature_functions();
private:
@ -239,6 +240,8 @@ namespace Moses
sptr<DocumentBias>
setupDocumentBias(std::map<std::string,float> const& bias) const;
vector<float> DefaultWeights() const;
};
} // end namespace

View File

@ -66,6 +66,10 @@ namespace Moses {
// does this feature function allow pooling of counts if
// there are no occurrences in the respective corpus?
virtual
void
load() { }
};
// base class for 'families' of phrase scorers that have a single

View File

@ -13,19 +13,26 @@ namespace Moses {
class
PScoreLex1 : public PhraseScorer<Token>
{
float m_alpha;
float m_alpha;
string m_lexfile;
public:
LexicalPhraseScorer2<Token> scorer;
PScoreLex1(string const& alpaspec, string const& lexfile)
PScoreLex1(string const& alphaspec, string const& lexfile)
{
this->m_index = -1;
this->m_num_feats = 2;
this->m_feature_names.reserve(2);
this->m_feature_names.push_back("lexfwd");
this->m_feature_names.push_back("lexbwd");
m_alpha = atof(alpaspec.c_str());
scorer.open(lexfile);
m_alpha = atof(alphaspec.c_str());
m_lexfile = lexfile;
}
void
load()
{
scorer.open(m_lexfile);
}
void