calculate m_featuresToApply in Load() for PhraseDictionary. Since every phrase dictionary now has to implement it, make Load() for PhraseDictionary abstract

This commit is contained in:
Hieu Hoang 2013-06-14 18:34:47 +01:00
parent 0d6565b9a5
commit f7371cf53d
16 changed files with 84 additions and 49 deletions

View File

@ -1,4 +1,4 @@
#include "UnknownWordPenaltyProducer.h"
namespace Moses namespace Moses
{ {

View File

@ -44,15 +44,6 @@ PhraseDictionary::PhraseDictionary(const std::string &description, const std::st
++ind; ++ind;
} }
} }
// find out which feature function can be applied in this decode step
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < allFeatures.size(); ++i) {
FeatureFunction *feature = allFeatures[i];
if (feature->IsUseable(m_outputFactors)) {
m_featuresToApply.push_back(feature);
}
}
} }
@ -76,5 +67,17 @@ bool PhraseDictionary::SetParameter(const std::string& key, const std::string& v
return true; return true;
} }
void PhraseDictionary::SetFeaturesToApply()
{
// find out which feature function can be applied in this decode step
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < allFeatures.size(); ++i) {
FeatureFunction *feature = allFeatures[i];
if (feature->IsUseable(m_outputFactors)) {
m_featuresToApply.push_back(feature);
}
}
}
} }

View File

@ -59,6 +59,8 @@ public:
virtual ~PhraseDictionary() { virtual ~PhraseDictionary() {
} }
virtual void Load() = 0;
//! table limit number. //! table limit number.
size_t GetTableLimit() const { size_t GetTableLimit() const {
return m_tableLimit; return m_tableLimit;
@ -99,7 +101,12 @@ protected:
size_t m_tableLimit; size_t m_tableLimit;
std::string m_filePath; std::string m_filePath;
// features to apply evaluate target phrase when loading.
// NOT when creating translation options. Those are in DecodeStep
std::vector<FeatureFunction*> m_featuresToApply; std::vector<FeatureFunction*> m_featuresToApply;
// MUST be called at the start of Load()
void SetFeaturesToApply();
}; };
} }

View File

@ -33,6 +33,8 @@ PhraseDictionaryDynSuffixArray::~PhraseDictionaryDynSuffixArray()
void PhraseDictionaryDynSuffixArray::Load() void PhraseDictionaryDynSuffixArray::Load()
{ {
SetFeaturesToApply();
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
vector<float> weight = staticData.GetWeights(this); vector<float> weight = staticData.GetWeights(this);

View File

@ -93,6 +93,8 @@ PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
void PhraseDictionaryMultiModel::Load() void PhraseDictionaryMultiModel::Load()
{ {
SetFeaturesToApply();
// since the top X target phrases of the final model are not the same as the top X phrases of each component model, // since the top X target phrases of the final model are not the same as the top X phrases of each component model,
// one could choose a higher value than tableLimit (or 0) here for maximal precision, at a cost of speed. // one could choose a higher value than tableLimit (or 0) here for maximal precision, at a cost of speed.
@ -336,25 +338,27 @@ void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source
} }
} }
const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const { const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const
{
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights); boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) { if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second; return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
} else { } else {
return NULL; return NULL;
} }
#else #else
return &m_multimodelweights_tmp; return &m_multimodelweights_tmp;
#endif #endif
} }
void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights) { void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
{
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights); boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
m_multimodelweights_tmp[boost::this_thread::get_id()] = weights; m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
#else #else
m_multimodelweights_tmp = weights; m_multimodelweights_tmp = weights;
#endif #endif
} }

View File

@ -104,15 +104,16 @@ protected:
PhraseCache& GetPhraseCache() { PhraseCache& GetPhraseCache() {
#ifdef WITH_THREADS #ifdef WITH_THREADS
{ // first try read-only lock {
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache); // first try read-only lock
SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id()); boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
if (i != m_sentenceCache.end()) return i->second; SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id());
} if (i != m_sentenceCache.end()) return i->second;
boost::unique_lock<boost::shared_mutex> lock(m_lock_cache); }
return m_sentenceCache[boost::this_thread::get_id()]; boost::unique_lock<boost::shared_mutex> lock(m_lock_cache);
return m_sentenceCache[boost::this_thread::get_id()];
#else #else
return m_sentenceCache; return m_sentenceCache;
#endif #endif
} }

View File

@ -122,6 +122,7 @@ PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
void PhraseDictionaryMultiModelCounts::Load() void PhraseDictionaryMultiModelCounts::Load()
{ {
SetFeaturesToApply();
for(size_t i = 0; i < m_numModels; ++i) { for(size_t i = 0; i < m_numModels; ++i) {
// phrase table // phrase table

View File

@ -35,6 +35,10 @@ PhraseDictionaryTreeAdaptor(const std::string &line)
PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor() PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
{ {
} }
void PhraseDictionaryTreeAdaptor::Load()
{
SetFeaturesToApply();
}
void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source) void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
{ {

View File

@ -37,6 +37,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary
public: public:
PhraseDictionaryTreeAdaptor(const std::string &line); PhraseDictionaryTreeAdaptor(const std::string &line);
virtual ~PhraseDictionaryTreeAdaptor(); virtual ~PhraseDictionaryTreeAdaptor();
void Load();
// enable/disable caching // enable/disable caching
// you enable caching if you request the target candidates for a source phrase multiple times // you enable caching if you request the target candidates for a source phrase multiple times

View File

@ -30,6 +30,11 @@ PhraseDictionaryALSuffixArray::PhraseDictionaryALSuffixArray(const std::string &
CHECK(m_args.size() == 0); CHECK(m_args.size() == 0);
} }
void PhraseDictionaryALSuffixArray::Load()
{
SetFeaturesToApply();
}
void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source) void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
{ {
// populate with rules for this sentence // populate with rules for this sentence

View File

@ -23,6 +23,7 @@ class PhraseDictionaryALSuffixArray : public PhraseDictionaryMemory
{ {
public: public:
PhraseDictionaryALSuffixArray(const std::string &line); PhraseDictionaryALSuffixArray(const std::string &line);
void Load();
void InitializeForInput(InputType const& source); void InitializeForInput(InputType const& source);
void CleanUpAfterSentenceProcessing(const InputType& source); void CleanUpAfterSentenceProcessing(const InputType& source);

View File

@ -66,6 +66,8 @@ PhraseDictionaryFuzzyMatch::~PhraseDictionaryFuzzyMatch()
void PhraseDictionaryFuzzyMatch::Load() void PhraseDictionaryFuzzyMatch::Load()
{ {
SetFeaturesToApply();
assert(m_config.size() == 3); assert(m_config.size() == 3);
m_FuzzyMatchWrapper = new tmmt::FuzzyMatchWrapper(m_config[0], m_config[1], m_config[2]); m_FuzzyMatchWrapper = new tmmt::FuzzyMatchWrapper(m_config[0], m_config[1], m_config[2]);
} }

View File

@ -34,7 +34,10 @@ PhraseDictionaryOnDisk::~PhraseDictionaryOnDisk()
{ {
} }
// PhraseDictionary impl void PhraseDictionaryOnDisk::Load()
{
SetFeaturesToApply();
}
//! find list of translations that can translates src. Only for phrase input //! find list of translations that can translates src. Only for phrase input
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const Phrase& /* src */) const const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const Phrase& /* src */) const

View File

@ -53,8 +53,8 @@ public:
: MyBase("PhraseDictionaryOnDisk", line) { : MyBase("PhraseDictionaryOnDisk", line) {
CHECK(m_args.size() == 0); CHECK(m_args.size() == 0);
} }
~PhraseDictionaryOnDisk();
virtual ~PhraseDictionaryOnDisk(); void Load();
PhraseTableImplementation GetPhraseTableImplementation() const { PhraseTableImplementation GetPhraseTableImplementation() const {
return OnDisk; return OnDisk;

View File

@ -36,6 +36,7 @@ RuleTableTrie::~RuleTableTrie()
void RuleTableTrie::Load() void RuleTableTrie::Load()
{ {
SetFeaturesToApply();
std::auto_ptr<Moses::RuleTableLoader> loader = std::auto_ptr<Moses::RuleTableLoader> loader =
Moses::RuleTableLoaderFactory::Create(m_filePath); Moses::RuleTableLoaderFactory::Create(m_filePath);

View File

@ -40,29 +40,29 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
// cut up confusion network into substrings // cut up confusion network into substrings
// start with 1-word phrases // start with 1-word phrases
std::vector<SourcePath> &subphrases = GetPhrases(startPos, startPos); std::vector<SourcePath> &subphrases = GetPhrases(startPos, startPos);
assert(subphrases.size() == 0); assert(subphrases.size() == 0);
const ConfusionNet::Column &col = input.GetColumn(startPos); const ConfusionNet::Column &col = input.GetColumn(startPos);
ConfusionNet::Column::const_iterator iter; ConfusionNet::Column::const_iterator iter;
for (iter = col.begin(); iter != col.end(); ++iter) { for (iter = col.begin(); iter != col.end(); ++iter) {
subphrases.push_back(SourcePath()); subphrases.push_back(SourcePath());
SourcePath &sourcePath = subphrases.back(); SourcePath &sourcePath = subphrases.back();
const std::pair<Word,std::vector<float> > &inputNode = *iter; const std::pair<Word,std::vector<float> > &inputNode = *iter;
//cerr << "word=" << inputNode.first << " scores=" << inputNode.second.size() << endl; //cerr << "word=" << inputNode.first << " scores=" << inputNode.second.size() << endl;
sourcePath.first.AddWord(inputNode.first); sourcePath.first.AddWord(inputNode.first);
sourcePath.second.PlusEquals(inputFeature, inputNode.second); sourcePath.second.PlusEquals(inputFeature, inputNode.second);
} }
} }
for (size_t startPos = 0; startPos < input.GetSize(); ++startPos) { for (size_t startPos = 0; startPos < input.GetSize(); ++startPos) {
for (size_t endPos = startPos; endPos < input.GetSize(); ++endPos) { for (size_t endPos = startPos; endPos < input.GetSize(); ++endPos) {
} }
} }
} }