calculate m_featuresToApply in Load() for PhraseDictionary. Since every phrase dictionary now has to implement it, make Load() for PhraseDictionary abstract

This commit is contained in:
Hieu Hoang 2013-06-14 18:34:47 +01:00
parent 0d6565b9a5
commit f7371cf53d
16 changed files with 84 additions and 49 deletions

View File

@ -1,4 +1,4 @@
#include "UnknownWordPenaltyProducer.h"
namespace Moses
{

View File

@ -44,15 +44,6 @@ PhraseDictionary::PhraseDictionary(const std::string &description, const std::st
++ind;
}
}
// find out which feature function can be applied in this decode step
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < allFeatures.size(); ++i) {
FeatureFunction *feature = allFeatures[i];
if (feature->IsUseable(m_outputFactors)) {
m_featuresToApply.push_back(feature);
}
}
}
@ -76,5 +67,17 @@ bool PhraseDictionary::SetParameter(const std::string& key, const std::string& v
return true;
}
void PhraseDictionary::SetFeaturesToApply()
{
// find out which feature function can be applied in this decode step
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < allFeatures.size(); ++i) {
FeatureFunction *feature = allFeatures[i];
if (feature->IsUseable(m_outputFactors)) {
m_featuresToApply.push_back(feature);
}
}
}
}

View File

@ -59,6 +59,8 @@ public:
virtual ~PhraseDictionary() {
}
virtual void Load() = 0;
//! table limit number.
size_t GetTableLimit() const {
return m_tableLimit;
@ -99,7 +101,12 @@ protected:
size_t m_tableLimit;
std::string m_filePath;
// features to apply evaluate target phrase when loading.
// NOT when creating translation options. Those are in DecodeStep
std::vector<FeatureFunction*> m_featuresToApply;
// MUST be called at the start of Load()
void SetFeaturesToApply();
};
}

View File

@ -33,6 +33,8 @@ PhraseDictionaryDynSuffixArray::~PhraseDictionaryDynSuffixArray()
void PhraseDictionaryDynSuffixArray::Load()
{
SetFeaturesToApply();
const StaticData &staticData = StaticData::Instance();
vector<float> weight = staticData.GetWeights(this);

View File

@ -93,6 +93,8 @@ PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
void PhraseDictionaryMultiModel::Load()
{
SetFeaturesToApply();
// since the top X target phrases of the final model are not the same as the top X phrases of each component model,
// one could choose a higher value than tableLimit (or 0) here for maximal precision, at a cost of speed.
@ -336,25 +338,27 @@ void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source
}
}
const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const {
const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
} else {
return NULL;
}
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
} else {
return NULL;
}
#else
return &m_multimodelweights_tmp;
return &m_multimodelweights_tmp;
#endif
}
void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights) {
void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
{
#ifdef WITH_THREADS
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
#else
m_multimodelweights_tmp = weights;
m_multimodelweights_tmp = weights;
#endif
}

View File

@ -104,15 +104,16 @@ protected:
PhraseCache& GetPhraseCache() {
#ifdef WITH_THREADS
{ // first try read-only lock
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id());
if (i != m_sentenceCache.end()) return i->second;
}
boost::unique_lock<boost::shared_mutex> lock(m_lock_cache);
return m_sentenceCache[boost::this_thread::get_id()];
{
// first try read-only lock
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id());
if (i != m_sentenceCache.end()) return i->second;
}
boost::unique_lock<boost::shared_mutex> lock(m_lock_cache);
return m_sentenceCache[boost::this_thread::get_id()];
#else
return m_sentenceCache;
return m_sentenceCache;
#endif
}

View File

@ -122,6 +122,7 @@ PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
void PhraseDictionaryMultiModelCounts::Load()
{
SetFeaturesToApply();
for(size_t i = 0; i < m_numModels; ++i) {
// phrase table

View File

@ -35,6 +35,10 @@ PhraseDictionaryTreeAdaptor(const std::string &line)
PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
{
}
void PhraseDictionaryTreeAdaptor::Load()
{
SetFeaturesToApply();
}
void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
{

View File

@ -37,6 +37,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary
public:
PhraseDictionaryTreeAdaptor(const std::string &line);
virtual ~PhraseDictionaryTreeAdaptor();
void Load();
// enable/disable caching
// you enable caching if you request the target candidates for a source phrase multiple times

View File

@ -30,6 +30,11 @@ PhraseDictionaryALSuffixArray::PhraseDictionaryALSuffixArray(const std::string &
CHECK(m_args.size() == 0);
}
void PhraseDictionaryALSuffixArray::Load()
{
SetFeaturesToApply();
}
void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
{
// populate with rules for this sentence

View File

@ -23,6 +23,7 @@ class PhraseDictionaryALSuffixArray : public PhraseDictionaryMemory
{
public:
PhraseDictionaryALSuffixArray(const std::string &line);
void Load();
void InitializeForInput(InputType const& source);
void CleanUpAfterSentenceProcessing(const InputType& source);

View File

@ -66,6 +66,8 @@ PhraseDictionaryFuzzyMatch::~PhraseDictionaryFuzzyMatch()
void PhraseDictionaryFuzzyMatch::Load()
{
SetFeaturesToApply();
assert(m_config.size() == 3);
m_FuzzyMatchWrapper = new tmmt::FuzzyMatchWrapper(m_config[0], m_config[1], m_config[2]);
}

View File

@ -34,7 +34,10 @@ PhraseDictionaryOnDisk::~PhraseDictionaryOnDisk()
{
}
// PhraseDictionary impl
void PhraseDictionaryOnDisk::Load()
{
SetFeaturesToApply();
}
//! find list of translations that can translates src. Only for phrase input
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const Phrase& /* src */) const

View File

@ -53,8 +53,8 @@ public:
: MyBase("PhraseDictionaryOnDisk", line) {
CHECK(m_args.size() == 0);
}
virtual ~PhraseDictionaryOnDisk();
~PhraseDictionaryOnDisk();
void Load();
PhraseTableImplementation GetPhraseTableImplementation() const {
return OnDisk;

View File

@ -36,6 +36,7 @@ RuleTableTrie::~RuleTableTrie()
void RuleTableTrie::Load()
{
SetFeaturesToApply();
std::auto_ptr<Moses::RuleTableLoader> loader =
Moses::RuleTableLoaderFactory::Create(m_filePath);

View File

@ -40,29 +40,29 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
// cut up confusion network into substrings
// start with 1-word phrases
std::vector<SourcePath> &subphrases = GetPhrases(startPos, startPos);
assert(subphrases.size() == 0);
std::vector<SourcePath> &subphrases = GetPhrases(startPos, startPos);
assert(subphrases.size() == 0);
const ConfusionNet::Column &col = input.GetColumn(startPos);
ConfusionNet::Column::const_iterator iter;
for (iter = col.begin(); iter != col.end(); ++iter) {
subphrases.push_back(SourcePath());
SourcePath &sourcePath = subphrases.back();
const ConfusionNet::Column &col = input.GetColumn(startPos);
ConfusionNet::Column::const_iterator iter;
for (iter = col.begin(); iter != col.end(); ++iter) {
subphrases.push_back(SourcePath());
SourcePath &sourcePath = subphrases.back();
const std::pair<Word,std::vector<float> > &inputNode = *iter;
const std::pair<Word,std::vector<float> > &inputNode = *iter;
//cerr << "word=" << inputNode.first << " scores=" << inputNode.second.size() << endl;
sourcePath.first.AddWord(inputNode.first);
sourcePath.second.PlusEquals(inputFeature, inputNode.second);
//cerr << "word=" << inputNode.first << " scores=" << inputNode.second.size() << endl;
sourcePath.first.AddWord(inputNode.first);
sourcePath.second.PlusEquals(inputFeature, inputNode.second);
}
}
}
for (size_t startPos = 0; startPos < input.GetSize(); ++startPos) {
for (size_t endPos = startPos; endPos < input.GetSize(); ++endPos) {
for (size_t startPos = 0; startPos < input.GetSize(); ++startPos) {
for (size_t endPos = startPos; endPos < input.GetSize(); ++endPos) {
}
}
}
}
}