calculate m_featuresToApply in Load() for PhraseDictionary. Since every phrase dictionary now has to implement it, make Load() for PhraseDictionary abstract

2024-12-25 04:43:03 +03:00 · 2013-06-14 18:34:47 +01:00 · 2013-06-14 18:34:47 +01:00 · f7371cf53d
commit f7371cf53d
parent 0d6565b9a5
16 changed files with 84 additions and 49 deletions
--- a/moses/FF/UnknownWordPenaltyProducer.cpp
+++ b/moses/FF/UnknownWordPenaltyProducer.cpp
@ -1,4 +1,4 @@
-
+#include "UnknownWordPenaltyProducer.h"

 namespace Moses
 {
--- a/moses/TranslationModel/PhraseDictionary.cpp
+++ b/moses/TranslationModel/PhraseDictionary.cpp
@ -44,15 +44,6 @@ PhraseDictionary::PhraseDictionary(const std::string &description, const std::st
      ++ind;
    }
  }
-
-  // find out which feature function can be applied in this decode step
-  const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
-  for (size_t i = 0; i < allFeatures.size(); ++i) {
-    FeatureFunction *feature = allFeatures[i];
-    if (feature->IsUseable(m_outputFactors)) {
-      m_featuresToApply.push_back(feature);
-    }
-  }
 }


@ -76,5 +67,17 @@ bool PhraseDictionary::SetParameter(const std::string& key, const std::string& v
  return true;
 }

+void PhraseDictionary::SetFeaturesToApply()
+{
+  // find out which feature function can be applied in this decode step
+  const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
+  for (size_t i = 0; i < allFeatures.size(); ++i) {
+    FeatureFunction *feature = allFeatures[i];
+    if (feature->IsUseable(m_outputFactors)) {
+      m_featuresToApply.push_back(feature);
+    }
+  }
+}
+
 }

--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@ -59,6 +59,8 @@ public:
  virtual ~PhraseDictionary() {
  }

+  virtual void Load() = 0;
+
  //! table limit number.
  size_t GetTableLimit() const {
    return m_tableLimit;
@ -99,7 +101,12 @@ protected:
  size_t m_tableLimit;
  std::string m_filePath;

+  // features to apply evaluate target phrase when loading.
+  // NOT when creating translation options. Those are in DecodeStep
  std::vector<FeatureFunction*> m_featuresToApply;
+
+  // MUST be called at the start of Load()
+  void SetFeaturesToApply();
 };

 }
--- a/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
+++ b/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
@ -33,6 +33,8 @@ PhraseDictionaryDynSuffixArray::~PhraseDictionaryDynSuffixArray()

 void PhraseDictionaryDynSuffixArray::Load()
 {
+  SetFeaturesToApply();
+
  const StaticData &staticData = StaticData::Instance();
  vector<float> weight = staticData.GetWeights(this);

--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@ -93,6 +93,8 @@ PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()

 void PhraseDictionaryMultiModel::Load()
 {
+  SetFeaturesToApply();
+
  // since the top X target phrases of the final model are not the same as the top X phrases of each component model,
  // one could choose a higher value than tableLimit (or 0) here for maximal precision, at a cost of speed.

@ -336,25 +338,27 @@ void  PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source
  }
 }

-const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const {
+const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const
+{
 #ifdef WITH_THREADS
-    boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
-    if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
-      return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
-    } else {
-      return NULL;
-    }
+  boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
+  if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
+    return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
+  } else {
+    return NULL;
+  }
 #else
-    return &m_multimodelweights_tmp;
+  return &m_multimodelweights_tmp;
 #endif
 }

-void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights) {
+void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
+{
 #ifdef WITH_THREADS
-    boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
-    m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
+  boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
+  m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
 #else
-    m_multimodelweights_tmp = weights;
+  m_multimodelweights_tmp = weights;
 #endif
 }

--- a/moses/TranslationModel/PhraseDictionaryMultiModel.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h
@ -104,15 +104,16 @@ protected:

  PhraseCache& GetPhraseCache() {
 #ifdef WITH_THREADS
-  { // first try read-only lock
-  boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
-  SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id());
-  if (i != m_sentenceCache.end()) return i->second;
-  }
-  boost::unique_lock<boost::shared_mutex> lock(m_lock_cache);
-  return m_sentenceCache[boost::this_thread::get_id()];
+    {
+      // first try read-only lock
+      boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
+      SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id());
+      if (i != m_sentenceCache.end()) return i->second;
+    }
+    boost::unique_lock<boost::shared_mutex> lock(m_lock_cache);
+    return m_sentenceCache[boost::this_thread::get_id()];
 #else
-  return m_sentenceCache;
+    return m_sentenceCache;
 #endif
  }

--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@ -122,6 +122,7 @@ PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()

 void PhraseDictionaryMultiModelCounts::Load()
 {
+  SetFeaturesToApply();
  for(size_t i = 0; i < m_numModels; ++i) {

    // phrase table
--- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp
@ -35,6 +35,10 @@ PhraseDictionaryTreeAdaptor(const std::string &line)
 PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
 {
 }
+void PhraseDictionaryTreeAdaptor::Load()
+{
+  SetFeaturesToApply();
+}

 void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
 {
--- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h
+++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h
@ -37,6 +37,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary
 public:
  PhraseDictionaryTreeAdaptor(const std::string &line);
  virtual ~PhraseDictionaryTreeAdaptor();
+  void Load();

  // enable/disable caching
  // you enable caching if you request the target candidates for a source phrase multiple times
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
@ -30,6 +30,11 @@ PhraseDictionaryALSuffixArray::PhraseDictionaryALSuffixArray(const std::string &
  CHECK(m_args.size() == 0);
 }

+void PhraseDictionaryALSuffixArray::Load()
+{
+  SetFeaturesToApply();
+}
+
 void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
 {
  // populate with rules for this sentence
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h
@ -23,6 +23,7 @@ class PhraseDictionaryALSuffixArray : public PhraseDictionaryMemory
 {
 public:
  PhraseDictionaryALSuffixArray(const std::string &line);
+  void Load();
  void InitializeForInput(InputType const& source);
  void CleanUpAfterSentenceProcessing(const InputType& source);

--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@ -66,6 +66,8 @@ PhraseDictionaryFuzzyMatch::~PhraseDictionaryFuzzyMatch()

 void PhraseDictionaryFuzzyMatch::Load()
 {
+  SetFeaturesToApply();
+
  assert(m_config.size() == 3);
  m_FuzzyMatchWrapper = new tmmt::FuzzyMatchWrapper(m_config[0], m_config[1], m_config[2]);
 }
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
@ -34,7 +34,10 @@ PhraseDictionaryOnDisk::~PhraseDictionaryOnDisk()
 {
 }

-// PhraseDictionary impl
+void PhraseDictionaryOnDisk::Load()
+{
+  SetFeaturesToApply();
+}

 //! find list of translations that can translates src. Only for phrase input
 const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const Phrase& /* src */) const
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
@ -53,8 +53,8 @@ public:
    : MyBase("PhraseDictionaryOnDisk", line) {
    CHECK(m_args.size() == 0);
  }
-
-  virtual ~PhraseDictionaryOnDisk();
+  ~PhraseDictionaryOnDisk();
+  void Load();

  PhraseTableImplementation GetPhraseTableImplementation() const {
    return OnDisk;
--- a/moses/TranslationModel/RuleTable/Trie.cpp
+++ b/moses/TranslationModel/RuleTable/Trie.cpp
@ -36,6 +36,7 @@ RuleTableTrie::~RuleTableTrie()

 void RuleTableTrie::Load()
 {
+  SetFeaturesToApply();

  std::auto_ptr<Moses::RuleTableLoader> loader =
    Moses::RuleTableLoaderFactory::Create(m_filePath);
--- a/moses/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/TranslationOptionCollectionConfusionNet.cpp
@ -40,29 +40,29 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet

    // cut up confusion network into substrings
    // start with 1-word phrases
-	  std::vector<SourcePath> &subphrases = GetPhrases(startPos, startPos);
-	  assert(subphrases.size() == 0);
+    std::vector<SourcePath> &subphrases = GetPhrases(startPos, startPos);
+    assert(subphrases.size() == 0);

-	  const ConfusionNet::Column &col = input.GetColumn(startPos);
-	  ConfusionNet::Column::const_iterator iter;
-	  for (iter = col.begin(); iter != col.end(); ++iter) {
-		  subphrases.push_back(SourcePath());
-		  SourcePath &sourcePath = subphrases.back();
+    const ConfusionNet::Column &col = input.GetColumn(startPos);
+    ConfusionNet::Column::const_iterator iter;
+    for (iter = col.begin(); iter != col.end(); ++iter) {
+      subphrases.push_back(SourcePath());
+      SourcePath &sourcePath = subphrases.back();

-		  const std::pair<Word,std::vector<float> > &inputNode = *iter;
+      const std::pair<Word,std::vector<float> > &inputNode = *iter;

-		  //cerr << "word=" << inputNode.first << " scores=" << inputNode.second.size() << endl;
-		  sourcePath.first.AddWord(inputNode.first);
-		  sourcePath.second.PlusEquals(inputFeature, inputNode.second);
+      //cerr << "word=" << inputNode.first << " scores=" << inputNode.second.size() << endl;
+      sourcePath.first.AddWord(inputNode.first);
+      sourcePath.second.PlusEquals(inputFeature, inputNode.second);

-	  }
+    }
  }

-	for (size_t startPos = 0; startPos < input.GetSize(); ++startPos) {
-		for (size_t endPos = startPos; endPos < input.GetSize(); ++endPos) {
+  for (size_t startPos = 0; startPos < input.GetSize(); ++startPos) {
+    for (size_t endPos = startPos; endPos < input.GetSize(); ++endPos) {

-		}
-	}
+    }
+  }


 }