port PhraseDictionaryMultiModel to new format

2024-12-25 12:52:29 +03:00 · 2013-05-10 12:30:01 +01:00 · 2013-05-10 12:30:01 +01:00 · ed7ab8146f
commit ed7ab8146f
parent 1f5fc77c94
4 changed files with 71 additions and 31 deletions
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -1441,6 +1441,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/PhraseDictionaryMultiModel.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModel.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/PhraseDictionaryMultiModel.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModel.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/PhraseDictionaryTree.cpp</name>
 			<type>1</type>
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <string>

 #ifdef WITH_THREADS
+#include <boost/thread.hpp>
 #include <boost/thread/mutex.hpp>
 #endif

@ -700,6 +701,40 @@ public:
  void CollectFeatureFunctions();
  bool CheckWeights() const;

+
+  void SetTemporaryMultiModelWeightsVector(std::vector<float> weights) const {
+#ifdef WITH_THREADS
+    m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
+#else
+    m_multimodelweights_tmp = weights;
+#endif
+  }
+
+  // multimodel
+  std::vector<float> m_multimodelweights;
+#ifdef WITH_THREADS
+  mutable std::map<boost::thread::id, std::vector<float> > m_multimodelweights_tmp;
+#else
+  mutable std::vector<float> m_multimodelweights_tmp;
+#endif
+
+  const std::vector<float>* GetMultiModelWeightsVector() const {
+    return &m_multimodelweights;
+  }
+
+  const std::vector<float>* GetTemporaryMultiModelWeightsVector() const {
+#ifdef WITH_THREADS
+    if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
+      return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
+    }
+    else {
+      return NULL;
+    }
+#else
+    return &m_multimodelweights_tmp;
+#endif
+  }
+
 };

 }
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@ -25,10 +25,9 @@ using namespace std;
 namespace Moses

 {
-PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(size_t numScoreComponent,
-    PhraseDictionaryFeature* feature): PhraseDictionary(numScoreComponent, feature)
+PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
+:PhraseDictionary("PhraseDictionaryMultiModel", line)
 {
-    m_feature_load = feature;
 }

 PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
@ -45,6 +44,7 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input
                                  , const LMList &languageModels
                                  , float weightWP)
 {
+  /*
  m_languageModels = &languageModels;
  m_weight = weight;
  m_weightWP = weightWP;
@ -63,7 +63,7 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input

  //how many actual scores there are in the phrase tables
  //so far, equal to number of log-linear scores, but it is allowed to be smaller (for other combination types)
-  size_t numPtScores = m_numScoreComponent;
+  size_t numPtScores = m_numScoreComponents;

  if (m_mode != "interpolate") {
    ostringstream msg;
@ -88,18 +88,18 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input

            if (!FileExists(file) && FileExists(file + ".gz")) file += ".gz";

-            PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent, m_feature_load);
+            PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponents, m_feature_load);
            pdm->SetNumScoreComponentMultiModel(numPtScores); //instead of complaining about inequal number of scores, silently fill up the score vector with zeroes
            pdm->Load( input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
            m_pd.push_back(pdm);
      } else if (implementation == Binary) {
-            PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, numInputScores , m_feature_load);
+            PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponents, numInputScores , m_feature_load);
            pdta->Load(input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
            m_pd.push_back(pdta);
      } else if (implementation == Compact) {
 #ifndef WIN32
-            PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponent, implementation, m_feature_load);
-            pdc->SetNumScoreComponentMultiModel(m_numScoreComponent); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
+            PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponents, implementation, m_feature_load);
+            pdc->SetNumScoreComponentMultiModel(m_numScoreComponents); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
            pdc->Load( input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
            m_pd.push_back(pdc);
 #else
@ -110,6 +110,7 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input
        UTIL_THROW(util::Exception,"PhraseDictionaryMultiModel does not support phrase table type " << implementation);
      }
  }
+*/

  return true;
 }
@ -122,7 +123,7 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect

  if (m_mode == "interpolate") {
    //interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
-    size_t numWeights = m_numScoreComponent-1;
+    size_t numWeights = m_numScoreComponents-1;
    multimodelweights = getWeights(numWeights, true);
  }

@ -161,7 +162,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,

      for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast;  ++iterTargetPhrase) {
        TargetPhrase * targetPhrase = *iterTargetPhrase;
-        std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature);
+        std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(this);

        std::string targetString = targetPhrase->GetStringRep(m_output);
        if (allStats->find(targetString) == allStats->end()) {
@ -169,21 +170,21 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
          multiModelStatistics * statistics = new multiModelStatistics;
          statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info

-          Scores scoreVector(m_numScoreComponent);
-          statistics->p.resize(m_numScoreComponent);
-          for(size_t j = 0; j < m_numScoreComponent; ++j){
+          Scores scoreVector(m_numScoreComponents);
+          statistics->p.resize(m_numScoreComponents);
+          for(size_t j = 0; j < m_numScoreComponents; ++j){
              statistics->p[j].resize(m_numModels);
              scoreVector[j] = -raw_scores[j];
          }

-          statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels); // set scores to 0
+          statistics->targetPhrase->SetScore(this, scoreVector); // set scores to 0

          (*allStats)[targetString] = statistics;

        }
        multiModelStatistics * statistics = (*allStats)[targetString];

-        for(size_t j = 0; j < m_numScoreComponent; ++j){
+        for(size_t j = 0; j < m_numScoreComponents; ++j){
            statistics->p[j][i] = UntransformScore(raw_scores[j]);
        }

@ -201,16 +202,16 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection

        multiModelStatistics * statistics = iter->second;

-        Scores scoreVector(m_numScoreComponent);
+        Scores scoreVector(m_numScoreComponents);

-        for(size_t i = 0; i < m_numScoreComponent-1; ++i){
+        for(size_t i = 0; i < m_numScoreComponents-1; ++i){
            scoreVector[i] = TransformScore(std::inner_product(statistics->p[i].begin(), statistics->p[i].end(), multimodelweights[i].begin(), 0.0));
        }

        //assuming that last value is phrase penalty
-        scoreVector[m_numScoreComponent-1] = 1.0;
+        scoreVector[m_numScoreComponents-1] = 1.0;

-        statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels);
+        statistics->targetPhrase->SetScore(this, scoreVector);
        ret->Add(new TargetPhrase(*statistics->targetPhrase));
    }
    return ret;
@ -303,7 +304,7 @@ void PhraseDictionaryMultiModel::CacheForCleanup(TargetPhraseCollection* tpc) {
 }


-void PhraseDictionaryMultiModel::CleanUp(const InputType &source) {
+void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType &source) {
 #ifdef WITH_THREADS
  boost::mutex::scoped_lock lock(m_sentenceMutex);
  PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
@ -327,7 +328,7 @@ void PhraseDictionaryMultiModel::CleanUp(const InputType &source) {

 void  PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source) {
  for(size_t i = 0; i < m_numModels; ++i){
-    m_pd[i]->CleanUp(source);
+    m_pd[i]->CleanUpAfterSentenceProcessing(source);
  }
 }

@ -380,10 +381,10 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
    Sentence sentence;
    CleanUp(sentence); // free memory used by compact phrase tables

-    size_t numWeights = m_numScoreComponent;
+    size_t numWeights = m_numScoreComponents;
    if (m_mode == "interpolate") {
        //interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
-        numWeights = m_numScoreComponent-1;
+        numWeights = m_numScoreComponents-1;
    }

    vector<float> ret (m_numModels*numWeights);
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h
@ -21,11 +21,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #define moses_PhraseDictionaryMultiModel_h

 #include "moses/TranslationModel/PhraseDictionary.h"
-#include "moses/TranslationModel/PhraseDictionaryMemory.h"
-#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
-#ifndef WIN32
-#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
-#endif


 #include <boost/unordered_map.hpp>
@ -62,7 +57,7 @@ friend class CrossEntropy;
 #endif

 public:
-  PhraseDictionaryMultiModel(size_t m_numScoreComponent, PhraseDictionaryFeature* feature);
+  PhraseDictionaryMultiModel(const std::string &line);
  ~PhraseDictionaryMultiModel();
  bool Load(const std::vector<FactorType> &input
            , const std::vector<FactorType> &output
@ -77,7 +72,7 @@ public:
  std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
  std::vector<float> normalizeWeights(std::vector<float> &weights) const;
  void CacheForCleanup(TargetPhraseCollection* tpc);
-  void CleanUp(const InputType &source);
+  void CleanUpAfterSentenceProcessing(const InputType &source);
  virtual void CleanUpComponentModels(const InputType &source);
 #ifdef WITH_DLIB
  virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
@ -100,7 +95,6 @@ protected:
  std::vector<FactorType> m_output;
  size_t m_numModels;
  size_t m_componentTableLimit;
-  PhraseDictionaryFeature* m_feature_load;

  typedef std::vector<TargetPhraseCollection*> PhraseCache;
 #ifdef WITH_THREADS