Query member phrase tables in batch mode for forward compatibility

Use GetTargetPhraseCollectionBatch instead of GetTargetPhraseCollectionLEGACY
2024-09-11 19:27:11 +03:00 · 2016-03-18 14:34:28 -04:00 · 2016-03-18 14:34:28 -04:00 · 2462c81f7a
commit 2462c81f7a
parent e7627e04ed
2 changed files with 221 additions and 239 deletions
--- a/moses/TranslationModel/PhraseDictionaryGroup.cpp
+++ b/moses/TranslationModel/PhraseDictionaryGroup.cpp
@ -20,7 +20,6 @@
 #include "moses/TranslationModel/PhraseDictionaryGroup.h"

 #include <boost/foreach.hpp>
-#include <boost/unordered_map.hpp>

 #include "util/exception.hh"

@ -30,18 +29,18 @@ using namespace boost;
 namespace Moses
 {

-PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line)
-  : PhraseDictionary(line, true),
-    m_numModels(0),
-    m_totalModelScores(0),
-    m_phraseCounts(false),
-    m_wordCounts(false),
-    m_modelBitmapCounts(false),
-    m_restrict(false),
-    m_haveDefaultScores(false),
-    m_defaultAverageOthers(false),
-    m_scoresPerModel(0),
-    m_haveMmsaptLrFunc(false)
+PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line) :
+      PhraseDictionary(line, true),
+      m_numModels(0),
+      m_totalModelScores(0),
+      m_phraseCounts(false),
+      m_wordCounts(false),
+      m_modelBitmapCounts(false),
+      m_restrict(false),
+      m_haveDefaultScores(false),
+      m_defaultAverageOthers(false),
+      m_scoresPerModel(0),
+      m_haveMmsaptLrFunc(false)
 {
  ReadParameters();
 }
@ -61,12 +60,12 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value)
    m_wordCounts = Scan<bool>(value);
  } else if (key == "model-bitmap-counts") {
    m_modelBitmapCounts = Scan<bool>(value);
-  } else if (key =="default-scores") {
+  } else if (key == "default-scores") {
    m_haveDefaultScores = true;
    m_defaultScores = Scan<float>(Tokenize(value, ","));
-  } else if (key =="default-average-others") {
+  } else if (key == "default-average-others") {
    m_defaultAverageOthers = Scan<bool>(value);
-  } else if (key =="mmsapt-lr-func") {
+  } else if (key == "mmsapt-lr-func") {
    m_haveMmsaptLrFunc = true;
  } else {
    PhraseDictionary::SetParameter(key, value);
@ -93,12 +92,12 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
          m_scoresPerModel = nScores;
        } else if (m_defaultAverageOthers) {
          UTIL_THROW_IF2(nScores != m_scoresPerModel,
-                         m_description << ": member models must have the same number of scores when using default-average-others");
+              m_description << ": member models must have the same number of scores when using default-average-others");
        }
      }
    }
    UTIL_THROW_IF2(!pdFound,
-                   m_description << ": could not find member phrase table " << pdName);
+        m_description << ": could not find member phrase table " << pdName);
  }
  m_totalModelScores = numScoreComponents;

@ -113,7 +112,7 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
    numScoreComponents += (pow(2, m_numModels) - 1);
  }
  UTIL_THROW_IF2(numScoreComponents != m_numScoreComponents,
-                 m_description << ": feature count mismatch: specify \"num-features=" << numScoreComponents << "\" and supply " << numScoreComponents << " weights");
+      m_description << ": feature count mismatch: specify \"num-features=" << numScoreComponents << "\" and supply " << numScoreComponents << " weights");

 #ifdef PT_UG
  // Locate mmsapt lexical reordering functions if specified
@ -129,7 +128,7 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
  // Determine "zero" scores for features
  if (m_haveDefaultScores) {
    UTIL_THROW_IF2(m_defaultScores.size() != m_numScoreComponents,
-                   m_description << ": number of specified default scores is unequal to number of member model scores");
+        m_description << ": number of specified default scores is unequal to number of member model scores");
  } else {
    // Default is all 0 (as opposed to e.g. -99 or similar to approximate log(0)
    // or a smoothed "not in model" score)
@ -137,253 +136,233 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
  }
 }

-void PhraseDictionaryGroup::InitializeForInput(const ttasksptr& ttask)
-{
-  // Member models are registered as FFs and should already be initialized
-}
-
 void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
-  const ttasksptr& ttask, const InputPathList& inputPathQueue) const
+    const ttasksptr& ttask,
+    const InputPathList& inputPathQueue) const
 {
-  // Some implementations (mmsapt) do work in PrefixExists
-  BOOST_FOREACH(const InputPath* inputPath, inputPathQueue) {
-    const Phrase& phrase = inputPath->GetPhrase();
-    BOOST_FOREACH(const PhraseDictionary* pd, m_memberPDs) {
-      pd->PrefixExists(ttask, phrase);
-    }
+  // For each member phrase table, add translation options to input paths
+  // (Run each phrase table lookup normally)
+  BOOST_FOREACH(const PhraseDictionary* pd, m_memberPDs) {
+    pd->GetTargetPhraseCollectionBatch(ttask, inputPathQueue);
  }
-  // Look up each input in each model
+
+  // Below: "collapse" translation options from all member tables into a single
+  // option for each <source, target> phrase pair for this table.  Remove
+  // original options from other tables.
+
+  // For each input path (source phrase)
  BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
-    const Phrase &phrase = inputPath->GetPhrase();
-    TargetPhraseCollection::shared_ptr  targetPhrases =
-      this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
-    inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
-  }
-}
+    const Phrase& source = inputPath->GetPhrase();

-TargetPhraseCollection::shared_ptr  PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
-  const Phrase& src) const
-{
-  UTIL_THROW2("Don't call me without the translation task.");
-}
+    // Aggregation of target phrases and corresponding statistics (scores, tables seen by)
+    vector<TargetPhrase*> phraseList;
+    PhraseMap phraseMap;

-TargetPhraseCollection::shared_ptr
-PhraseDictionaryGroup::
-GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const
-{
-  TargetPhraseCollection::shared_ptr ret
-  = CreateTargetPhraseCollection(ttask, src);
-  ret->NthElement(m_tableLimit); // sort the phrases for pruning later
-  const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
-  return ret;
-}
+    // For each member phrase table
+    size_t offset = 0;
+    for (size_t i = 0; i < m_numModels; ++i) {

-TargetPhraseCollection::shared_ptr
-PhraseDictionaryGroup::
-CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
-{
-  // Aggregation of phrases and corresponding statistics (scores, models seen by)
-  vector<TargetPhrase*> phraseList;
-  typedef unordered_map<const TargetPhrase*, PDGroupPhrase, UnorderedComparer<Phrase>, UnorderedComparer<Phrase> > PhraseMap;
-  PhraseMap phraseMap;
+      // "Pop" target phrases for this source from current table
+      const PhraseDictionary& pd = *m_memberPDs[i];
+      TargetPhraseCollection::shared_ptr targets = inputPath->GetTargetPhrases(
+          pd);
+      inputPath->SetTargetPhrases(pd, TargetPhraseCollection::shared_ptr(),
+      NULL);

-  // For each model
-  size_t offset = 0;
-  for (size_t i = 0; i < m_numModels; ++i) {
+      // For each target phrase for this <source, table>
+      if (targets != NULL) {
+        BOOST_FOREACH(const TargetPhrase* targetPhrase, *targets) {

-    // Collect phrases from this table
-    const PhraseDictionary& pd = *m_memberPDs[i];
-    TargetPhraseCollection::shared_ptr
-    ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src);
+          vector<float> scores =
+              targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);

-    if (ret_raw != NULL) {
-      // Process each phrase from table
-      BOOST_FOREACH(const TargetPhrase* targetPhrase, *ret_raw) {
-        vector<float> raw_scores =
-          targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
+          // Phrase not in collection -> add if unrestricted or first model
+          PhraseMap::iterator iter = phraseMap.find(targetPhrase);
+          if (iter == phraseMap.end()) {
+            if (m_restrict && i > 0) {
+              continue;
+            }
+            // Copy phrase to avoid disrupting base model
+            TargetPhrase* phrase = new TargetPhrase(*targetPhrase);
+            // Correct future cost estimates and total score
+            phrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
+            vector<FeatureFunction*> pd_feature;
+            pd_feature.push_back(m_memberPDs[i]);
+            const vector<FeatureFunction*> pd_feature_const(pd_feature);
+            phrase->EvaluateInIsolation(source, pd_feature_const);
+            // Zero out scores from original phrase table
+            phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
+            // Add phrase entry
+            phraseList.push_back(phrase);
+            phraseMap[targetPhrase] = PDGroupPhrase(phrase, m_defaultScores,
+                m_numModels);
+          } else {
+            // For existing phrases: merge extra scores (such as lr-func scores for mmsapt)
+            TargetPhrase* phrase = iter->second.m_targetPhrase;
+            BOOST_FOREACH(const TargetPhrase::ScoreCache_t::value_type pair, targetPhrase->GetExtraScores()) {
+              phrase->SetExtraScores(pair.first, pair.second);
+            }
+          }
+          // Don't repeat lookup if phrase already found
+          PDGroupPhrase& pdgPhrase =
+              (iter == phraseMap.end()) ?
+                  phraseMap.find(targetPhrase)->second : iter->second;

-        // Phrase not in collection -> add if unrestricted or first model
-        PhraseMap::iterator iter = phraseMap.find(targetPhrase);
-        if (iter == phraseMap.end()) {
-          if (m_restrict && i > 0) {
-            continue;
+          // Copy scores from this model
+          for (size_t j = 0; j < pd.GetNumScoreComponents(); ++j) {
+            pdgPhrase.m_scores[offset + j] = scores[j];
          }

-          // Copy phrase to avoid disrupting base model
-          TargetPhrase* phrase = new TargetPhrase(*targetPhrase);
-          // Correct future cost estimates and total score
-          phrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
-          vector<FeatureFunction*> pd_feature;
-          pd_feature.push_back(m_memberPDs[i]);
-          const vector<FeatureFunction*> pd_feature_const(pd_feature);
-          phrase->EvaluateInIsolation(src, pd_feature_const);
-          // Zero out scores from original phrase table
-          phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
-          // Add phrase entry
-          phraseList.push_back(phrase);
-          phraseMap[targetPhrase] = PDGroupPhrase(phrase, m_defaultScores, m_numModels);
-        } else {
-          // For existing phrases: merge extra scores (such as lr-func scores for mmsapt)
-          TargetPhrase* phrase = iter->second.m_targetPhrase;
-          BOOST_FOREACH(const TargetPhrase::ScoreCache_t::value_type pair, targetPhrase->GetExtraScores()) {
-            phrase->SetExtraScores(pair.first, pair.second);
-          }
-        }
-        // Don't repeat lookup if phrase already found
-        PDGroupPhrase& pdgPhrase = (iter == phraseMap.end()) ? phraseMap.find(targetPhrase)->second : iter->second;
-
-        // Copy scores from this model
-        for (size_t j = 0; j < pd.GetNumScoreComponents(); ++j) {
-          pdgPhrase.m_scores[offset + j] = raw_scores[j];
-        }
-
-        // Phrase seen by this model
-        pdgPhrase.m_seenBy[i] = true;
-      }
-    }
-    offset += pd.GetNumScoreComponents();
-  }
-
-  // Compute additional scores as phrases are added to return collection
-  TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
-  const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
-  BOOST_FOREACH(TargetPhrase* phrase, phraseList) {
-    PDGroupPhrase& pdgPhrase = phraseMap.find(phrase)->second;
-
-    // Score order (example with 2 models)
-    // member1_scores member2_scores [m1_pc m2_pc] [m1_wc m2_wc]
-
-    // Extra scores added after member model scores
-    size_t offset = m_totalModelScores;
-    // Phrase count (per member model)
-    if (m_phraseCounts) {
-      for (size_t i = 0; i < m_numModels; ++i) {
-        if (pdgPhrase.m_seenBy[i]) {
-          pdgPhrase.m_scores[offset + i] = 1;
+          // Phrase seen by this model
+          pdgPhrase.m_seenBy[i] = true;
        }
      }
-      offset += m_numModels;
-    }
-    // Word count (per member model)
-    if (m_wordCounts) {
-      size_t wc = pdgPhrase.m_targetPhrase->GetSize();
-      for (size_t i = 0; i < m_numModels; ++i) {
-        if (pdgPhrase.m_seenBy[i]) {
-          pdgPhrase.m_scores[offset + i] = wc;
-        }
-      }
-      offset += m_numModels;
+      offset += pd.GetNumScoreComponents();
    }

-    // Model bitmap features (one feature per possible bitmap)
-    // e.g. seen by models 1 and 3 but not 2 -> "101" fires
-    if (m_modelBitmapCounts) {
-      // Throws exception if someone tries to combine more than 64 models
-      pdgPhrase.m_scores[offset + (pdgPhrase.m_seenBy.to_ulong() - 1)] = 1;
-      offset += m_seenByAll.to_ulong();
-    }
+    // Compute additional scores as phrases are added to return collection
+    TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
+    const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
+    BOOST_FOREACH(TargetPhrase* phrase, phraseList) {

-    // Average other-model scores to fill in defaults when models have not seen
-    // this phrase
-    if (m_defaultAverageOthers) {
-      // Average seen scores
-      if (pdgPhrase.m_seenBy != m_seenByAll) {
-        vector<float> avgScores(m_scoresPerModel, 0);
-        size_t seenBy = 0;
-        offset = 0;
-        // sum
+      PDGroupPhrase& pdgPhrase = phraseMap.find(phrase)->second;
+
+      // Score order (example with 2 models)
+      // member1_scores member2_scores [m1_pc m2_pc] [m1_wc m2_wc]
+
+      // Extra scores added after member model scores
+      size_t offset = m_totalModelScores;
+      // Phrase count (per member model)
+      if (m_phraseCounts) {
        for (size_t i = 0; i < m_numModels; ++i) {
          if (pdgPhrase.m_seenBy[i]) {
-            for (size_t j = 0; j < m_scoresPerModel; ++j) {
-              avgScores[j] += pdgPhrase.m_scores[offset + j];
-            }
-            seenBy += 1;
+            pdgPhrase.m_scores[offset + i] = 1;
          }
-          offset += m_scoresPerModel;
        }
-        // divide
-        for (size_t j = 0; j < m_scoresPerModel; ++j) {
-          avgScores[j] /= seenBy;
-        }
-        // copy
-        offset = 0;
+        offset += m_numModels;
+      }
+      // Word count (per member model)
+      if (m_wordCounts) {
+        size_t wc = pdgPhrase.m_targetPhrase->GetSize();
        for (size_t i = 0; i < m_numModels; ++i) {
-          if (!pdgPhrase.m_seenBy[i]) {
-            for (size_t j = 0; j < m_scoresPerModel; ++j) {
-              pdgPhrase.m_scores[offset + j] = avgScores[j];
-            }
+          if (pdgPhrase.m_seenBy[i]) {
+            pdgPhrase.m_scores[offset + i] = wc;
          }
-          offset += m_scoresPerModel;
        }
-#ifdef PT_UG
-        // Also average LexicalReordering scores if specified
-        // We don't necessarily have a lr-func for each model
-        if (m_haveMmsaptLrFunc) {
-          SPTR<Scores> avgLRScores;
+        offset += m_numModels;
+      }
+
+      // Model bitmap features (one feature per possible bitmap)
+      // e.g. seen by models 1 and 3 but not 2 -> "101" fires
+      if (m_modelBitmapCounts) {
+        // Throws exception if someone tries to combine more than 64 models
+        pdgPhrase.m_scores[offset + (pdgPhrase.m_seenBy.to_ulong() - 1)] = 1;
+        offset += m_seenByAll.to_ulong();
+      }
+
+      // Average other-model scores to fill in defaults when models have not seen
+      // this phrase
+      if (m_defaultAverageOthers) {
+        // Average seen scores
+        if (pdgPhrase.m_seenBy != m_seenByAll) {
+          vector<float> avgScores(m_scoresPerModel, 0);
          size_t seenBy = 0;
-          // For each model
+          offset = 0;
+          // sum
          for (size_t i = 0; i < m_numModels; ++i) {
-            const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
-            // Add if phrase seen and model has lr-func
-            if (pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
-              const Scores* scores = pdgPhrase.m_targetPhrase->GetExtraScores(lrFunc);
-              if (!avgLRScores) {
-                avgLRScores.reset(new Scores(*scores));
-              } else {
-                for (size_t j = 0; j < scores->size(); ++j) {
-                  (*avgLRScores)[j] += (*scores)[j];
-                }
+            if (pdgPhrase.m_seenBy[i]) {
+              for (size_t j = 0; j < m_scoresPerModel; ++j) {
+                avgScores[j] += pdgPhrase.m_scores[offset + j];
              }
              seenBy += 1;
            }
+            offset += m_scoresPerModel;
          }
-          // Make sure we have at least one lr-func
-          if (avgLRScores) {
-            // divide
-            for (size_t j = 0; j < avgLRScores->size(); ++j) {
-              (*avgLRScores)[j] /= seenBy;
+          // divide
+          for (size_t j = 0; j < m_scoresPerModel; ++j) {
+            avgScores[j] /= seenBy;
+          }
+          // copy
+          offset = 0;
+          for (size_t i = 0; i < m_numModels; ++i) {
+            if (!pdgPhrase.m_seenBy[i]) {
+              for (size_t j = 0; j < m_scoresPerModel; ++j) {
+                pdgPhrase.m_scores[offset + j] = avgScores[j];
+              }
            }
-            // set
+            offset += m_scoresPerModel;
+          }
+#ifdef PT_UG
+          // Also average LexicalReordering scores if specified
+          // We don't necessarily have a lr-func for each model
+          if (m_haveMmsaptLrFunc) {
+            SPTR<Scores> avgLRScores;
+            size_t seenBy = 0;
+            // For each model
            for (size_t i = 0; i < m_numModels; ++i) {
              const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
-              if (!pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
-                pdgPhrase.m_targetPhrase->SetExtraScores(lrFunc, avgLRScores);
+              // Add if phrase seen and model has lr-func
+              if (pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
+                const Scores* scores = pdgPhrase.m_targetPhrase->GetExtraScores(lrFunc);
+                if (!avgLRScores) {
+                  avgLRScores.reset(new Scores(*scores));
+                } else {
+                  for (size_t j = 0; j < scores->size(); ++j) {
+                    (*avgLRScores)[j] += (*scores)[j];
+                  }
+                }
+                seenBy += 1;
+              }
+            }
+            // Make sure we have at least one lr-func
+            if (avgLRScores) {
+              // divide
+              for (size_t j = 0; j < avgLRScores->size(); ++j) {
+                (*avgLRScores)[j] /= seenBy;
+              }
+              // set
+              for (size_t i = 0; i < m_numModels; ++i) {
+                const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
+                if (!pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
+                  pdgPhrase.m_targetPhrase->SetExtraScores(lrFunc, avgLRScores);
+                }
              }
            }
          }
-        }
 #endif
+        }
      }
+
+      // Assign scores
+      phrase->GetScoreBreakdown().Assign(this, pdgPhrase.m_scores);
+      // Correct future cost estimates and total score
+      phrase->EvaluateInIsolation(source, pd_feature_const);
+      ret->Add(phrase);
    }
-
-    // Assign scores
-    phrase->GetScoreBreakdown().Assign(this, pdgPhrase.m_scores);
-    // Correct future cost estimates and total score
-    phrase->EvaluateInIsolation(src, pd_feature_const);
-    ret->Add(phrase);
+    // Add target phrases to path for this input phrase
+    const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
+    inputPath->SetTargetPhrases(*this, ret, NULL);
  }
-
-  return ret;
 }

-ChartRuleLookupManager*
-PhraseDictionaryGroup::
-CreateRuleLookupManager(const ChartParser &,
-                        const ChartCellCollectionBase&, size_t)
+ChartRuleLookupManager* PhraseDictionaryGroup::CreateRuleLookupManager(
+    const ChartParser &,
+    const ChartCellCollectionBase&,
+    size_t)
 {
  UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
 }

-//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
-void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr  tpc)
+// copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
+void PhraseDictionaryGroup::CacheForCleanup(
+    TargetPhraseCollection::shared_ptr tpc)
 {
  PhraseCache &ref = GetPhraseCache();
  ref.push_back(tpc);
 }

-void
-PhraseDictionaryGroup::
-CleanUpAfterSentenceProcessing(const InputType &source)
+void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing(
+    const InputType &source)
 {
  GetPhraseCache().clear();
  CleanUpComponentModels(source);
--- a/moses/TranslationModel/PhraseDictionaryGroup.h
+++ b/moses/TranslationModel/PhraseDictionaryGroup.h
@ -39,17 +39,24 @@
 namespace Moses
 {

-struct PDGroupPhrase {
+struct PDGroupPhrase
+{
  TargetPhrase* m_targetPhrase;
  std::vector<float> m_scores;
  boost::dynamic_bitset<> m_seenBy;

-  PDGroupPhrase() : m_targetPhrase(NULL) { }
+  PDGroupPhrase() :
+      m_targetPhrase(NULL)
+  {
+  }

-  PDGroupPhrase(TargetPhrase* targetPhrase, const std::vector<float>& scores, const size_t nModels)
-    : m_targetPhrase(targetPhrase),
-      m_scores(scores),
-      m_seenBy(nModels) { }
+  PDGroupPhrase(
+      TargetPhrase* targetPhrase,
+      const std::vector<float>& scores,
+      const size_t nModels) :
+      m_targetPhrase(targetPhrase), m_scores(scores), m_seenBy(nModels)
+  {
+  }
 };

 /** Combines multiple phrase tables into a single interface.  Each member phrase
@ -64,24 +71,17 @@ class PhraseDictionaryGroup: public PhraseDictionary
 public:
  PhraseDictionaryGroup(const std::string& line);
  void Load(AllOptions::ptr const& opts);
-  TargetPhraseCollection::shared_ptr
-  CreateTargetPhraseCollection(const ttasksptr& ttask,
-                               const Phrase& src) const;
-  std::vector<std::vector<float> > getWeights(size_t numWeights,
-      bool normalize) const;
-  void CacheForCleanup(TargetPhraseCollection::shared_ptr  tpc);
+  void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
  void CleanUpAfterSentenceProcessing(const InputType& source);
  void CleanUpComponentModels(const InputType& source);
  // functions below override the base class
-  void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
-                                      const InputPathList &inputPathQueue) const;
-  TargetPhraseCollection::shared_ptr  GetTargetPhraseCollectionLEGACY(
-    const Phrase& src) const;
-  TargetPhraseCollection::shared_ptr  GetTargetPhraseCollectionLEGACY(
-    const ttasksptr& ttask, const Phrase& src) const;
-  void InitializeForInput(ttasksptr const& ttask);
-  ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&,
-      const ChartCellCollectionBase&, std::size_t);
+  void GetTargetPhraseCollectionBatch(
+      const ttasksptr& ttask,
+      const InputPathList &inputPathQueue) const;
+  ChartRuleLookupManager* CreateRuleLookupManager(
+      const ChartParser&,
+      const ChartCellCollectionBase&,
+      std::size_t);
  void SetParameter(const std::string& key, const std::string& value);

 protected:
@ -109,8 +109,10 @@ protected:
  bool m_haveMmsaptLrFunc;
  // pointers to pointers since member mmsapts may not load these until later
  std::vector<LexicalReordering**> m_mmsaptLrFuncs;
+  typedef boost::unordered_map<const TargetPhrase*, PDGroupPhrase,
+      UnorderedComparer<Phrase>, UnorderedComparer<Phrase> > PhraseMap;

-  typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
+  typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
 #ifdef WITH_THREADS
  boost::shared_mutex m_lock_cache;
  typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
@ -119,13 +121,14 @@ protected:
 #endif
  SentenceCache m_sentenceCache;

-  PhraseCache& GetPhraseCache() {
+  PhraseCache& GetPhraseCache()
+  {
 #ifdef WITH_THREADS
    {
      // first try read-only lock
      boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
      SentenceCache::iterator i = m_sentenceCache.find(
-        boost::this_thread::get_id());
+          boost::this_thread::get_id());
      if (i != m_sentenceCache.end())
        return i->second;
    }