Merge branch 'master' of github.com:moses-smt/mosesdecoder into vw_tgtcontext

2024-09-11 19:27:11 +03:00 · 2016-03-22 15:42:43 +01:00 · 2016-03-22 15:42:43 +01:00 · c7222841d7
commit c7222841d7
parent a7e33755f9 b760ad8a7e
4 changed files with 273 additions and 248 deletions
--- a/moses/FeatureVector.cpp
+++ b/moses/FeatureVector.cpp
@ -175,7 +175,7 @@ void FVector::resize(size_t newsize)

 void FVector::clear()
 {
-  m_coreFeatures.resize(0);
+  m_coreFeatures.resize(m_coreFeatures.size(), 0);
  m_features.clear();
 }

--- a/moses/TranslationModel/PhraseDictionaryGroup.cpp
+++ b/moses/TranslationModel/PhraseDictionaryGroup.cpp
@ -20,6 +20,7 @@
 #include "moses/TranslationModel/PhraseDictionaryGroup.h"

 #include <boost/foreach.hpp>
+#include <boost/unordered_map.hpp>

 #include "util/exception.hh"

@ -29,18 +30,18 @@ using namespace boost;
 namespace Moses
 {

-PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line) :
-  PhraseDictionary(line, true),
-  m_numModels(0),
-  m_totalModelScores(0),
-  m_phraseCounts(false),
-  m_wordCounts(false),
-  m_modelBitmapCounts(false),
-  m_restrict(false),
-  m_haveDefaultScores(false),
-  m_defaultAverageOthers(false),
-  m_scoresPerModel(0),
-  m_haveMmsaptLrFunc(false)
+PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line)
+  : PhraseDictionary(line, true),
+    m_numModels(0),
+    m_totalModelScores(0),
+    m_phraseCounts(false),
+    m_wordCounts(false),
+    m_modelBitmapCounts(false),
+    m_restrict(false),
+    m_haveDefaultScores(false),
+    m_defaultAverageOthers(false),
+    m_scoresPerModel(0),
+    m_haveMmsaptLrFunc(false)
 {
  ReadParameters();
 }
@ -60,12 +61,12 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value)
    m_wordCounts = Scan<bool>(value);
  } else if (key == "model-bitmap-counts") {
    m_modelBitmapCounts = Scan<bool>(value);
-  } else if (key == "default-scores") {
+  } else if (key =="default-scores") {
    m_haveDefaultScores = true;
    m_defaultScores = Scan<float>(Tokenize(value, ","));
-  } else if (key == "default-average-others") {
+  } else if (key =="default-average-others") {
    m_defaultAverageOthers = Scan<bool>(value);
-  } else if (key == "mmsapt-lr-func") {
+  } else if (key =="mmsapt-lr-func") {
    m_haveMmsaptLrFunc = true;
  } else {
    PhraseDictionary::SetParameter(key, value);
@ -136,233 +137,253 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
  }
 }

-void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
-  const ttasksptr& ttask,
-  const InputPathList& inputPathQueue) const
+void PhraseDictionaryGroup::InitializeForInput(const ttasksptr& ttask)
 {
-  // For each member phrase table, add translation options to input paths
-  // (Run each phrase table lookup normally)
-  BOOST_FOREACH(const PhraseDictionary* pd, m_memberPDs) {
-    pd->GetTargetPhraseCollectionBatch(ttask, inputPathQueue);
+  // Member models are registered as FFs and should already be initialized
+}
+
+void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
+  const ttasksptr& ttask, const InputPathList& inputPathQueue) const
+{
+  // Some implementations (mmsapt) do work in PrefixExists
+  BOOST_FOREACH(const InputPath* inputPath, inputPathQueue) {
+    const Phrase& phrase = inputPath->GetPhrase();
+    BOOST_FOREACH(const PhraseDictionary* pd, m_memberPDs) {
+      pd->PrefixExists(ttask, phrase);
+    }
  }
-
-  // Below: "collapse" translation options from all member tables into a single
-  // option for each <source, target> phrase pair for this table.  Remove
-  // original options from other tables.
-
-  // For each input path (source phrase)
+  // Look up each input in each model
  BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
-    const Phrase& source = inputPath->GetPhrase();
-
-    // Aggregation of target phrases and corresponding statistics (scores, tables seen by)
-    vector<TargetPhrase*> phraseList;
-    PhraseMap phraseMap;
-
-    // For each member phrase table
-    size_t offset = 0;
-    for (size_t i = 0; i < m_numModels; ++i) {
-
-      // "Pop" target phrases for this source from current table
-      const PhraseDictionary& pd = *m_memberPDs[i];
-      TargetPhraseCollection::shared_ptr targets = inputPath->GetTargetPhrases(
-            pd);
-      inputPath->SetTargetPhrases(pd, TargetPhraseCollection::shared_ptr(),
-                                  NULL);
-
-      // For each target phrase for this <source, table>
-      if (targets != NULL) {
-        BOOST_FOREACH(const TargetPhrase* targetPhrase, *targets) {
-
-          vector<float> scores =
-            targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
-
-          // Phrase not in collection -> add if unrestricted or first model
-          PhraseMap::iterator iter = phraseMap.find(targetPhrase);
-          if (iter == phraseMap.end()) {
-            if (m_restrict && i > 0) {
-              continue;
-            }
-            // Copy phrase to avoid disrupting base model
-            TargetPhrase* phrase = new TargetPhrase(*targetPhrase);
-            // Correct future cost estimates and total score
-            phrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
-            vector<FeatureFunction*> pd_feature;
-            pd_feature.push_back(m_memberPDs[i]);
-            const vector<FeatureFunction*> pd_feature_const(pd_feature);
-            phrase->EvaluateInIsolation(source, pd_feature_const);
-            // Zero out scores from original phrase table
-            phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
-            // Add phrase entry
-            phraseList.push_back(phrase);
-            phraseMap[targetPhrase] = PDGroupPhrase(phrase, m_defaultScores,
-                                                    m_numModels);
-          } else {
-            // For existing phrases: merge extra scores (such as lr-func scores for mmsapt)
-            TargetPhrase* phrase = iter->second.m_targetPhrase;
-            BOOST_FOREACH(const TargetPhrase::ScoreCache_t::value_type pair, targetPhrase->GetExtraScores()) {
-              phrase->SetExtraScores(pair.first, pair.second);
-            }
-          }
-          // Don't repeat lookup if phrase already found
-          PDGroupPhrase& pdgPhrase =
-            (iter == phraseMap.end()) ?
-            phraseMap.find(targetPhrase)->second : iter->second;
-
-          // Copy scores from this model
-          for (size_t j = 0; j < pd.GetNumScoreComponents(); ++j) {
-            pdgPhrase.m_scores[offset + j] = scores[j];
-          }
-
-          // Phrase seen by this model
-          pdgPhrase.m_seenBy[i] = true;
-        }
-      }
-      offset += pd.GetNumScoreComponents();
-    }
-
-    // Compute additional scores as phrases are added to return collection
-    TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
-    const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
-    BOOST_FOREACH(TargetPhrase* phrase, phraseList) {
-
-      PDGroupPhrase& pdgPhrase = phraseMap.find(phrase)->second;
-
-      // Score order (example with 2 models)
-      // member1_scores member2_scores [m1_pc m2_pc] [m1_wc m2_wc]
-
-      // Extra scores added after member model scores
-      size_t offset = m_totalModelScores;
-      // Phrase count (per member model)
-      if (m_phraseCounts) {
-        for (size_t i = 0; i < m_numModels; ++i) {
-          if (pdgPhrase.m_seenBy[i]) {
-            pdgPhrase.m_scores[offset + i] = 1;
-          }
-        }
-        offset += m_numModels;
-      }
-      // Word count (per member model)
-      if (m_wordCounts) {
-        size_t wc = pdgPhrase.m_targetPhrase->GetSize();
-        for (size_t i = 0; i < m_numModels; ++i) {
-          if (pdgPhrase.m_seenBy[i]) {
-            pdgPhrase.m_scores[offset + i] = wc;
-          }
-        }
-        offset += m_numModels;
-      }
-
-      // Model bitmap features (one feature per possible bitmap)
-      // e.g. seen by models 1 and 3 but not 2 -> "101" fires
-      if (m_modelBitmapCounts) {
-        // Throws exception if someone tries to combine more than 64 models
-        pdgPhrase.m_scores[offset + (pdgPhrase.m_seenBy.to_ulong() - 1)] = 1;
-        offset += m_seenByAll.to_ulong();
-      }
-
-      // Average other-model scores to fill in defaults when models have not seen
-      // this phrase
-      if (m_defaultAverageOthers) {
-        // Average seen scores
-        if (pdgPhrase.m_seenBy != m_seenByAll) {
-          vector<float> avgScores(m_scoresPerModel, 0);
-          size_t seenBy = 0;
-          offset = 0;
-          // sum
-          for (size_t i = 0; i < m_numModels; ++i) {
-            if (pdgPhrase.m_seenBy[i]) {
-              for (size_t j = 0; j < m_scoresPerModel; ++j) {
-                avgScores[j] += pdgPhrase.m_scores[offset + j];
-              }
-              seenBy += 1;
-            }
-            offset += m_scoresPerModel;
-          }
-          // divide
-          for (size_t j = 0; j < m_scoresPerModel; ++j) {
-            avgScores[j] /= seenBy;
-          }
-          // copy
-          offset = 0;
-          for (size_t i = 0; i < m_numModels; ++i) {
-            if (!pdgPhrase.m_seenBy[i]) {
-              for (size_t j = 0; j < m_scoresPerModel; ++j) {
-                pdgPhrase.m_scores[offset + j] = avgScores[j];
-              }
-            }
-            offset += m_scoresPerModel;
-          }
-#ifdef PT_UG
-          // Also average LexicalReordering scores if specified
-          // We don't necessarily have a lr-func for each model
-          if (m_haveMmsaptLrFunc) {
-            SPTR<Scores> avgLRScores;
-            size_t seenBy = 0;
-            // For each model
-            for (size_t i = 0; i < m_numModels; ++i) {
-              const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
-              // Add if phrase seen and model has lr-func
-              if (pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
-                const Scores* scores = pdgPhrase.m_targetPhrase->GetExtraScores(lrFunc);
-                if (!avgLRScores) {
-                  avgLRScores.reset(new Scores(*scores));
-                } else {
-                  for (size_t j = 0; j < scores->size(); ++j) {
-                    (*avgLRScores)[j] += (*scores)[j];
-                  }
-                }
-                seenBy += 1;
-              }
-            }
-            // Make sure we have at least one lr-func
-            if (avgLRScores) {
-              // divide
-              for (size_t j = 0; j < avgLRScores->size(); ++j) {
-                (*avgLRScores)[j] /= seenBy;
-              }
-              // set
-              for (size_t i = 0; i < m_numModels; ++i) {
-                const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
-                if (!pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
-                  pdgPhrase.m_targetPhrase->SetExtraScores(lrFunc, avgLRScores);
-                }
-              }
-            }
-          }
-#endif
-        }
-      }
-
-      // Assign scores
-      phrase->GetScoreBreakdown().Assign(this, pdgPhrase.m_scores);
-      // Correct future cost estimates and total score
-      phrase->EvaluateInIsolation(source, pd_feature_const);
-      ret->Add(phrase);
-    }
-    // Add target phrases to path for this input phrase
-    const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
-    inputPath->SetTargetPhrases(*this, ret, NULL);
+    const Phrase &phrase = inputPath->GetPhrase();
+    TargetPhraseCollection::shared_ptr  targetPhrases =
+      this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
+    inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
  }
 }

-ChartRuleLookupManager* PhraseDictionaryGroup::CreateRuleLookupManager(
-  const ChartParser &,
-  const ChartCellCollectionBase&,
-  size_t)
+TargetPhraseCollection::shared_ptr  PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
+  const Phrase& src) const
+{
+  UTIL_THROW2("Don't call me without the translation task.");
+}
+
+TargetPhraseCollection::shared_ptr
+PhraseDictionaryGroup::
+GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const
+{
+  TargetPhraseCollection::shared_ptr ret
+  = CreateTargetPhraseCollection(ttask, src);
+  ret->NthElement(m_tableLimit); // sort the phrases for pruning later
+  const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
+  return ret;
+}
+
+TargetPhraseCollection::shared_ptr
+PhraseDictionaryGroup::
+CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
+{
+  // Aggregation of phrases and corresponding statistics (scores, models seen by)
+  vector<TargetPhrase*> phraseList;
+  typedef unordered_map<const TargetPhrase*, PDGroupPhrase, UnorderedComparer<Phrase>, UnorderedComparer<Phrase> > PhraseMap;
+  PhraseMap phraseMap;
+
+  // For each model
+  size_t offset = 0;
+  for (size_t i = 0; i < m_numModels; ++i) {
+
+    // Collect phrases from this table
+    const PhraseDictionary& pd = *m_memberPDs[i];
+    TargetPhraseCollection::shared_ptr
+    ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src);
+
+    if (ret_raw != NULL) {
+      // Process each phrase from table
+      BOOST_FOREACH(const TargetPhrase* targetPhrase, *ret_raw) {
+        vector<float> raw_scores =
+          targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
+
+        // Phrase not in collection -> add if unrestricted or first model
+        PhraseMap::iterator iter = phraseMap.find(targetPhrase);
+        if (iter == phraseMap.end()) {
+          if (m_restrict && i > 0) {
+            continue;
+          }
+
+          // Copy phrase to avoid disrupting base model
+          TargetPhrase* phrase = new TargetPhrase(*targetPhrase);
+          // Correct future cost estimates and total score
+          phrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
+          vector<FeatureFunction*> pd_feature;
+          pd_feature.push_back(m_memberPDs[i]);
+          const vector<FeatureFunction*> pd_feature_const(pd_feature);
+          phrase->EvaluateInIsolation(src, pd_feature_const);
+          // Zero out scores from original phrase table
+          phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
+          // Add phrase entry
+          phraseList.push_back(phrase);
+          phraseMap[targetPhrase] = PDGroupPhrase(phrase, m_defaultScores, m_numModels);
+        } else {
+          // For existing phrases: merge extra scores (such as lr-func scores for mmsapt)
+          TargetPhrase* phrase = iter->second.m_targetPhrase;
+          BOOST_FOREACH(const TargetPhrase::ScoreCache_t::value_type pair, targetPhrase->GetExtraScores()) {
+            phrase->SetExtraScores(pair.first, pair.second);
+          }
+        }
+        // Don't repeat lookup if phrase already found
+        PDGroupPhrase& pdgPhrase = (iter == phraseMap.end()) ? phraseMap.find(targetPhrase)->second : iter->second;
+
+        // Copy scores from this model
+        for (size_t j = 0; j < pd.GetNumScoreComponents(); ++j) {
+          pdgPhrase.m_scores[offset + j] = raw_scores[j];
+        }
+
+        // Phrase seen by this model
+        pdgPhrase.m_seenBy[i] = true;
+      }
+    }
+    offset += pd.GetNumScoreComponents();
+  }
+
+  // Compute additional scores as phrases are added to return collection
+  TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
+  const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
+  BOOST_FOREACH(TargetPhrase* phrase, phraseList) {
+    PDGroupPhrase& pdgPhrase = phraseMap.find(phrase)->second;
+
+    // Score order (example with 2 models)
+    // member1_scores member2_scores [m1_pc m2_pc] [m1_wc m2_wc]
+
+    // Extra scores added after member model scores
+    size_t offset = m_totalModelScores;
+    // Phrase count (per member model)
+    if (m_phraseCounts) {
+      for (size_t i = 0; i < m_numModels; ++i) {
+        if (pdgPhrase.m_seenBy[i]) {
+          pdgPhrase.m_scores[offset + i] = 1;
+        }
+      }
+      offset += m_numModels;
+    }
+    // Word count (per member model)
+    if (m_wordCounts) {
+      size_t wc = pdgPhrase.m_targetPhrase->GetSize();
+      for (size_t i = 0; i < m_numModels; ++i) {
+        if (pdgPhrase.m_seenBy[i]) {
+          pdgPhrase.m_scores[offset + i] = wc;
+        }
+      }
+      offset += m_numModels;
+    }
+
+    // Model bitmap features (one feature per possible bitmap)
+    // e.g. seen by models 1 and 3 but not 2 -> "101" fires
+    if (m_modelBitmapCounts) {
+      // Throws exception if someone tries to combine more than 64 models
+      pdgPhrase.m_scores[offset + (pdgPhrase.m_seenBy.to_ulong() - 1)] = 1;
+      offset += m_seenByAll.to_ulong();
+    }
+
+    // Average other-model scores to fill in defaults when models have not seen
+    // this phrase
+    if (m_defaultAverageOthers) {
+      // Average seen scores
+      if (pdgPhrase.m_seenBy != m_seenByAll) {
+        vector<float> avgScores(m_scoresPerModel, 0);
+        size_t seenBy = 0;
+        offset = 0;
+        // sum
+        for (size_t i = 0; i < m_numModels; ++i) {
+          if (pdgPhrase.m_seenBy[i]) {
+            for (size_t j = 0; j < m_scoresPerModel; ++j) {
+              avgScores[j] += pdgPhrase.m_scores[offset + j];
+            }
+            seenBy += 1;
+          }
+          offset += m_scoresPerModel;
+        }
+        // divide
+        for (size_t j = 0; j < m_scoresPerModel; ++j) {
+          avgScores[j] /= seenBy;
+        }
+        // copy
+        offset = 0;
+        for (size_t i = 0; i < m_numModels; ++i) {
+          if (!pdgPhrase.m_seenBy[i]) {
+            for (size_t j = 0; j < m_scoresPerModel; ++j) {
+              pdgPhrase.m_scores[offset + j] = avgScores[j];
+            }
+          }
+          offset += m_scoresPerModel;
+        }
+#ifdef PT_UG
+        // Also average LexicalReordering scores if specified
+        // We don't necessarily have a lr-func for each model
+        if (m_haveMmsaptLrFunc) {
+          SPTR<Scores> avgLRScores;
+          size_t seenBy = 0;
+          // For each model
+          for (size_t i = 0; i < m_numModels; ++i) {
+            const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
+            // Add if phrase seen and model has lr-func
+            if (pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
+              const Scores* scores = pdgPhrase.m_targetPhrase->GetExtraScores(lrFunc);
+              if (!avgLRScores) {
+                avgLRScores.reset(new Scores(*scores));
+              } else {
+                for (size_t j = 0; j < scores->size(); ++j) {
+                  (*avgLRScores)[j] += (*scores)[j];
+                }
+              }
+              seenBy += 1;
+            }
+          }
+          // Make sure we have at least one lr-func
+          if (avgLRScores) {
+            // divide
+            for (size_t j = 0; j < avgLRScores->size(); ++j) {
+              (*avgLRScores)[j] /= seenBy;
+            }
+            // set
+            for (size_t i = 0; i < m_numModels; ++i) {
+              const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
+              if (!pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
+                pdgPhrase.m_targetPhrase->SetExtraScores(lrFunc, avgLRScores);
+              }
+            }
+          }
+        }
+#endif
+      }
+    }
+
+    // Assign scores
+    phrase->GetScoreBreakdown().Assign(this, pdgPhrase.m_scores);
+    // Correct future cost estimates and total score
+    phrase->EvaluateInIsolation(src, pd_feature_const);
+    ret->Add(phrase);
+  }
+
+  return ret;
+}
+
+ChartRuleLookupManager*
+PhraseDictionaryGroup::
+CreateRuleLookupManager(const ChartParser &,
+                        const ChartCellCollectionBase&, size_t)
 {
  UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
 }

-// copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
-void PhraseDictionaryGroup::CacheForCleanup(
-  TargetPhraseCollection::shared_ptr tpc)
+//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
+void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr  tpc)
 {
  PhraseCache &ref = GetPhraseCache();
  ref.push_back(tpc);
 }

-void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing(
-  const InputType &source)
+void
+PhraseDictionaryGroup::
+CleanUpAfterSentenceProcessing(const InputType &source)
 {
  GetPhraseCache().clear();
  CleanUpComponentModels(source);
--- a/moses/TranslationModel/PhraseDictionaryGroup.h
+++ b/moses/TranslationModel/PhraseDictionaryGroup.h
@ -44,16 +44,12 @@ struct PDGroupPhrase {
  std::vector<float> m_scores;
  boost::dynamic_bitset<> m_seenBy;

-  PDGroupPhrase() :
-    m_targetPhrase(NULL) {
-  }
+  PDGroupPhrase() : m_targetPhrase(NULL) { }

-  PDGroupPhrase(
-    TargetPhrase* targetPhrase,
-    const std::vector<float>& scores,
-    const size_t nModels) :
-    m_targetPhrase(targetPhrase), m_scores(scores), m_seenBy(nModels) {
-  }
+  PDGroupPhrase(TargetPhrase* targetPhrase, const std::vector<float>& scores, const size_t nModels)
+    : m_targetPhrase(targetPhrase),
+      m_scores(scores),
+      m_seenBy(nModels) { }
 };

 /** Combines multiple phrase tables into a single interface.  Each member phrase
@ -68,17 +64,24 @@ class PhraseDictionaryGroup: public PhraseDictionary
 public:
  PhraseDictionaryGroup(const std::string& line);
  void Load(AllOptions::ptr const& opts);
-  void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
+  TargetPhraseCollection::shared_ptr
+  CreateTargetPhraseCollection(const ttasksptr& ttask,
+                               const Phrase& src) const;
+  std::vector<std::vector<float> > getWeights(size_t numWeights,
+      bool normalize) const;
+  void CacheForCleanup(TargetPhraseCollection::shared_ptr  tpc);
  void CleanUpAfterSentenceProcessing(const InputType& source);
  void CleanUpComponentModels(const InputType& source);
  // functions below override the base class
-  void GetTargetPhraseCollectionBatch(
-    const ttasksptr& ttask,
-    const InputPathList &inputPathQueue) const;
-  ChartRuleLookupManager* CreateRuleLookupManager(
-    const ChartParser&,
-    const ChartCellCollectionBase&,
-    std::size_t);
+  void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
+                                      const InputPathList &inputPathQueue) const;
+  TargetPhraseCollection::shared_ptr  GetTargetPhraseCollectionLEGACY(
+    const Phrase& src) const;
+  TargetPhraseCollection::shared_ptr  GetTargetPhraseCollectionLEGACY(
+    const ttasksptr& ttask, const Phrase& src) const;
+  void InitializeForInput(ttasksptr const& ttask);
+  ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&,
+      const ChartCellCollectionBase&, std::size_t);
  void SetParameter(const std::string& key, const std::string& value);

 protected:
@ -106,10 +109,8 @@ protected:
  bool m_haveMmsaptLrFunc;
  // pointers to pointers since member mmsapts may not load these until later
  std::vector<LexicalReordering**> m_mmsaptLrFuncs;
-  typedef boost::unordered_map<const TargetPhrase*, PDGroupPhrase,
-          UnorderedComparer<Phrase>, UnorderedComparer<Phrase> > PhraseMap;

-  typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
+  typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
 #ifdef WITH_THREADS
  boost::shared_mutex m_lock_cache;
  typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@ -348,6 +348,9 @@ sub tokenize
    $text =~ s/^ //g;
    $text =~ s/ $//g;

+    # .' at end of sentence is missed
+    $text =~ s/\.\' ?$/ . ' /;
+
    # restore protected
    for (my $i = 0; $i < scalar(@protected); ++$i) {
      my $subst = sprintf("THISISPROTECTED%.3d", $i);