online combination of multiple phrase tables

- creates a virtual phrase table at decoding time based on a vector of component models and a combination algorithm - linear interpolation or instance weighting - two possible component model types supported so far: 0 (in-memory) or 12 (compact) - weights can be set in config, and overriden on a sentence-level through mosesserver API - online optimization (perplexity minimization) using dlib and xmlrpc-c call
2024-09-11 11:25:40 +03:00 · 2013-04-22 13:21:59 +02:00 · 2013-04-22 13:21:59 +02:00 · 908c006e32
commit 908c006e32
parent 477f913585
17 changed files with 1886 additions and 12 deletions
--- a/contrib/server/client_multimodel.py
+++ b/contrib/server/client_multimodel.py
@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+#
+# Sample python client. Additionally to basic functionality, shows how translation model weights can be provided to multimodel phrase table type,
+# and how translation model weights can be optimized on tuning set of phrase pairs.
+# translate_concurrent() shows how to use multiple moses server threads.
+#
+
+import sys
+import gzip
+from multiprocessing import Pool
+
+if sys.version_info < (3, 0):
+    import xmlrpclib
+else:
+    import xmlrpc.client as xmlrpclib
+
+
+def translate(input_object, server, weights=None):
+
+    for line in input_object:
+        params = {}
+        params['text'] = line
+        if weights:
+            params['weight-t-multimodel'] = weights
+
+        print server.translate(params)
+
+
+def optimize(phrase_pairs, server):
+
+    params = {}
+    params['phrase_pairs'] = phrase_pairs
+    weights = server.optimize(params)
+    sys.stderr.write(str(weights + '\n'))
+    return weights
+
+
+def read_phrase_pairs(input_object):
+
+    pairs = []
+    for line in input_object:
+        line = line.split(' ||| ')
+        pairs.append((line[0],line[1]))
+    return pairs
+
+
+#same functionality as translate(), but using multiple concurrent connections to server
+def translate_concurrent(input_object, url, weights=None, num_processes=8):
+
+    pool = Pool(processes=num_processes)
+    text_args = [(line, weights, url) for line in input_object]
+
+    for translated_line in pool.imap(translate_single_line, text_args):
+        print translated_line
+
+
+def translate_single_line(args):
+
+    line, weights, url = args
+    server = xmlrpclib.ServerProxy(url)
+
+    params = {}
+    params['text'] = line
+    if weights:
+        params['weight-t-multimodel'] = weights
+
+    return server.translate(params)['text']
+
+
+if __name__ == '__main__':
+    url = "http://localhost:8111/RPC2"
+    server = xmlrpclib.ServerProxy(url)
+
+    phrase_pairs = read_phrase_pairs(gzip.open('/path/to/moses-regression-tests/models/multimodel/extract.sorted.gz'))
+    weights = optimize(phrase_pairs, server)
+
+    translate(sys.stdin, server, weights)
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@ -11,11 +11,16 @@
 #include "moses/Phrase.h"
 #include "moses/StaticData.h"
 #include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
+#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
 #include "moses/TranslationSystem.h"
 #include "moses/TreeInput.h"
 #include "moses/LMList.h"
 #include "moses/LM/ORLM.h"

+#ifdef WITH_THREADS
+#include <boost/thread.hpp>
+#endif
+
 #include <xmlrpc-c/base.hpp>
 #include <xmlrpc-c/registry.hpp>
 #include <xmlrpc-c/server_abyss.hpp>
@ -133,6 +138,61 @@ public:
  }
 };

+class Optimizer : public xmlrpc_c::method
+{
+public:
+  Optimizer() {
+    // signature and help strings are documentation -- the client
+    // can query this information with a system.methodSignature and
+    // system.methodHelp RPC.
+    this->_signature = "S:S";
+    this->_help = "Optimizes multi-model translation model";
+  }
+
+  void
+  execute(xmlrpc_c::paramList const& paramList,
+          xmlrpc_c::value *   const  retvalP) {
+#ifdef WITH_DLIB
+    const params_t params = paramList.getStruct(0);
+    const TranslationSystem& system = getTranslationSystem(params);
+    const PhraseDictionaryFeature* pdf = system.GetPhraseDictionaries()[0];
+    PhraseDictionaryMultiModel* pdmm = (PhraseDictionaryMultiModel*) pdf->GetDictionary();
+
+    params_t::const_iterator si = params.find("phrase_pairs");
+    if (si == params.end()) {
+      throw xmlrpc_c::fault(
+        "Missing list of phrase pairs",
+        xmlrpc_c::fault::CODE_PARSE);
+    }
+
+    vector<pair<string, string> > phrase_pairs;
+
+    xmlrpc_c::value_array phrase_pairs_array = xmlrpc_c::value_array(si->second);
+    vector<xmlrpc_c::value> phrasePairValueVector(phrase_pairs_array.vectorValueValue());
+    for (size_t i=0;i < phrasePairValueVector.size();i++) {
+        vector<xmlrpc_c::value> phrasePair(xmlrpc_c::value_array(phrasePairValueVector[i]).vectorValueValue());
+        string L1 = xmlrpc_c::value_string(phrasePair[0]);
+        string L2 = xmlrpc_c::value_string(phrasePair[1]);
+        phrase_pairs.push_back(make_pair(L1,L2));
+    }
+
+    vector<float> weight_vector;
+    weight_vector = pdmm->MinimizePerplexity(phrase_pairs);
+
+    vector<xmlrpc_c::value> weight_vector_ret;
+    for (size_t i=0;i < weight_vector.size();i++) {
+        weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i]));
+    }
+    *retvalP = xmlrpc_c::value_array(weight_vector_ret);
+#else
+    string errmsg = "Error: Perplexity minimization requires dlib (compilation option --with-dlib)";
+    cerr << errmsg << endl;
+    *retvalP = xmlrpc_c::value_string(errmsg);
+#endif
+  }
+};
+
+
 class Translator : public xmlrpc_c::method
 {
 public:
@ -173,12 +233,29 @@ public:
    si = params.find("nbest-distinct");
    bool nbest_distinct = (si != params.end());

+    vector<float> multiModelWeights;
+    si = params.find("weight-t-multimodel");
+    if (si != params.end()) {
+        xmlrpc_c::value_array multiModelArray = xmlrpc_c::value_array(si->second);
+        vector<xmlrpc_c::value> multiModelValueVector(multiModelArray.vectorValueValue());
+        for (size_t i=0;i < multiModelValueVector.size();i++) {
+            multiModelWeights.push_back(xmlrpc_c::value_double(multiModelValueVector[i]));
+        }
+    }
+
    const StaticData &staticData = StaticData::Instance();

    if (addGraphInfo) {
      (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true);
    }

+    if (multiModelWeights.size() > 0) {
+      staticData.SetTemporaryMultiModelWeightsVector(multiModelWeights);
+      if (staticData.GetUseTransOptCache()) {
+          cerr << "Warning: -use-persistent-cache is set to true; sentence-specific weights may be ignored. Disable cache for true results.\n";
+      }
+    }
+
    const TranslationSystem& system = getTranslationSystem(params);
    stringstream out, graphInfo, transCollOpts;
    map<string, xmlrpc_c::value> retData;
@ -425,13 +502,18 @@ int main(int argc, char** argv)
    exit(1);
  }

+  //512 MB data limit (512KB is not enough for optimization)
+  xmlrpc_limit_set(XMLRPC_XML_SIZE_LIMIT_ID, 512*1024*1024);
+
  xmlrpc_c::registry myRegistry;

  xmlrpc_c::methodPtr const translator(new Translator);
  xmlrpc_c::methodPtr const updater(new Updater);
+  xmlrpc_c::methodPtr const optimizer(new Optimizer);

  myRegistry.addMethod("translate", translator);
  myRegistry.addMethod("updater", updater);
+  myRegistry.addMethod("optimize", optimizer);

  xmlrpc_c::serverAbyss myAbyssServer(
    myRegistry,
--- a/moses/Jamfile
+++ b/moses/Jamfile
@ -3,7 +3,14 @@ path-constant FACTOR-LOG : bin/factor.log ;
 update-if-changed $(FACTOR-LOG) $(max-factors) ;
 max-factors = <define>MAX_NUM_FACTORS=$(max-factors) <dependency>$(FACTOR-LOG) ;

-alias headers : ../util//kenutil : : : $(max-factors) ;
+with-dlib = [ option.get "with-dlib" ] ;
+if $(with-dlib) {
+  dlib = <define>WITH_DLIB <include>$(with-dlib) ;
+} else {
+  dlib = ;
+}
+
+alias headers : ../util//kenutil : : : $(max-factors) $(dlib) ;

 alias ThreadPool : ThreadPool.cpp ;

--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@ -100,6 +100,7 @@ Parameter::Parameter()
 	AddParam("weight-w", "w", "weight for word penalty");
 	AddParam("weight-u", "u", "weight for unknown word penalty");
 	AddParam("weight-e", "e", "weight for word deletion"); 
+  AddParam("weight-t-multimodel", "tmo", "weights for multi-model mode");
  AddParam("weight-file", "wf", "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini");
 	AddParam("output-factors", "list if factors in the output");
 	AddParam("cache-path", "?");
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@ -355,6 +355,8 @@ bool StaticData::LoadData(Parameter *parameter)
  m_unknownWordPenaltyProducer = new UnknownWordPenaltyProducer();
  SetWeight(m_unknownWordPenaltyProducer, weightUnknownWord);

+  m_multimodelweights = Scan<float>( m_parameter->GetParam("weight-t-multimodel") );
+
  // reordering constraints
  m_maxDistortion = (m_parameter->GetParam("distortion-limit").size() > 0) ?
                    Scan<int>(m_parameter->GetParam("distortion-limit")[0])
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <string>

 #ifdef WITH_THREADS
+#include <boost/thread.hpp>
 #include <boost/thread/mutex.hpp>
 #endif

@ -246,7 +247,13 @@ protected:

  int m_threadCount;
  long m_startTranslationId;
-  
+
+  std::vector<float> m_multimodelweights;
+#ifdef WITH_THREADS
+  mutable std::map<boost::thread::id, std::vector<float> > m_multimodelweights_tmp;
+#else
+  mutable std::vector<float> m_multimodelweights_tmp;
+#endif
  StaticData();


@ -728,6 +735,34 @@ public:
  void SetExecPath(const std::string &path);
  const std::string &GetBinDirectory() const;

+  const std::vector<float>* GetMultiModelWeightsVector() const {
+    return &m_multimodelweights;
+  }
+
+  void SetTemporaryMultiModelWeightsVector(std::vector<float> weights) const {
+#ifdef WITH_THREADS
+    m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
+#else
+    m_multimodelweights_tmp = weights;
+#endif
+  }
+
+  const std::vector<float>* GetTemporaryMultiModelWeightsVector() const {
+#ifdef WITH_THREADS
+    if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
+      return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
+    }
+    else {
+      return NULL;
+    }
+#else
+    return &m_multimodelweights_tmp;
+#endif
+  }
+
+  void SetNeedAlignmentInfo(bool needAlignmentInfo) {
+      m_needAlignmentInfo = needAlignmentInfo;
+  }
  bool NeedAlignmentInfo() const {
    return m_needAlignmentInfo; }
  const std::string &GetAlignmentOutputFile() const {
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
@ -433,9 +433,13 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
      size_t idx = m_multipleScoreTrees ? scores.size() : 0;
      float score = m_scoreTrees[idx]->Read(encodedBitStream);
      scores.push_back(score);
-      
      if(scores.size() == m_numScoreComponent)
      {
+        //PhraseDictionaryMultiModel may use input phrase dictionaries with a different number of features than it is assigned in the log-linear model;
+        //filling extra slots with zeroes to prevent error messages on the way
+        if (m_phraseDictionary.GetNumScoreComponentMultiModel() > 0 && m_phraseDictionary.GetNumScoreComponentMultiModel() > m_numScoreComponent) {
+          scores.resize(m_phraseDictionary.GetNumScoreComponentMultiModel());
+        }
        targetPhrase->SetScore(m_feature, scores, ScoreComponentCollection() /*sparse*/,*m_weight, m_weightWP, *m_languageModels);
        
        if(m_containsAlignmentInfo)
--- a/moses/TranslationModel/PhraseDictionary.cpp
+++ b/moses/TranslationModel/PhraseDictionary.cpp
@ -26,6 +26,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
 #include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
 #include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
+#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
+#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"

 #ifndef WIN32
 #include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
@ -76,7 +78,7 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
  m_sparsePhraseDictionaryFeature(spdf)
 {
  if (implementation == Memory || implementation == SCFG || implementation == SuffixArray ||
-      implementation==Compact || implementation==FuzzyMatch ) {
+      implementation==Compact || implementation==FuzzyMatch || implementation == MultiModel || implementation == MultiModelCounts) {
    m_useThreadSafePhraseDictionary = true;
  } else {
    m_useThreadSafePhraseDictionary = false;
@ -241,7 +243,43 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
 #else
    CHECK(false);
 #endif
-  }  
+  } else if (m_implementation == MultiModel ) {
+    // memory phrase table
+    VERBOSE(2,"multi-model mode" << std::endl);
+    if (staticData.GetInputType() != SentenceInput) {
+      UserMessage::Add("Must use binary phrase table for this input type");
+      CHECK(false);
+    }
+
+    PhraseDictionaryMultiModel* pd  = new PhraseDictionaryMultiModel(GetNumScoreComponents(),this);
+    bool ret = pd->Load(GetInput(), GetOutput()
+                         , m_config
+                         , weightT
+                         , m_tableLimit
+                         , system->GetLanguageModels()
+                         , system->GetWeightWordPenalty());
+    CHECK(ret);
+    return pd;
+  } else if (m_implementation == MultiModelCounts) {
+    // memory phrase table
+    VERBOSE(2,"multi-model mode (count tables)" << std::endl);
+    if (staticData.GetInputType() != SentenceInput) {
+      UserMessage::Add("Must use binary phrase table for this input type");
+      CHECK(false);
+    }
+
+    (const_cast<StaticData&>(staticData)).SetNeedAlignmentInfo(true); //needed for lexical weight computation
+
+    PhraseDictionaryMultiModelCounts* pd  = new PhraseDictionaryMultiModelCounts(GetNumScoreComponents(),this);
+    bool ret = pd->Load(GetInput(), GetOutput()
+                         , m_config
+                         , weightT
+                         , m_tableLimit
+                         , system->GetLanguageModels()
+                         , system->GetWeightWordPenalty());
+    CHECK(ret);
+    return pd;
+  }
  else {
    std::cerr << "Unknown phrase table type " << m_implementation << endl;
    CHECK(false);
@ -261,6 +299,16 @@ void PhraseDictionaryFeature::InitDictionary(const TranslationSystem* system)
  //Other types will be lazy loaded
 }

+void PhraseDictionary::SetNumScoreComponentMultiModel(size_t num)
+{
+  m_numScoreComponentMultiModel = num;
+}
+
+size_t PhraseDictionary::GetNumScoreComponentMultiModel() const
+{
+  return m_numScoreComponentMultiModel;
+}
+
 //Called when we start translating a new sentence
 void PhraseDictionaryFeature::InitDictionary(const TranslationSystem* system, const InputType& source)
 {
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@ -60,7 +60,7 @@ class PhraseDictionary: public Dictionary
 {
 public:
  PhraseDictionary(size_t numScoreComponent, const PhraseDictionaryFeature* feature):
-    Dictionary(numScoreComponent), m_tableLimit(0), m_feature(feature) {}
+    Dictionary(numScoreComponent), m_tableLimit(0), m_feature(feature), m_numScoreComponentMultiModel(0) {}
  //! table limit number.
  size_t GetTableLimit() const {
    return m_tableLimit;
@ -83,9 +83,14 @@ public:
    const InputType &,
    const ChartCellCollectionBase &) = 0;

+  //PhraseDictionaryMultiModel may use input phrase dictionaries with a different number of features than it is assigned in the log-linear model
+  void SetNumScoreComponentMultiModel(size_t num);
+  size_t GetNumScoreComponentMultiModel() const;
+
 protected:
  size_t m_tableLimit;
  const PhraseDictionaryFeature* m_feature;
+  size_t m_numScoreComponentMultiModel;
 };


@ -174,7 +179,7 @@ private:
  PhraseTableImplementation m_implementation;
  const std::vector<std::string> m_config;
  SparsePhraseDictionaryFeature* m_sparsePhraseDictionaryFeature;
-
+  std::vector<std::string> m_allPaths;
 };


--- a/moses/TranslationModel/PhraseDictionaryMemory.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemory.cpp
@ -122,10 +122,17 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
      }
    }
    if (scv.size() != m_numScoreComponent) {
-      stringstream strme;
-      strme << "Size of scoreVector != number (" <<scv.size() << "!=" <<m_numScoreComponent<<") of score components on line " << line_num;
-      UserMessage::Add(strme.str());
-      abort();
+      //PhraseDictionaryMultiModel may use input phrase dictionaries with a different number of features than it is assigned in the log-linear model;
+      //filling extra slots with zeroes to prevent error messages on the way
+      if (m_numScoreComponentMultiModel > 0 && scv.size() == m_numScoreComponentMultiModel && m_numScoreComponentMultiModel < m_numScoreComponent) {
+          scv.resize(m_numScoreComponent);
+      }
+      else {
+        stringstream strme;
+        strme << "Size of scoreVector != number (" <<scv.size() << "!=" <<m_numScoreComponent<<") of score components on line " << line_num;
+        UserMessage::Add(strme.str());
+        abort();
+      }
    }


--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@ -0,0 +1,476 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
+
+using namespace std;
+
+namespace Moses
+
+{
+PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(size_t numScoreComponent,
+    PhraseDictionaryFeature* feature): PhraseDictionary(numScoreComponent, feature)
+{
+    m_feature_load = feature;
+}
+
+PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
+{
+    RemoveAllInColl(m_pd);
+}
+
+bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input
+                                  , const std::vector<FactorType> &output
+                                  , const std::vector<std::string> &config
+                                  , const vector<float> &weight
+                                  , size_t tableLimit
+                                  , const LMList &languageModels
+                                  , float weightWP)
+{
+  m_languageModels = &languageModels;
+  m_weight = weight;
+  m_weightWP = weightWP;
+  m_input = input;
+  m_output = output;
+  m_tableLimit = tableLimit;
+
+  m_mode = config[4];
+  std::vector<std::string> files(config.begin()+5,config.end());
+
+  m_numModels = files.size();
+
+  // since the top X target phrases of the final model are not the same as the top X phrases of each component model,
+  // one could choose a higher value than tableLimit (or 0) here for maximal precision, at a cost of speed.
+  m_componentTableLimit = tableLimit;
+
+  //how many actual scores there are in the phrase tables
+  //so far, equal to number of log-linear scores, but it is allowed to be smaller (for other combination types)
+  size_t numPtScores = m_numScoreComponent;
+
+  if (m_mode != "interpolate") {
+    ostringstream msg;
+    msg << "combination mode unknown: " << m_mode;
+    throw runtime_error(msg.str());
+  }
+
+  for(size_t i = 0; i < m_numModels; ++i){
+
+      std::string impl, file, main_table;
+
+      std::string delim = ":";
+      size_t delim_pos = files[i].find(delim);
+      if (delim_pos >= files[i].size()) {
+        UserMessage::Add("Phrase table must be specified in this format: Implementation:Path");
+        CHECK(false);
+      }
+
+      impl = files[i].substr(0,delim_pos);
+      file = files[i].substr(delim_pos+1,files[i].size());
+
+      PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(impl);
+
+      if (implementation == Memory) {
+
+            if (!FileExists(file) && FileExists(file + ".gz")) file += ".gz";
+
+            PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent, m_feature_load);
+            pdm->SetNumScoreComponentMultiModel(numPtScores); //instead of complaining about inequal number of scores, silently fill up the score vector with zeroes
+            pdm->Load( input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
+            m_pd.push_back(pdm);
+      }
+      else if (implementation == Compact) {
+#ifndef WIN32
+            PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponent, implementation, m_feature_load);
+            pdc->SetNumScoreComponentMultiModel(m_numScoreComponent); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
+            pdc->Load( input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
+            m_pd.push_back(pdc);
+#else
+            CHECK(false);
+#endif
+      }
+      else {
+        UserMessage::Add("phrase table type unknown to multi-model mode");
+        CHECK(false);
+      }
+  }
+
+  return true;
+}
+
+
+const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollection(const Phrase& src) const
+{
+
+  std::vector<std::vector<float> > multimodelweights;
+
+  if (m_mode == "interpolate") {
+    //interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
+    size_t numWeights = m_numScoreComponent-1;
+    multimodelweights = getWeights(numWeights, true);
+  }
+
+  std::map<std::string,multiModelStatistics*>* allStats = new(std::map<std::string,multiModelStatistics*>);
+
+  CollectSufficientStatistics(src, allStats);
+
+  TargetPhraseCollection *ret;
+  if (m_mode == "interpolate") {
+    ret = CreateTargetPhraseCollectionLinearInterpolation(allStats, multimodelweights);
+  }
+
+  ret->NthElement(m_tableLimit); // sort the phrases for pruning later
+  const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
+  RemoveAllInMap(*allStats);
+  delete allStats;
+
+  return ret;
+}
+
+
+void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const
+{
+  for(size_t i = 0; i < m_numModels; ++i){
+
+    TargetPhraseCollection *ret_raw = (TargetPhraseCollection*)  m_pd[i]->GetTargetPhraseCollection( src);
+    if (ret_raw != NULL) {
+
+      TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
+      if (m_componentTableLimit != 0 && ret_raw->GetSize() > m_componentTableLimit) {
+          iterLast = ret_raw->begin() + m_componentTableLimit;
+      }
+      else {
+          iterLast = ret_raw->end();
+      }
+
+      for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast;  ++iterTargetPhrase) {
+        TargetPhrase * targetPhrase = *iterTargetPhrase;
+        std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature);
+
+        std::string targetString = targetPhrase->GetStringRep(m_output);
+        if (allStats->find(targetString) == allStats->end()) {
+
+          multiModelStatistics * statistics = new multiModelStatistics;
+          statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
+
+          Scores scoreVector(m_numScoreComponent);
+          statistics->p.resize(m_numScoreComponent);
+          for(size_t j = 0; j < m_numScoreComponent; ++j){
+              statistics->p[j].resize(m_numModels);
+              scoreVector[j] = -raw_scores[j];
+          }
+
+          statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels); // set scores to 0
+
+          (*allStats)[targetString] = statistics;
+
+        }
+        multiModelStatistics * statistics = (*allStats)[targetString];
+
+        for(size_t j = 0; j < m_numScoreComponent; ++j){
+            statistics->p[j][i] = UntransformScore(raw_scores[j]);
+        }
+
+        (*allStats)[targetString] = statistics;
+      }
+    }
+  }
+}
+
+
+TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const
+{
+    TargetPhraseCollection *ret = new TargetPhraseCollection();
+    for ( std::map< std::string, multiModelStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
+
+        multiModelStatistics * statistics = iter->second;
+
+        Scores scoreVector(m_numScoreComponent);
+
+        for(size_t i = 0; i < m_numScoreComponent-1; ++i){
+            scoreVector[i] = TransformScore(std::inner_product(statistics->p[i].begin(), statistics->p[i].end(), multimodelweights[i].begin(), 0.0));
+        }
+
+        //assuming that last value is phrase penalty
+        scoreVector[m_numScoreComponent-1] = 1.0;
+
+        statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels);
+        ret->Add(new TargetPhrase(*statistics->targetPhrase));
+    }
+    return ret;
+}
+
+
+//TODO: is it worth caching the results as long as weights don't change?
+std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t numWeights, bool normalize) const
+{
+  const std::vector<float>* weights_ptr;
+  std::vector<float> raw_weights;
+  const StaticData &staticData = StaticData::Instance();
+
+  weights_ptr = staticData.GetTemporaryMultiModelWeightsVector();
+
+  //checking weights passed to mosesserver; only valid for this sentence; *don't* raise exception if client weights are malformed
+  if (weights_ptr == NULL || weights_ptr->size() == 0) {
+    weights_ptr = staticData.GetMultiModelWeightsVector(); //fall back to weights defined in config
+  }
+  else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
+    //TODO: can we pass error message to client if weights are malformed?
+    std::stringstream strme;
+    strme << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ". Reverting to weights in config";
+    UserMessage::Add(strme.str());
+    weights_ptr = staticData.GetMultiModelWeightsVector(); //fall back to weights defined in config
+  }
+
+  //checking weights defined in config; only valid for this sentence; raise exception if config weights are malformed
+  if (weights_ptr == NULL || weights_ptr->size() == 0) {
+    for (size_t i=0;i < m_numModels;i++) {
+      raw_weights.push_back(1.0/m_numModels); //uniform weights created online
+    }
+  }
+  else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
+    std::stringstream strme;
+    strme << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ".";
+    UserMessage::Add(strme.str());
+    CHECK(false);
+  }
+  else {
+      raw_weights = *weights_ptr;
+  }
+
+  std::vector<std::vector<float> > multimodelweights (numWeights);
+
+  for (size_t i=0;i < numWeights;i++) {
+    std::vector<float> weights_onefeature (m_numModels);
+    if(raw_weights.size() == m_numModels) {
+        weights_onefeature = raw_weights;
+    }
+    else {
+        copy ( raw_weights.begin()+i*m_numModels, raw_weights.begin()+(i+1)*m_numModels, weights_onefeature.begin() );
+    }
+    if(normalize) {
+         multimodelweights[i] = normalizeWeights(weights_onefeature);
+    }
+    else {
+        multimodelweights[i] = weights_onefeature;
+    }
+  }
+
+  return multimodelweights;
+}
+
+std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<float> &weights) const
+{
+    std::vector<float> ret (m_numModels);
+    float total = std::accumulate(weights.begin(),weights.end(),0.0);
+    for (size_t i=0;i < weights.size();i++) {
+        ret[i] = weights[i]/total;
+    }
+    return ret;
+}
+
+
+ChartRuleLookupManager *PhraseDictionaryMultiModel::CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&)
+{
+  CHECK(false);
+  return 0;
+}
+
+
+//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
+void PhraseDictionaryMultiModel::CacheForCleanup(TargetPhraseCollection* tpc) {
+#ifdef WITH_THREADS
+  boost::mutex::scoped_lock lock(m_sentenceMutex);
+  PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
+#else
+  PhraseCache &ref = m_sentenceCache;
+#endif
+  ref.push_back(tpc);
+}
+
+
+void PhraseDictionaryMultiModel::CleanUp(const InputType &source) {
+#ifdef WITH_THREADS
+  boost::mutex::scoped_lock lock(m_sentenceMutex);
+  PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
+#else
+  PhraseCache &ref = m_sentenceCache;
+#endif
+  for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
+      delete *it;
+  }
+
+  PhraseCache temp;
+  temp.swap(ref);
+
+  CleanUpComponentModels(source);
+
+  const StaticData &staticData = StaticData::Instance();
+  std::vector<float> empty_vector;
+  (const_cast<StaticData&>(staticData)).SetTemporaryMultiModelWeightsVector(empty_vector);
+}
+
+
+void  PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source) {
+  for(size_t i = 0; i < m_numModels; ++i){
+    m_pd[i]->CleanUp(source);
+  }
+}
+
+
+#ifdef WITH_DLIB
+vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector) {
+
+    const StaticData &staticData = StaticData::Instance();
+    const string& factorDelimiter = staticData.GetFactorDelimiter();
+
+    map<pair<string, string>, size_t> phrase_pair_map;
+
+    for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
+        phrase_pair_map[*iter] += 1;
+    }
+
+    vector<multiModelStatisticsOptimization*> optimizerStats;
+
+    for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
+
+        pair<string, string> phrase_pair = iter->first;
+        string source_string = phrase_pair.first;
+        string target_string = phrase_pair.second;
+
+        vector<float> fs(m_numModels);
+        map<string,multiModelStatistics*>* allStats = new(map<string,multiModelStatistics*>);
+
+        Phrase sourcePhrase(0);
+        sourcePhrase.CreateFromString(m_input, source_string, factorDelimiter);
+
+        CollectSufficientStatistics(sourcePhrase, allStats); //optimization potential: only call this once per source phrase
+
+        //phrase pair not found; leave cache empty
+        if (allStats->find(target_string) == allStats->end()) {
+            RemoveAllInMap(*allStats);
+            delete allStats;
+            continue;
+        }
+
+        multiModelStatisticsOptimization* targetStatistics = new multiModelStatisticsOptimization();
+        targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
+        targetStatistics->p = (*allStats)[target_string]->p;
+        targetStatistics->f = iter->second;
+        optimizerStats.push_back(targetStatistics);
+
+        RemoveAllInMap(*allStats);
+        delete allStats;
+        }
+
+    Sentence sentence;
+    CleanUp(sentence); // free memory used by compact phrase tables
+
+    size_t numWeights = m_numScoreComponent;
+    if (m_mode == "interpolate") {
+        //interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
+        numWeights = m_numScoreComponent-1;
+    }
+
+    vector<float> ret (m_numModels*numWeights);
+    for (size_t iFeature=0; iFeature < numWeights; iFeature++) {
+
+        CrossEntropy * ObjectiveFunction = new CrossEntropy(optimizerStats, this, iFeature);
+
+        vector<float> weight_vector = Optimize(ObjectiveFunction, m_numModels);
+
+        if (m_mode == "interpolate") {
+            weight_vector = normalizeWeights(weight_vector);
+        }
+
+        cerr << "Weight vector for feature " << iFeature << ": ";
+        for (size_t i=0; i < m_numModels; i++) {
+            ret[(iFeature*m_numModels)+i] = weight_vector[i];
+            cerr << weight_vector[i] << " ";
+        }
+        cerr << endl;
+        delete ObjectiveFunction;
+    }
+
+    RemoveAllInColl(optimizerStats);
+    return ret;
+
+}
+
+vector<float> PhraseDictionaryMultiModel::Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels) {
+
+        dlib::matrix<double,0,1> starting_point;
+        starting_point.set_size(numModels);
+        starting_point = 1.0;
+
+        try {
+            dlib::find_min_bobyqa(*ObjectiveFunction,
+                            starting_point,
+                            2*numModels+1,    // number of interpolation points
+                            dlib::uniform_matrix<double>(numModels,1, 1e-09),  // lower bound constraint
+                            dlib::uniform_matrix<double>(numModels,1, 1e100),   // upper bound constraint
+                            1.0,    // initial trust region radius
+                            1e-5,  // stopping trust region radius
+                            10000    // max number of objective function evaluations
+            );
+        }
+        catch (dlib::bobyqa_failure& e)
+        {
+            cerr << e.what() << endl;
+        }
+
+        vector<float> weight_vector (numModels);
+
+        for (int i=0; i < starting_point.nr(); i++) {
+            weight_vector[i] = starting_point(i);
+        }
+
+        cerr << "Cross-entropy: " << (*ObjectiveFunction)(starting_point) << endl;
+        return weight_vector;
+}
+
+
+double CrossEntropy::operator() ( const dlib::matrix<double,0,1>& arg) const
+{
+    double total = 0.0;
+    double n = 0.0;
+    std::vector<float> weight_vector (m_model->m_numModels);
+
+    for (int i=0; i < arg.nr(); i++) {
+        weight_vector[i] = arg(i);
+    }
+    if (m_model->m_mode == "interpolate") {
+        weight_vector = m_model->normalizeWeights(weight_vector);
+    }
+
+    for ( std::vector<multiModelStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
+        multiModelStatisticsOptimization* statistics = *iter;
+        size_t f = statistics->f;
+
+        double score;
+        score = std::inner_product(statistics->p[m_iFeature].begin(), statistics->p[m_iFeature].end(), weight_vector.begin(), 0.0);
+
+        total -= (FloorScore(TransformScore(score))/TransformScore(2))*f;
+        n += f;
+    }
+    return total/n;
+}
+
+#endif
+
+} //namespace
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h
@ -0,0 +1,148 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_PhraseDictionaryMultiModel_h
+#define moses_PhraseDictionaryMultiModel_h
+
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/TranslationModel/PhraseDictionaryMemory.h"
+#ifndef WIN32
+#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
+#endif
+
+
+#include <boost/unordered_map.hpp>
+#include "moses/StaticData.h"
+#include "moses/TargetPhrase.h"
+#include "moses/Util.h"
+#include "moses/UserMessage.h"
+
+#ifdef WITH_DLIB
+#include <dlib/optimization.h>
+#endif
+
+namespace Moses
+{
+
+  struct multiModelStatistics {
+    TargetPhrase *targetPhrase;
+    std::vector<std::vector<float> > p;
+    ~multiModelStatistics() {delete targetPhrase;};
+  };
+
+  struct multiModelStatisticsOptimization: multiModelStatistics {
+    size_t f;
+  };
+
+class OptimizationObjective;
+
+/** Implementation of a virtual phrase table constructed from multiple component phrase tables.
+ */
+class PhraseDictionaryMultiModel: public PhraseDictionary
+{
+#ifdef WITH_DLIB
+friend class CrossEntropy;
+#endif
+
+public:
+  PhraseDictionaryMultiModel(size_t m_numScoreComponent, PhraseDictionaryFeature* feature);
+  ~PhraseDictionaryMultiModel();
+  bool Load(const std::vector<FactorType> &input
+            , const std::vector<FactorType> &output
+            , const std::vector<std::string> &files
+            , const std::vector<float> &weight
+            , size_t tableLimit
+            , const LMList &languageModels
+            , float weightWP);
+  virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
+  virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
+  std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
+  std::vector<float> normalizeWeights(std::vector<float> &weights) const;
+  void CacheForCleanup(TargetPhraseCollection* tpc);
+  void CleanUp(const InputType &source);
+  virtual void CleanUpComponentModels(const InputType &source);
+#ifdef WITH_DLIB
+  virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
+  std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
+#endif
+  // functions below required by base class
+  virtual const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
+  virtual void InitializeForInput(InputType const&) {
+    /* Don't do anything source specific here as this object is shared between threads.*/
+  }
+  ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&);
+
+protected:
+  std::string m_mode;
+  std::vector<PhraseDictionary*> m_pd;
+  std::vector<float> m_weight;
+  const LMList *m_languageModels;
+  float m_weightWP;
+  std::vector<FactorType> m_input;
+  std::vector<FactorType> m_output;
+  size_t m_numModels;
+  size_t m_componentTableLimit;
+  PhraseDictionaryFeature* m_feature_load;
+
+  typedef std::vector<TargetPhraseCollection*> PhraseCache;
+#ifdef WITH_THREADS
+  boost::mutex m_sentenceMutex;
+  typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
+#else
+  typedef PhraseCache SentenceCache;
+#endif
+  SentenceCache m_sentenceCache;
+
+};
+
+#ifdef WITH_DLIB
+class OptimizationObjective 
+{
+public:
+
+    virtual double operator() ( const dlib::matrix<double,0,1>& arg) const = 0;
+};
+
+class CrossEntropy: public OptimizationObjective
+{
+public:
+
+    CrossEntropy (
+        std::vector<multiModelStatisticsOptimization*> &optimizerStats,
+        PhraseDictionaryMultiModel * model,
+        size_t iFeature
+    )
+    {
+        m_optimizerStats = optimizerStats;
+        m_model = model;
+        m_iFeature = iFeature;
+    }
+
+    double operator() ( const dlib::matrix<double,0,1>& arg) const;
+
+protected:
+    std::vector<multiModelStatisticsOptimization*> m_optimizerStats;
+    PhraseDictionaryMultiModel * m_model;
+    size_t m_iFeature;
+};
+#endif
+
+} // end namespace
+
+#endif
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@ -0,0 +1,666 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
+
+#define LINE_MAX_LENGTH 100000
+#include "phrase-extract/SafeGetline.h" // for SAFE_GETLINE()
+
+using namespace std;
+
+// from phrase-extract/tables-core.cpp
+vector<string> tokenize( const char* input )
+{
+  vector< string > token;
+  bool betweenWords = true;
+  int start=0;
+  int i=0;
+  for(; input[i] != '\0'; i++) {
+    bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+    if (!isSpace && betweenWords) {
+      start = i;
+      betweenWords = false;
+    } else if (isSpace && !betweenWords) {
+      token.push_back( string( input+start, i-start ) );
+      betweenWords = true;
+    }
+  }
+  if (!betweenWords)
+    token.push_back( string( input+start, i-start ) );
+  return token;
+}
+
+namespace Moses
+
+{
+PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(size_t numScoreComponent,
+    PhraseDictionaryFeature* feature): PhraseDictionaryMultiModel(numScoreComponent, feature)
+{
+    m_feature_load = feature;
+    m_mode = "instance_weighting"; //TODO: set this in config; use m_mode to switch between interpolation and instance weighting
+    m_combineFunction = InstanceWeighting;
+    //m_mode = "interpolate";
+    //m_combineFunction = LinearInterpolationFromCounts;
+}
+
+PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
+{
+    RemoveAllInColl(m_lexTable_e2f);
+    RemoveAllInColl(m_lexTable_f2e);
+    RemoveAllInColl(m_pd);
+    RemoveAllInColl(m_inverse_pd);
+}
+
+bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
+                                  , const vector<FactorType> &output
+                                  , const vector<string> &config
+                                  , const vector<float> &weight
+                                  , size_t tableLimit
+                                  , const LMList &languageModels
+                                  , float weightWP)
+{
+  m_languageModels = &languageModels;
+  m_weight = weight;
+  m_weightWP = weightWP;
+  m_input = input;
+  m_output = output;
+  m_tableLimit = tableLimit;
+
+  m_mode = config[4];
+  std::vector<std::string> files(config.begin()+5,config.end());
+
+  m_numModels = files.size();
+
+  if (m_mode == "instance_weighting")
+    m_combineFunction = InstanceWeighting;
+  else if (m_mode == "interpolate")
+    m_combineFunction = LinearInterpolationFromCounts;
+  else {
+    ostringstream msg;
+    msg << "combination mode unknown: " << m_mode;
+    throw runtime_error(msg.str());
+  }
+
+  for(size_t i = 0; i < m_numModels; ++i){
+
+      string impl, file, main_table, target_table, lex_e2f, lex_f2e;
+
+      string delim = ":";
+      size_t delim_pos = files[i].find(delim);
+      if (delim_pos >= files[i].size()) {
+        UserMessage::Add("Phrase table must be specified in this format: Implementation:Path");
+        CHECK(false);
+      }
+
+      impl = files[i].substr(0,delim_pos);
+      file = files[i].substr(delim_pos+1,files[i].size());
+      main_table = file + "/count-table";
+      target_table = file + "/count-table-target";
+      lex_e2f = file + "/lex.counts.e2f";
+      lex_f2e = file + "/lex.counts.f2e";
+      size_t componentTableLimit = 0; // using 0, because we can't trust implemented pruning algorithms with count tables.
+
+      PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(impl);
+
+      if (implementation == Memory) {
+
+            //how many actual scores there are in the phrase tables
+            size_t numScoresCounts = 3;
+            size_t numScoresTargetCounts = 1;
+
+            if (!FileExists(main_table) && FileExists(main_table + ".gz")) main_table += ".gz";
+            if (!FileExists(target_table) && FileExists(target_table + ".gz")) target_table += ".gz";
+
+            PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent, m_feature_load);
+            pdm->SetNumScoreComponentMultiModel(numScoresCounts); //instead of complaining about inequal number of scores, silently fill up the score vector with zeroes
+            pdm->Load( input, output, main_table, m_weight, componentTableLimit, languageModels, m_weightWP);
+            m_pd.push_back(pdm);
+
+            PhraseDictionaryMemory* pdm_inverse = new PhraseDictionaryMemory(m_numScoreComponent, m_feature_load);
+            pdm_inverse->SetNumScoreComponentMultiModel(numScoresTargetCounts);
+            pdm_inverse->Load( input, output, target_table, m_weight, componentTableLimit, languageModels, m_weightWP);
+            m_inverse_pd.push_back(pdm_inverse);
+      }
+      else if (implementation == Compact) {
+#ifndef WIN32
+            PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponent, implementation, m_feature_load);
+            pdc->SetNumScoreComponentMultiModel(m_numScoreComponent); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
+            pdc->Load( input, output, main_table, m_weight, componentTableLimit, languageModels, m_weightWP);
+            m_pd.push_back(pdc);
+
+            PhraseDictionaryCompact* pdc_inverse = new PhraseDictionaryCompact(m_numScoreComponent, implementation, m_feature_load);
+            pdc_inverse->SetNumScoreComponentMultiModel(m_numScoreComponent);
+            pdc_inverse->Load( input, output, target_table, m_weight, componentTableLimit, languageModels, m_weightWP);
+            m_inverse_pd.push_back(pdc_inverse);
+#else
+            CHECK(false);
+#endif
+      }
+      else {
+        UserMessage::Add("phrase table type unknown to multi-model mode");
+        CHECK(false);
+      }
+
+      lexicalTable* e2f = new lexicalTable;
+      LoadLexicalTable(lex_e2f, e2f);
+      lexicalTable* f2e = new lexicalTable;
+      LoadLexicalTable(lex_f2e, f2e);
+
+      m_lexTable_e2f.push_back(e2f);
+      m_lexTable_f2e.push_back(f2e);
+
+  }
+
+  return true;
+}
+
+
+const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollection(const Phrase& src) const
+{
+
+  vector<vector<float> > multimodelweights;
+  bool normalize;
+  normalize = (m_mode == "interpolate") ? true : false;
+  multimodelweights = getWeights(4,normalize);
+
+  //source phrase frequency is shared among all phrase pairs
+  vector<float> fs(m_numModels);
+
+  map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
+
+  CollectSufficientStatistics(src, fs, allStats);
+
+  TargetPhraseCollection *ret = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
+
+  ret->NthElement(m_tableLimit); // sort the phrases for pruning later
+  const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret);
+  return ret;
+}
+
+
+void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const
+//fill fs and allStats with statistics from models
+{
+  for(size_t i = 0; i < m_numModels; ++i){
+
+    TargetPhraseCollection *ret_raw = (TargetPhraseCollection*)  m_pd[i]->GetTargetPhraseCollection( src);
+    if (ret_raw != NULL) {
+
+      TargetPhraseCollection::iterator iterTargetPhrase;
+      for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end();  ++iterTargetPhrase) {
+
+        TargetPhrase * targetPhrase = *iterTargetPhrase;
+        vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature);
+
+        string targetString = targetPhrase->GetStringRep(m_output);
+        if (allStats->find(targetString) == allStats->end()) {
+
+          multiModelCountsStatistics * statistics = new multiModelCountsStatistics;
+          statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
+
+          statistics->fst.resize(m_numModels);
+          statistics->ft.resize(m_numModels);
+          Scores scoreVector(5);
+          scoreVector[0] = -raw_scores[0];
+          scoreVector[1] = -raw_scores[1];
+          scoreVector[2] = -raw_scores[2];
+          statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels); // set scores to 0
+
+          (*allStats)[targetString] = statistics;
+
+        }
+        multiModelCountsStatistics * statistics = (*allStats)[targetString];
+
+        statistics->fst[i] = UntransformScore(raw_scores[0]);
+        statistics->ft[i] = UntransformScore(raw_scores[1]);
+        fs[i] = UntransformScore(raw_scores[2]);
+        (*allStats)[targetString] = statistics;
+      }
+    }
+  }
+
+  // get target phrase frequency for models which have not seen the phrase pair
+  for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
+    multiModelCountsStatistics * statistics = iter->second;
+
+    for (size_t i = 0; i < m_numModels; ++i) {
+        if (!statistics->ft[i]) {
+            statistics->ft[i] = GetTargetCount(static_cast<const Phrase&>(*statistics->targetPhrase), i);
+        }
+    }
+  }
+}
+
+
+TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats, vector<vector<float> > &multimodelweights) const
+{
+  TargetPhraseCollection *ret = new TargetPhraseCollection();
+  for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
+
+    multiModelCountsStatistics * statistics = iter->second;
+
+    if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
+        UserMessage::Add(" alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
+        CHECK(false);
+    }
+
+    try {
+        pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(src, static_cast<const Phrase&>(*statistics->targetPhrase), statistics->targetPhrase->GetAlignTerm());
+        vector< set<size_t> > alignedToT = alignment.first;
+        vector< set<size_t> > alignedToS = alignment.second;
+        double lexst = ComputeWeightedLexicalTranslation(static_cast<const Phrase&>(*statistics->targetPhrase), src, alignedToS, m_lexTable_e2f, multimodelweights[1], m_output, m_input );
+        double lexts = ComputeWeightedLexicalTranslation(src, static_cast<const Phrase&>(*statistics->targetPhrase), alignedToT, m_lexTable_f2e, multimodelweights[3], m_input, m_output );
+
+        Scores scoreVector(5);
+        scoreVector[0] = FloorScore(TransformScore(m_combineFunction(statistics->fst, statistics->ft, multimodelweights[0])));
+        scoreVector[1] = FloorScore(TransformScore(lexst));
+        scoreVector[2] = FloorScore(TransformScore(m_combineFunction(statistics->fst, fs, multimodelweights[2])));
+        scoreVector[3] = FloorScore(TransformScore(lexts));
+        scoreVector[4] = FloorScore(TransformScore(2.718));
+
+        statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels);
+    }
+    catch (AlignmentException& e) {
+        continue;
+    }
+
+    ret->Add(new TargetPhrase(*statistics->targetPhrase));
+  }
+
+  RemoveAllInMap(*allStats);
+  delete allStats;
+  return ret;
+}
+
+
+float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, size_t modelIndex) const {
+
+    TargetPhraseCollection *ret_raw = (TargetPhraseCollection*)  m_inverse_pd[modelIndex]->GetTargetPhraseCollection(target);
+
+    // in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
+    if (ret_raw != NULL) {
+        TargetPhrase * targetPhrase = *(ret_raw->begin());
+        return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature)[0]);
+    }
+
+    // target phrase unknown
+    else return 0;
+}
+
+
+pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> PhraseDictionaryMultiModelCounts::GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const {
+
+    size_t tsize = phraseT.GetSize();
+    size_t ssize = phraseS.GetSize();
+    AlignVector alignedToT (tsize);
+    AlignVector alignedToS (ssize);
+    AlignmentInfo::const_iterator iter;
+
+    for (iter = alignment.begin(); iter != alignment.end(); ++iter) {
+    const pair<size_t,size_t> &alignPair = *iter;
+        size_t s = alignPair.first;
+        size_t t = alignPair.second;
+        if (s >= ssize || t >= tsize) {
+            cerr << "Error: inconsistent alignment for phrase pair: " << phraseS << " - " << phraseT << endl;
+            cerr << "phrase pair will be discarded" << endl;
+            throw AlignmentException();
+        }
+        alignedToT[t].insert( s );
+        alignedToS[s].insert( t );
+  }
+  return make_pair(alignedToT,alignedToS);
+}
+
+
+double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, vector<float> &multimodelweights, const vector<FactorType> &input_factors, const vector<FactorType> &output_factors ) const {
+  // lexical translation probability
+
+  double lexScore = 1.0;
+  string null = "NULL";
+
+  // all target words have to be explained
+  for(size_t ti=0; ti<alignment.size(); ti++) {
+    const set< size_t > & srcIndices = alignment[ ti ];
+    Word t_word = phraseT.GetWord(ti);
+    string ti_str = t_word.GetString(output_factors, false);
+    if (srcIndices.empty()) {
+      // explain unaligned word by NULL
+      lexScore *= GetLexicalProbability( null, ti_str, tables, multimodelweights );
+    } else {
+      // go through all the aligned words to compute average
+      double thisWordScore = 0;
+      for (set< size_t >::const_iterator si(srcIndices.begin()); si != srcIndices.end(); ++si) {
+        string s_str = phraseS.GetWord(*si).GetString(input_factors, false);
+        thisWordScore += GetLexicalProbability( s_str, ti_str, tables, multimodelweights );
+      }
+      lexScore *= thisWordScore / srcIndices.size();
+    }
+  }
+  return lexScore;
+}
+
+
+lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, const vector<FactorType> &input_factors, const vector<FactorType> &output_factors ) {
+//do all the necessary lexical table lookups and get counts, but don't apply weights yet
+
+  string null = "NULL";
+  lexicalCache ret;
+
+  // all target words have to be explained
+  for(size_t ti=0; ti<alignment.size(); ti++) {
+    const set< size_t > & srcIndices = alignment[ ti ];
+    Word t_word = phraseT.GetWord(ti);
+    string ti_str = t_word.GetString(output_factors, false);
+
+    vector<lexicalPair> ti_vector;
+    if (srcIndices.empty()) {
+      // explain unaligned word by NULL
+      vector<float> joint_count (m_numModels);
+      vector<float> marginals (m_numModels);
+
+      FillLexicalCountsJoint(null, ti_str, joint_count, tables);
+      FillLexicalCountsMarginal(null, marginals, tables);
+
+      ti_vector.push_back(make_pair(joint_count, marginals));
+
+    } else {
+      for (set< size_t >::const_iterator si(srcIndices.begin()); si != srcIndices.end(); ++si) {
+        string s_str = phraseS.GetWord(*si).GetString(input_factors, false);
+        vector<float> joint_count (m_numModels);
+        vector<float> marginals (m_numModels);
+
+        FillLexicalCountsJoint(s_str, ti_str, joint_count, tables);
+        FillLexicalCountsMarginal(s_str, marginals, tables);
+
+        ti_vector.push_back(make_pair(joint_count, marginals));
+      }
+    }
+    ret.push_back(ti_vector);
+  }
+  return ret;
+}
+
+
+double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslationFromCache( lexicalCache &cache, vector<float> &weights ) const {
+  // lexical translation probability
+
+  double lexScore = 1.0;
+
+  for (lexicalCache::const_iterator iter = cache.begin();  iter != cache.end(); ++iter) {
+      vector<lexicalPair> t_vector = *iter;
+      double thisWordScore = 0;
+      for ( vector<lexicalPair>::const_iterator iter2 = t_vector.begin();  iter2 != t_vector.end(); ++iter2) {
+          vector<float> joint_count = iter2->first;
+          vector<float> marginal = iter2->second;
+          thisWordScore += m_combineFunction(joint_count, marginal, weights);
+      }
+      lexScore *= thisWordScore / t_vector.size();
+  }
+  return lexScore;
+}
+
+// get lexical probability for single word alignment pair
+double PhraseDictionaryMultiModelCounts::GetLexicalProbability( string &wordS, string &wordT, const vector<lexicalTable*> &tables, vector<float> &multimodelweights ) const {
+    vector<float> joint_count (m_numModels);
+    vector<float> marginals (m_numModels);
+
+    FillLexicalCountsJoint(wordS, wordT, joint_count, tables);
+    FillLexicalCountsMarginal(wordS, marginals, tables);
+
+    double lexProb = m_combineFunction(joint_count, marginals, multimodelweights);
+
+  return lexProb;
+}
+
+
+void PhraseDictionaryMultiModelCounts::FillLexicalCountsJoint(string &wordS, string &wordT, vector<float> &count, const vector<lexicalTable*> &tables) const {
+    for (size_t i=0;i < m_numModels;i++) {
+        lexicalMapJoint::iterator joint_s = tables[i]->joint.find( wordS );
+        if (joint_s == tables[i]->joint.end()) count[i] = 0.0;
+        else {
+            lexicalMap::iterator joint_t = joint_s->second.find( wordT );
+            if (joint_t == joint_s->second.end()) count[i] = 0.0;
+            else count[i] = joint_t->second;
+        }
+    }
+}
+
+void PhraseDictionaryMultiModelCounts::FillLexicalCountsMarginal(string &wordS, vector<float> &count, const vector<lexicalTable*> &tables) const {
+    for (size_t i=0;i < m_numModels;i++) {
+        lexicalMap::iterator marginal_s = tables[i]->marginal.find( wordS );
+        if (marginal_s == tables[i]->marginal.end()) count[i] = 0.0;
+        else count[i] = marginal_s->second;
+    }
+}
+
+
+void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexicalTable* ltable) {
+
+  cerr << "Loading lexical translation table from " << fileName;
+  ifstream inFile;
+  inFile.open(fileName.c_str());
+  if (inFile.fail()) {
+    cerr << " - ERROR: could not open file\n";
+    exit(1);
+  }
+  istream *inFileP = &inFile;
+
+  char line[LINE_MAX_LENGTH];
+
+  int i=0;
+  while(true) {
+    i++;
+    if (i%100000 == 0) cerr << "." << flush;
+    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
+    if (inFileP->eof()) break;
+
+    vector<string> token = tokenize( line );
+    if (token.size() != 4) {
+      cerr << "line " << i << " in " << fileName
+           << " has wrong number of tokens, skipping:\n"
+           << token.size() << " " << token[0] << " " << line << endl;
+      continue;
+    }
+
+    double joint = atof( token[2].c_str() );
+    double marginal = atof( token[3].c_str() );
+    string wordT = token[0];
+    string wordS = token[1];
+    ltable->joint[ wordS ][ wordT ] = joint;
+    ltable->marginal[ wordS ] = marginal;
+  }
+  cerr << endl;
+
+}
+
+
+void  PhraseDictionaryMultiModelCounts::CleanUpComponentModels(const InputType &source)  {
+  for(size_t i = 0; i < m_numModels; ++i){
+    m_pd[i]->CleanUp(source);
+    m_inverse_pd[i]->CleanUp(source);
+  }
+}
+
+
+#ifdef WITH_DLIB
+vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector) {
+
+    const StaticData &staticData = StaticData::Instance();
+    const string& factorDelimiter = staticData.GetFactorDelimiter();
+
+    map<pair<string, string>, size_t> phrase_pair_map;
+
+    for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
+        phrase_pair_map[*iter] += 1;
+    }
+
+    vector<multiModelCountsStatisticsOptimization*> optimizerStats;
+
+    for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
+
+        pair<string, string> phrase_pair = iter->first;
+        string source_string = phrase_pair.first;
+        string target_string = phrase_pair.second;
+
+        vector<float> fs(m_numModels);
+        map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
+
+        Phrase sourcePhrase(0);
+        sourcePhrase.CreateFromString(m_input, source_string, factorDelimiter);
+
+        CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
+
+        //phrase pair not found; leave cache empty
+        if (allStats->find(target_string) == allStats->end()) {
+            RemoveAllInMap(*allStats);
+            delete allStats;
+            continue;
+        }
+
+        multiModelCountsStatisticsOptimization * targetStatistics = new multiModelCountsStatisticsOptimization();
+        targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
+        targetStatistics->fs = fs;
+        targetStatistics->fst = (*allStats)[target_string]->fst;
+        targetStatistics->ft = (*allStats)[target_string]->ft;
+        targetStatistics->f = iter->second;
+
+        try {
+            pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), targetStatistics->targetPhrase->GetAlignTerm());
+            targetStatistics->lexCachee2f = CacheLexicalStatistics(static_cast<const Phrase&>(*targetStatistics->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, m_output, m_input );
+            targetStatistics->lexCachef2e = CacheLexicalStatistics(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), alignment.first, m_lexTable_f2e, m_input, m_output );
+
+            optimizerStats.push_back(targetStatistics);
+        }
+        catch (AlignmentException& e) {}
+
+        RemoveAllInMap(*allStats);
+        delete allStats;
+    }
+
+    Sentence sentence;
+    CleanUp(sentence); // free memory used by compact phrase tables
+
+    vector<float> ret (m_numModels*4);
+    for (size_t iFeature=0; iFeature < 4; iFeature++) {
+
+        CrossEntropyCounts * ObjectiveFunction = new CrossEntropyCounts(optimizerStats, this, iFeature);
+
+        vector<float> weight_vector = Optimize(ObjectiveFunction, m_numModels);
+
+        if (m_mode == "interpolate") {
+            weight_vector = normalizeWeights(weight_vector);
+        }
+        else if (m_mode == "instance_weighting") {
+            float first_value = weight_vector[0];
+            for (size_t i=0; i < m_numModels; i++) {
+                weight_vector[i] = weight_vector[i]/first_value;
+            }
+        }
+        cerr << "Weight vector for feature " << iFeature << ": ";
+        for (size_t i=0; i < m_numModels; i++) {
+            ret[(iFeature*m_numModels)+i] = weight_vector[i];
+            cerr << weight_vector[i] << " ";
+        }
+        cerr << endl;
+        delete ObjectiveFunction;
+    }
+
+    RemoveAllInColl(optimizerStats);
+    return ret;
+
+}
+
+double CrossEntropyCounts::operator() ( const dlib::matrix<double,0,1>& arg) const
+{
+    double total = 0.0;
+    double n = 0.0;
+    std::vector<float> weight_vector (m_model->m_numModels);
+
+    for (int i=0; i < arg.nr(); i++) {
+        weight_vector[i] = arg(i);
+    }
+    if (m_model->m_mode == "interpolate") {
+        weight_vector = m_model->normalizeWeights(weight_vector);
+    }
+
+    for ( std::vector<multiModelCountsStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
+        multiModelCountsStatisticsOptimization* statistics = *iter;
+        size_t f = statistics->f;
+
+        double score;
+        if (m_iFeature == 0) {
+            score = m_model->m_combineFunction(statistics->fst, statistics->ft, weight_vector);
+        }
+        else if (m_iFeature == 1) {
+            score = m_model->ComputeWeightedLexicalTranslationFromCache(statistics->lexCachee2f, weight_vector);
+        }
+        else if (m_iFeature == 2) {
+            score = m_model->m_combineFunction(statistics->fst, statistics->fs, weight_vector);
+        }
+        else if (m_iFeature == 3) {
+            score = m_model->ComputeWeightedLexicalTranslationFromCache(statistics->lexCachef2e, weight_vector);
+        }
+        else {
+            score = 0;
+            UserMessage::Add("Trying to optimize feature that I don't know. Aborting");
+            CHECK(false);
+        }
+        total -= (FloorScore(TransformScore(score))/TransformScore(2))*f;
+        n += f;
+    }
+    return total/n;
+}
+
+#endif
+
+// calculate weighted probability based on instance weighting of joint counts and marginal counts
+double InstanceWeighting(vector<float> &joint_counts, vector<float> &marginals, vector<float> &multimodelweights) {
+
+    double joint_counts_weighted =  inner_product(joint_counts.begin(), joint_counts.end(), multimodelweights.begin(), 0.0);
+    double marginals_weighted = inner_product(marginals.begin(), marginals.end(), multimodelweights.begin(), 0.0);
+
+    if (marginals_weighted == 0) {
+        return 0;
+    }
+    else {
+        return joint_counts_weighted/marginals_weighted;
+    }
+}
+
+
+// calculate linear interpolation of relative frequency estimates based on joint count and marginal counts
+//unused for now; enable in config?
+double LinearInterpolationFromCounts(vector<float> &joint_counts, vector<float> &marginals, vector<float> &multimodelweights) {
+
+    vector<float> p(marginals.size());
+
+    for (size_t i=0;i < marginals.size();i++) {
+        if (marginals[i] != 0) {
+            p[i] = joint_counts[i]/marginals[i];
+        }
+    }
+
+    double p_weighted = inner_product(p.begin(), p.end(), multimodelweights.begin(), 0.0);
+
+    return p_weighted;
+}
+
+
+} //namespace
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h
@ -0,0 +1,149 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_PhraseDictionaryMultiModelCounts_h
+#define moses_PhraseDictionaryMultiModelCounts_h
+
+#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
+#include "moses/TranslationModel/PhraseDictionaryMemory.h"
+#ifndef WIN32
+#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
+#endif
+
+
+#include <boost/unordered_map.hpp>
+#include "moses/StaticData.h"
+#include "moses/TargetPhrase.h"
+#include "moses/Util.h"
+#include "moses/UserMessage.h"
+#include <exception>
+
+extern std::vector<std::string> tokenize( const char*);
+
+namespace Moses
+{
+
+  typedef boost::unordered_map<std::string, double > lexicalMap;
+  typedef boost::unordered_map<std::string, lexicalMap > lexicalMapJoint;
+  typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair;
+  typedef std::vector<std::vector<lexicalPair> > lexicalCache;
+
+  struct multiModelCountsStatistics : multiModelStatistics {
+    std::vector<float> fst, ft;
+  };
+
+  struct multiModelCountsStatisticsOptimization: multiModelCountsStatistics {
+    std::vector<float> fs;
+    lexicalCache lexCachee2f, lexCachef2e;
+    size_t f;
+  };
+
+  struct lexicalTable {
+    lexicalMapJoint joint;
+    lexicalMap marginal;
+  };
+
+  double InstanceWeighting(std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights);
+  double LinearInterpolationFromCounts(std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights);
+
+
+//thrown if alignment information does not match phrase pair (out-of-bound alignment points)
+class AlignmentException : public std::runtime_error {
+public:
+  AlignmentException() : std::runtime_error("AlignmentException") { }
+};
+
+
+/** Implementation of a phrase table with raw counts.
+ */
+class PhraseDictionaryMultiModelCounts: public PhraseDictionaryMultiModel
+{
+
+#ifdef WITH_DLIB
+friend class CrossEntropyCounts;
+#endif
+
+typedef std::vector< std::set<size_t> > AlignVector;
+
+
+public:
+  PhraseDictionaryMultiModelCounts(size_t m_numScoreComponent, PhraseDictionaryFeature* feature);
+  ~PhraseDictionaryMultiModelCounts();
+  bool Load(const std::vector<FactorType> &input
+            , const std::vector<FactorType> &output
+            , const std::vector<std::string> &files
+            , const std::vector<float> &weight
+            , size_t tableLimit
+            , const LMList &languageModels
+            , float weightWP);
+  TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
+  void CollectSufficientStatistics(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats) const;
+  float GetTargetCount(const Phrase& target, size_t modelIndex) const;
+  double GetLexicalProbability( std::string &inner, std::string &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const;
+  double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, const std::vector<FactorType> &input_factors, const std::vector<FactorType> &output_factors ) const;
+  double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const;
+  std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const;
+  std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, const std::vector<FactorType> &input_factors, const std::vector<FactorType> &output_factors );
+  void FillLexicalCountsJoint(std::string &wordS, std::string &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
+  void FillLexicalCountsMarginal(std::string &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
+  void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
+  const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
+  void CleanUpComponentModels(const InputType &source);
+#ifdef WITH_DLIB
+  std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
+#endif
+  // functions below required by base class
+  virtual void InitializeForInput(InputType const&) {
+    /* Don't do anything source specific here as this object is shared between threads.*/
+  }
+
+private:
+  std::vector<PhraseDictionary*> m_inverse_pd;
+  std::vector<lexicalTable*> m_lexTable_e2f, m_lexTable_f2e;
+  double (*m_combineFunction) (std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights);
+};
+
+#ifdef WITH_DLIB
+class CrossEntropyCounts: public OptimizationObjective
+{
+public:
+
+    CrossEntropyCounts (
+        std::vector<multiModelCountsStatisticsOptimization*> &optimizerStats,
+        PhraseDictionaryMultiModelCounts * model,
+        size_t iFeature
+    )
+    {
+        m_optimizerStats = optimizerStats;
+        m_model = model;
+        m_iFeature = iFeature;
+    }
+
+    double operator() ( const dlib::matrix<double,0,1>& arg) const;
+
+private:
+    std::vector<multiModelCountsStatisticsOptimization*> m_optimizerStats;
+    PhraseDictionaryMultiModelCounts * m_model;
+    size_t m_iFeature;
+};
+#endif
+
+} // end namespace
+
+#endif
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@ -140,6 +140,8 @@ enum PhraseTableImplementation {
  ,FuzzyMatch    = 11
  ,Compact      = 12
  ,Interpolated = 13
+  ,MultiModelCounts = 98
+  ,MultiModel   = 99
 };

 enum InputTypeEnum {
--- a/moses/Util.h
+++ b/moses/Util.h
@ -310,7 +310,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
 		return out.str();						\
 	}															\
 
-//! delete and remove every element of a collection object such as map, set, list etc
+//! delete and remove every element of a collection object such as set, list etc
 template<class COLL>
 void RemoveAllInColl(COLL &coll)
 {
@ -320,6 +320,17 @@ void RemoveAllInColl(COLL &coll)
  coll.clear();
 }

+//! delete and remove every element of map
+template<class COLL>
+void RemoveAllInMap(COLL &coll)
+{
+  for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter) {
+    delete (iter->second);
+  }
+  coll.clear();
+}
+
+
 //! x-platform reference to temp folder
 std::string GetTempFolder();
 //! MD5 hash of a file
--- a/scripts/training/create_count_tables.py
+++ b/scripts/training/create_count_tables.py
@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Author: Rico Sennrich <sennrich [AT] cl.uzh.ch>
+
+# This script creates tables that store phrase pair frequencies rather than probabilities.
+# These count tables can be used for a delayed, online computation of the original phrase translation features
+# The benefit is that models can be combined quickly, with the same results as if we trained a model on the concatenation of all data (excepting differences in word alignment).
+# Also, each model can be given a weight, which is applied to all frequencies of the model for the combination.
+
+# Note: the input phrase table must have alignment information (--phrase-word-alignment in train-model.perl);
+#       it must be unsmoothed;
+#       additionally, the online model type requires the lexical counts files lex.counts.e2f and lex.counts.f2e to be in the same folder (--write-lexical-counts in train-model.perl)
+# The results may differ from training on the concatenation of all data due to differences in word alignment, and rounding errors.
+
+
+from __future__ import unicode_literals
+import sys
+import os
+import gzip
+from tempfile import NamedTemporaryFile
+from subprocess import Popen, PIPE
+
+if len(sys.argv) < 3 or len(sys.argv) > 4:
+    sys.stderr.write('Usage: ' + sys.argv[0] + ' in_file out_path [prune_count]\nThis script will create the files out_path/count-table.gz and out_path/count-table-target.gz\n')
+    exit()
+
+
+def handle_file(filename,action,fileobj=None,mode='r'):
+    """support reading either from stdin, plain file or gzipped file"""
+
+    if action == 'open':
+
+        if mode == 'r':
+            mode = 'rb'
+
+        if mode == 'rb' and not filename == '-' and not os.path.exists(filename):
+            if os.path.exists(filename+'.gz'):
+                filename = filename+'.gz'
+            else:
+                sys.stderr.write('Error: unable to open file. ' + filename + ' - aborting.\n')
+                exit()
+
+        if filename.endswith('.gz'):
+            fileobj = gzip.open(filename,mode)
+
+        elif filename == '-':
+            fileobj = sys.stdin
+
+        else:
+            fileobj = open(filename,mode)
+
+        return fileobj
+
+    elif action == 'close' and filename != '-':
+        fileobj.close()
+
+
+def sort_and_uniq(infile, outfile):
+
+    cmd = ['sort', infile]
+    fobj = handle_file(outfile, 'open', mode='w')
+    sys.stderr.write('Executing: LC_ALL=C ' + ' '.join(cmd) + ' | uniq | gzip -c > ' + outfile + '\n')
+    p_sort = Popen(cmd, env={'LC_ALL':'C'}, stdout=PIPE)
+    p_uniq = Popen(['uniq'], stdin = p_sort.stdout, stdout=PIPE)
+    p_compress = Popen(['gzip', '-c'], stdin = p_uniq.stdout, stdout=fobj)
+    p_compress.wait()
+    fobj.close()
+
+
+def create_count_lines(fobj, countobj, countobj_target, prune=0):
+
+    i = 0
+    original_pos = 0
+    source = ""
+    store_lines = set()
+    for line in fobj:
+
+        if not i % 100000:
+            sys.stderr.write('.')
+        i += 1
+
+        line = line.split(b' ||| ')
+        current_source = line[0]
+        scores = line[2].split()
+        comments = line[4].split()
+
+        fs = comments[1]
+        ft = comments[0]
+        try:
+            fst = comments[2]
+        except IndexError:
+            fst = str(int(round(float(scores[0])*float(ft)))).encode()
+
+        line[2] = b' '.join([fst,ft,fs])
+
+        if prune:
+            if current_source != source:
+                write_batch(store_lines, countobj, prune)
+                source = current_source
+                store_lines = set()
+                original_pos = 0
+
+            store_lines.add((float(fst), original_pos, b' ||| '.join(line)))
+            original_pos += 1
+
+        else:
+            countobj.write(b' ||| '.join(line))
+
+        # target count file
+        tline = b' ||| '.join([line[1], b'X', ft]) + b' ||| |||\n' # if you use string formatting to make this look nicer, you may break Python 3 compatibility.
+        countobj_target.write(tline)
+
+    if prune:
+        write_batch(store_lines, countobj, prune)
+
+    countobj.close()
+    countobj_target.close()
+
+
+def write_batch(store_lines, outfile, prune):
+    top20 = sorted(store_lines, reverse=True)[:prune]
+    for score, original_pos, store_line in sorted(top20, key = lambda x: x[1]): #write in original_order
+        outfile.write(store_line)
+
+
+if __name__ == '__main__':
+
+    if len(sys.argv) == 4:
+        prune = int(sys.argv[3])
+    else:
+        prune = 0
+
+    fileobj = handle_file(sys.argv[1],'open')
+    out_path = sys.argv[2]
+
+    count_table_file = gzip.open(os.path.join(out_path,'count-table.gz'), 'w')
+    count_table_target_file = os.path.join(out_path,'count-table-target.gz')
+
+    count_table_target_file_temp = NamedTemporaryFile(delete=False)
+    try:
+        sys.stderr.write('Creating temporary file for unsorted target counts file: ' + count_table_target_file_temp.name + '\n')
+
+        create_count_lines(fileobj, count_table_file, count_table_target_file_temp, prune)
+        count_table_target_file_temp.close()
+        sys.stderr.write('Finished writing, now re-sorting and compressing target count file\n')
+
+        sort_and_uniq(count_table_target_file_temp.name, count_table_target_file)
+        os.remove(count_table_target_file_temp.name)
+        sys.stderr.write('Done\n')
+
+    except BaseException:
+        os.remove(count_table_target_file_temp.name)
+        raise