Reorganization of phrase scorers in Mmsapt.

2024-09-17 14:17:13 +03:00 · 2014-06-14 13:03:31 +01:00 · 2014-06-14 13:03:31 +01:00 · 1a3d7d3266
commit 1a3d7d3266
parent c3db1a3a67
4 changed files with 225 additions and 210 deletions
--- a/moses/TranslationModel/UG/mm/custom-pt.cc
+++ b/moses/TranslationModel/UG/mm/custom-pt.cc
@ -23,6 +23,7 @@
 #include "ug_typedefs.h"
 #include "tpt_pickler.h"
 #include "ug_bitext.h"
+#include "../mmsapt_phrase_scorers.h"
 #include "ug_lexical_phrase_scorer2.h"

 using namespace std;
@ -44,7 +45,7 @@ float lbsmooth = .005;

 PScorePfwd<Token> calc_pfwd;
 PScorePbwd<Token> calc_pbwd;
-PScoreLex<Token>  calc_lex;
+PScoreLex<Token>  calc_lex(1.0);
 PScoreWP<Token>   apply_wp;
 vector<float> fweights;

@ -129,7 +130,7 @@ int main(int argc, char* argv[])
  bt.setDefaultSampleSize(max_samples);

  size_t i;
-  i = calc_pfwd.init(0,.05);
+  i = calc_pfwd.init(0,.05,'g');
  i = calc_pbwd.init(i,.05);
  i = calc_lex.init(i,base+L1+"-"+L2+".lex");
  i = apply_wp.init(i);
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@ -47,15 +47,22 @@ namespace Moses
  }
 #endif

+  vector<string> const&
+  Mmsapt::
+  GetFeatureNames() const
+  {
+    return m_feature_names;
+  }
+
  Mmsapt::
  Mmsapt(string const& line)
-    // : PhraseDictionary("Mmsapt",line), ofactor(1,0)
    : PhraseDictionary(line)
+    , m_lex_alpha(1.0)
    , withLogCountFeatures(false)
-    , withPfwd(true), withPbwd(true) 
+    , withCoherence(true)
+    , m_pfwd_features("g"), withPbwd(true), poolCounts(true)
    , ofactor(1,0)
    , m_tpc_ctr(0)
-      // default values chosen for bwd probability
  {
    this->init(line);
  }
@ -101,52 +108,56 @@ namespace Moses
    assert(L1.size());
    assert(L2.size());

-    m = param.find("pfwd_denom");
+    m = param.find("pfwd-denom");
    m_pfwd_denom = m != param.end() ? m->second[0] : 's';
-
+    
    m = param.find("smooth");
    m_lbop_parameter = m != param.end() ? atof(m->second.c_str()) : .05;

    m = param.find("max-samples");
    m_default_sample_size = m != param.end() ? atoi(m->second.c_str()) : 1000;

-    m = param.find("logcnt-features");
-    if (m != param.end())
+    if ((m = param.find("logcnt-features")) != param.end())
      withLogCountFeatures = m->second != "0";

-    m = param.find("pfwd");
-    if (m != param.end())
-      withPfwd = m->second != "0";
-
-    m = param.find("pbwd");
-    if (m != param.end())
+    if ((m = param.find("coh")) != param.end())
+      withCoherence = m->second != "0";
+    
+    if ((m = param.find("pfwd")) != param.end())
+      m_pfwd_features = (m->second == "0" ? "" : m->second);
+    
+    if (m_pfwd_features == "1") 
+      m_pfwd_features[0] = m_pfwd_denom;
+    
+    if ((m = param.find("pbwd")) != param.end())
      withPbwd = m->second != "0";
      
+    if ((m = param.find("lexalpha")) != param.end())
+      m_lex_alpha = atof(m->second.c_str());
+
    m = param.find("workers");
    m_workers = m != param.end() ? atoi(m->second.c_str()) : 8;
    m_workers = min(m_workers,24UL);

-    m = param.find("limit");
-    if (m != param.end()) m_tableLimit = atoi(m->second.c_str());
+    if ((m = param.find("limit")) != param.end()) 
+      m_tableLimit = atoi(m->second.c_str());

    m = param.find("cache-size");
    m_history.reserve(m != param.end()?max(1000,atoi(m->second.c_str())):10000);
    // in plain language: cache size is at least 1000, and 10,000 by default
+    // this cache keeps track of the most frequently used target phrase collections
+    // even when not actively in use
    
    this->m_numScoreComponents = atoi(param["num-features"].c_str());

-    // num_features = 0;
    m = param.find("ifactor");
    input_factor = m != param.end() ? atoi(m->second.c_str()) : 0;
+    
    poolCounts = true;
-    m = param.find("extra");
-    if (m != param.end()) 
-      {
-	extra_data = m->second;
-	// cerr << "have extra data" << endl;
-      }
-    // keeps track of the most frequently used target phrase collections
-    // (to keep them cached even when not actively in use)
+    
+    if ((m = param.find("extra")) != param.end()) 
+      extra_data = m->second;
+
  }

  void
@ -175,6 +186,49 @@ namespace Moses
    // cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
  }

+  size_t
+  Mmsapt::
+  add_corpus_specific_features
+  (vector<sptr<pscorer > >& ffvec, size_t num_feats)
+  {
+    float const lbop = m_lbop_parameter; // just for code readability below
+    // for the time being, we assume that all phrase probability features 
+    // use the same confidence parameter for lower-bound-estimation
+    for (size_t i = 0; i < m_pfwd_features.size(); ++i) 
+      {	
+	UTIL_THROW_IF2(m_pfwd_features[i] != 'g' &&
+		       m_pfwd_features[i] != 'r' &&
+		       m_pfwd_features[i] != 's',
+		       "Can't handle pfwd feature type '" 
+		       << m_pfwd_features[i] << "'.");
+	sptr<PScorePfwd<Token> > ff(new PScorePfwd<Token>());
+	size_t k = num_feats;
+	num_feats = ff->init(num_feats,lbop,m_pfwd_features[i]);
+	for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
+	ffvec.push_back(ff);
+      }
+    
+    if (withPbwd) 
+      {
+	sptr<PScorePbwd<Token> > ff(new PScorePbwd<Token>());
+	size_t k = num_feats;
+	num_feats = ff->init(num_feats,lbop);
+	for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
+	ffvec.push_back(ff);
+      }
+    
+    if (withLogCountFeatures) 
+      {
+	sptr<PScoreLogCounts<Token> > ff(new PScoreLogCounts<Token>());
+	size_t k = num_feats;
+	num_feats = ff->init(num_feats);
+	for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
+	ffvec.push_back(ff);
+      }
+
+    return num_feats;
+  }
+
  void
  Mmsapt::
  Load()
@ -184,44 +238,52 @@ namespace Moses
    btfix.setDefaultSampleSize(m_default_sample_size);
    
    size_t num_feats = 0;
-    // TO DO: should we use different lbop parameters 
-    //        for the relative-frequency based features?
    
-    if (withLogCountFeatures) num_feats = add_logcounts_fix.init(num_feats);
-
-    float const lbop = m_lbop_parameter; // just for code readability below
-    if (withPfwd) num_feats = calc_pfwd_fix.init(num_feats,lbop,m_pfwd_denom);
-    if (withPbwd) num_feats = calc_pbwd_fix.init(num_feats,lbop);
+    // lexical scores are currently always active 
+    sptr<PScoreLex<Token> > ff(new PScoreLex<Token>(m_lex_alpha));
+    size_t k = num_feats;
+    num_feats = ff->init(num_feats, bname + L1 + "-" + L2 + ".lex");
+    for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
+    m_active_ff_common.push_back(ff);
    
-    // currently always active by default; may (should) change later
-    num_feats  = calc_lex.init(num_feats, bname + L1 + "-" + L2 + ".lex");
-
-    // if (this->m_numScoreComponents%2) // a bit of a hack, for backwards compatibility
-    // num_feats  = apply_pp.init(num_feats);
-
-    if (num_feats < this->m_numScoreComponents)
+    if (withCoherence)
      {
-	poolCounts = false;
-	if (withLogCountFeatures) num_feats = add_logcounts_dyn.init(num_feats);
-	if (withPfwd) num_feats = calc_pfwd_dyn.init(num_feats,lbop,m_pfwd_denom);
-	if (withPbwd) num_feats = calc_pbwd_dyn.init(num_feats,lbop);
+	sptr<PScoreCoherence<Token> > ff(new PScoreCoherence<Token>());
+	size_t k = num_feats;
+	num_feats = ff->init(num_feats);
+	for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
+	m_active_ff_common.push_back(ff);
      }
-    
-    if (num_feats != this->m_numScoreComponents)
-      {
-	ostringstream buf;
-	buf << "At " << __FILE__ << ":" << __LINE__
-	    << ": number of feature values provided by Phrase table"
-	    << " does not match number specified in Moses config file!";
-	throw buf.str().c_str();
-      }
-    // cerr << "MMSAPT provides " << num_feats << " features at " 
-    // << __FILE__ << ":" << __LINE__ << endl;

+    num_feats = add_corpus_specific_features(m_active_ff_fix,num_feats);
+    // cerr << num_feats << "/" << this->m_numScoreComponents 
+    // << " at " << __FILE__ << ":" << __LINE__ << endl;
+    poolCounts = poolCounts && num_feats == this->m_numScoreComponents;
+    if (!poolCounts)
+      num_feats = add_corpus_specific_features(m_active_ff_dyn, num_feats);
+    
+#if 0
+    cerr << "MMSAPT provides " << num_feats << " features at " 
+	 << __FILE__ << ":" << __LINE__ << endl;
+    BOOST_FOREACH(string const& fname, m_feature_names)
+      cerr << fname << endl;
+#endif
+    UTIL_THROW_IF2(num_feats != this->m_numScoreComponents,
+		   "At " << __FILE__ << ":" << __LINE__
+		   << ": number of feature values provided by Phrase table (" 
+		   << num_feats << ") does not match number specified in "
+		   << "Moses config file (" << this->m_numScoreComponents 
+		   << ")!\n";);
+    
+    
    btdyn.reset(new imBitext<Token>(btfix.V1, btfix.V2,m_default_sample_size));
    btdyn->num_workers = this->m_workers;
-    if (extra_data.size()) load_extra_data(extra_data);
-
+    if (extra_data.size()) 
+      {
+	load_extra_data(extra_data);
+      }
+    
+#if 0
    // currently not used
    LexicalPhraseScorer2<Token>::table_t & COOC = calc_lex.scorer.COOC;
    typedef LexicalPhraseScorer2<Token>::table_t::Cell cell_t;
@ -230,7 +292,8 @@ namespace Moses
      for (cell_t const* c = COOC[r].start; c < COOC[r].stop; ++c)
 	wlex21[c->id].push_back(r);
    COOCraw.open(bname + L1 + "-" + L2 + ".coc");
-
+#endif
+    
  }

  void
@ -283,20 +346,28 @@ namespace Moses
  {
    PhrasePair pp;   
    pp.init(pid1, stats, this->m_numScoreComponents);
-    // if (this->m_numScoreComponents%2)
-    // apply_pp(bt,pp);
    pstats::trg_map_t::const_iterator t;
    for (t = stats.trg.begin(); t != stats.trg.end(); ++t)
      {
   	pp.update(t->first,t->second);
-	calc_lex(bt,pp);
-	if (withPfwd) calc_pfwd_fix(bt,pp);
-	if (withPbwd) calc_pbwd_fix(bt,pp);
-	if (withLogCountFeatures) add_logcounts_fix(bt,pp);
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+	  (*ff)(bt,pp);
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+	  (*ff)(bt,pp);
 	tpcoll->Add(createTargetPhrase(src,bt,pp));
      }
  }

+  void
+  Mmsapt::
+  ScorePPfix(bitext::PhrasePair& pp) const
+  {
+    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+      (*ff)(btfix,pp);
+    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+      (*ff)(btfix,pp);
+  }
+
  // process phrase stats from a single parallel corpus
  bool
  Mmsapt::
@ -318,8 +389,6 @@ namespace Moses
      pp.init(pid1b, *statsb, this->m_numScoreComponents);
    else return false; // throw "no stats for pooling available!";

-    // if (this->m_numScoreComponents%2)
-    // apply_pp(bta,pp);
    pstats::trg_map_t::const_iterator b;
    pstats::trg_map_t::iterator a;
    if (statsb)
@ -344,10 +413,10 @@ namespace Moses
 			    b->second);
 	      }
 	    else pp.update(b->first,b->second);
-	    calc_lex(btb,pp);
-	    if (withPfwd) calc_pfwd_fix(btb,pp);
-	    if (withPbwd) calc_pbwd_fix(btb,pp);
-	    if (withLogCountFeatures) add_logcounts_fix(btb,pp);
+	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+	      (*ff)(btb,pp);
+	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+	      (*ff)(btb,pp);
 	    tpcoll->Add(createTargetPhrase(src,btb,pp));
 	  }
      }
@ -377,20 +446,19 @@ namespace Moses
 #endif

 	UTIL_THROW_IF2(pp.raw2 == 0, 
-		       "OOPS" 
-		       << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " 
+		       "OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " 
 		       << bta.T2->pid2str(bta.V2.get(),pp.p2) << ": "
 		       << pp.raw1 << " " << pp.sample1 << " " 
 		       << pp.good1 << " " << pp.joint << " " 
 		       << pp.raw2);
-	calc_lex(bta,pp);
-	if (withPfwd) calc_pfwd_fix(bta,pp);
-	if (withPbwd) calc_pbwd_fix(bta,pp);
-	if (withLogCountFeatures) add_logcounts_fix(bta,pp);
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+	  (*ff)(bta,pp);
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+	  (*ff)(bta,pp);
 	tpcoll->Add(createTargetPhrase(src,bta,pp));
      }
    return true;
-}
+  }
  
  
  // process phrase stats from a single parallel corpus
@ -398,75 +466,81 @@ namespace Moses
  Mmsapt::
  combine_pstats
  (Phrase   const& src,
-   uint64_t const  pid1a, 
-   pstats   * statsa, 
-   Bitext<Token> const & bta,
-   uint64_t const  pid1b, 
-   pstats   const* statsb, 
-   Bitext<Token> const & btb,
-   TargetPhraseCollection* tpcoll
-   ) const
+   uint64_t const  pid1a, pstats      * statsa, Bitext<Token> const & bta,
+   uint64_t const  pid1b, pstats const* statsb, Bitext<Token> const & btb,
+   TargetPhraseCollection* tpcoll) const
  {
    PhrasePair ppfix,ppdyn,pool; 
+    // ppfix: counts from btfix
+    // ppdyn: counts from btdyn
+    // pool: pooled counts from both
    Word w;
    if (statsa) ppfix.init(pid1a,*statsa,this->m_numScoreComponents);
    if (statsb) ppdyn.init(pid1b,*statsb,this->m_numScoreComponents);
    pstats::trg_map_t::const_iterator b;
    pstats::trg_map_t::iterator a;
+
    if (statsb)
      {
 	pool.init(pid1b,*statsb,0);
-	// if (this->m_numScoreComponents%2)
-	// apply_pp(btb,ppdyn);
 	for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
 	  {
 	    ppdyn.update(b->first,b->second);
-	    if (withPfwd) calc_pfwd_dyn(btb,ppdyn);
-	    if (withPbwd) calc_pbwd_dyn(btb,ppdyn);
-	    if (withLogCountFeatures) add_logcounts_dyn(btb,ppdyn);
-	    calc_lex(btb,ppdyn);
+	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+	      (*ff)(btb,ppdyn);
 	    
 	    uint32_t sid,off,len;    
 	    parse_pid(b->first, sid, off, len);
 	    Token const* x = bta.T2->sntStart(sid) + off;
 	    TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
+	    
 	    if (m.size() && statsa && 
-		((a = statsa->trg.find(m.getPid())) 
-		 != statsa->trg.end()))
+		((a = statsa->trg.find(m.getPid())) != statsa->trg.end()))
 	      {
+		// phrase pair found also in btfix
 		ppfix.update(a->first,a->second);
-		if (withPfwd) calc_pfwd_fix(bta,ppfix,&ppdyn.fvals);
-		if (withPbwd) calc_pbwd_fix(bta,ppfix,&ppdyn.fvals);
-		if (withLogCountFeatures) add_logcounts_fix(bta,ppfix,&ppdyn.fvals);
+		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+		  (*ff)(bta,ppfix,&ppdyn.fvals);
+		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+		  (*ff)(bta,ppfix,&ppdyn.fvals);
 		a->second.invalidate();
 	      }
 	    else 
 	      {
-		if (m.size())
-		  pool.update(b->first,m.approxOccurrenceCount(),
-			      b->second);
-		else
+		// phrase pair was not found in btfix
+
+		// ... but the source phrase was  
+		if (m.size()) 
+		  pool.update(b->first,m.approxOccurrenceCount(), b->second);
+
+		// ... and not even the source phrase 
+		else 
 		  pool.update(b->first,b->second);
-		if (withPfwd) calc_pfwd_fix(btb,pool,&ppdyn.fvals);
-		if (withPbwd) calc_pbwd_fix(btb,pool,&ppdyn.fvals);
-		if (withLogCountFeatures) add_logcounts_fix(btb,pool,&ppdyn.fvals);
+		
+		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+		  (*ff)(btb,pool,&ppdyn.fvals);
+		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+		  (*ff)(btb,pool,&ppdyn.fvals);
+		
 	      }
+
 	    tpcoll->Add(createTargetPhrase(src,btb,ppdyn));
 	  }
      }
+
+    // now deal with all phraise pairs that are ONLY in btfix
+    // (the ones that are in both were dealt with above)
    if (statsa)
      {
 	pool.init(pid1a,*statsa,0);
-	// if (this->m_numScoreComponents%2)
-	// apply_pp(bta,ppfix);
 	for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
 	  {
 	    if (!a->second.valid()) continue; // done above
 	    ppfix.update(a->first,a->second);
-	    if (withPfwd) calc_pfwd_fix(bta,ppfix);
-	    if (withPbwd) calc_pbwd_fix(bta,ppfix);
-	    if (withLogCountFeatures) add_logcounts_fix(bta,ppfix);
-	    calc_lex(bta,ppfix);
+	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+	      (*ff)(bta,ppfix);
+	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+	      (*ff)(bta,ppfix);
 	    
 	    if (btb.I2)
 	      {
@ -480,9 +554,8 @@ namespace Moses
 		  pool.update(a->first,a->second);
 	      }
 	    else pool.update(a->first,a->second);
-	    if (withPfwd) calc_pfwd_dyn(bta,pool,&ppfix.fvals);
-	    if (withPbwd) calc_pbwd_dyn(bta,pool,&ppfix.fvals);
-	    if (withLogCountFeatures) add_logcounts_dyn(bta,pool,&ppfix.fvals);
+	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+	      (*ff)(btb,pool,&ppfix.fvals);
 	  }
 	if (ppfix.p2)
 	  tpcoll->Add(createTargetPhrase(src,bta,ppfix));
@ -490,92 +563,6 @@ namespace Moses
    return (statsa || statsb);
  }
  
-  // // phrase statistics combination treating the two knowledge 
-  // // sources separately with backoff to pooling when only one 
-  // // of the two knowledge sources contains the phrase pair in 
-  // // question
-  // void
-  // Mmsapt::
-  // process_pstats(uint64_t const  mypid1,
-  // 		 uint64_t const  otpid1,
-  // 		 pstats   const& mystats,       // my phrase stats
-  // 		 pstats   const* otstats,       // other phrase stats
-  // 		 Bitext<Token> const & mybt,    // my bitext
-  // 		 Bitext<Token> const * otbt,    // other bitext
-  // 		 PhraseScorer<Token> const& mypfwd, 
-  // 		 PhraseScorer<Token> const& mypbwd, 
-  // 		 PhraseScorer<Token> const* otpfwd, 
-  // 		 PhraseScorer<Token> const* otpbwd, 
-  // 		 TargetPhraseCollection* tpcoll)
-  // {
-  //   boost::unordered_map<uint64_t,jstats>::const_iterator t;
-  //   vector<FactorType> ofact(1,0);
-  //   PhrasePair mypp,otpp,combo; 
-  //   mypp.init(mypid1, mystats, this->m_numScoreComponents);
-  //   if (otstats) 
-  //     {
-  // 	otpp.init(otpid1, *otstats, 0);
-  // 	combo.init(otpid1, mystats, *otstats, 0);
-  //     }
-  //   else combo = mypp;
-    
-  //   for (t = mystats.trg.begin(); t != mystats.trg.end(); ++t)
-  //     {
-  // 	if (!t->second.valid()) continue; 
-  // 	// we dealt with this phrase pair already; 
-  // 	// see j->second.invalidate() below;
-  // 	uint32_t sid,off,len; parse_pid(t->first,sid,off,len);
-   
-  // 	mypp.update(t->first,t->second);
-  // 	apply_pp(mybt,mypp);
-  // 	calc_lex (mybt,mypp);
-  // 	mypfwd(mybt,mypp);
-  // 	mypbwd(mybt,mypp);
-	
-  // 	if (otbt) // it's a dynamic phrase table
-  // 	  {
-  // 	    assert(otpfwd);
-  // 	    assert(otpbwd);
-  // 	    boost::unordered_map<uint64_t,jstats>::iterator j;
-	    
-  // 	    // look up the current target phrase in the other bitext
-  // 	    Token const* x = mybt.T2->sntStart(sid) + off;
-  // 	    TSA<TOKEN>::tree_iterator m(otbt->I2.get(),x,x+len);
-  // 	    if (otstats     // source phrase exists in other bitext
-  // 		&& m.size() // target phrase exists in other bitext
-  // 		&& ((j = otstats->trg.find(m.getPid())) 
-  // 		    != otstats->trg.end())) // phrase pair found in other bitext
-  // 	      {
-  // 		otpp.update(j->first,j->second);
-  // 		j->second.invalidate(); // mark the phrase pair as seen
-  // 		otpfwd(*otbt,otpp,&mypp.fvals);
-  // 		otpbwd(*otbt,otpp,&mypp.fvals);
-  // 	      }
-  // 	    else 
-  // 	      {
-  // 		if (m.size()) // target phrase seen in other bitext, but not the phrase pair
-  // 		  combo.update(t->first,m.approxOccurrenceCount(),t->second);
-  // 		else
-  // 		  combo.update(t->first,t->second);
-  // 		(*otpfwd)(mybt,combo,&mypp.fvals);
-  // 		(*otpbwd)(mybt,combo,&mypp.fvals);
-  // 	      }
-  // 	  }
-	
-  // 	// now add the phrase pair to the TargetPhraseCollection:
-  // 	TargetPhrase* tp = new TargetPhrase();
-  // 	for (size_t k = off; k < stop; ++k)
-  // 	  {
-  // 	    StringPiece wrd = (*mybt.V2)[x[k].id()];
-  // 	    Word w; w.CreateFromString(Output,ofact,wrd,false);
-  // 	    tp->AddWord(w);
-  // 	  }
-  // 	tp->GetScoreBreakdown().Assign(this,mypp.fvals);
-  // 	tp->Evaluate(src);
-  // 	tpcoll->Add(tp);
-  //     }
-  // }
-  
  Mmsapt::
  TargetPhraseCollectionWrapper::
  TargetPhraseCollectionWrapper(size_t r, uint64_t k)
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@ -29,6 +29,7 @@
 #include <map>

 #include "moses/TranslationModel/PhraseDictionary.h"
+#include "mmsapt_phrase_scorers.h"

 // TO DO:
 // - make lexical phrase scorer take addition to the "dynamic overlay" into account
@ -51,6 +52,7 @@ namespace Moses
    typedef mmBitext<Token> mmbitext;
    typedef imBitext<Token> imbitext;
    typedef TSA<Token>           tsa;
+    typedef PhraseScorer<Token> pscorer;
  private:
    mmbitext btfix; 
    sptr<imbitext> btdyn;
@ -58,30 +60,48 @@ namespace Moses
    string L1;
    string L2;
    float  m_lbop_parameter;
+    float  m_lex_alpha; 
+    // alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
+    // must be > 0 if dynamic 
    size_t m_default_sample_size;
    size_t m_workers;  // number of worker threads for sampling the bitexts
+
+    // deprecated!
    char m_pfwd_denom; // denominator for computation of fwd phrase score:
    // 'r' - divide by raw count
    // 's' - divide by sample count
    // 'g' - devide by number of "good" (i.e. coherent) samples 
    // size_t num_features;
+
    size_t input_factor;
    size_t output_factor; // we can actually return entire Tokens!
+
+    bool withLogCountFeatures; // add logs of counts as features?
+    bool withCoherence; 
+    string m_pfwd_features; // which pfwd functions to use
+    vector<string> m_feature_names; // names of features activated
+    vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
+    vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
+    vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)
+
+    size_t
+    add_corpus_specific_features
+    (vector<sptr<pscorer > >& ffvec, size_t num_feats);
+    
    // built-in feature functions
-    PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
-    PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
-    PScoreLex<Token>  calc_lex; // this one I'd like to see as an external ff eventually
+    // PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
+    // PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
+    // PScoreLex<Token>  calc_lex; // this one I'd like to see as an external ff eventually
    // PScorePP<Token>   apply_pp; // apply phrase penalty 
-    PScoreLogCounts<Token>   add_logcounts_fix;
-    PScoreLogCounts<Token>   add_logcounts_dyn;
+    // PScoreLogCounts<Token>   add_logcounts_fix;
+    // PScoreLogCounts<Token>   add_logcounts_dyn;
    void init(string const& line);
    mutable boost::mutex lock;
+    bool withPbwd;
    bool poolCounts;
-    bool withLogCountFeatures; // add logs of counts as features?
-    bool withPfwd,withPbwd;
    vector<FactorType> ofactor;

-    
+
  public:
    // typedef boost::unordered_map<uint64_t, sptr<TargetPhraseCollection> > tpcoll_cache_t;
    class TargetPhraseCollectionWrapper 
@ -207,6 +227,12 @@ namespace Moses
    bool
    PrefixExists(Phrase const& phrase) const;

+    vector<string> const&
+    GetFeatureNames() const;
+    
+    void
+    ScorePPfix(bitext::PhrasePair& pp) const;
+
  private:
  };
 } // end namespace
--- a/moses/TranslationModel/UG/mmsapt_align.cc
+++ b/moses/TranslationModel/UG/mmsapt_align.cc
@ -127,6 +127,7 @@ namespace Moses
  Alignment::
  show(ostream& out, PhraseAlnHyp const& ah)
  {
+#if 0
    LexicalPhraseScorer2<Token>::table_t const& 
      COOCjnt = PT.calc_lex.scorer.COOC;

@ -164,6 +165,7 @@ namespace Moses
    // 	 <<     " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
    // 	 <<     " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
    // 	 << "]" << endl;
+#endif
  }
  
  void
@ -283,9 +285,7 @@ namespace Moses
 	    psiter R = tpid2span.find(y->first);
 	    if (R == tpid2span.end()) continue;
 	    pp.update(y->first, y->second);
-	    PT.calc_lex(PT.btfix,pp);
-	    PT.calc_pfwd_fix(PT.btfix,pp);
-	    PT.calc_pbwd_fix(PT.btfix,pp);
+	    PT.ScorePPfix(pp);
 	    pp.eval(PT.feature_weights);
 	    PP.push_back(pp);
 	    BOOST_FOREACH(span const& sspan, L->second)
@ -329,6 +329,7 @@ namespace Moses
    BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
    sptr<vector<int> > aln;
    return aln;
-  }
+}
 }

+