Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes.

2024-09-11 11:25:40 +03:00 · 2014-07-09 02:41:28 +01:00 · 2014-07-09 02:41:28 +01:00 · 4d41211c2c
commit 4d41211c2c
parent 28d64e2339
42 changed files with 2370 additions and 1308 deletions
--- a/.gitignore
+++ b/.gitignore
@ -79,3 +79,4 @@ nbproject/
 mingw/MosesGUI/MosesGUI.e4p
 mingw/MosesGUI/_eric4project/

+contrib/m4m/merge-sorted
--- a/4
+++ b/4
@ -152,13 +152,15 @@ build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses
 if [ option.get "with-mm" : : "yes" ]
 {
 alias mm :  
+  moses/TranslationModel/UG//spe-check-coverage2
  moses/TranslationModel/UG//ptable-lookup 
+  moses/TranslationModel/UG//sim-pe 
+  moses/TranslationModel/UG//spe-check-coverage 
  moses/TranslationModel/UG/mm//mtt-build 
  moses/TranslationModel/UG/mm//mtt-dump 
  moses/TranslationModel/UG/mm//symal2mam 
  moses/TranslationModel/UG/mm//mam2symal 
  moses/TranslationModel/UG/mm//mam_verify 
-  moses/TranslationModel/UG/mm//custom-pt 
  moses/TranslationModel/UG/mm//mmlex-build 
  moses/TranslationModel/UG/mm//mmlex-lookup 
  moses/TranslationModel/UG/mm//mtt-count-words 
--- a/OnDiskPt/queryOnDiskPt.cpp
+++ b/OnDiskPt/queryOnDiskPt.cpp
@ -22,7 +22,7 @@ int main(int argc, char **argv)
 {
  int tableLimit = 20;
  std::string ttable = "";
-  bool useAlignments = false;
+  // bool useAlignments = false;

  for(int i = 1; i < argc; i++) {
    if(!strcmp(argv[i], "-tlimit")) {
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@ -4,6 +4,7 @@
 #include <algorithm>


+#include "moses/Util.h"
 #include "moses/ChartManager.h"
 #include "moses/Hypothesis.h"
 #include "moses/Manager.h"
@ -59,7 +60,7 @@ public:
    if(add2ORLM_) {
      //updateORLM();
    }
-    cerr << "Done inserting\n";
+    XVERBOSE(1,"Done inserting\n");
    //PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
    map<string, xmlrpc_c::value> retData;
    //*retvalP = xmlrpc_c::value_struct(retData);
@ -120,17 +121,17 @@ public:
    if(si == params.end())
      throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE);
    source_ = xmlrpc_c::value_string(si->second);
-    cerr << "source = " << source_ << endl;
+    XVERBOSE(1,"source = " << source_ << endl);
    si = params.find("target");
    if(si == params.end())
      throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE);
    target_ = xmlrpc_c::value_string(si->second);
-    cerr << "target = " << target_ << endl;
+    XVERBOSE(1,"target = " << target_ << endl);
    si = params.find("alignment");
    if(si == params.end())
      throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
    alignment_ = xmlrpc_c::value_string(si->second);
-    cerr << "alignment = " << alignment_ << endl;
+    XVERBOSE(1,"alignment = " << alignment_ << endl);
    si = params.find("bounded");
    bounded_ = (si != params.end());
    si = params.find("updateORLM");
@ -224,7 +225,7 @@ public:
    }
    const string source((xmlrpc_c::value_string(si->second)));

-    cerr << "Input: " << source << endl;
+    XVERBOSE(1,"Input: " << source << endl);
    si = params.find("align");
    bool addAlignInfo = (si != params.end());
    si = params.find("word-align");
@ -287,13 +288,13 @@ public:
        }
    } else {
        Sentence sentence;
-        const vector<FactorType> &inputFactorOrder =
-          staticData.GetInputFactorOrder();
+        const vector<FactorType> &
+	  inputFactorOrder = staticData.GetInputFactorOrder();
        stringstream in(source + "\n");
        sentence.Read(in,inputFactorOrder);
 	size_t lineNumber = 0; // TODO: Include sentence request number here?
        Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm());
-        manager.ProcessSentence();
+	manager.ProcessSentence();
        const Hypothesis* hypo = manager.GetBestHypothesis();

        vector<xmlrpc_c::value> alignInfo;
@ -331,7 +332,7 @@ public:
    pair<string, xmlrpc_c::value>
    text("text", xmlrpc_c::value_string(out.str()));
    retData.insert(text);
-    cerr << "Output: " << out.str() << endl;
+    XVERBOSE(1,"Output: " << out.str() << endl);
    *retvalP = xmlrpc_c::value_struct(retData);
  }

@ -574,7 +575,7 @@ int main(int argc, char** argv)
 {

  //Extract port and log, send other args to moses
-  char** mosesargv = new char*[argc+2];
+  char** mosesargv = new char*[argc+2]; // why "+2" [UG]
  int mosesargc = 0;
  int port = 8080;
  const char* logfile = "/dev/null";
@ -634,11 +635,11 @@ int main(int argc, char** argv)
  myRegistry.addMethod("updater", updater);
  myRegistry.addMethod("optimize", optimizer);

-   xmlrpc_c::serverAbyss myAbyssServer(
-					myRegistry,
-					port,              // TCP port on which to listen
-					logfile
-					);
+  xmlrpc_c::serverAbyss myAbyssServer(
+				      myRegistry,
+				      port,              // TCP port on which to listen
+				      logfile
+				      );
  /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
  xmlrpc_c::serverAbyss myAbyssServer(
    xmlrpc_c::serverAbyss::constrOpt()
@ -648,12 +649,10 @@ int main(int argc, char** argv)
    .allowOrigin("*")
  );
  */
-
-  cerr << "Listening on port " << port << endl;
+  
+  XVERBOSE(1,"Listening on port " << port << endl);
  if (isSerial) {
-    while(1) {
-      myAbyssServer.runOnce();
-    }
+    while(1) myAbyssServer.runOnce();
  } else {
    myAbyssServer.run();
  }
--- a/moses-cmd/Jamfile
+++ b/moses-cmd/Jamfile
@ -3,4 +3,11 @@ alias deps : IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp ..//z
 exe moses : Main.cpp deps ;
 exe lmbrgrid : LatticeMBRGrid.cpp deps ;

-alias programs : moses lmbrgrid ;
+exe simulate-pe : 
+simulate-pe.cc 
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_program_options 
+deps 
+;
+
+alias programs : moses lmbrgrid simulate-pe ;
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@ -161,13 +161,17 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
  }

  if (m_translations.size() > 1) {
-	UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
-			"Non-monotonic future score");
+    UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
+		   "Non-monotonic future score: " 
+		   << m_translations.Get(0)->GetFutureScore() << " vs. " 
+		   << m_translations.Get(1)->GetFutureScore());
  }

  if (m_hypotheses.size() > 1) {
    UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
-			  "Non-monotonic total score");
+		   "Non-monotonic total score" 
+		   << m_hypotheses[0]->GetTotalScore() << " vs. "
+		   << m_hypotheses[1]->GetTotalScore());
  }

  HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
@ -442,7 +446,9 @@ BitmapContainer::ProcessBestHypothesis()
  if (!Empty()) {
    HypothesisQueueItem *check = Dequeue(true);
    UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
-    		"Non-monotonic total score");
+		   "Non-monotonic total score: "
+		   << item->GetHypothesis()->GetTotalScore() << " vs. "
+		   << check->GetHypothesis()->GetTotalScore());
  }

  // Logging for the criminally insane
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@ -105,7 +105,9 @@ void Manager::ProcessSentence()
  // some reporting on how long this took
  IFVERBOSE(1) {
    GetSentenceStats().StopTimeCollectOpts();
-    TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took " << GetSentenceStats().GetTimeCollectOpts() << " seconds" << endl);
+    TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took " 
+	      << GetSentenceStats().GetTimeCollectOpts() << " seconds at " 
+	      << __FILE__ << ":" << __LINE__ << endl);
  }

  // search for best translation with the specified algorithm
--- a/moses/TranslationModel/UG/Jamfile
+++ b/moses/TranslationModel/UG/Jamfile
@ -20,6 +20,39 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
 $(TOP)/util//kenutil 
 ; 

+exe sim-pe : 
+sim-pe.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil 
+; 
+
+exe spe-check-coverage : 
+spe-check-coverage.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil 
+; 
+
+exe spe-check-coverage2 : 
+spe-check-coverage2.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil 
+; 
+
 install $(PREFIX)/bin : try-align ; 

-fakelib mmsapt : [ glob *.cpp mmsapt*.cc ] ;
+fakelib mmsapt : [ glob *.cpp mmsapt*.cc sapt*.cc ] ;
--- a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
@ -0,0 +1,50 @@
+//-*- c++ -*-
+#include "ug_splice_arglist.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
+#include <boost/foreach.hpp>
+
+namespace Moses {
+  
+  void 
+  filter_arguments(int const argc_in, char const* const* const argv_in,
+		   int & argc_moses, char*** argv_moses,  
+		   int & argc_other, char*** argv_other,
+		   vector<pair<string,int> > const& filter)
+  {
+    *argv_moses = new char*[argc_in];
+    *argv_other = new char*[argc_in]; 
+    (*argv_moses)[0] = new char[strlen(argv_in[0])+1];
+    strcpy((*argv_moses)[0], argv_in[0]);
+    argc_moses = 1;
+    argc_other = 0;
+    typedef pair<string,int> option;
+    int i = 1;
+    while (i < argc_in)
+      {
+	BOOST_FOREACH(option const& o, filter)
+	  {
+	    if (o.first == argv_in[i])
+	      {
+		(*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
+		strcpy((*argv_other)[argc_other++],argv_in[i]);
+		for (int k = 0; k < o.second; ++k)
+		{
+		  UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-', 
+				 "[" << HERE << "] Missing argument for "
+				 << "parameter " << o.first << "!");
+		  (*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
+		  strcpy((*argv_other)[argc_other++],argv_in[i]);
+		}
+		if (++i >= argc_in) break;
+	      }
+	  }
+	if (i >= argc_in) break;
+	(*argv_moses)[argc_moses] = new char[strlen(argv_in[i])+1];
+	strcpy((*argv_moses)[argc_moses++], argv_in[i++]);
+      }
+  }
+  
+} // namespace Moses
+
+
--- a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
@ -0,0 +1,18 @@
+//-*- c++ -*-
+#pragma once
+#include <vector>
+#include <string>
+namespace Moses {
+  using namespace std;
+
+  // Function to splice the argument list (e.g. before handing it over to 
+  // Moses LoadParam() function. /filter/ is a vector of argument names
+  // and the number of arguments after each of them 
+  void 
+  filter_arguments(int const argc_in, char const* const* const argv_in,
+		   int & argc_moses, char*** argv_moses,  
+		   int & argc_other, char*** argv_other,
+		   vector<pair<string,int> > const& filter);
+
+
+} // namespace Moses
--- a/moses/TranslationModel/UG/mm/Jamfile
+++ b/moses/TranslationModel/UG/mm/Jamfile
@ -72,15 +72,15 @@ $(TOP)/moses/TranslationModel/UG/mm//mm
 $(TOP)/util//kenutil 
 ; 

-exe custom-pt : 
-custom-pt.cc 
-$(TOP)/moses//moses
-$(TOP)//boost_iostreams 
-$(TOP)//boost_program_options 
-$(TOP)/moses/TranslationModel/UG/mm//mm 
-$(TOP)/moses/TranslationModel/UG/generic//generic 
-$(TOP)/util//kenutil 
-; 
+# exe custom-pt : 
+# custom-pt.cc 
+# $(TOP)/moses//moses
+# $(TOP)//boost_iostreams 
+# $(TOP)//boost_program_options 
+# $(TOP)/moses/TranslationModel/UG/mm//mm 
+# $(TOP)/moses/TranslationModel/UG/generic//generic 
+# $(TOP)/util//kenutil 
+# ; 


 exe calc-coverage : 
@ -98,7 +98,6 @@ mtt-dump
 mtt-count-words 
 symal2mam 
 mam2symal 
-custom-pt 
 mmlex-build 
 mmlex-lookup
 mam_verify 
--- a/moses/TranslationModel/UG/mm/custom-pt.cc
+++ b/moses/TranslationModel/UG/mm/custom-pt.cc
@ -1,6 +1,6 @@
 // build a phrase table for the given input
 // #include "ug_lexical_phrase_scorer2.h"
-
+#if 0
 #include <stdint.h>
 #include <string>
 #include <vector>
@ -25,7 +25,7 @@
 #include "ug_bitext.h"
 #include "../mmsapt_phrase_scorers.h"
 #include "ug_lexical_phrase_scorer2.h"
-
+#include "../sapt_phrase_scorers.h"
 using namespace std;
 using namespace ugdiss;
 using namespace Moses;
@ -109,6 +109,7 @@ nbest_phrasepairs(uint64_t const  pid1,
 int main(int argc, char* argv[])
 {
  // assert(argc == 4);
+#if 0
 #if 0
  string base = argv[1];
  string L1   = argv[2];
@ -182,7 +183,7 @@ int main(int argc, char* argv[])
      	    }
      	}
    }
-  
+#endif  
    exit(0);
 }
-
+#endif
--- a/moses/TranslationModel/UG/mm/ug_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext.cc
@ -158,99 +158,25 @@ namespace Moses
    jstats::
    invalidate()
    {
-      my_rcnt = 0;
+      if (my_wcnt > 0) 
+	my_wcnt *= -1;
+    }
+
+    void 
+    jstats::
+    validate()
+    {
+      if (my_wcnt < 0) 
+	my_wcnt *= -1;
    }

    bool
    jstats::
    valid()
    {
-      return my_rcnt != 0;
+      return my_wcnt >= 0;
    }

-    bool
-    PhrasePair::
-    operator<=(PhrasePair const& other) const
-    {
-      return this->score <= other.score;
-    }
-
-    bool
-    PhrasePair::
-    operator>=(PhrasePair const& other) const
-    {
-      return this->score >= other.score;
-    }
-
-    bool
-    PhrasePair::
-    operator<(PhrasePair const& other) const
-    {
-      return this->score < other.score;
-    }
-    
-    bool
-    PhrasePair::
-    operator>(PhrasePair const& other) const
-    {
-      return this->score > other.score;
-    }
-    
-    PhrasePair::
-    PhrasePair() {}
-
-    PhrasePair::
-    PhrasePair(PhrasePair const& o) 
-      : p1(o.p1), 
-	p2(o.p2),
-	raw1(o.raw1), 
-	raw2(o.raw2), 
-	sample1(o.sample1),
-	sample2(o.sample2),
-	good1(o.good1),
-	good2(o.good2),
-	joint(o.joint),
-	fvals(o.fvals),
-	aln(o.aln),
-	score(o.score)
-    {
-      for (size_t i = 0; i <= po_other; ++i)
-	{
-	  dfwd[i] = o.dfwd[i];
-	  dbwd[i] = o.dbwd[i];
-	}
-    }
-    
-    void
-    PhrasePair::
-    init(uint64_t const pid1, pstats const& ps, size_t const numfeats)
-    {
-      p1      = pid1;
-      p2      = 0;
-      raw1    = ps.raw_cnt;
-      sample1 = ps.sample_cnt;
-      sample2 = 0;
-      good1   = ps.good;
-      good2   = 0;
-      raw2    = 0;
-      fvals.resize(numfeats);
-    }
-
-    void
-    PhrasePair::
-    init(uint64_t const pid1, 
-	 pstats const& ps1, 
-	 pstats const& ps2, 
-	 size_t const numfeats)
-    {
-      p1      = pid1;
-      raw1    = ps1.raw_cnt    + ps2.raw_cnt;
-      sample1 = ps1.sample_cnt + ps2.sample_cnt;
-      sample2 = 0;
-      good1   = ps1.good       + ps2.good;
-      good2   = 0;
-      fvals.resize(numfeats);
-    }
    
    float 
    lbop(size_t const tries, size_t const succ, float const confidence)
@ -261,85 +187,6 @@ namespace Moses
 		 find_lower_bound_on_p(tries, succ, confidence)));
    }
    
-    PhrasePair const&
-    PhrasePair::
-    update(uint64_t const pid2, jstats const& js)   
-    {
-      p2    = pid2;
-      raw2  = js.cnt2();
-      joint = js.rcnt();
-      assert(js.aln().size());
-      if (js.aln().size()) 
-	aln = js.aln()[0].second;
-      float total_fwd = 0, total_bwd = 0;
-      for (int i = po_first; i <= po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  total_fwd += js.dcnt_fwd(po)+1;
-	  total_bwd += js.dcnt_bwd(po)+1;
-	}
-      for (int i = po_first; i <= po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
-	  dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
-	}
-      return *this;
-    }
-
-    PhrasePair const&
-    PhrasePair::
-    update(uint64_t const pid2, jstats const& js1, jstats const& js2)   
-    {
-      p2    = pid2;
-      raw2  = js1.cnt2() + js2.cnt2();
-      joint = js1.rcnt() + js2.rcnt();
-      assert(js1.aln().size() || js2.aln().size());
-      if (js1.aln().size()) 
-	aln = js1.aln()[0].second;
-      else if (js2.aln().size()) 
-	aln = js2.aln()[0].second;
-      for (int i = po_first; i < po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other);
-	  dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other);
-	}
-      return *this;
-    }
-
-    PhrasePair const&
-    PhrasePair::
-    update(uint64_t const pid2, 
-	   size_t   const raw2extra,
-	   jstats   const& js)   
-    {
-      p2    = pid2;
-      raw2  = js.cnt2() + raw2extra;
-      joint = js.rcnt();
-      assert(js.aln().size());
-      if (js.aln().size()) 
-	aln = js.aln()[0].second;
-      for (int i = po_first; i <= po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other);
-	  dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other);
-	}
-      return *this;
-    }
-
-    float
-    PhrasePair::
-    eval(vector<float> const& w)
-    {
-      assert(w.size() == this->fvals.size());
-      this->score = 0;
-      for (size_t i = 0; i < w.size(); ++i)
-	this->score += w[i] * this->fvals[i];
-      return this->score;
-    }
-  
    template<>
    sptr<imBitext<L2R_Token<SimpleWordId> > > 
    imBitext<L2R_Token<SimpleWordId> >::
@ -371,7 +218,8 @@ namespace Moses
 	  uint32_t row,col; char c;
 	  while (ibuf >> row >> c >> col)
 	    {
-	      assert(c == '-');
+	      UTIL_THROW_IF2(c != '-', "[" << HERE << "] "
+			     << "Error in alignment information:\n" << a);
 	      binwrite(obuf,row);
 	      binwrite(obuf,col);
 	    }
@ -639,7 +487,6 @@ namespace Moses
      cout  << string(90,'-') << endl;
    }

-
    PhraseOrientation 
    find_po_fwd(vector<vector<ushort> >& a1,
 		vector<vector<ushort> >& a2,
@ -654,13 +501,13 @@ namespace Moses
      
      ushort ns1,ne1,ne2;
      if (!expand_phrase_pair(a1,a2,n2,b1,e1,ns1,ne1,ne2))
-	{
-	  return po_other;
-	}
+	return po_other;
+
      if (ns1 >= e1)
 	{
 	  for (ushort j = e1; j < ns1; ++j)
-	    if (a1[j].size()) return po_jfwd;
+	    if (a1[j].size()) 
+	      return po_jfwd;
 	  return po_mono;
 	}
      else
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@ -56,6 +56,7 @@ namespace Moses {
  class Mmsapt;
  namespace bitext
  {
+    template<typename TKN> class Bitext;
    using namespace ugdiss;

    template<typename TKN> class Bitext;
@ -120,6 +121,7 @@ namespace Moses {
      void add(float w, vector<uchar> const& a, uint32_t const cnt2,
 	       uint32_t fwd_orient, uint32_t bwd_orient);
      void invalidate();
+      void validate();
      bool valid();
      uint32_t dcnt_fwd(PhraseOrientation const idx) const;
      uint32_t dcnt_bwd(PhraseOrientation const idx) const;
@ -157,43 +159,6 @@ namespace Moses {
 	  uint32_t fwd_o, uint32_t bwd_o);
    };
    
-    class 
-    PhrasePair
-    {
-    public:
-      uint64_t p1, p2;
-      uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
-      vector<float> fvals;
-      float dfwd[po_other+1];
-      float dbwd[po_other+1];
-      vector<uchar> aln;
-      // float    avlex12,avlex21; // average lexical probs (Moses std)
-      // float    znlex1,znlex2;   // zens-ney lexical smoothing
-      // float    colex1,colex2;   // based on raw lexical occurrences
-      float score;
-      PhrasePair();
-      PhrasePair(PhrasePair const& o);
-      bool operator<(PhrasePair const& other) const;
-      bool operator>(PhrasePair const& other) const;
-      bool operator<=(PhrasePair const& other) const;
-      bool operator>=(PhrasePair const& other) const;
-
-      void init(uint64_t const pid1, pstats const& ps,  size_t const numfeats);
-      void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2, 
-		size_t const numfeats);
-
-      PhrasePair const& 
-      update(uint64_t const pid2, jstats const& js);
-
-      PhrasePair const& 
-      update(uint64_t const pid2, jstats   const& js1, jstats   const& js2);
-
-      PhrasePair const& 
-      update(uint64_t const pid2, size_t const raw2extra, jstats const& js);
-
-      float eval(vector<float> const& w);
-    };
-

    template<typename TKN>
    class Bitext 
--- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
@ -16,6 +16,9 @@
 #include "tpt_tokenindex.h"
 #include "ug_ttrack_base.h"
 #include "tpt_tokenindex.h"
+#include "util/exception.hh"
+#include "moses/Util.h"
+
 // #include "ug_vocab.h"

 // define the corpus buffer size (in sentences) and the
@ -49,6 +52,8 @@ namespace ugdiss
    typename boost::shared_ptr<imTtrack<Token> > 
    append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);

+    void m_check_token_count(); // debugging function
+
  public:

    imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
@ -69,6 +74,22 @@ namespace ugdiss

  };

+  template<typename Token>
+  void
+  imTtrack<Token>::
+  m_check_token_count()
+  { // sanity check
+    size_t check = 0;
+    BOOST_FOREACH(vector<Token> const& s, *myData)
+      check += s.size();
+    UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]" 
+		   << " Wrong token count after appending sentence!"
+		   << " Counted " << check << " but expected " 
+		   << this->numToks << " in a total of " << myData->size() 
+		   << " sentences.");
+    
+  }
+
  template<typename Token>
  Token const* 
  imTtrack<Token>::
@ -111,9 +132,9 @@ namespace ugdiss
  template<typename Token>
  imTtrack<Token>::
  imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL)
+    : numToks(0)
  {
    myData.reset(new vector<vector<Token> >());
-    numToks = 0;
    string line,w;
    size_t linectr=0;
    boost::unordered_map<string,id_type> H;
@ -135,6 +156,7 @@ namespace ugdiss
  template<typename Token>
  imTtrack<Token>::
  imTtrack(size_t reserve)
+    : numToks(0)
  {
    myData.reset(new vector<vector<Token> >());
    if (reserve) myData->reserve(reserve);
@ -143,9 +165,9 @@ namespace ugdiss
  template<typename Token>
  imTtrack<Token>::
  imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
+    : numToks(0)
  {
    myData  = d;
-    numToks = 0;
    BOOST_FOREACH(vector<Token> const& v, *d)
      numToks += v.size();
  }
@ -171,6 +193,9 @@ namespace ugdiss
  shared_ptr<imTtrack<TOKEN> > 
  append(shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
  {
+#if 1
+    if (crp) crp->m_check_token_count();
+#endif
    shared_ptr<imTtrack<TOKEN> > ret;
    if (crp == NULL)
      {
@ -185,6 +210,11 @@ namespace ugdiss
      }
    else ret = crp;
    ret->myData->push_back(snt);
+    ret->numToks += snt.size();
+
+#if 1
+    ret->m_check_token_count();
+#endif
    return ret;
  }

--- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
@ -27,7 +27,6 @@ namespace ugdiss
    typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> table_t;
    table_t COOC;
    void open(string const& fname);
-
    template<typename someint>
    void 
    score(TKN const* snt1, size_t const s1, size_t const e1,
@ -104,7 +103,19 @@ namespace ugdiss
    if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
    UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__
 		   << ": alpha parameter must be >= 0");
-    return float(COOC[s][t]+alpha)/(COOC.m1(s)+alpha);
+    float ret = COOC[s][t]+alpha;
+    ret =  (ret?ret:1.)/(COOC.m1(s)+alpha);
+    UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__ 
+		   << ": result not > 0 and <= 1. alpha = " << alpha << "; "
+		   << COOC[s][t] << "/" << COOC.m1(s));
+
+#if 0
+    cerr << "[" << s << "," << t << "] " 
+	 << COOC.m1(s) << "/" 
+	 << COOC[s][t] << "/" 
+	 << COOC.m2(t) << endl;
+#endif
+    return ret;
  }
  
  template<typename TKN>
@ -115,7 +126,11 @@ namespace ugdiss
    if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
    UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__
 		   << ": alpha parameter must be >= 0");
-    return float(COOC[s][t]+alpha)/(COOC.m2(t)+alpha);
+    float ret = float(COOC[s][t]+alpha);
+    ret = (ret?ret:1.)/(COOC.m2(t)+alpha);
+    UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__ 
+		   << ": result not > 0 and <= 1.");
+    return ret;
  }
  
  template<typename TKN>
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.cc
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
@ -0,0 +1,97 @@
+#include "ug_phrasepair.h"
+namespace Moses {
+  namespace bitext
+  {
+
+#if 0
+    void 
+    PhrasePair::
+    init()
+    {
+      p1 = p2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0;
+    }
+
+    void
+    PhrasePair::
+    init(uint64_t const pid1, 
+	 pstats const& ps1, 
+	 pstats const& ps2, 
+	 size_t const numfeats)
+    {
+      p1      = pid1;
+      raw1    = ps1.raw_cnt    + ps2.raw_cnt;
+      sample1 = ps1.sample_cnt + ps2.sample_cnt;
+      sample2 = 0;
+      good1   = ps1.good       + ps2.good;
+      good2   = 0;
+      joint   = 0;
+      fvals.resize(numfeats);
+    }
+
+    PhrasePair const&
+    PhrasePair::
+    update(uint64_t const pid2, jstats const& js1, jstats const& js2)   
+    {
+      p2    = pid2;
+      raw2  = js1.cnt2() + js2.cnt2();
+      joint = js1.rcnt() + js2.rcnt();
+      assert(js1.aln().size() || js2.aln().size());
+      if (js1.aln().size()) 
+	aln = js1.aln()[0].second;
+      else if (js2.aln().size()) 
+	aln = js2.aln()[0].second;
+      for (int i = po_first; i < po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other);
+	  dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other);
+	}
+      return *this;
+    }
+
+    PhrasePair const&
+    PhrasePair::
+    update(uint64_t const pid2, size_t r2)
+    {
+      p2    = pid2;
+      raw2  = r2;
+      joint = 0;
+      return *this;
+    } 
+
+
+    PhrasePair const&
+    PhrasePair::
+    update(uint64_t const pid2, 
+	   size_t   const raw2extra,
+	   jstats   const& js)   
+    {
+      p2    = pid2;
+      raw2  = js.cnt2() + raw2extra;
+      joint = js.rcnt();
+      assert(js.aln().size());
+      if (js.aln().size()) 
+	aln = js.aln()[0].second;
+      for (int i = po_first; i <= po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other);
+	  dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other);
+	}
+      return *this;
+    }
+
+    float
+    PhrasePair::
+    eval(vector<float> const& w)
+    {
+      assert(w.size() == this->fvals.size());
+      this->score = 0;
+      for (size_t i = 0; i < w.size(); ++i)
+	this->score += w[i] * this->fvals[i];
+      return this->score;
+    }
+#endif
+  } // namespace bitext
+} // namespace Moses
+
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.h
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@ -0,0 +1,243 @@
+//-*- c++ -*-
+#pragma once
+#include "ug_bitext.h"
+
+using namespace ugdiss;
+using namespace std;
+
+namespace Moses {
+  namespace bitext
+  {
+
+    template<typename Token>
+    string 
+    toString(TokenIndex const& V, Token const* x, size_t const len)
+    {
+      if (!len) return "";
+      UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!");
+      ostringstream buf; 
+      buf << V[x->id()];
+      size_t i = 1;
+      for (x = x->next(); x && i < len; ++i, x = x->next())
+	buf << " " << V[x->id()];
+      UTIL_THROW_IF2(i != len, HERE << ": Unexpected end of phrase!");
+      return buf.str();
+    }
+
+    template<typename Token>
+    class 
+    PhrasePair
+    {
+    public:
+      Token const* start1;
+      Token const* start2;
+      uint32_t len1;
+      uint32_t len2;
+      // uint64_t p1, p2;
+      uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
+      vector<float> fvals;
+      float dfwd[po_other+1]; // distortion counts // counts or probs?
+      float dbwd[po_other+1]; // distortion counts
+      vector<uchar> aln;
+      float score;
+      PhrasePair() { };
+      PhrasePair(PhrasePair const& o);
+
+      PhrasePair const& operator+=(PhrasePair const& other);
+
+      bool operator<(PhrasePair const& other) const;
+      bool operator>(PhrasePair const& other) const;
+      bool operator<=(PhrasePair const& other) const; 
+      bool operator>=(PhrasePair const& other) const;
+
+      void init();
+      void init(Token const* x,   uint32_t const len,
+		pstats const* ps = NULL, size_t const numfeats=0);
+      
+      // void init(uint64_t const pid1, pstats const& ps,  size_t const numfeats);
+      // void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2, 
+      // size_t const numfeats);
+
+      // PhrasePair const&
+      // update(uint64_t const pid2, size_t r2 = 0);
+
+      PhrasePair const& 
+      update(Token const* x, uint32_t const len, jstats const& js);
+      
+      // PhrasePair const& 
+      // update(uint64_t const pid2, jstats   const& js1, jstats   const& js2);
+
+      // PhrasePair const& 
+      // update(uint64_t const pid2, size_t const raw2extra, jstats const& js);
+
+      // float 
+      // eval(vector<float> const& w);
+
+      class SortByTargetIdSeq
+      {
+      public:
+	int cmp(PhrasePair const& a, PhrasePair const& b) const;
+	bool operator()(PhrasePair const& a, PhrasePair const& b) const;
+      };
+    };
+
+    template<typename Token>
+    void
+    PhrasePair<Token>::
+    init(Token const* x, uint32_t const len, 
+	 pstats const* ps, size_t const numfeats)
+    {
+      start1 = x; len1 = len;
+      // p1      = pid1;
+      // p2      = 0;
+      if (ps)
+	{
+	  raw1    = ps->raw_cnt;
+	  sample1 = ps->sample_cnt;
+	  good1   = ps->good;
+	}
+      else raw1 = sample1 = good1 = 0;
+      joint   = 0;
+      good2   = 0;
+      sample2 = 0;
+      raw2    = 0;
+      fvals.resize(numfeats);
+    }
+
+    template<typename Token>
+    PhrasePair<Token> const&
+    PhrasePair<Token>::
+    update(Token const* x, uint32_t const len, jstats const& js)   
+    {
+      // p2    = pid2;
+      start2 = x; len2 = len;
+      raw2  = js.cnt2();
+      joint = js.rcnt();
+      assert(js.aln().size());
+      if (js.aln().size()) 
+	aln = js.aln()[0].second;
+      float total_fwd = 0, total_bwd = 0;
+      for (int i = po_first; i <= po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  total_fwd += js.dcnt_fwd(po)+1;
+	  total_bwd += js.dcnt_bwd(po)+1;
+	}
+
+      // should we do that here or leave the raw counts?
+      for (int i = po_first; i <= po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
+	  dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
+	}
+
+      return *this;
+    }
+
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator<(PhrasePair const& other) const 
+    { return this->score < other.score; }
+    
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator>(PhrasePair const& other) const
+    { return this->score > other.score; }
+
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator<=(PhrasePair const& other) const 
+    { return this->score <= other.score; }
+    
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator>=(PhrasePair const& other) const
+    { return this->score >= other.score; }
+
+    template<typename Token>
+    PhrasePair<Token> const&
+    PhrasePair<Token>::
+    operator+=(PhrasePair const& o) 
+    { 
+      raw1 += o.raw1;
+      raw2 += o.raw2;
+      sample1 += o.sample1;
+      sample2 += o.sample2;
+      good1 += o.good1;
+      good2 += o.good2;
+      joint += o.joint;
+      return *this;
+    }
+
+    template<typename Token>
+    PhrasePair<Token>::
+    PhrasePair(PhrasePair<Token> const& o) 
+      : start1(o.start1)
+      , start2(o.start2)
+      , len1(o.len1)
+      , len2(o.len2)
+      , raw1(o.raw1) 
+      , raw2(o.raw2) 
+      , sample1(o.sample1)
+      , sample2(o.sample2)
+      ,	good1(o.good1)
+      , good2(o.good2)
+      , joint(o.joint)
+      , fvals(o.fvals)
+      , aln(o.aln)
+      , score(o.score)
+    {
+      for (size_t i = 0; i <= po_other; ++i)
+	{
+	  dfwd[i] = o.dfwd[i];
+	  dbwd[i] = o.dbwd[i];
+	}
+    }
+    
+    template<typename Token>
+    int
+    PhrasePair<Token>::
+    SortByTargetIdSeq::
+    cmp(PhrasePair const& a, PhrasePair const& b) const
+    {
+      size_t i = 0;
+      Token const* x = a.start2;
+      Token const* y = b.start2;
+      while (i < a.len2 && i < b.len2 && x->id() == y->id()) 
+	{
+	  x = x->next();
+	  y = y->next();
+	  ++i;
+	}
+      if (i == a.len2 && i == b.len2) return 0;
+      if (i == a.len2) return -1;
+      if (i == b.len2) return  1;
+      return x->id() < y->id() ? -1 : 1;
+    }
+    
+    template<typename Token>
+    bool
+    PhrasePair<Token>::
+    SortByTargetIdSeq::
+    operator()(PhrasePair const& a, PhrasePair const& b) const
+    {
+      return this->cmp(a,b) < 0;
+    }
+
+    template<typename Token>
+    void 
+    PhrasePair<Token>::
+    init()
+    {
+      len1 = len2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0;
+      start1 = start2 = NULL;
+    }
+
+
+  } // namespace bitext
+} // namespace Moses
--- a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
@ -7,6 +7,8 @@
 #include "ug_typedefs.h"
 #include "tpt_tokenindex.h"
 #include <iostream>
+#include "util/exception.hh"
+#include "moses/Util.h"
 //#include <cassert>

 // #include "ug_bv_iter.h"
@ -60,8 +62,13 @@ namespace ugdiss

    // TSA_tree_iterator(TSA_tree_iterator const& other);
    TSA_tree_iterator(TSA<Token> const* s);
+    TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other);
    TSA_tree_iterator(TSA<Token> const* r, id_type const* s, size_t const len);
    // TSA_tree_iterator(TSA<Token> const* s, Token const& t);
+    TSA_tree_iterator(TSA<Token> const* s, 
+		      Token const* kstart, 
+		      size_t const len, 
+		      bool full_match_only=true);
    TSA_tree_iterator(TSA<Token> const* s, 
 		      Token const* kstart, 
 		      Token const* kend, 
@ -150,9 +157,12 @@ namespace ugdiss
    double approxOccurrenceCount(int p=-1) const
    {
      assert(root);
+      if (p < 0) p += lower.size();
      double ret = arrayByteSpanSize(p)/root->aveIndexEntrySize();
-      assert(ret < root->corpus->numTokens());
      if (ret < 25) ret = rawCnt(p);
+      UTIL_THROW_IF2(ret > root->corpus->numTokens(), "[" << HERE << "] "
+		     << "Word count mismatch.");
+      assert(ret <= root->corpus->numTokens());
      return ret;
    }

@ -318,6 +328,18 @@ namespace ugdiss
    : root(s) 
  {};

+  template<typename Token>
+  TSA_tree_iterator<Token>::
+  TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other)
+    : root(s) 
+  {
+    Token const* x = other.getToken(0);
+    for (size_t i = 0; i < other.size() && this->extend(x->id()); ++i)
+      x = x->next(); 
+  };
+
+
+
  template<typename Token>
  TSA_tree_iterator<Token>::
  TSA_tree_iterator
@ -382,6 +404,25 @@ namespace ugdiss

 #endif

+  template<typename Token>
+  TSA_tree_iterator<Token>::
+  TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, 
+		    size_t const len, bool full_match_only)
+    : root(s) 
+  {
+    if (!root) return;
+    size_t i = 0;
+    for (; i < len && kstart && extend(*kstart); ++i)
+      kstart = kstart->next();
+    if (full_match_only && i != len) 
+      {
+        lower.clear();
+        upper.clear();
+      }
+  };
+
+  // DEPRECATED: DO NOT USE. Use the one that takes the length 
+  // instead of kend.
  template<typename Token>
  TSA_tree_iterator<Token>::
  TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, 
@ -561,8 +602,7 @@ namespace ugdiss
  TSA_tree_iterator<Token>::
  rawCnt(int p) const
  {
-    if (p < 0)
-      p = lower.size()+p;
+    if (p < 0) p += lower.size();
    assert(p>=0);
    if (lower.size() == 0) return root->getCorpusSize();
    return root->rawCnt(lower[p],upper[p]);
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@ -19,6 +19,7 @@
 #include "moses/TranslationModel/UG/mm/ug_typedefs.h"
 #include "moses/TranslationModel/UG/mm/tpt_pickler.h"
 #include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
 #include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h"

 #include "moses/InputFileStream.h"
@ -29,7 +30,8 @@
 #include <map>

 #include "moses/TranslationModel/PhraseDictionary.h"
-#include "mmsapt_phrase_scorers.h"
+#include "mmsapt_phrase_scorers.h" // deprecated
+#include "sapt_phrase_scorers.h"

 // TO DO:
 // - make lexical phrase scorer take addition to the "dynamic overlay" into account
@ -47,47 +49,68 @@ namespace Moses
 #endif
  {
    friend class Alignment;
+    map<string,string> param;
  public:    
    typedef L2R_Token<SimpleWordId> Token;
    typedef mmBitext<Token> mmbitext;
    typedef imBitext<Token> imbitext;
+    typedef Bitext<Token>     bitext;
    typedef TSA<Token>           tsa;
    typedef PhraseScorer<Token> pscorer;
+
  private:
+    // vector<sptr<bitext> > shards;
    mmbitext btfix; 
-    sptr<imbitext> btdyn;
+    sptr<imbitext> btdyn; 
    string bname,extra_data;
    string L1;
    string L2;
-    float  m_lbop_parameter;
-    float  m_lex_alpha; 
+    float  m_lbop_conf; // confidence level for lbop smoothing
+    float  m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing
    // alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
    // must be > 0 if dynamic 
    size_t m_default_sample_size;
    size_t m_workers;  // number of worker threads for sampling the bitexts

-    // deprecated!
-    char m_pfwd_denom; // denominator for computation of fwd phrase score:
-    // 'r' - divide by raw count
-    // 's' - divide by sample count
-    // 'g' - devide by number of "good" (i.e. coherent) samples 
-    // size_t num_features;
+    // // deprecated!
+    // char m_pfwd_denom; // denominator for computation of fwd phrase score:
+    // // 'r' - divide by raw count
+    // // 's' - divide by sample count
+    // // 'g' - devide by number of "good" (i.e. coherent) samples 
+    // // size_t num_features;

    size_t input_factor;
    size_t output_factor; // we can actually return entire Tokens!

-    bool withLogCountFeatures; // add logs of counts as features?
-    bool withCoherence; 
-    string m_pfwd_features; // which pfwd functions to use
-    string m_pbwd_features; // which pbwd functions to use
+    // bool withLogCountFeatures; // add logs of counts as features?
+    // bool withCoherence; 
+    // string m_pfwd_features; // which pfwd functions to use
+    // string m_pbwd_features; // which pbwd functions to use
+
+    // for display for human inspection (ttable dumps):
    vector<string> m_feature_names; // names of features activated
+    vector<bool> m_is_logval;  // keeps track of which features are log valued 
+    vector<bool> m_is_integer; // keeps track of which features are integer valued 
+
    vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
    vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
    vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)

-    size_t
-    add_corpus_specific_features
-    (vector<sptr<pscorer > >& ffvec, size_t num_feats);
+    void
+    register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry);
+
+    template<typename fftype>
+    void 
+    check_ff(string const ffname,vector<sptr<pscorer> >* registry = NULL);
+    // add feature function if specified 
+    
+    template<typename fftype>
+    void 
+    check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry = NULL);
+    // add feature function if specified
+
+    void
+    add_corpus_specific_features(vector<sptr<pscorer > >& ffvec);
    
    // built-in feature functions
    // PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
@ -140,12 +163,24 @@ namespace Moses
    mm2dtable_t COOCraw;

    TargetPhrase* 
-    createTargetPhrase
+    mkTPhrase(Phrase const& src, 
+	      Moses::bitext::PhrasePair<Token>* fix, 
+	      Moses::bitext::PhrasePair<Token>* dyn, 
+	      sptr<Bitext<Token> > const& dynbt) const;
+
+    // template<typename Token>
+    // void 
+    // expand(typename Bitext<Token>::iter const& m, Bitext<Token> const& bt, 
+    // 	   pstats const& pstats, vector<PhrasePair<Token> >& dest);
+    
+#if 0
+    TargetPhrase* 
+    mkTPhrase
    (Phrase        const& src, 
     Bitext<Token> const& bt, 
-     bitext::PhrasePair    const& pp
+     Moses::bitext::PhrasePair const& pp
     ) const;
-
+#endif
    void
    process_pstats
    (Phrase   const& src,
@ -180,7 +215,7 @@ namespace Moses
     ) const;

    void
-    load_extra_data(string bname);
+    load_extra_data(string bname, bool locking);

    mutable size_t m_tpc_ctr;
  public:
@ -231,8 +266,14 @@ namespace Moses
    vector<string> const&
    GetFeatureNames() const;
    
-    void
-    ScorePPfix(bitext::PhrasePair& pp) const;
+    // void
+    // ScorePPfix(bitext::PhrasePair& pp) const;
+
+    bool
+    isLogVal(int i) const;
+    
+    bool
+    isInteger(int i) const;

  private:
  };
--- a/moses/TranslationModel/UG/mmsapt_align.cc
+++ b/moses/TranslationModel/UG/mmsapt_align.cc
@ -1,335 +1,336 @@
 #include "mmsapt.h"
+// currently broken

-namespace Moses
-{
-  using namespace bitext;
-  using namespace std;
-  using namespace boost;
+// namespace Moses
+// {
+//   using namespace bitext;
+//   using namespace std;
+//   using namespace boost;
  
-  struct PPgreater
-  {
-    bool operator()(PhrasePair const& a, PhrasePair const& b)
-    {
-      return a.score > b.score;
-    }
-  };
+//   struct PPgreater
+//   {
+//     bool operator()(PhrasePair const& a, PhrasePair const& b)
+//     {
+//       return a.score > b.score;
+//     }
+//   };

-  void
-  Mmsapt::
-  setWeights(vector<float> const & w)
-  {
-    assert(w.size() == this->m_numScoreComponents);
-    this->feature_weights = w;
-  }
+//   void
+//   Mmsapt::
+//   setWeights(vector<float> const & w)
+//   {
+//     assert(w.size() == this->m_numScoreComponents);
+//     this->feature_weights = w;
+//   }

-  struct PhraseAlnHyp
-  {
-    PhrasePair pp;
-    ushort   s1,e1,s2,e2; // start and end positions
-    int             prev; // preceding alignment hypothesis
-    float          score; 
-    bitvector       scov; // source coverage
-    PhraseAlnHyp(PhrasePair const& ppx, int slen,
-		 pair<uint32_t,uint32_t> const& sspan,
-		 pair<uint32_t,uint32_t> const& tspan)
-      : pp(ppx), prev(-1), score(ppx.score), scov(slen)
-    {
-      s1 = sspan.first; e1 = sspan.second;
-      s2 = tspan.first; e2 = tspan.second;
-      for (size_t i = s1; i < e1; ++i) 
-	scov.set(i);
-    }
+//   struct PhraseAlnHyp
+//   {
+//     PhrasePair pp;
+//     ushort   s1,e1,s2,e2; // start and end positions
+//     int             prev; // preceding alignment hypothesis
+//     float          score; 
+//     bitvector       scov; // source coverage
+//     PhraseAlnHyp(PhrasePair const& ppx, int slen,
+// 		 pair<uint32_t,uint32_t> const& sspan,
+// 		 pair<uint32_t,uint32_t> const& tspan)
+//       : pp(ppx), prev(-1), score(ppx.score), scov(slen)
+//     {
+//       s1 = sspan.first; e1 = sspan.second;
+//       s2 = tspan.first; e2 = tspan.second;
+//       for (size_t i = s1; i < e1; ++i) 
+// 	scov.set(i);
+//     }

-    bool operator<(PhraseAlnHyp const& other) const
-    {
-      return this->score < other.score;
-    }
+//     bool operator<(PhraseAlnHyp const& other) const
+//     {
+//       return this->score < other.score;
+//     }

-    bool operator>(PhraseAlnHyp const& other) const
-    {
-      return this->score > other.score;
-    }
+//     bool operator>(PhraseAlnHyp const& other) const
+//     {
+//       return this->score > other.score;
+//     }

-    PhraseOrientation
-    po_bwd(PhraseAlnHyp const* prev) const
-    {
-      if (s2 == 0) return po_first;
-      assert(prev);
-      assert(prev->e2 <= s2);
-      if (prev->e2 < s2)  return po_other;
-      if (prev->e1 == s1) return po_mono;
-      if (prev->e1 < s1)  return po_jfwd;
-      if (prev->s1 == e1) return po_swap;
-      if (prev->s1 > e1)  return po_jbwd;
-      return po_other;
-    }
+//     PhraseOrientation
+//     po_bwd(PhraseAlnHyp const* prev) const
+//     {
+//       if (s2 == 0) return po_first;
+//       assert(prev);
+//       assert(prev->e2 <= s2);
+//       if (prev->e2 < s2)  return po_other;
+//       if (prev->e1 == s1) return po_mono;
+//       if (prev->e1 < s1)  return po_jfwd;
+//       if (prev->s1 == e1) return po_swap;
+//       if (prev->s1 > e1)  return po_jbwd;
+//       return po_other;
+//     }

-    PhraseOrientation
-    po_fwd(PhraseAlnHyp const* next) const
-    {
-      if (!next) return po_last;
-      assert(next->s2 >= e2);
-      if (next->s2 < e2)  return po_other;
-      if (next->e1 == s1) return po_swap;
-      if (next->e1 < s1)  return po_jbwd;
-      if (next->s1 == e1) return po_mono;
-      if (next->s1 > e1)  return po_jfwd;
-      return po_other;
-    }
+//     PhraseOrientation
+//     po_fwd(PhraseAlnHyp const* next) const
+//     {
+//       if (!next) return po_last;
+//       assert(next->s2 >= e2);
+//       if (next->s2 < e2)  return po_other;
+//       if (next->e1 == s1) return po_swap;
+//       if (next->e1 < s1)  return po_jbwd;
+//       if (next->s1 == e1) return po_mono;
+//       if (next->s1 > e1)  return po_jfwd;
+//       return po_other;
+//     }

-    float 
-    dprob_fwd(PhraseAlnHyp const& next)
-    {
-      return pp.dfwd[po_fwd(&next)];
-    }
+//     float 
+//     dprob_fwd(PhraseAlnHyp const& next)
+//     {
+//       return pp.dfwd[po_fwd(&next)];
+//     }

-    float 
-    dprob_bwd(PhraseAlnHyp const& prev)
-    {
-      return pp.dbwd[po_bwd(&prev)];
-    }
+//     float 
+//     dprob_bwd(PhraseAlnHyp const& prev)
+//     {
+//       return pp.dbwd[po_bwd(&prev)];
+//     }

-  };
+//   };

-  class Alignment
-  {
-    typedef L2R_Token<SimpleWordId> Token;
-    typedef TSA<Token>           tsa;
-    typedef pair<uint32_t, uint32_t>  span;
-    typedef vector<vector<uint64_t> > pidmap_t; // span -> phrase ID
-    typedef boost::unordered_map<uint64_t,vector<span> > pid2span_t;
-    typedef pstats::trg_map_t jStatsTable;
+//   class Alignment
+//   {
+//     typedef L2R_Token<SimpleWordId> Token;
+//     typedef TSA<Token>           tsa;
+//     typedef pair<uint32_t, uint32_t>  span;
+//     typedef vector<vector<uint64_t> > pidmap_t; // span -> phrase ID
+//     typedef boost::unordered_map<uint64_t,vector<span> > pid2span_t;
+//     typedef pstats::trg_map_t jStatsTable;

-    Mmsapt const& PT;
-    vector<id_type> s,t; 
-    pidmap_t   sspan2pid, tspan2pid; // span -> phrase ID
-    pid2span_t spid2span,tpid2span;
-    vector<vector<sptr<pstats> > > spstats;
+//     Mmsapt const& PT;
+//     vector<id_type> s,t; 
+//     pidmap_t   sspan2pid, tspan2pid; // span -> phrase ID
+//     pid2span_t spid2span,tpid2span;
+//     vector<vector<sptr<pstats> > > spstats;

-    vector<PhrasePair> PP; 
-    // position-independent phrase pair info
-  public:
-    vector<PhraseAlnHyp> PAH;  
-    vector<vector<int> > tpos2ahyp;
-    // maps from target start positions to PhraseAlnHyps starting at
-    // that position
+//     vector<PhrasePair> PP; 
+//     // position-independent phrase pair info
+//   public:
+//     vector<PhraseAlnHyp> PAH;  
+//     vector<vector<int> > tpos2ahyp;
+//     // maps from target start positions to PhraseAlnHyps starting at
+//     // that position

-    sptr<pstats> getPstats(span const& sspan);
-    void fill_tspan_maps();
-    void fill_sspan_maps();
-  public:
-    Alignment(Mmsapt const& pt, string const& src, string const& trg);
-    void show(ostream& out); 
-    void show(ostream& out, PhraseAlnHyp const& ah); 
-  };
+//     sptr<pstats> getPstats(span const& sspan);
+//     void fill_tspan_maps();
+//     void fill_sspan_maps();
+//   public:
+//     Alignment(Mmsapt const& pt, string const& src, string const& trg);
+//     void show(ostream& out); 
+//     void show(ostream& out, PhraseAlnHyp const& ah); 
+//   };

-  void
-  Alignment::
-  show(ostream& out, PhraseAlnHyp const& ah)
-  {
-#if 0
-    LexicalPhraseScorer2<Token>::table_t const& 
-      COOCjnt = PT.calc_lex.scorer.COOC;
+//   void
+//   Alignment::
+//   show(ostream& out, PhraseAlnHyp const& ah)
+//   {
+// #if 0
+//     LexicalPhraseScorer2<Token>::table_t const& 
+//       COOCjnt = PT.calc_lex.scorer.COOC;

-    out << setw(10) << exp(ah.score) << " "
-	<< PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2) 
-	<< " <=> "
-	<< PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
-    vector<uchar> const& a = ah.pp.aln;
-    // BOOST_FOREACH(int x,a) cout << "[" << x << "] ";
-    for (size_t u = 0; u+1 < a.size(); u += 2)
-      out << " " << int(a[u+1]) << "-" << int(a[u]);
+//     out << setw(10) << exp(ah.score) << " "
+// 	<< PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2) 
+// 	<< " <=> "
+// 	<< PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
+//     vector<uchar> const& a = ah.pp.aln;
+//     // BOOST_FOREACH(int x,a) cout << "[" << x << "] ";
+//     for (size_t u = 0; u+1 < a.size(); u += 2)
+//       out << " " << int(a[u+1]) << "-" << int(a[u]);

-    if (ah.e2-ah.s2 == 1 and ah.e1-ah.s1 == 1)
-      out << " " << COOCjnt[s[ah.s1]][t[ah.s2]]
-	  << "/" << PT.COOCraw[s[ah.s1]][t[ah.s2]]
-	  << "=" << float(COOCjnt[s[ah.s1]][t[ah.s2]])/PT.COOCraw[s[ah.s1]][t[ah.s2]];
-    out << endl;
-    // float const* ofwdj = ah.pp.dfwd;
-    // float const* obwdj = ah.pp.dbwd;
-    // uint32_t const* ofwdm = spstats[ah.s1][ah.e1-ah.s1-1]->ofwd;
-    // uint32_t const* obwdm = spstats[ah.s1][ah.e1-ah.s1-1]->obwd;
-    // out << "   [first: " << ofwdj[po_first]<<"/"<<ofwdm[po_first]
-    // 	 <<     " last: " << ofwdj[po_last]<<"/"<<ofwdm[po_last]
-    // 	 <<     " mono: " << ofwdj[po_mono]<<"/"<<ofwdm[po_mono]
-    // 	 <<     " jfwd: " << ofwdj[po_jfwd]<<"/"<<ofwdm[po_jfwd]
-    // 	 <<     " swap: " << ofwdj[po_swap]<<"/"<<ofwdm[po_swap]
-    // 	 <<     " jbwd: " << ofwdj[po_jbwd]<<"/"<<ofwdm[po_jbwd]
-    // 	 <<     " other: " << ofwdj[po_other]<<"/"<<ofwdm[po_other]
-    // 	 << "]" << endl
-    // 	 << "   [first: " << obwdj[po_first]<<"/"<<obwdm[po_first]
-    // 	 <<     " last: " << obwdj[po_last]<<"/"<<obwdm[po_last]
-    // 	 <<     " mono: " << obwdj[po_mono]<<"/"<<obwdm[po_mono]
-    // 	 <<     " jfwd: " << obwdj[po_jfwd]<<"/"<<obwdm[po_jfwd]
-    // 	 <<     " swap: " << obwdj[po_swap]<<"/"<<obwdm[po_swap]
-    // 	 <<     " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
-    // 	 <<     " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
-    // 	 << "]" << endl;
-#endif
-  }
+//     if (ah.e2-ah.s2 == 1 and ah.e1-ah.s1 == 1)
+//       out << " " << COOCjnt[s[ah.s1]][t[ah.s2]]
+// 	  << "/" << PT.COOCraw[s[ah.s1]][t[ah.s2]]
+// 	  << "=" << float(COOCjnt[s[ah.s1]][t[ah.s2]])/PT.COOCraw[s[ah.s1]][t[ah.s2]];
+//     out << endl;
+//     // float const* ofwdj = ah.pp.dfwd;
+//     // float const* obwdj = ah.pp.dbwd;
+//     // uint32_t const* ofwdm = spstats[ah.s1][ah.e1-ah.s1-1]->ofwd;
+//     // uint32_t const* obwdm = spstats[ah.s1][ah.e1-ah.s1-1]->obwd;
+//     // out << "   [first: " << ofwdj[po_first]<<"/"<<ofwdm[po_first]
+//     // 	 <<     " last: " << ofwdj[po_last]<<"/"<<ofwdm[po_last]
+//     // 	 <<     " mono: " << ofwdj[po_mono]<<"/"<<ofwdm[po_mono]
+//     // 	 <<     " jfwd: " << ofwdj[po_jfwd]<<"/"<<ofwdm[po_jfwd]
+//     // 	 <<     " swap: " << ofwdj[po_swap]<<"/"<<ofwdm[po_swap]
+//     // 	 <<     " jbwd: " << ofwdj[po_jbwd]<<"/"<<ofwdm[po_jbwd]
+//     // 	 <<     " other: " << ofwdj[po_other]<<"/"<<ofwdm[po_other]
+//     // 	 << "]" << endl
+//     // 	 << "   [first: " << obwdj[po_first]<<"/"<<obwdm[po_first]
+//     // 	 <<     " last: " << obwdj[po_last]<<"/"<<obwdm[po_last]
+//     // 	 <<     " mono: " << obwdj[po_mono]<<"/"<<obwdm[po_mono]
+//     // 	 <<     " jfwd: " << obwdj[po_jfwd]<<"/"<<obwdm[po_jfwd]
+//     // 	 <<     " swap: " << obwdj[po_swap]<<"/"<<obwdm[po_swap]
+//     // 	 <<     " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
+//     // 	 <<     " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
+//     // 	 << "]" << endl;
+// #endif
+//   }
  
-  void
-  Alignment::
-  show(ostream& out)
-  {
-    // show what we have so far ...
-    for (size_t s2 = 0; s2 < t.size(); ++s2)
-      {
-	VectorIndexSorter<PhraseAlnHyp> foo(PAH);
-	sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo);
-	for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h)
-	  show(out,PAH[tpos2ahyp[s2][h]]);
-      }
-  }
+//   void
+//   Alignment::
+//   show(ostream& out)
+//   {
+//     // show what we have so far ...
+//     for (size_t s2 = 0; s2 < t.size(); ++s2)
+//       {
+// 	VectorIndexSorter<PhraseAlnHyp> foo(PAH);
+// 	sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo);
+// 	for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h)
+// 	  show(out,PAH[tpos2ahyp[s2][h]]);
+//       }
+//   }

-  sptr<pstats>
-  Alignment::
-  getPstats(span const& sspan)
-  {
-    size_t k = sspan.second - sspan.first - 1;
-    if (k < spstats[sspan.first].size())
-      return spstats[sspan.first][k];
-    else return sptr<pstats>();
-  }
+//   sptr<pstats>
+//   Alignment::
+//   getPstats(span const& sspan)
+//   {
+//     size_t k = sspan.second - sspan.first - 1;
+//     if (k < spstats[sspan.first].size())
+//       return spstats[sspan.first][k];
+//     else return sptr<pstats>();
+//   }
  
-  void
-  Alignment::
-  fill_tspan_maps()
-  {
-    tspan2pid.assign(t.size(),vector<uint64_t>(t.size(),0));
-    for (size_t i = 0; i < t.size(); ++i)
-      {
-	tsa::tree_iterator m(PT.btfix.I2.get());
-	for (size_t k = i; k < t.size() && m.extend(t[k]); ++k)
-	  {
-	    uint64_t pid = m.getPid();
-	    tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
-	    tspan2pid[i][k] = pid;
-	  }
-      } 
-  }
+//   void
+//   Alignment::
+//   fill_tspan_maps()
+//   {
+//     tspan2pid.assign(t.size(),vector<uint64_t>(t.size(),0));
+//     for (size_t i = 0; i < t.size(); ++i)
+//       {
+// 	tsa::tree_iterator m(PT.btfix.I2.get());
+// 	for (size_t k = i; k < t.size() && m.extend(t[k]); ++k)
+// 	  {
+// 	    uint64_t pid = m.getPid();
+// 	    tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
+// 	    tspan2pid[i][k] = pid;
+// 	  }
+//       } 
+//   }

-  void
-  Alignment::
-  fill_sspan_maps()
-  {
-    sspan2pid.assign(s.size(),vector<uint64_t>(s.size(),0));
-    spstats.resize(s.size());
-    for (size_t i = 0; i < s.size(); ++i)
-      {
-	tsa::tree_iterator m(PT.btfix.I1.get());
-	for (size_t k = i; k < s.size() && m.extend(s[k]); ++k)
-	  {
-	    uint64_t pid = m.getPid();
-	    sspan2pid[i][k] = pid;
-	    pid2span_t::iterator p = spid2span.find(pid);
-	    if (p != spid2span.end())
-	      {
-		int x = p->second[0].first;
-		int y = p->second[0].second-1;
-		spstats[i].push_back(spstats[x][y-x]);
-	      }
-	    else 
-	      {
-		spstats[i].push_back(PT.btfix.lookup(m));
-		cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
-		     << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt 
-		     << endl;
-	      }
-	    spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
-	  }
-      }
-  }
+//   void
+//   Alignment::
+//   fill_sspan_maps()
+//   {
+//     sspan2pid.assign(s.size(),vector<uint64_t>(s.size(),0));
+//     spstats.resize(s.size());
+//     for (size_t i = 0; i < s.size(); ++i)
+//       {
+// 	tsa::tree_iterator m(PT.btfix.I1.get());
+// 	for (size_t k = i; k < s.size() && m.extend(s[k]); ++k)
+// 	  {
+// 	    uint64_t pid = m.getPid();
+// 	    sspan2pid[i][k] = pid;
+// 	    pid2span_t::iterator p = spid2span.find(pid);
+// 	    if (p != spid2span.end())
+// 	      {
+// 		int x = p->second[0].first;
+// 		int y = p->second[0].second-1;
+// 		spstats[i].push_back(spstats[x][y-x]);
+// 	      }
+// 	    else 
+// 	      {
+// 		spstats[i].push_back(PT.btfix.lookup(m));
+// 		cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
+// 		     << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt 
+// 		     << endl;
+// 	      }
+// 	    spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
+// 	  }
+//       }
+//   }

-  Alignment::
-  Alignment(Mmsapt const& pt, string const& src, string const& trg)
-    : PT(pt)
-  {
-    PT.btfix.V1->fillIdSeq(src,s);
-    PT.btfix.V2->fillIdSeq(trg,t);
+//   Alignment::
+//   Alignment(Mmsapt const& pt, string const& src, string const& trg)
+//     : PT(pt)
+//   {
+//     PT.btfix.V1->fillIdSeq(src,s);
+//     PT.btfix.V2->fillIdSeq(trg,t);

-    // LexicalPhraseScorer2<Token>::table_t const& COOC = PT.calc_lex.scorer.COOC;
-    // BOOST_FOREACH(id_type i, t)
-    //   {
-    // 	cout << (*PT.btfix.V2)[i];
-    // 	if (i < PT.wlex21.size())
-    // 	  {
-    // 	    BOOST_FOREACH(id_type k, PT.wlex21[i])
-    // 	      {
-    // 		size_t  j = COOC[k][i];
-    // 		size_t m1 = COOC.m1(k);
-    // 		size_t m2 = COOC.m2(i);
-    // 		if (j*1000 > m1 && j*1000 > m2)
-    // 		  cout << " " << (*PT.btfix.V1)[k];
-    // 	      }	 
-    // 	  }
-    // 	cout << endl;
-    //   }
+//     // LexicalPhraseScorer2<Token>::table_t const& COOC = PT.calc_lex.scorer.COOC;
+//     // BOOST_FOREACH(id_type i, t)
+//     //   {
+//     // 	cout << (*PT.btfix.V2)[i];
+//     // 	if (i < PT.wlex21.size())
+//     // 	  {
+//     // 	    BOOST_FOREACH(id_type k, PT.wlex21[i])
+//     // 	      {
+//     // 		size_t  j = COOC[k][i];
+//     // 		size_t m1 = COOC.m1(k);
+//     // 		size_t m2 = COOC.m2(i);
+//     // 		if (j*1000 > m1 && j*1000 > m2)
+//     // 		  cout << " " << (*PT.btfix.V1)[k];
+//     // 	      }	 
+//     // 	  }
+//     // 	cout << endl;
+//     //   }
    
-    fill_tspan_maps();
-    fill_sspan_maps();
-    tpos2ahyp.resize(t.size()); 
-    // now fill the association score table
-    PAH.reserve(1000000);
-    typedef pid2span_t::iterator psiter;
-    for (psiter L = spid2span.begin(); L != spid2span.end(); ++L)
-      {
-	if (!L->second.size()) continue; // should never happen anyway
-	int i = L->second[0].first;
-	int k = L->second[0].second - i -1;
-	sptr<pstats> ps = spstats[i][k];
-	PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents);
-	jStatsTable & J = ps->trg;
-	for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y)
-	  {
-	    psiter R = tpid2span.find(y->first);
-	    if (R == tpid2span.end()) continue;
-	    pp.update(y->first, y->second);
-	    PT.ScorePPfix(pp);
-	    pp.eval(PT.feature_weights);
-	    PP.push_back(pp);
-	    BOOST_FOREACH(span const& sspan, L->second)
-	      {
-		BOOST_FOREACH(span const& tspan, R->second)
-		  {
-		    tpos2ahyp[tspan.first].push_back(PAH.size());
-		    PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan));
-		  }
-	      }
-	  }
-      }
-  }
+//     fill_tspan_maps();
+//     fill_sspan_maps();
+//     tpos2ahyp.resize(t.size()); 
+//     // now fill the association score table
+//     PAH.reserve(1000000);
+//     typedef pid2span_t::iterator psiter;
+//     for (psiter L = spid2span.begin(); L != spid2span.end(); ++L)
+//       {
+// 	if (!L->second.size()) continue; // should never happen anyway
+// 	int i = L->second[0].first;
+// 	int k = L->second[0].second - i -1;
+// 	sptr<pstats> ps = spstats[i][k];
+// 	PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents);
+// 	jStatsTable & J = ps->trg;
+// 	for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y)
+// 	  {
+// 	    psiter R = tpid2span.find(y->first);
+// 	    if (R == tpid2span.end()) continue;
+// 	    pp.update(y->first, y->second);
+// 	    PT.ScorePPfix(pp);
+// 	    pp.eval(PT.feature_weights);
+// 	    PP.push_back(pp);
+// 	    BOOST_FOREACH(span const& sspan, L->second)
+// 	      {
+// 		BOOST_FOREACH(span const& tspan, R->second)
+// 		  {
+// 		    tpos2ahyp[tspan.first].push_back(PAH.size());
+// 		    PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan));
+// 		  }
+// 	      }
+// 	  }
+//       }
+//   }

    

-  int
-  extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
-  {
-    if ((PAH[edge].scov & PAH[next].scov).count()) 
-      return -1;
-    int ret = PAH.size();
-    PAH.push_back(PAH[next]);
-    PhraseAlnHyp & h = PAH.back();
-    h.prev  = edge;
-    h.scov |= PAH[edge].scov;
-    h.score += log(PAH[edge].dprob_fwd(PAH[next]));
-    h.score += log(PAH[next].dprob_bwd(PAH[edge]));
-    return ret;
-  }
+//   int
+//   extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
+//   {
+//     if ((PAH[edge].scov & PAH[next].scov).count()) 
+//       return -1;
+//     int ret = PAH.size();
+//     PAH.push_back(PAH[next]);
+//     PhraseAlnHyp & h = PAH.back();
+//     h.prev  = edge;
+//     h.scov |= PAH[edge].scov;
+//     h.score += log(PAH[edge].dprob_fwd(PAH[next]));
+//     h.score += log(PAH[next].dprob_bwd(PAH[edge]));
+//     return ret;
+//   }

-  sptr<vector<int> >
-  Mmsapt::
-  align(string const& src, string const& trg) const
-  {
-    // For the time being, we consult only the fixed bitext.
-    // We might also consider the dynamic bitext. => TO DO.
-    Alignment A(*this,src,trg);
-    VectorIndexSorter<PhraseAlnHyp> foo(A.PAH);
-    vector<size_t> o; foo.GetOrder(o);
-    BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
-    sptr<vector<int> > aln;
-    return aln;
-}
-}
+//   sptr<vector<int> >
+//   Mmsapt::
+//   align(string const& src, string const& trg) const
+//   {
+//     // For the time being, we consult only the fixed bitext.
+//     // We might also consider the dynamic bitext. => TO DO.
+//     Alignment A(*this,src,trg);
+//     VectorIndexSorter<PhraseAlnHyp> foo(A.PAH);
+//     vector<size_t> o; foo.GetOrder(o);
+//     BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
+//     sptr<vector<int> > aln;
+//     return aln;
+// }
+// }


--- a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h
+++ b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h
@ -1,268 +1,17 @@
 // -*- c++ -*-
+// written by Ulrich Germann 
 #pragma once
 #include "moses/TranslationModel/UG/mm/ug_bitext.h"
 #include "util/exception.hh"
+#include "boost/format.hpp"
+#include "sapt_pscore_base.h"
+
+// DEPRECATED CODE: Word and phrase penalties are now 
+// added by the decoder.

 namespace Moses {
  namespace bitext
  {
-
-    template<typename Token>
-    class
-    PhraseScorer
-    {
-    protected:
-      int m_index;
-      int m_num_feats;
-      vector<string> m_feature_names;
-    public:
- 
-      virtual 
-      void 
-      operator()(Bitext<Token> const& pt, PhrasePair& pp, vector<float> * dest=NULL) 
-	const = 0;
-    
-      int 
-      fcnt() const 
-      { return m_num_feats; }
-    
-      vector<string> const &
-      fnames() const
-      { return m_feature_names; }
-
-      string const &
-      fname(int i) const
-      { 
-	UTIL_THROW_IF2((i < m_index || i >= m_index + m_num_feats),
-		       "Feature name index out of range at " 
-		       << __FILE__ << ":" << __LINE__);
-	return m_feature_names.at(i - m_index); 
-      }
-    
-      int 
-      getIndex() const 
-      { return m_index; }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-  
-    template<typename Token>
-    class
-    PScorePfwd : public PhraseScorer<Token>
-    {
-      float conf;
-      char denom;
-    public:
-      PScorePfwd() 
-      {
-	this->m_num_feats = 1;
-      }
-
-      int 
-      init(int const i, float const c, char d) 
-      { 
-	conf  = c; 
-	denom = d;
-	this->m_index = i;
-	ostringstream buf;
-	buf << format("pfwd-%c%.3f") % denom % c;
-	this->m_feature_names.push_back(buf.str());
-	return i + this->m_num_feats;
-      }
-
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair & pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	if (pp.joint > pp.good1) 
-	  {
-	    cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
-	    cerr<<pp.joint<<"/"<<pp.good1<<"/"<<pp.raw2<<endl;
-	  }
-	switch (denom)
-	  {
-	  case 'g': 
-	    (*dest)[this->m_index] = log(lbop(pp.good1, pp.joint, conf)); 
-	    break;
-	  case 's': 
-	    (*dest)[this->m_index] = log(lbop(pp.sample1, pp.joint, conf)); 
-	    break;
-	  case 'r':
-	    (*dest)[this->m_index] = log(lbop(pp.raw1, pp.joint, conf)); 
-	  }
-      }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-
-    template<typename Token>
-    class
-    PScorePbwd : public PhraseScorer<Token>
-    {
-      float conf;
-      char denom;
-    public:
-      PScorePbwd() 
-      {
-	this->m_num_feats = 1;
-      }
-
-      int 
-      init(int const i, float const c, char d) 
-      { 
-	conf = c; 
-	denom = d;
-	this->m_index = i;
-	ostringstream buf;
-	buf << format("pbwd-%c%.3f") % denom % c;
-	this->m_feature_names.push_back(buf.str());
-	return i + this->m_num_feats;
-      }
-
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	// we use the denominator specification to scale the raw counts on the 
-	// target side; the clean way would be to counter-sample
-	uint32_t r2 = pp.raw2;
-	if      (denom == 'g') r2 = round(r2 * float(pp.good1)   / pp.raw1);
-	else if (denom == 's') r2 = round(r2 * float(pp.sample1) / pp.raw1);
-	(*dest)[this->m_index] = log(lbop(max(r2, pp.joint),pp.joint,conf));
-      }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-
-    template<typename Token>
-    class
-    PScoreCoherence : public PhraseScorer<Token>
-    {
-    public:
-      PScoreCoherence() 
-      {
-	this->m_num_feats = 1;
-      }
-    
-      int 
-      init(int const i) 
-      { 
-	this->m_index = i;
-	this->m_feature_names.push_back(string("coherence"));
-	return i + this->m_num_feats;
-      }
-
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	(*dest)[this->m_index] = log(pp.good1) - log(pp.sample1);
-      }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-
-    template<typename Token>
-    class
-    PScoreLogCounts : public PhraseScorer<Token>
-    {
-      float conf;
-    public:
-      PScoreLogCounts() 
-      {
-	this->m_num_feats = 5;
-      }
-    
-      int 
-      init(int const i) 
-      { 
-	this->m_index = i;
-	this->m_feature_names.push_back("log-r1");
-	this->m_feature_names.push_back("log-s1");
-	this->m_feature_names.push_back("log-g1");
-	this->m_feature_names.push_back("log-j");
-	this->m_feature_names.push_back("log-r2");
-	return i + this->m_num_feats;
-      }
-    
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	size_t i = this->m_index;
-	assert(pp.raw1);
-	assert(pp.sample1);
-	assert(pp.good1);
-	assert(pp.joint);
-	assert(pp.raw2);
-	(*dest)[i]   = -log(pp.raw1);
-	(*dest)[++i] = -log(pp.sample1);
-	(*dest)[++i] = -log(pp.good1);
-	(*dest)[++i] = +log(pp.joint);
-	(*dest)[++i] = -log(pp.raw2);
-      }
-    };
-  
-    template<typename Token>
-    class
-    PScoreLex : public PhraseScorer<Token>
-    {
-      float const m_alpha;
-    public:
-      LexicalPhraseScorer2<Token> scorer;
-    
-      PScoreLex(float const a) 
-	: m_alpha(a) 
-      { this->m_num_feats = 2; }
-    
-      int 
-      init(int const i, string const& fname) 
-      { 
-	scorer.open(fname); 
-	this->m_index = i;
-	this->m_feature_names.push_back("lexfwd");
-	this->m_feature_names.push_back("lexbwd");
-	return i + this->m_num_feats;
-      }
-    
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
-	parse_pid(pp.p1, sid1, off1, len1);
-	parse_pid(pp.p2, sid2, off2, len2);
-	
-#if 0
-	cout << len1 << " " << len2 << endl;
-	Token const* t1 = bt.T1->sntStart(sid1);
-	for (size_t i = off1; i < off1 + len1; ++i)
-	  cout << (*bt.V1)[t1[i].id()] << " "; 
-	cout << __FILE__ << ":" << __LINE__ << endl;
-	
-	Token const* t2 = bt.T2->sntStart(sid2);
-	for (size_t i = off2; i < off2 + len2; ++i)
-	  cout << (*bt.V2)[t2[i].id()] << " "; 
-	cout << __FILE__ << ":" << __LINE__ << endl;
-	
-	BOOST_FOREACH (int a, pp.aln)
-	  cout << a << " " ;
-	cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
-	
-#endif
-	scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
-		     bt.T2->sntStart(sid2)+off2,0,len2,
-		     pp.aln, m_alpha,
-		     (*dest)[this->m_index],
-		     (*dest)[this->m_index+1]);
-      }
-      
-    };
-  
    /// Word penalty
    template<typename Token>
    class
@ -280,7 +29,8 @@ namespace Moses {
      }
    
      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
+      operator()(Bitext<Token> const& bt, PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
      {
 	if (!dest) dest = &pp.fvals;
 	uint32_t sid2=0,off2=0,len2=0;
@ -307,7 +57,8 @@ namespace Moses {
      }
    
      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
+      operator()(Bitext<Token> const& bt, PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
      {
 	if (!dest) dest = &pp.fvals;
 	(*dest)[this->m_index] = 1;
--- a/moses/TranslationModel/UG/ptable-lookup.cc
+++ b/moses/TranslationModel/UG/ptable-lookup.cc
@ -106,15 +106,11 @@ int main(int argc, char* argv[])
      	  cout << "   ";
      	  for (size_t k = idx.first; k < idx.second; ++k)
      	    {
-      	      if (mmsapt && fname[k-idx.first].substr(0,3) == "log")
-      		{
-      		  if(scores[k] < 0)
-      		    cout << " " << format("%10d") % round(exp(-scores[k]));
-      		  else
-      		    cout << " " << format("%10d") % round(exp(scores[k]));
-      		}
-      	      else
-      		cout << " " << format("%10.8f") % exp(scores[k]);
+	      size_t j = k-idx.first;
+	      float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
+			 : scores[k] < 0 ? exp(scores[k]) : scores[k]);
+	      string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
+	      cout << " " << format(fmt) % f;
      	    }
      	  cout << endl;
      	}
--- a/moses/TranslationModel/UG/sapt_phrase_key.h
+++ b/moses/TranslationModel/UG/sapt_phrase_key.h
@ -0,0 +1,13 @@
+//-*- c++ -*-
+#pragma once
+#include <stdint.h>
+
+using namespace std;
+namespace sapt
+{
+  using namespace Moses;
+  using namespace std;
+
+    
+
+}
--- a/moses/TranslationModel/UG/sapt_phrase_scorers.h
+++ b/moses/TranslationModel/UG/sapt_phrase_scorers.h
@ -0,0 +1,12 @@
+// -*- c++ -*-
+// Phrase scoring functions for suffix array-based phrase tables
+// written by Ulrich Germann 
+#pragma once
+#include "sapt_pscore_unaligned.h"   // count # of unaligned words
+#include "sapt_pscore_provenance.h"  // reward for joint phrase occ. per corpus
+#include "sapt_pscore_rareness.h"    // penalty for rare occurrences (global?)
+#include "sapt_pscore_logcnt.h"      // logs of observed counts
+#include "sapt_pscore_lex1.h"        // plain vanilla Moses lexical scores
+#include "sapt_pscore_pfwd.h"        // fwd phrase prob
+#include "sapt_pscore_pbwd.h"        // bwd phrase prob
+#include "sapt_pscore_coherence.h"   // coherence feature: good/sample-size
--- a/moses/TranslationModel/UG/sapt_pscore_base.h
+++ b/moses/TranslationModel/UG/sapt_pscore_base.h
@ -0,0 +1,103 @@
+// -*- c++ -*-
+// Base classes for suffix array-based phrase scorers
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+
+    // abstract base class that defines the common API for phrase scorers
+    template<typename Token>
+    class
+    PhraseScorer
+    {
+    protected:
+      int m_index;
+      int m_num_feats;
+      string m_tag;
+      vector<string> m_feature_names;
+    public:
+ 
+      virtual 
+      void 
+      operator()(Bitext<Token> const& pt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest=NULL) 
+	const = 0;
+
+      void
+      setIndex(int const i) { m_index = i; }
+    
+      int
+      getIndex() const { return m_index; }
+
+      int 
+      fcnt() const { return m_num_feats; }
+    
+      vector<string> const &
+      fnames() const { return m_feature_names; }
+
+      string const &
+      fname(int i) const
+      { 
+	if (i < 0) i += m_num_feats;
+	UTIL_THROW_IF2(i < 0 || i >= m_num_feats,
+		       "Feature name index out of range at " << HERE);
+	return m_feature_names.at(i); 
+      }
+
+      virtual
+      bool
+      isLogVal(int i) const  { return true; }; 
+      // is this feature log valued? 
+    
+      virtual
+      bool
+      isIntegerValued(int i) const  { return false; }; 
+      // is this feature integer valued (e.g., count features)? 
+
+      virtual
+      bool
+      allowPooling() const { return true; }
+      // does this feature function allow pooling of counts if 
+      // there are no occurrences in the respective corpus?
+      
+    };
+
+    // base class for 'families' of phrase scorers that have a single 
+    template<typename Token>
+    class
+    SingleRealValuedParameterPhraseScorerFamily 
+      : public PhraseScorer<Token>
+    {
+    protected:
+      vector<float> m_x;
+
+      virtual 
+      void 
+      init(string const specs) 
+      { 
+	using namespace boost;
+	UTIL_THROW_IF2(this->m_tag.size() == 0, 
+		       "m_tag must be initialized in constructor");
+	UTIL_THROW_IF2(specs.size() == 0,"empty specification string!");
+	UTIL_THROW_IF2(this->m_feature_names.size(),
+		       "PhraseScorer can only be initialized once!");
+	this->m_index = -1;
+	float x; char c;
+	for (istringstream buf(specs); buf>>x; buf>>c)
+	  {
+	    this->m_x.push_back(x);
+	    string fname = (format("%s-%.2f") % this->m_tag % x).str();
+	    this->m_feature_names.push_back(fname);
+	  }
+	this->m_num_feats = this->m_x.size();
+      }
+    };
+  } // namespace bitext
+} // namespace moses
--- a/moses/TranslationModel/UG/sapt_pscore_coherence.h
+++ b/moses/TranslationModel/UG/sapt_pscore_coherence.h
@ -0,0 +1,33 @@
+// -*- c++ -*-
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScoreCoherence : public PhraseScorer<Token>
+    {
+    public:
+      PScoreCoherence(string const dummy) 
+      { 
+	this->m_index = -1;
+	this->m_num_feats = 1;
+	this->m_feature_names.push_back(string("coherence"));
+      }
+      
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	(*dest)[this->m_index] = log(pp.good1) - log(pp.sample1);
+      }
+    };
+  }
+}
--- a/moses/TranslationModel/UG/sapt_pscore_lex1.h
+++ b/moses/TranslationModel/UG/sapt_pscore_lex1.h
@ -0,0 +1,70 @@
+// -*- c++ -*-
+// Phrase scorer that counts the number of unaligend words in the phrase
+// written by Ulrich Germann 
+
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScoreLex1 : public PhraseScorer<Token>
+    {
+      float m_alpha;
+    public:
+      LexicalPhraseScorer2<Token> scorer;
+    
+      PScoreLex1(string const& alpaspec, string const& lexfile) 
+      { 
+	this->m_index = -1;
+	this->m_num_feats = 2; 
+	this->m_feature_names.reserve(2);
+	this->m_feature_names.push_back("lexfwd");
+	this->m_feature_names.push_back("lexbwd");
+	m_alpha = atof(alpaspec.c_str());
+	scorer.open(lexfile); 
+      }
+    
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	// uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
+	// parse_pid(pp.p1, sid1, off1, len1);
+	// parse_pid(pp.p2, sid2, off2, len2);
+#if 0
+	cout << len1 << " " << len2 << endl;
+	Token const* t1 = bt.T1->sntStart(sid1);
+	for (size_t i = off1; i < off1 + len1; ++i)
+	  cout << (*bt.V1)[t1[i].id()] << " "; 
+	cout << __FILE__ << ":" << __LINE__ << endl;
+	
+	Token const* t2 = bt.T2->sntStart(sid2);
+	for (size_t i = off2; i < off2 + len2; ++i)
+	  cout << (*bt.V2)[t2[i].id()] << " "; 
+	cout << __FILE__ << ":" << __LINE__ << endl;
+	
+	BOOST_FOREACH (int a, pp.aln)
+	  cout << a << " " ;
+	cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
+	
+	scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
+		     bt.T2->sntStart(sid2)+off2,0,len2,
+		     pp.aln, m_alpha,
+		     (*dest)[this->m_index],
+		     (*dest)[this->m_index+1]);
+#endif
+	scorer.score(pp.start1,0, pp.len1, 
+		     pp.start2,0, pp.len2, pp.aln, m_alpha, 
+		     (*dest)[this->m_index], 
+		     (*dest)[this->m_index+1]);
+      }
+    };
+  } //namespace bitext
+} // namespace Moses
+
--- a/moses/TranslationModel/UG/sapt_pscore_logcnt.h
+++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
@ -0,0 +1,65 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+  namespace bitext  {
+    
+    template<typename Token>
+    class
+    PScoreLogCnt : public PhraseScorer<Token>
+    {
+      string m_specs;
+    public:
+      PScoreLogCnt(string const specs) 
+      { 
+	this->m_index = -1;
+	this->m_specs = specs;
+	if (specs.find("r1") != string::npos) // raw source phrase counts
+	  this->m_feature_names.push_back("log-r1");
+	if (specs.find("s1") != string::npos)
+	  this->m_feature_names.push_back("log-s1"); // L1 sample size
+	if (specs.find("g1") != string::npos) // coherent phrases
+	  this->m_feature_names.push_back("log-g1");
+	if (specs.find("j") != string::npos) // joint counts
+	  this->m_feature_names.push_back("log-j");
+	if (specs.find("r2") != string::npos) // raw target phrase counts
+	  this->m_feature_names.push_back("log-r2");
+	this->m_num_feats = this->m_feature_names.size();
+      }
+
+      bool
+      isIntegerValued(int i) const { return true; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	assert(pp.raw1);
+	assert(pp.sample1);
+	assert(pp.good1);
+	assert(pp.joint);
+	assert(pp.raw2);
+	size_t i = this->m_index;
+	if (m_specs.find("r1") != string::npos) 
+	  (*dest)[i++] = log(pp.raw1);
+	if (m_specs.find("s1") != string::npos) 
+	  (*dest)[i++] = log(pp.sample1);
+	if (m_specs.find("g1") != string::npos) 
+	  (*dest)[i++] = log(pp.good1);
+	if (m_specs.find("j") != string::npos) 
+	  (*dest)[i++] = log(pp.joint);
+	if (m_specs.find("r2") != string::npos) 
+	  (*dest)[++i] = log(pp.raw2);
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
--- a/moses/TranslationModel/UG/sapt_pscore_pbwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
@ -0,0 +1,58 @@
+//-*- c++ -*-
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "boost/foreach.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScorePbwd : public PhraseScorer<Token>
+    {
+      float   conf;
+      string denom;
+      
+    public:
+      PScorePbwd(float const c, string d) 
+      { 
+	this->m_index = -1;
+	conf  = c; 
+	denom = d;
+	size_t checksum = d.size();
+	BOOST_FOREACH(char const& x, denom)
+	  {
+	    if (x == '+') { --checksum; continue; }
+	    if (x != 'g' && x != 's' && x != 'r') continue;
+	    string s = (format("pbwd-%c%.3f") % x % c).str();
+	    this->m_feature_names.push_back(s);
+	  }
+	this->m_num_feats = this->m_feature_names.size();
+	UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
+		       "Unknown parameter in specification '"
+		       << d << "' for Pbwd phrase scorer at " << HERE);
+      }
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	// we use the denominator specification to scale the raw counts on the 
+	// target side; the clean way would be to counter-sample
+	size_t i = this->m_index;
+	BOOST_FOREACH(char const& x, denom)
+	  {
+	    uint32_t m2 = pp.raw2;
+	    if      (x == 'g') m2 = round(m2 * float(pp.good1)   / pp.raw1);
+	    else if (x == 's') m2 = round(m2 * float(pp.sample1) / pp.raw1);
+	    (*dest)[i++] = log(lbop(max(m2, pp.joint),pp.joint,conf));
+	  }
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
--- a/moses/TranslationModel/UG/sapt_pscore_pfwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
@ -0,0 +1,70 @@
+// -*- c++ -*-
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "boost/foreach.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScorePfwd : public PhraseScorer<Token>
+    {
+      float   conf;
+      string denom;
+
+    public:
+
+      PScorePfwd(float const c, string d) 
+      { 
+	this->m_index = -1;
+	conf  = c; 
+	denom = d;
+	size_t checksum = d.size();
+	BOOST_FOREACH(char const& x, denom)
+	  {
+	    if (x == '+') { --checksum; continue; }
+	    if (x != 'g' && x != 's' && x != 'r') continue;
+	    string s = (format("pfwd-%c%.3f") % x % c).str();
+	    this->m_feature_names.push_back(s);
+	  }
+	this->m_num_feats = this->m_feature_names.size();
+	UTIL_THROW_IF2(this->m_feature_names.size() != checksum, 
+		       "Unknown parameter in specification '" 
+		       << d << "' for Pfwd phrase scorer at " << HERE);
+      }
+      
+      void 
+      operator()(Bitext<Token> const& bt, PhrasePair<Token> & pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	if (pp.joint > pp.good1) 
+	  {
+	    pp.joint = pp.good1;
+	    // cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
+	    // cerr<<pp.joint<<"/"<<pp.good1<<"/"<<pp.raw2<<endl;
+	  }
+	size_t i = this->m_index;
+	BOOST_FOREACH(char const& c, this->denom)
+	  {
+	    switch (c)
+	      {
+	      case 'g': 
+		(*dest)[i++] = log(lbop(pp.good1, pp.joint, conf)); 
+		break;
+	      case 's': 
+		(*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf)); 
+		break;
+	      case 'r':
+		(*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf)); 
+	      }
+	  }
+      }
+    };
+  }
+}
+  
--- a/moses/TranslationModel/UG/sapt_pscore_provenance.h
+++ b/moses/TranslationModel/UG/sapt_pscore_provenance.h
@ -0,0 +1,47 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function j/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+  namespace bitext {
+    
+    // asymptotic provenance feature n/(n+x)
+    template<typename Token>
+    class
+    PScoreProvenance : public SingleRealValuedParameterPhraseScorerFamily<Token>
+    {
+    public:
+
+      PScoreProvenance(string const& spec) 
+      {
+	this->m_tag = "prov";
+	this->init(spec);
+      }
+    
+      bool
+      isLogVal(int i) const { return false; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	size_t i = this->m_index;
+	BOOST_FOREACH(float const x, this->m_x)
+	  (*dest).at(i++) = pp.joint/(x + pp.joint);
+      }
+
+      bool
+      allowPooling() const 
+      { return false; }
+
+    };
+  } // namespace bitext
+} // namespace Moses
--- a/moses/TranslationModel/UG/sapt_pscore_rareness.h
+++ b/moses/TranslationModel/UG/sapt_pscore_rareness.h
@ -0,0 +1,41 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+  namespace bitext  {
+    
+    // rareness penalty: x/(n+x)
+    template<typename Token>
+    class
+    PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily<Token>
+    {
+    public:
+      PScoreRareness(string const spec) 
+      {
+	this->m_tag = "rare";
+	this->init(spec);
+      }
+
+      bool
+      isLogVal(int i) const { return false; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	size_t i = this->m_index;
+	BOOST_FOREACH(float const x, this->m_x)
+	  (*dest).at(i++) = x/(x + pp.joint);
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
--- a/moses/TranslationModel/UG/sapt_pscore_unaligned.h
+++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
@ -0,0 +1,67 @@
+// -*- c++ -*-
+// Phrase scorer that counts the number of unaligend words in the phrase
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScoreUnaligned : public PhraseScorer<Token>
+    {
+      typedef boost::dynamic_bitset<uint64_t> bitvector;
+    public:
+      PScoreUnaligned(string const spec) 
+      {
+	this->m_index = -1;
+	int f = this->m_num_feats = atoi(spec.c_str());
+	UTIL_THROW_IF2(f != 1 && f != 2,"unal parameter must be 1 or 2 at "<<HERE);
+	this->m_feature_names.resize(f);
+	if (f == 1)
+	  this->m_feature_names[0] = "unal";
+	else
+	  {
+	    this->m_feature_names[0] = "unal-s";
+	    this->m_feature_names[1] = "unal-t";
+	  }
+      }
+    
+      bool
+      isLogVal(int i) const { return false; } 
+      
+      bool
+      isIntegerValued(int i) const { return true; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	// uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
+	// parse_pid(pp.p1, sid1, off1, len1);
+	// parse_pid(pp.p2, sid2, off2, len2);
+	bitvector check1(pp.len1),check2(pp.len2);
+	for (size_t i = 0; i < pp.aln.size(); )
+	  { 
+	    check1.set(pp.aln[i++]); 
+	    check2.set(pp.aln.at(i++)); 
+	  }
+
+	if (this->m_num_feats == 1)
+	  {
+	    (*dest)[this->m_index]  = pp.len1 - check1.count();
+	    (*dest)[this->m_index] += pp.len2 - check2.count();
+	  }
+	else
+	  {
+	    (*dest)[this->m_index]   = pp.len1 - check1.count();
+	    (*dest)[this->m_index+1] = pp.len2 - check2.count();
+	  }
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
--- a/moses/TranslationModel/UG/sim-pe.cc
+++ b/moses/TranslationModel/UG/sim-pe.cc
@ -0,0 +1,83 @@
+#include "mmsapt.h"
+#include "moses/Manager.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+vector<FactorType> fo(1,FactorType(0));
+
+ostream& 
+operator<<(ostream& out, Hypothesis const* x)
+{
+  vector<const Hypothesis*> H;
+  for (const Hypothesis* h = x; h; h = h->GetPrevHypo())
+    H.push_back(h);
+  for (; H.size(); H.pop_back())
+    {
+      Phrase const& p = H.back()->GetCurrTargetPhrase();
+      for (size_t pos = 0 ; pos < p.GetSize() ; pos++) 
+	out << *p.GetFactor(pos, 0) << (H.size() ? " " : "");
+    }
+  return out;
+}
+
+vector<FactorType> ifo;
+size_t lineNumber;
+
+string 
+translate(string const& source)
+{
+  StaticData const& global = StaticData::Instance();
+
+  Sentence sentence; 
+  istringstream ibuf(source+"\n"); 
+  sentence.Read(ibuf,ifo);
+
+  Manager manager(lineNumber, sentence, global.GetSearchAlgorithm());
+  manager.ProcessSentence();
+  
+  ostringstream obuf;
+  const Hypothesis* h = manager.GetBestHypothesis();
+  obuf << h;
+  return obuf.str();
+
+}
+
+int main(int argc, char* argv[])
+{
+  Parameter params;
+  if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
+    exit(1);
+  
+  StaticData const& global = StaticData::Instance();
+  global.SetVerboseLevel(0);
+  ifo = global.GetInputFactorOrder();
+
+  lineNumber = 0; // TODO: Include sentence request number here?
+  string source, target, alignment;
+  while (getline(cin,source))
+    {
+      getline(cin,target);
+      getline(cin,alignment);
+      cout << "[S] " << source << endl;
+      cout << "[H] " << translate(source) << endl;
+      cout << "[T] " << target << endl;
+      Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
+      pdsa->add(source,target,alignment);
+      cout << "[X] " << translate(source) << endl;
+      cout << endl;
+    }
+  exit(0);
+}
+  
+  
+
--- a/moses/TranslationModel/UG/try-align.cc
+++ b/moses/TranslationModel/UG/try-align.cc
@ -2,32 +2,33 @@
 using namespace std;
 using namespace Moses;

+// currently broken

 Mmsapt* PT;
 int main(int argc, char* argv[])
 {
-  string base = argv[1];
-  string L1   = argv[2];
-  string L2   = argv[3];
-  ostringstream buf;
-  buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base="
-      << base << " L1=" << L1 << " L2=" << L2;
-  string configline = buf.str();
-  PT = new Mmsapt(configline);
-  PT->Load();
-  float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856,  0.647506 };
-  vector<float> weights(w,w+5);
-  PT->setWeights(weights);
-  // these values are taken from a moses.ini file;
-  // is there a convenient way of accessing them from within mmsapt ???
-  string eline,fline;
-  // TokenIndex V; V.open("crp/trn/mm/de.tdx");
-  while (getline(cin,eline) && getline(cin,fline))
-    {
-      cout << eline << endl;
-      cout << fline << endl;
-      PT->align(eline,fline);
-    }
-  delete PT;
+  // string base = argv[1];
+  // string L1   = argv[2];
+  // string L2   = argv[3];
+  // ostringstream buf;
+  // buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base="
+  //     << base << " L1=" << L1 << " L2=" << L2;
+  // string configline = buf.str();
+  // PT = new Mmsapt(configline);
+  // PT->Load();
+  // float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856,  0.647506 };
+  // vector<float> weights(w,w+5);
+  // PT->setWeights(weights);
+  // // these values are taken from a moses.ini file;
+  // // is there a convenient way of accessing them from within mmsapt ???
+  // string eline,fline;
+  // // TokenIndex V; V.open("crp/trn/mm/de.tdx");
+  // while (getline(cin,eline) && getline(cin,fline))
+  //   {
+  //     cout << eline << endl;
+  //     cout << fline << endl;
+  //     PT->align(eline,fline);
+  //   }
+  // delete PT;
 }

--- a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
+++ b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
@ -345,10 +345,10 @@ string FuzzyMatchWrapper::ExtractTM(WordIndex &wordIndex, long translationId, co
    // find the best matches according to letter sed
    string best_path = "";
    int best_match = -1;
-    int best_letter_cost;
+    unsigned int best_letter_cost;
    if (lsed_flag) {
      best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
-      for(int si=0; si<best_tm.size(); si++) {
+      for(size_t si=0; si<best_tm.size(); si++) {
        int s = best_tm[si];
        string path;
        unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@ -59,7 +59,11 @@ const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
 const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
 const size_t DEFAULT_MAX_TRANS_OPT_SIZE	= 5000;
 const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
-const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+#ifdef PT_UG
+  const size_t DEFAULT_MAX_PHRASE_LENGTH = -1;
+#else
+ const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+#endif
 const size_t DEFAULT_MAX_CHART_SPAN			= 10;
 const size_t ARRAY_SIZE_INCR					= 10; //amount by which a phrase gets resized when necessary
 const float LOWEST_SCORE							= -100.0f;
--- a/moses/Util.h
+++ b/moses/Util.h
@ -56,8 +56,12 @@ namespace Moses

 /** verbose macros
 * */
+
 #define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
 #define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
+#define XVERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR("[" << __FILE__ << ":" << __LINE__ << "] ");TRACE_ERR(str); } }
+#define HERE __FILE__ << ":" << __LINE__
+

 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
 // gcc nth_element() bug
--- a/scripts/server/moses.py
+++ b/scripts/server/moses.py
@ -152,7 +152,7 @@ def find_free_port(p):

 class MosesServer(ProcessWrapper):

-  def __init__(self,args=["-fd", "\n"]):
+  def __init__(self,args=[]):
    self.process = None
    mserver_cmd  = moses_root+"/bin/mosesserver"
    self.cmd = [mserver_cmd] + args 
@ -175,7 +175,10 @@ class MosesServer(ProcessWrapper):
    self.cmd.extend(["--server-port", "%d"%self.port])
    if debug:
      print >>sys.stderr,self.cmd
-      self.process = Popen(self.cmd,stderr = sys.stderr)
+      # self.stderr = open("mserver.%d.stderr"%self.port,'w')
+      # self.stdout = open("mserver.%d.stdout"%self.port,'w')
+      # self.process = Popen(self.cmd,stderr = self.stderr,stdout = self.stdout)
+      self.process = Popen(self.cmd)
    else:
      devnull = open(os.devnull,"w")
      self.process = Popen(self.cmd, stderr=devnull, stdout=devnull)
@ -216,10 +219,13 @@ class MosesServer(ProcessWrapper):

        elif type(input) is list:
          return [self.translate(x) for x in input]
+
        elif type(input) is dict:
          return self.proxy.translate(input)
+
        else:
          raise Exception("Can't handle input of this type!")
+
      except:
        attempts += 1
        print >>sys.stderr, "WAITING", attempts
--- a/scripts/server/sim-pe.py
+++ b/scripts/server/sim-pe.py
@ -127,13 +127,40 @@ def translate(proxy, args, line):
        param['nbest-distinct'] = True
        pass
    attempts = 0
-    while attempts < 120:
+    while attempts < 20:
+        t1 = time.time()
        try:
-            return proxy.translate(param)
-        except:
-            print >>sys.stderr, "Waiting", proxy
-            attempts += 1
+            return proxy.translate(param) 
+
+        # except xmlrpclib.Fault as e:
+        # except xmlrpclib.ProtocolError as e:
+        # except xmlrpclib.ResponseError as e:
+        except xmlrpclib.Error as e:
+            time.sleep(2) # give all the stderr stuff a chance to be flushed
+            print >>sys.stderr," XMLRPC error:",e
+            print >>sys.stderr, "Input was"
+            print >>sys.stderr, param
+            sys.exit(1)
+
+        except IOError as e:
+            print >>sys.stderr,"I/O error({0}): {1}".format(e.errno, e.strerror)
            time.sleep(5)
+
+        except:
+            serverstatus = mserver.process.poll()
+            if serverstatus == None:
+                print >>sys.stderr, "Connection failed after %f seconds"%(time.time()-t1)
+                attempts += 1
+                if attempts > 10:
+                    time.sleep(10)
+                else:
+                    time.sleep(5)
+                    pass
+            else:
+                
+                print >>sys.stderr, "Oopsidaisy, server exited with code %d (signal %d)"\
+                    %(serverstatus/256,serverstatus%256)
+                pass
            pass
        pass
    raise Exception("Exception: could not reach translation server.")
@ -210,17 +237,25 @@ if __name__ == "__main__":
            pass
        pass

-    if args.url:
-        mserver.connect(args.url)
-    else:
-        mserver.start(args=mo_args,port=args.port,debug=args.debug)
-        pass
-
    ref = None
    aln = None
    if args.ref: ref = read_data(args.ref)
    if args.aln: aln = read_data(args.aln)

+    if ref and aln:
+        try:
+            mo_args.index("--serial")
+        except:
+            mo_args.append("--serial")
+            pass
+        pass
+
+    if args.url:
+        mserver.connect(args.url)
+    else:
+        mserver.start(args=mo_args, port=args.port, debug=args.debug)
+        pass
+
    if (args.input == "-"):
        line = sys.stdin.readline()
        idx = 0