Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-08-16 15:00:33 +03:00 · 2015-09-10 16:52:26 +02:00 · 2015-09-10 16:52:26 +02:00 · 37f7326057
commit 37f7326057
parent 5070728f32 d55325cbdb
395 changed files with 11546 additions and 12722 deletions
--- a/.beautify-ignore
+++ b/.beautify-ignore
@ -21,6 +21,9 @@ mingw/MosesGUI/icons_rc.py
 mingw/MosesGUI/Ui_credits.py
 mingw/MosesGUI/Ui_mainWindow.py
 moses/TranslationModel/UG
+moses/server
+moses/parameters
+moses/thread_safe_container.h
 phrase-extract/pcfg-common
 phrase-extract/syntax-common
 randlm
@ -32,3 +35,4 @@ srilm
 util
 xmlrpc-c
 .git
+util/ug_cache_with_timeout.h
--- a/8
+++ b/8
@ -133,7 +133,9 @@ if [ option.get "filter-warnings" : : "yes" ] {
  requirements += <cxxflags>-Wno-unused-but-set-variable ;
  requirements += <cxxflags>-Wno-unused-result ;
  requirements += <cxxflags>-Wno-unused-variable ;
-  requirements += <cxxflags>-Wcomment ;
+  requirements += <cxxflags>-Wno-comment ;
+  requirements += <cxxflags>-Wno-strict-aliasing ;
+  requirements += <cxxflags>-Wno-overloaded-virtual ;
 }

 if [ option.get "debug-build" : : "yes" ] {
@ -179,7 +181,7 @@ if [ option.get "with-icu" : : "yes" ]
  requirements += <library>icui18n/<link>shared ;
  requirements += <cxxflags>-fPIC ;
  requirements += <address-model>64 ;
-  requirements += <runtime-link>shared ;
+#  requirements += <runtime-link>shared ;
 }

 if [ option.get "with-probing-pt" : : "yes" ]
@ -301,5 +303,5 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {

 #local temp = [ _shell "bash source ./s.sh" ] ;
 local temp = [ _shell "mkdir -p $(TOP)/bin" ] ;
-local temp = [ _shell "rm $(TOP)/bin/moses_chart" ] ;
+local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ;
 local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ;
--- a/biconcor/SuffixArray.cpp
+++ b/biconcor/SuffixArray.cpp
@ -21,6 +21,11 @@ SuffixArray::SuffixArray()
    m_wordInSentence(NULL),
    m_sentence(NULL),
    m_sentenceLength(NULL),
+    m_document(NULL),
+    m_documentName(NULL),
+    m_documentNameLength(0),
+    m_documentCount(0),
+    m_useDocument(false),
    m_vcb(),
    m_size(0),
    m_sentenceCount(0) { }
@ -32,6 +37,8 @@ SuffixArray::~SuffixArray()
  free(m_wordInSentence);
  free(m_sentence);
  free(m_sentenceLength);
+  free(m_document);
+  free(m_documentName);
 }

 void SuffixArray::Create(const string& fileName )
@ -46,22 +53,32 @@ void SuffixArray::Create(const string& fileName )
  textFile.open(fileName.c_str());

  if (!textFile) {
-    cerr << "no such file or directory " << fileName << endl;
+    cerr << "Error: no such file or directory " << fileName << endl;
    exit(1);
  }

+  // first pass through data: get size
  istream *fileP = &textFile;
  m_size = 0;
  m_sentenceCount = 0;
+  m_documentCount = 0;
  while(!fileP->eof()) {
    SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
    if (fileP->eof()) break;
+    if (m_useDocument && ProcessDocumentLine(line,0)) continue;
    vector< WORD_ID > words = m_vcb.Tokenize( line );
    m_size += words.size() + 1;
    m_sentenceCount++;
  }
  textFile.close();
  cerr << m_size << " words (incl. sentence boundaries)" << endl;
+  if (m_useDocument) {
+    cerr << m_documentCount << " documents" << endl;
+    if (m_documentCount == 0) {
+      cerr << "Error: no documents found, aborting." << endl;
+      exit(1);
+    }
+  }

  // allocate memory
  m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
@ -69,21 +86,31 @@ void SuffixArray::Create(const string& fileName )
  m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
  m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
  m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
-
-  // fill the array
-  int wordIndex = 0;
-  int sentenceId = 0;
-  textFile.open(fileName.c_str());
-
-  if (!textFile) {
-    cerr << "no such file or directory " << fileName << endl;
-    exit(1);
+  CheckAllocation(m_array != NULL, "m_array");
+  CheckAllocation(m_index != NULL, "m_index");
+  CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
+  CheckAllocation(m_sentence != NULL, "m_sentence");
+  CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
+  if (m_useDocument) {
+    m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
+    m_documentName = (INDEX*) calloc( sizeof( char ), m_documentCount );
+    m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
+    CheckAllocation(m_document != NULL, "m_document");
+    CheckAllocation(m_documentName != NULL, "m_documentName");
+    CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
  }

+  // second pass through data: fill the arrays
+  int wordIndex = 0;
+  int sentenceId = 0;
+  m_documentNameLength = 0; // re-use as counter
+  m_documentCount = 0;      // re-use as counter
+  textFile.open(fileName.c_str());
  fileP = &textFile;
  while(!fileP->eof()) {
    SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
    if (fileP->eof()) break;
+    if (m_useDocument && ProcessDocumentLine(line,sentenceId)) continue;
    vector< WORD_ID > words = m_vcb.Tokenize( line );
    vector< WORD_ID >::const_iterator i;

@ -105,7 +132,7 @@ void SuffixArray::Create(const string& fileName )
  m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );

  if (m_buffer == NULL) {
-    cerr << "cannot allocate memory to m_buffer" << endl;
+    cerr << "Error: cannot allocate memory to m_buffer" << endl;
    exit(1);
  }

@ -114,6 +141,45 @@ void SuffixArray::Create(const string& fileName )
  cerr << "done sorting" << endl;
 }

+// very specific code to deal with common crawl document ids
+bool SuffixArray::ProcessDocumentLine( const char *line, const size_t sentenceId )
+{
+  size_t i;
+  // first 32 characters are hex-hash
+  for(i=0; i<32; i++) {
+    if ((line[i] < '0' || line[i] > '9') && (line[i] < 'a' || line[i] > 'f')) {
+      return false;
+    }
+  }
+  if (line[i++] != ' ') return false;
+
+  // second token is float
+  for (; line[i] != ' ' && line[i] != 0; i++) {
+    if (line[i] != '.' && (line[i] < '0' || line[i] > '9')) {
+      return false;
+    }
+  }
+  i++;
+
+  // last token is url (=name)
+  size_t startName = i;
+  for (; line[i] != ' ' && line[i] != 0; i++) {}
+  if (line[i] == ' ') return false;
+  size_t endName = i+1; // include '\0'
+
+  // second pass: record name and sentence number
+  if (m_document != NULL) {
+    m_documentName[m_documentCount] = m_documentNameLength;
+    for(size_t i=startName; i<endName; i++) {
+      m_documentNameBuffer[m_documentNameLength + i-startName] = line[i];
+    }
+    m_document[m_documentCount] = sentenceId;
+  }
+  m_documentNameLength += endName-startName;
+  m_documentCount++;
+  return true;
+}
+
 // good ol' quick sort
 void SuffixArray::Sort(INDEX start, INDEX end)
 {
@ -162,7 +228,6 @@ int SuffixArray::CompareIndex( INDEX a, INDEX b ) const

 inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
 {
-  // cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
  return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
 }

@ -272,13 +337,73 @@ void SuffixArray::List(INDEX start, INDEX end)
  }
 }

+void SuffixArray::PrintSentenceMatches( const std::vector< WORD > &phrase )
+{
+  cout << "QUERY\t";
+  for(size_t i=0; i<phrase.size(); i++) {
+    if (i>0) cout << " ";
+    cout <<  phrase[i];
+  }
+  cout << '\t';
+  INDEX start = 0;
+  INDEX end = m_size-1;
+  INDEX mid = FindFirst( phrase, start, end );
+  if (mid == m_size) { // no matches
+    cout << "0 matches" << endl;
+    return;
+  }
+
+  INDEX firstMatch = FindLast( phrase, mid, start, -1 );
+  INDEX lastMatch = FindLast( phrase, mid, end, 1 );
+
+  // loop through all matches
+  cout << (lastMatch-firstMatch+1) << " matches" << endl;
+  for(INDEX i=firstMatch; i<=lastMatch; i++) {
+    // get sentence information
+    INDEX pos = GetPosition( i );
+    INDEX start = pos - GetWordInSentence( pos );
+    char length = GetSentenceLength( GetSentence( pos ) );
+    // print document name
+    if (m_useDocument) {
+      INDEX sentence = GetSentence( pos );
+      INDEX document = GetDocument( sentence );
+      PrintDocumentName( document );
+      cout << '\t';
+    }
+    // print sentence
+    for(char i=0; i<length; i++) {
+      if (i>0) cout << " ";
+      cout << GetWord( start + i );
+    }
+    cout << endl;
+  }
+}
+
+SuffixArray::INDEX SuffixArray::GetDocument( INDEX sentence ) const
+{
+  // binary search
+  INDEX min = 0;
+  INDEX max = m_documentCount-1;
+  if (sentence >= m_document[max]) {
+    return max;
+  }
+  while(true) {
+    INDEX mid = (min + max) / 2;
+    if (sentence >= m_document[mid] && sentence < m_document[mid+1]) {
+      return mid;
+    }
+    if (sentence < m_document[mid]) {
+      max = mid-1;
+    } else {
+      min = mid+1;
+    }
+  }
+}
+
 void SuffixArray::Save(const string& fileName ) const
 {
  FILE *pFile = fopen ( fileName.c_str() , "w" );
-  if (pFile == NULL) {
-    cerr << "Cannot open " << fileName << endl;
-    exit(1);
-  }
+  if (pFile == NULL) Error("cannot open",fileName);

  fwrite( &m_size, sizeof(INDEX), 1, pFile );
  fwrite( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
@ -288,6 +413,16 @@ void SuffixArray::Save(const string& fileName ) const

  fwrite( &m_sentenceCount, sizeof(INDEX), 1, pFile );
  fwrite( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
+
+  char useDocument = m_useDocument; // not sure if that is needed
+  fwrite( &useDocument, sizeof(char), 1, pFile );
+  if (m_useDocument) {
+    fwrite( &m_documentCount, sizeof(INDEX), 1, pFile );
+    fwrite( m_document, sizeof(INDEX), m_documentCount, pFile );
+    fwrite( m_documentName, sizeof(INDEX), m_documentCount, pFile );
+    fwrite( &m_documentNameLength, sizeof(INDEX), 1, pFile );
+    fwrite( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile );
+  }
  fclose( pFile );

  m_vcb.Save( fileName + ".src-vcb" );
@ -296,56 +431,81 @@ void SuffixArray::Save(const string& fileName ) const
 void SuffixArray::Load(const string& fileName )
 {
  FILE *pFile = fopen ( fileName.c_str() , "r" );
-  if (pFile == NULL) {
-    cerr << "no such file or directory " << fileName << endl;
-    exit(1);
-  }
+  if (pFile == NULL) Error("no such file or directory", fileName);

  cerr << "loading from " << fileName << endl;

-  fread( &m_size, sizeof(INDEX), 1, pFile );
+  fread( &m_size, sizeof(INDEX), 1, pFile )
+  || Error("could not read m_size from", fileName);
  cerr << "words in corpus: " << m_size << endl;
+
  m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
  m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
  m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
  m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
+  CheckAllocation(m_array != NULL, "m_array");
+  CheckAllocation(m_index != NULL, "m_index");
+  CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
+  CheckAllocation(m_sentence != NULL, "m_sentence");
+  fread( m_array, sizeof(WORD_ID), m_size, pFile ) // corpus
+  || Error("could not read m_array from", fileName);
+  fread( m_index, sizeof(INDEX), m_size, pFile )   // suffix array
+  || Error("could not read m_index from", fileName);
+  fread( m_wordInSentence, sizeof(char), m_size, pFile) // word index
+  || Error("could not read m_wordInSentence from", fileName);
+  fread( m_sentence, sizeof(INDEX), m_size, pFile ) // sentence index
+  || Error("could not read m_sentence from", fileName);

-  if (m_array == NULL) {
-    cerr << "Error: cannot allocate memory to m_array" << endl;
-    exit(1);
-  }
-
-  if (m_index == NULL) {
-    cerr << "Error: cannot allocate memory to m_index" << endl;
-    exit(1);
-  }
-
-  if (m_wordInSentence == NULL) {
-    cerr << "Error: cannot allocate memory to m_wordInSentence" << endl;
-    exit(1);
-  }
-
-  if (m_sentence == NULL) {
-    cerr << "Error: cannot allocate memory to m_sentence" << endl;
-    exit(1);
-  }
-
-  fread( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
-  fread( m_index, sizeof(INDEX), m_size, pFile );   // suffix array
-  fread( m_wordInSentence, sizeof(char), m_size, pFile); // word index
-  fread( m_sentence, sizeof(INDEX), m_size, pFile); // sentence index
-
-  fread( &m_sentenceCount, sizeof(INDEX), 1, pFile );
+  fread( &m_sentenceCount, sizeof(INDEX), 1, pFile )
+  || Error("could not read m_sentenceCount from", fileName);
  cerr << "sentences in corpus: " << m_sentenceCount << endl;
-  m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );

-  if (m_sentenceLength == NULL) {
-    cerr << "Error: cannot allocate memory to m_sentenceLength" << endl;
-    exit(1);
+  m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
+  CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
+  fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile) // sentence length
+  || Error("could not read m_sentenceLength from", fileName);
+
+  if (m_useDocument) { // do not read it when you do not need it
+    char useDocument;
+    fread( &useDocument, sizeof(char), 1, pFile )
+    || Error("could not read m_useDocument from", fileName);
+    if (!useDocument) {
+      cerr << "Error: stored suffix array does not have a document index\n";
+      exit(1);
+    }
+    fread( &m_documentCount, sizeof(INDEX), 1, pFile )
+    || Error("could not read m_documentCount from", fileName);
+    m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
+    m_documentName = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
+    CheckAllocation(m_document != NULL, "m_document");
+    CheckAllocation(m_documentName != NULL, "m_documentName");
+    fread( m_document, sizeof(INDEX), m_documentCount, pFile )
+    || Error("could not read m_document from", fileName);
+    fread( m_documentName, sizeof(INDEX), m_documentCount, pFile )
+    || Error("could not read m_documentName from", fileName);
+    fread( &m_documentNameLength, sizeof(INDEX), 1, pFile )
+    || Error("could not read m_documentNameLength from", fileName);
+    m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
+    CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
+    fread( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile )
+    || Error("could not read m_document from", fileName);
  }

-  fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
  fclose( pFile );

  m_vcb.Load( fileName + ".src-vcb" );
 }
+
+void SuffixArray::CheckAllocation( bool check, const char *dataStructure ) const
+{
+  if (check) return;
+  cerr << "Error: could not allocate memory for " << dataStructure << endl;
+  exit(1);
+}
+
+bool SuffixArray::Error( const char *message, const string &fileName) const
+{
+  cerr << "Error: " << message << " " << fileName << endl;
+  exit(1);
+  return true; // yeah, i know.
+}
--- a/biconcor/SuffixArray.h
+++ b/biconcor/SuffixArray.h
@ -15,6 +15,12 @@ private:
  INDEX *m_sentence;
  char *m_sentenceLength;
  WORD_ID m_endOfSentence;
+  INDEX *m_document;
+  INDEX *m_documentName;
+  char *m_documentNameBuffer;
+  size_t m_documentNameLength;
+  size_t m_documentCount;
+  bool m_useDocument;
  Vocabulary m_vcb;
  INDEX m_size;
  INDEX m_sentenceCount;
@ -28,6 +34,7 @@ public:
  ~SuffixArray();

  void Create(const std::string& fileName );
+  bool ProcessDocumentLine( const char* const, const size_t );
  void Sort(INDEX start, INDEX end);
  int CompareIndex( INDEX a, INDEX b ) const;
  inline int CompareWord( WORD_ID a, WORD_ID b ) const;
@ -40,6 +47,7 @@ public:
  INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
  int Match( const std::vector< WORD > &phrase, INDEX index );
  void List( INDEX start, INDEX end );
+  void PrintSentenceMatches( const std::vector< WORD > &phrase );
  inline INDEX GetPosition( INDEX index ) const {
    return m_index[ index ];
  }
@ -58,6 +66,17 @@ public:
  inline WORD GetWord( INDEX position ) const {
    return m_vcb.GetWord( m_array[position] );
  }
+  void UseDocument() {
+    m_useDocument = true;
+  }
+  INDEX GetDocument( INDEX sentence ) const;
+  void PrintDocumentName( INDEX document ) {
+    for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
+      std::cout << m_documentNameBuffer[ i ];
+    }
+  }
  void Save(const std::string& fileName ) const;
  void Load(const std::string& fileName );
+  void CheckAllocation(bool, const char *dataStructure) const;
+  bool Error( const char* message, const std::string& fileName) const;
 };
--- a/biconcor/phrase-lookup.cpp
+++ b/biconcor/phrase-lookup.cpp
@ -1,4 +1,5 @@
 #include "SuffixArray.h"
+#include "../util/tokenize.hh"
 #include <getopt.h>

 using namespace std;
@ -13,10 +14,12 @@ int main(int argc, char* argv[])
  string query;
  string fileNameSuffix;
  string fileNameSource;
-  int loadFlag = false;
-  int saveFlag = false;
-  int createFlag = false;
-  int queryFlag = false;
+  bool loadFlag = false;
+  bool saveFlag = false;
+  bool createFlag = false;
+  bool queryFlag = false;
+  bool querySentenceFlag = false;
+
  int stdioFlag = false;  // receive requests from STDIN, respond to STDOUT
  string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create corpus]\n\t[--query string]\n\t[--stdio]\n";
  while(1) {
@ -25,11 +28,14 @@ int main(int argc, char* argv[])
      {"save", required_argument, 0, 's'},
      {"create", required_argument, 0, 'c'},
      {"query", required_argument, 0, 'q'},
+      {"query-sentence", required_argument, 0, 'Q'},
+      {"document", required_argument, 0, 'd'},
      {"stdio", no_argument, 0, 'i'},
+      {"stdio-sentence", no_argument, 0, 'I'},
      {0, 0, 0, 0}
    };
    int option_index = 0;
-    int c = getopt_long (argc, argv, "l:s:c:q:i", long_options, &option_index);
+    int c = getopt_long (argc, argv, "l:s:c:q:Q:iId", long_options, &option_index);
    if (c == -1) break;
    switch (c) {
    case 'l':
@ -48,17 +54,25 @@ int main(int argc, char* argv[])
      query = string(optarg);
      queryFlag = true;
      break;
+    case 'Q':
+      query = string(optarg);
+      querySentenceFlag = true;
+      break;
    case 'i':
      stdioFlag = true;
      break;
+    case 'I':
+      stdioFlag = true;
+      querySentenceFlag = true;
+      break;
+    case 'd':
+      suffixArray.UseDocument();
+      break;
    default:
      cerr << info;
      exit(1);
    }
  }
-  if (stdioFlag) {
-    queryFlag = true;
-  }

  // check if parameter settings are legal
  if (saveFlag && !createFlag) {
@ -74,7 +88,7 @@ int main(int argc, char* argv[])
    exit(1);
  }

-  // do your thing
+  // get suffix array
  if (createFlag) {
    cerr << "will create\n";
    cerr << "corpus is in " << fileNameSource << endl;
@ -88,16 +102,26 @@ int main(int argc, char* argv[])
    cerr << "will load from " << fileNameSuffix << endl;
    suffixArray.Load( fileNameSuffix );
  }
+
+  // do something with it
  if (stdioFlag) {
    while(true) {
      string query;
      if (getline(cin, query, '\n').eof()) {
        return 0;
      }
-      cout << lookup( query ) << endl;
+      if (querySentenceFlag) {
+        vector< string > queryString = util::tokenize( query.c_str() );
+        suffixArray.PrintSentenceMatches( queryString );
+      } else {
+        cout << lookup( query ) << endl;
+      }
    }
  } else if (queryFlag) {
    cout << lookup( query ) << endl;
+  } else if (querySentenceFlag) {
+    vector< string > queryString = util::tokenize( query.c_str() );
+    suffixArray.PrintSentenceMatches( queryString );
  }
  return 0;
 }
@ -105,32 +129,6 @@ int main(int argc, char* argv[])
 size_t lookup( string query )
 {
  cerr << "query is " << query << endl;
-  vector< string > queryString = tokenize( query.c_str() );
+  vector< string > queryString = util::tokenize( query.c_str() );
  return suffixArray.Count( queryString );
 }
-
-// Duplicate of definition in util/tokenize.hh.
-// TODO: Can we de-duplicate this?  At the time of writing biconcor does not
-// use util at all.
-vector<string> tokenize(const char input[])
-{
-  vector< string > token;
-  bool betweenWords = true;
-  int start=0;
-  int i;
-  for(i = 0; input[i] != '\0'; i++) {
-    const bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
-    if (!isSpace && betweenWords) {
-      start = i;
-      betweenWords = false;
-    } else if (isSpace && !betweenWords) {
-      token.push_back( string( input+start, i-start ) );
-      betweenWords = true;
-    }
-  }
-  if (!betweenWords)
-    token.push_back( string( input+start, i-start ) );
-  return token;
-}
-
--- a/contrib/other-builds/CreateOnDiskPt/.cproject
+++ b/contrib/other-builds/CreateOnDiskPt/.cproject
@ -42,6 +42,7 @@
 								<option id="gnu.cpp.link.option.libs.1325292383" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="cmph"/>
 									<listOptionValue builtIn="false" value="search"/>
 									<listOptionValue builtIn="false" value="lm"/>
 									<listOptionValue builtIn="false" value="util"/>
@ -59,6 +60,7 @@
 								</option>
 								<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@ -11,12 +11,12 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -72,13 +72,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/extract-ghkm/.project
+++ b/contrib/other-builds/extract-ghkm/.project
@ -102,9 +102,14 @@
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
 		</link>
 		<link>
-			<name>SyntaxTree.cpp</name>
+			<name>SyntaxNodeCollection.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>SyntaxNodeCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
 		</link>
 		<link>
 			<name>SyntaxTree.h</name>
--- a/contrib/other-builds/extract-rules/.project
+++ b/contrib/other-builds/extract-rules/.project
@ -81,9 +81,14 @@
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
 		</link>
 		<link>
-			<name>SyntaxTree.cpp</name>
+			<name>SyntaxNodeCollection.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>SyntaxNodeCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
 		</link>
 		<link>
 			<name>SyntaxTree.h</name>
--- a/contrib/other-builds/extractor/.project
+++ b/contrib/other-builds/extractor/.project
@ -83,6 +83,16 @@
 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
 	</natures>
 	<linkedResources>
+		<link>
+			<name>InternalTree.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.cpp</locationURI>
+		</link>
+		<link>
+			<name>InternalTree.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.h</locationURI>
+		</link>
 		<link>
 			<name>bin</name>
 			<type>2</type>
--- a/contrib/other-builds/lm/.project
+++ b/contrib/other-builds/lm/.project
@ -546,26 +546,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/interpolate.hh</locationURI>
 		</link>
-		<link>
-			<name>builder/joint_order.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/joint_order.hh</locationURI>
-		</link>
 		<link>
 			<name>builder/lmplz_main.cc</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/lmplz_main.cc</locationURI>
 		</link>
-		<link>
-			<name>builder/ngram.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram.hh</locationURI>
-		</link>
-		<link>
-			<name>builder/ngram_stream.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram_stream.hh</locationURI>
-		</link>
 		<link>
 			<name>builder/pipeline.cc</name>
 			<type>1</type>
@ -576,21 +561,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/pipeline.hh</locationURI>
 		</link>
-		<link>
-			<name>builder/print.cc</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.cc</locationURI>
-		</link>
-		<link>
-			<name>builder/print.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.hh</locationURI>
-		</link>
-		<link>
-			<name>builder/sort.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/sort.hh</locationURI>
-		</link>
 		<link>
 			<name>filter/Jamfile</name>
 			<type>1</type>
--- a/contrib/other-builds/mert_lib/.cproject
+++ b/contrib/other-builds/mert_lib/.cproject
@ -11,15 +11,15 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
 					<folderInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.1932340583" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.debug.296711714" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.debug"/>
@ -32,6 +32,9 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
 								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.2072043013" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
+								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1183866856" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.1365367786" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug">
@ -46,9 +49,6 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
-					<fileInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013.195400614" name="MeteorScorer.cpp" rcbsApplicability="disable" resourcePath="MeteorScorer.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660">
-						<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537"/>
-					</fileInfo>
 					<sourceEntries>
 						<entry excluding="mert/PreProcessFilter.h|mert/PreProcessFilter.cpp|mert/UtilTest.cpp|mert/TimerTest.cpp|mert/SingletonTest.cpp|mert/PointTest.cpp|mert/OptimizerFactoryTest.cpp|mert/NgramTest.cpp|mert/FeatureDataTest.cpp|mert/DataTest.cpp|mert/ReferenceTest.cpp|mert/VocabularyTest.cpp|mert/extractor.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
@ -66,15 +66,15 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
 					<folderInfo id="cdt.managedbuild.config.gnu.lib.release.3250316." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.1996805666" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.release.106685808" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.release"/>
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@ -28,7 +28,7 @@
 									<listOptionValue builtIn="false" value="/opt/local/include/"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.849384962" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
 									<listOptionValue builtIn="false" value="WITH_THREADS"/>
@ -47,6 +47,7 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@ -56,6 +57,7 @@
 								</option>
 								<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="cmph"/>
 									<listOptionValue builtIn="false" value="search"/>
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="lm"/>
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@ -11,15 +11,15 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1846963597" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1846963597" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
 					<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1167373278" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.397694981" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -31,8 +31,11 @@
 								<option id="gnu.cpp.compiler.option.include.paths.876218169" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.53427549" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="PT_UG"/>
+									<listOptionValue builtIn="false" value="HAVE_CMPH"/>
 									<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
 									<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
 									<listOptionValue builtIn="false" value="WITH_THREADS"/>
@ -58,18 +61,18 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1761300858" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1815042864" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1720439764" name="NeuralLMWrapper.h" rcbsApplicability="disable" resourcePath="LM/NeuralLMWrapper.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1094892289" name="MaxEntSRI.h" rcbsApplicability="disable" resourcePath="LM/MaxEntSRI.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1113398114" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1183410636" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1448475064" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1459438132" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1094892289" name="MaxEntSRI.h" rcbsApplicability="disable" resourcePath="LM/MaxEntSRI.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1720439764" name="NeuralLMWrapper.h" rcbsApplicability="disable" resourcePath="LM/NeuralLMWrapper.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1272004353" name="BilingualLM.h" rcbsApplicability="disable" resourcePath="LM/BilingualLM.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1815042864" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1459438132" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.871386239" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1761300858" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
 					<sourceEntries>
-						<entry excluding="LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|TranslationModel/CompactPT|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/ProbingPT|TranslationModel/UG|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="TranslationModel/UG/mm/test-http-client.cc|TranslationModel/UG/ptable-describe-features.cc|TranslationModel/UG/count-ptable-features.cc|TranslationModel/UG/try-align2.cc|TranslationModel/UG/try-align.cc|TranslationModel/UG/spe-check-coverage3.cc|TranslationModel/UG/spe-check-coverage2.cc|TranslationModel/UG/spe-check-coverage.cc|TranslationModel/UG/sim-pe.cc|TranslationModel/UG/generic/stringdist|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/mtt.count.cc|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/ProbingPT|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
@ -79,16 +82,16 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1911984684" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1911984684" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
 					<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1911984684." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1552241309" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.332871558" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
@ -141,10 +144,10 @@
 	</storageModule>
 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
 	<storageModule moduleId="refreshScope" versionNumber="2">
-		<configuration configurationName="Release">
+		<configuration configurationName="Debug">
 			<resource resourceType="PROJECT" workspacePath="/moses"/>
 		</configuration>
-		<configuration configurationName="Debug">
+		<configuration configurationName="Release">
 			<resource resourceType="PROJECT" workspacePath="/moses"/>
 		</configuration>
 	</storageModule>
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -60,6 +60,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/AlignmentInfoTest.cpp</locationURI>
 		</link>
+		<link>
+			<name>AllOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>AllOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
+		</link>
 		<link>
 			<name>BaseManager.cpp</name>
 			<type>1</type>
@ -70,6 +80,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.h</locationURI>
 		</link>
+		<link>
+			<name>BeamSearchOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
+		</link>
 		<link>
 			<name>BitmapContainer.cpp</name>
 			<type>1</type>
@ -80,6 +95,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/BitmapContainer.h</locationURI>
 		</link>
+		<link>
+			<name>BookkeepingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>BookkeepingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
+		</link>
 		<link>
 			<name>CMakeLists.txt</name>
 			<type>1</type>
@ -230,6 +255,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
 		</link>
+		<link>
+			<name>CubePruningOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>CubePruningOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
+		</link>
 		<link>
 			<name>DecodeGraph.cpp</name>
 			<type>1</type>
@ -460,6 +495,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/InputFileStream.h</locationURI>
 		</link>
+		<link>
+			<name>InputOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>InputOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
+		</link>
 		<link>
 			<name>InputPath.cpp</name>
 			<type>1</type>
@ -490,6 +535,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>LMBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>LMBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
+		</link>
 		<link>
 			<name>LVoc.cpp</name>
 			<type>1</type>
@ -510,6 +565,21 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/LatticeMBR.h</locationURI>
 		</link>
+		<link>
+			<name>LookupOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
+		</link>
+		<link>
+			<name>MBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>MBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
+		</link>
 		<link>
 			<name>Manager.cpp</name>
 			<type>1</type>
@ -535,6 +605,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/MosesTest.cpp</locationURI>
 		</link>
+		<link>
+			<name>NBestOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>NBestOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
+		</link>
 		<link>
 			<name>NonTerminal.cpp</name>
 			<type>1</type>
@ -550,6 +630,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/ObjectPool.h</locationURI>
 		</link>
+		<link>
+			<name>OptionsBaseClass.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
+		</link>
+		<link>
+			<name>OptionsBaseClass.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
+		</link>
 		<link>
 			<name>OutputCollector.h</name>
 			<type>1</type>
@ -635,6 +725,26 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingConstraint.h</locationURI>
 		</link>
+		<link>
+			<name>ReorderingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>ReorderingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>ReportingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>ReportingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
+		</link>
 		<link>
 			<name>RuleCube.cpp</name>
 			<type>1</type>
@ -711,14 +821,14 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormal.h</locationURI>
 		</link>
 		<link>
-			<name>SearchNormalBatch.cpp</name>
+			<name>SearchOptions.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.cpp</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
 		</link>
 		<link>
-			<name>SearchNormalBatch.h</name>
+			<name>SearchOptions.h</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.h</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
 		</link>
 		<link>
 			<name>Sentence.cpp</name>
@ -740,6 +850,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/SentenceStats.h</locationURI>
 		</link>
+		<link>
+			<name>ServerOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>ServerOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
+		</link>
 		<link>
 			<name>SquareMatrix.cpp</name>
 			<type>1</type>
@ -1065,6 +1185,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/mbr.h</locationURI>
 		</link>
+		<link>
+			<name>parameters</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>rule.proto</name>
 			<type>1</type>
@ -1360,16 +1485,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
 		</link>
-		<link>
-			<name>FF/SkeletonChangeInput.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.cpp</locationURI>
-		</link>
-		<link>
-			<name>FF/SkeletonChangeInput.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.h</locationURI>
-		</link>
 		<link>
 			<name>FF/SkeletonStatefulFF.cpp</name>
 			<type>1</type>
@ -2020,16 +2135,6 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>TranslationModel/BilingualDynSuffixArray.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/BilingualDynSuffixArray.cpp</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/BilingualDynSuffixArray.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/BilingualDynSuffixArray.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/CYKPlusParser</name>
 			<type>2</type>
@ -2040,21 +2145,6 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSuffixArray.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSuffixArray.cpp</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSuffixArray.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSuffixArray.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/PhraseDictionary.cpp</name>
 			<type>1</type>
@ -2070,16 +2160,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.README</locationURI>
 		</link>
-		<link>
-			<name>TranslationModel/PhraseDictionaryDynSuffixArray.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/PhraseDictionaryDynSuffixArray.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/PhraseDictionaryDynamicCacheBased.cpp</name>
 			<type>1</type>
@ -2200,16 +2280,6 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>TranslationModel/WordCoocTable.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/WordCoocTable.cpp</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/WordCoocTable.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/WordCoocTable.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/fuzzy-match</name>
 			<type>2</type>
@ -2240,6 +2310,146 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.h</locationURI>
 		</link>
+		<link>
+			<name>parameters/AllOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/AllOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/BeamSearchOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/BookkeepingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/BookkeepingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ContextParameters.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ContextParameters.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/CubePruningOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/CubePruningOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/InputOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/InputOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/LMBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/LMBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/LookupOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/MBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/MBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/NBestOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/NBestOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/OptionsBaseClass.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/OptionsBaseClass.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReorderingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReorderingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReportingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReportingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/SearchOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/SearchOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ServerOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ServerOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
+		</link>
 		<link>
 			<name>FF/LexicalReordering/LexicalReordering.cpp</name>
 			<type>1</type>
@ -2935,86 +3145,6 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/FileHandler.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/FileHandler.cpp</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/FileHandler.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/FileHandler.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/Jamfile</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/Jamfile</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/RandLMCache.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/RandLMCache.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/RandLMFilter.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/RandLMFilter.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/fdstream.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/fdstream.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/hash.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/hash.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/onlineRLM.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/onlineRLM.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/params.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/params.cpp</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/params.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/params.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/perfectHash.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/perfectHash.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/quantizer.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/quantizer.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/types.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/types.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/utils.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/utils.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/vocab.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/vocab.cpp</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/DynSAInclude/vocab.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/vocab.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/ProbingPT/Jamfile</name>
 			<type>1</type>
@ -3285,6 +3415,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/Makefile</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/TargetPhraseCollectionCache.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/TargetPhraseCollectionCache.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/TargetPhraseCollectionCache.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/bin</name>
 			<type>2</type>
@ -3330,11 +3470,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-lookup.cc</locationURI>
 		</link>
-		<link>
-			<name>TranslationModel/UG/sapt_phrase_key.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_phrase_key.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/UG/sapt_phrase_scorers.h</name>
 			<type>1</type>
@ -3680,6 +3815,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-dynamic-im-tsa.cc</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/test-http-client.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-http-client.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/test-xml-escaping.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-xml-escaping.cc</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/tpt_pickler.cc</name>
 			<type>1</type>
@ -3725,6 +3870,56 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_agenda.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_agenda.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_agenda_job.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_agenda_worker.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_jstats.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_jstats.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_jstats.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_moses.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_moses.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_phrase_extraction_record.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_phrase_extraction_record.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_pstats.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_pstats.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_pstats.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_bitext_sampler.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_sampler.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/ug_conll_bottom_up_token.h</name>
 			<type>1</type>
@ -3760,6 +3955,26 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_deptree.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_http_client.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_http_client.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_http_client.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_http_client.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_im_bitext.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_bitext.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_im_bitext.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_bitext.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/ug_im_tsa.h</name>
 			<type>1</type>
@ -3780,6 +3995,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_lexical_reordering.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_lexical_reordering.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_reordering.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/ug_load_primer.cc</name>
 			<type>1</type>
@ -3800,6 +4025,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_2d_table.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_mm_bitext.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_bitext.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/ug_mm_tsa.h</name>
 			<type>1</type>
@ -3815,16 +4045,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_ttrack.h</locationURI>
 		</link>
-		<link>
-			<name>TranslationModel/UG/mm/ug_mmbitext.cc</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.cc</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/UG/mm/ug_mmbitext.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/UG/mm/ug_phrasepair.cc</name>
 			<type>1</type>
@ -3835,6 +4055,21 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_phrasepair.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_prep_phrases.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_prep_phrases.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_sampling_bias.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_sampling_bias.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/ug_sampling_bias.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_sampling_bias.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/ug_tsa_array_entry.cc</name>
 			<type>1</type>
@ -4060,6 +4295,21 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/generic/threading/ug_ref_counter.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_ref_counter.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/generic/threading/ug_thread_pool.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_pool.cc</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/generic/threading/ug_thread_pool.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_pool.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc</name>
 			<type>1</type>
@ -4070,6 +4320,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/bin/gcc-4.8</name>
 			<type>2</type>
@ -4110,6 +4365,11 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/bin/gcc-4.8/release</name>
 			<type>2</type>
@ -4145,6 +4405,11 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on</name>
 			<type>2</type>
@ -4720,6 +4985,11 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
 			<type>2</type>
@ -5315,6 +5585,11 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
 			<type>2</type>
@ -5570,6 +5845,201 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
 			<type>1</type>
--- a/contrib/other-builds/pruneGeneration/pruneGeneration.project
+++ b/contrib/other-builds/pruneGeneration/pruneGeneration.project
@ -1,6 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <CodeLite_Project Name="pruneGeneration" InternalType="Console">
  <Plugins>
+    <Plugin Name="qmake">
+      <![CDATA[00010001N0005Debug000000000000]]>
+    </Plugin>
    <Plugin Name="CMakePlugin">
      <![CDATA[[{
  "name": "Debug",
@ -13,9 +16,6 @@
  "parentProject": ""
 }]]]>
    </Plugin>
-    <Plugin Name="qmake">
-      <![CDATA[00010001N0005Debug000000000000]]>
-    </Plugin>
  </Plugins>
  <Description/>
  <Dependencies/>
@ -44,8 +44,10 @@
        <LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
        <Library Value="boost_filesystem"/>
        <Library Value="boost_system"/>
+        <Library Value="boost_iostreams"/>
        <Library Value="moses"/>
        <Library Value="z"/>
+        <Library Value="bz2"/>
      </Linker>
      <ResourceCompiler Options="" Required="no"/>
      <General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
--- a/contrib/other-builds/server/.cproject
+++ b/contrib/other-builds/server/.cproject
@ -44,6 +44,7 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1443553047" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.1096041402" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@ -53,6 +54,7 @@
 								</option>
 								<option id="gnu.cpp.link.option.libs.1087215166" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="cmph"/>
 									<listOptionValue builtIn="false" value="search"/>
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="lm"/>
@ -75,6 +77,7 @@
 									<listOptionValue builtIn="false" value="boost_filesystem"/>
 									<listOptionValue builtIn="false" value="boost_program_options"/>
 									<listOptionValue builtIn="false" value="z"/>
+									<listOptionValue builtIn="false" value="bz2"/>
 									<listOptionValue builtIn="false" value="dl"/>
 									<listOptionValue builtIn="false" value="rt"/>
 								</option>
--- a/contrib/server/Jamfile
+++ b/contrib/server/Jamfile
@ -13,7 +13,7 @@ with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
 if $(with-xmlrpc-c) {
  echo While building mosesserver ... ;
  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
-  echo "!!! You are linking the XMLRPC-C library; Do NOT use v.1.25.29            !!!" ;
+  echo "!!! You are linking the XMLRPC-C library; Must be v.1.32 (September 2012) or higher !!!" ;
  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;

  build-moses-server = true ;
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@ -38,13 +38,12 @@ int main(int argc, char** argv)
 #include "moses/StaticData.h"
 #include "moses/ThreadPool.h"
 #include "moses/TranslationTask.h"
-#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
+#include "moses/FF/StatefulFeatureFunction.h"
 #if PT_UG
 #include "moses/TranslationModel/UG/mmsapt.h"
 #endif
 #include "moses/TreeInput.h"
-#include "moses/LM/ORLM.h"
 #include "moses/IOWrapper.h"

 #include <boost/foreach.hpp>
@ -58,8 +57,8 @@ int main(int argc, char** argv)
 #include <xmlrpc-c/server_abyss.hpp>

 // using namespace Moses;
-using Moses::TreeInput;
 using namespace std;
+using namespace Moses;

 typedef std::map<std::string, xmlrpc_c::value> params_t;

@ -82,70 +81,16 @@ public:
    Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
    pdsa->add(source_,target_,alignment_);
 #else
-    const PhraseDictionary* pdf = PhraseDictionary::GetColl()[0];
-    PhraseDictionaryDynSuffixArray*
-      pdsa = (PhraseDictionaryDynSuffixArray*) pdf;
-    cerr << "Inserting into address " << pdsa << endl;
-    pdsa->insertSnt(source_, target_, alignment_);
+    std::string msg;
+    msg  = "Server was compiled without a phrase table implementation that ";
+    msg += "supports updates.";
+    throw xmlrpc_c::fault(msg.c_str(), xmlrpc_c::fault::CODE_PARSE);
 #endif
-    if(add2ORLM_) {
-      //updateORLM();
-    }
    XVERBOSE(1,"Done inserting\n");
-    //PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
-    map<string, xmlrpc_c::value> retData;
-    //*retvalP = xmlrpc_c::value_struct(retData);
-#ifndef PT_UG
-    pdf = 0;
-#endif
-    pdsa = 0;
    *retvalP = xmlrpc_c::value_string("Phrase table updated");
  }
  string source_, target_, alignment_;
-  bool bounded_, add2ORLM_;
-  /*
-  void updateORLM() {
-    // TODO(level101): this belongs in the language model, not in moseserver.cpp
-    vector<string> vl;
-    map<vector<string>, int> ngSet;
-    LMList lms = StaticData::Instance().GetLMList(); // get LM
-    LMList::const_iterator lmIter = lms.begin();
-    LanguageModel *lm = *lmIter;
-    LanguageModelORLM* orlm = static_cast<LanguageModelORLM*>(lm);
-    if(orlm == 0) {
-      cerr << "WARNING: Unable to add target sentence to ORLM\n";
-      return;
-    }
-    // break out new ngrams from sentence
-    const int ngOrder(orlm->GetNGramOrder());
-    const std::string sBOS = orlm->GetSentenceStart()->GetString().as_string();
-    const std::string sEOS = orlm->GetSentenceEnd()->GetString().as_string();
-    Utils::splitToStr(target_, vl, " ");
-    // insert BOS and EOS
-    vl.insert(vl.begin(), sBOS);
-    vl.insert(vl.end(), sEOS);
-    for(int j=0; j < vl.size(); ++j) {
-      int i = (j<ngOrder) ? 0 : j-ngOrder+1;
-      for(int t=j; t >= i; --t) {
-        vector<string> ngVec;
-        for(int s=t; s<=j; ++s) {
-          ngVec.push_back(vl[s]);
-          //cerr << vl[s] << " ";
-        }
-        ngSet[ngVec]++;
-        //cerr << endl;
-      }
-    }
-    // insert into LM in order from 1grams up (for LM well-formedness)
-    cerr << "Inserting " << ngSet.size() << " ngrams into ORLM...\n";
-    for(int i=1; i <= ngOrder; ++i) {
-      iterate(ngSet, it) {
-        if(it->first.size() == i)
-          orlm->UpdateORLM(it->first, it->second);
-      }
-    }
-  }
-  */
+  bool bounded_;

  void breakOutParams(const params_t& params) {
    params_t::const_iterator si = params.find("source");
@ -165,8 +110,6 @@ public:
    XVERBOSE(1,"alignment = " << alignment_ << endl);
    si = params.find("bounded");
    bounded_ = (si != params.end());
-    si = params.find("updateORLM");
-    add2ORLM_ = (si != params.end());
  }
 };

@ -678,6 +621,14 @@ int main(int argc, char** argv)
  bool isSerial = false;
  size_t numThreads = 10; //for translation tasks

+  //Abyss server configuration: initial values reflect hard-coded default
+  //-> http://xmlrpc-c.sourceforge.net/doc/libxmlrpc_server_abyss.html#max_conn
+  size_t maxConn = 15;
+  size_t maxConnBacklog = 15;
+  size_t keepaliveTimeout = 15;
+  size_t keepaliveMaxConn = 30;
+  size_t timeout = 15;
+
  for (int i = 0; i < argc; ++i) {
    if (!strcmp(argv[i],"--server-port")) {
      ++i;
@ -695,6 +646,46 @@ int main(int argc, char** argv)
      } else {
        logfile = argv[i];
      }
+    } else if (!strcmp(argv[i],"--server-maxconn")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-maxconn" << endl;
+        exit(1);
+      } else {
+        maxConn = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-maxconn-backlog")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-maxconn-backlog" << endl;
+        exit(1);
+      } else {
+        maxConnBacklog = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-keepalive-timeout")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-keepalive-timeout" << endl;
+        exit(1);
+      } else {
+        keepaliveTimeout = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-keepalive-maxconn")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-keepalive-maxconn" << endl;
+        exit(1);
+      } else {
+        keepaliveMaxConn = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-timeout")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-timeout" << endl;
+        exit(1);
+      } else {
+        timeout = atoi(argv[i]);
+      }
    } else if (!strcmp(argv[i], "--threads")) {
      ++i;
      if (i>=argc) {
@ -740,20 +731,27 @@ int main(int argc, char** argv)
  myRegistry.addMethod("updater", updater);
  myRegistry.addMethod("optimize", optimizer);

+  /* CODE FOR old xmlrpc-c v. 1.32 or lower
  xmlrpc_c::serverAbyss myAbyssServer(
 				      myRegistry,
 				      port,              // TCP port on which to listen
 				      logfile
 				      );
-  /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
+  */
+
+  /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04 */
  xmlrpc_c::serverAbyss myAbyssServer(
    xmlrpc_c::serverAbyss::constrOpt()
-    .registryPtr(&myRegistry)
+    .registryP(&myRegistry)
    .portNumber(port)              // TCP port on which to listen
    .logFileName(logfile)
    .allowOrigin("*")
+    .maxConn((unsigned int)maxConn)
+    .maxConnBacklog((unsigned int)maxConnBacklog)
+    .keepaliveTimeout((unsigned int)keepaliveTimeout)
+    .keepaliveMaxConn((unsigned int)keepaliveMaxConn)
+    .timeout((unsigned int)timeout)
  );
-  */

  XVERBOSE(1,"Listening on port " << port << endl);
  if (isSerial) {
--- a/jam-files/xmlrpc-c.jam
+++ b/jam-files/xmlrpc-c.jam
@ -2,7 +2,7 @@
 # xmlrpc-c library (including the abyss server) that is needed for 
 # moses server functionality

-if [ option.get "no-xmlrpc-c" ] 
+if [ option.get "no-xmlrpc-c" : : "yes" ]
 {
  rule xmlrpc ( what ? ) { } # never return anything
 }
--- a/lm/CMakeLists.txt
+++ b/lm/CMakeLists.txt
@ -1,46 +1,139 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+
+set(KENLM_MAX_ORDER 6)
+
+add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
+
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+set(KENLM_SOURCE 
+	bhiksha.cc
+	binary_format.cc
+	config.cc
+	lm_exception.cc
+	model.cc
+	quantize.cc
+	read_arpa.cc
+	search_hashed.cc
+	search_trie.cc
+	sizes.cc
+	trie.cc
+	trie_sort.cc
+	value_build.cc
+	virtual_interface.cc
+	vocab.cc
+)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm OBJECT ${KENLM_SOURCE})
+
+# This directory has children that need to be processed
+add_subdirectory(builder)
+add_subdirectory(common)
+add_subdirectory(filter)
+
+
+
+# Explicitly list the executable files to be compiled
+set(EXE_LIST
+  query
+  fragment
+  build_binary
+)
+
+# Iterate through the executable list   
+foreach(exe ${EXE_LIST})
+
+  # Compile the executable, linking against the requisite dependent object files
+  add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
+
+  # Link the executable against boost
+  target_link_libraries(${exe} ${Boost_LIBRARIES})
+
+  # Group executables together
+  set_target_properties(${exe} PROPERTIES FOLDER executables)
+
+# End for loop
+endforeach(exe)
+
+
+# Install the executable files
+install(TARGETS ${EXE_LIST} DESTINATION bin)
+
+
+
+if(BUILD_TESTING)
+
+    # Explicitly list the Boost test files to be compiled
+    set(KENLM_BOOST_TESTS_LIST
+      left_test
+      model_test
+      partial_test
+    )
+
+    # Iterate through the Boost tests list   
+    foreach(test ${KENLM_BOOST_TESTS_LIST})
+
+      # Compile the executable, linking against the requisite dependent object files
+      add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
+
+      # Require the following compile flag
+      set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
+  
+      # Link the executable against boost
+      target_link_libraries(${test} ${Boost_LIBRARIES})
+
+      # model_test requires an extra command line parameter
+      if ("${test}" STREQUAL "model_test")
+        set(test_params 
+            ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa 
+            ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa
+        )
+      else()
+        set(test_params 
+            ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
+        )      
+      endif()
+      
+      # Specify command arguments for how to run each unit test
+      #
+      # Assuming that foo was defined via add_executable(foo ...),
+      #   the syntax $<TARGET_FILE:foo> gives the full path to the executable.
+      #
+      add_test(NAME ${test}_test 
+               COMMAND $<TARGET_FILE:${test}> ${test_params})
+
+      # Group unit tests together
+      set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
+      
+    # End for loop
+    endforeach(test)
+
+endif()
+
+

-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/blank.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/enumerate_vocab.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/facade.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/left.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/max_order.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model_type.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/ngram_query.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/partial.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/return.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/state.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/weights.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/word_index.hh")

-add_library(kenlm OBJECT ${SOURCE_KENLM})
--- a/lm/Jamfile
+++ b/lm/Jamfile
@ -17,7 +17,7 @@ wrappers = ;
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
  lib nplm : : <search>$(with-nplm)/src ;
-  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
+  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
  alias nplm-all : nplm.o nplm ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
  wrappers += nplm-all ;
 }
--- a/lm/builder/CMakeLists.txt
+++ b/lm/builder/CMakeLists.txt
@ -0,0 +1,87 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+#
+# In order to set correct paths to these files
+#    in case this variable is referenced by CMake files in the parent directory,
+#    we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_BUILDER_SOURCE 
+		${CMAKE_CURRENT_SOURCE_DIR}/adjust_counts.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/corpus_count.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/initial_probabilities.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/output.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/pipeline.cc
+	)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_builder OBJECT ${KENLM_BUILDER_SOURCE})
+
+
+# Compile the executable, linking against the requisite dependent object files
+add_executable(lmplz lmplz_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
+
+# Link the executable against boost
+target_link_libraries(lmplz ${Boost_LIBRARIES})
+
+# Group executables together
+set_target_properties(lmplz PROPERTIES FOLDER executables)
+
+if(BUILD_TESTING)
+
+    # Explicitly list the Boost test files to be compiled
+    set(KENLM_BOOST_TESTS_LIST
+      adjust_counts_test
+      corpus_count_test
+    )
+
+    # Iterate through the Boost tests list   
+    foreach(test ${KENLM_BOOST_TESTS_LIST})
+
+      # Compile the executable, linking against the requisite dependent object files
+      add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
+
+      # Require the following compile flag
+      set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK")
+  
+      # Link the executable against boost
+      target_link_libraries(${test} ${Boost_LIBRARIES})
+      
+      # Specify command arguments for how to run each unit test
+      #
+      # Assuming that foo was defined via add_executable(foo ...),
+      #   the syntax $<TARGET_FILE:foo> gives the full path to the executable.
+      #
+      add_test(NAME ${test}_test 
+               COMMAND $<TARGET_FILE:${test}>)
+
+      # Group unit tests together
+      set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
+      
+    # End for loop
+    endforeach(test)
+
+endif()
--- a/lm/builder/corpus_count.cc
+++ b/lm/builder/corpus_count.cc
@ -15,9 +15,6 @@
 #include "util/stream/timer.hh"
 #include "util/tokenize_piece.hh"

-#include <boost/unordered_set.hpp>
-#include <boost/unordered_map.hpp>
-
 #include <functional>

 #include <stdint.h>
--- a/lm/builder/corpus_count_test.cc
+++ b/lm/builder/corpus_count_test.cc
@ -43,12 +43,13 @@ BOOST_AUTO_TEST_CASE(Short) {
  util::scoped_fd vocab(util::MakeTemp("corpus_count_test_vocab"));

  util::stream::Chain chain(config);
-  NGramStream<BuildingPayload> stream;
  uint64_t token_count;
  WordIndex type_count = 10;
  std::vector<bool> prune_words;
  CorpusCount counter(input_piece, vocab.get(), token_count, type_count, prune_words, "", chain.BlockSize() / chain.EntrySize(), SILENT);
-  chain >> boost::ref(counter) >> stream >> util::stream::kRecycle;
+  chain >> boost::ref(counter);
+  NGramStream<BuildingPayload> stream(chain.Add());
+  chain >> util::stream::kRecycle;

  const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"};

--- a/lm/builder/debug_print.hh
+++ b/lm/builder/debug_print.hh
@ -1,54 +1,18 @@
-#ifndef LM_BUILDER_PRINT_H
-#define LM_BUILDER_PRINT_H
+#ifndef LM_BUILDER_DEBUG_PRINT_H
+#define LM_BUILDER_DEBUG_PRINT_H

-#include "lm/common/ngram_stream.hh"
-#include "lm/builder/output.hh"
 #include "lm/builder/payload.hh"
-#include "lm/common/ngram.hh"
+#include "lm/common/print.hh"
+#include "lm/common/ngram_stream.hh"
 #include "util/fake_ofstream.hh"
 #include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/string_piece.hh"

 #include <boost/lexical_cast.hpp>

-#include <ostream>
-#include <cassert>
-
-// Warning: print routines read all unigrams before all bigrams before all
-// trigrams etc.  So if other parts of the chain move jointly, you'll have to
-// buffer.
-
 namespace lm { namespace builder {
-
-class VocabReconstitute {
-  public:
-    // fd must be alive for life of this object; does not take ownership.
-    explicit VocabReconstitute(int fd);
-
-    const char *Lookup(WordIndex index) const {
-      assert(index < map_.size() - 1);
-      return map_[index];
-    }
-
-    StringPiece LookupPiece(WordIndex index) const {
-      return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
-    }
-
-    std::size_t Size() const {
-      // There's an extra entry to support StringPiece lengths.
-      return map_.size() - 1;
-    }
-
-  private:
-    util::scoped_memory memory_;
-    std::vector<const char*> map_;
-};
-
 // Not defined, only specialized.
 template <class T> void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload);
 template <> inline void PrintPayload<uint64_t>(util::FakeOFStream &to, const BuildingPayload &payload) {
-  // TODO slow
  to << payload.count;
 }
 template <> inline void PrintPayload<Uninterpolated>(util::FakeOFStream &to, const BuildingPayload &payload) {
@ -101,19 +65,6 @@ template <class V> class Print {
    int to_;
 };

-class PrintARPA : public OutputHook {
-  public:
-    explicit PrintARPA(int fd, bool verbose_header)
-      : OutputHook(PROB_SEQUENTIAL_HOOK), out_fd_(fd), verbose_header_(verbose_header) {}
-
-    void Sink(util::stream::Chains &chains);
-
-    void Run(const util::stream::ChainPositions &positions);
-
-  private:
-    util::scoped_fd out_fd_;
-    bool verbose_header_;
-};
-
 }} // namespaces
-#endif // LM_BUILDER_PRINT_H
+
+#endif // LM_BUILDER_DEBUG_PRINT_H
--- a/lm/builder/dump_counts_main.cc
+++ b/lm/builder/dump_counts_main.cc
@ -1,4 +1,4 @@
-#include "lm/builder/print.hh"
+#include "lm/common/print.hh"
 #include "lm/word_index.hh"
 #include "util/file.hh"
 #include "util/read_compressed.hh"
@ -20,7 +20,7 @@ int main(int argc, char *argv[]) {
  }
  util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
  util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
-  lm::builder::VocabReconstitute vocab(vocab_file.get());
+  lm::VocabReconstitute vocab(vocab_file.get());
  unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
  std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
  while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
--- a/lm/builder/header_info.hh
+++ b/lm/builder/header_info.hh
@ -5,6 +5,8 @@
 #include <vector>
 #include <stdint.h>

+namespace lm { namespace builder {
+
 // Some configuration info that is used to add
 // comments to the beginning of an ARPA file
 struct HeaderInfo {
@ -21,4 +23,6 @@ struct HeaderInfo {
  // TODO: More info if multiple models were interpolated
 };

+}} // namespaces
+
 #endif
--- a/lm/builder/initial_probabilities.cc
+++ b/lm/builder/initial_probabilities.cc
@ -1,9 +1,9 @@
 #include "lm/builder/initial_probabilities.hh"

 #include "lm/builder/discount.hh"
-#include "lm/builder/special.hh"
 #include "lm/builder/hash_gamma.hh"
 #include "lm/builder/payload.hh"
+#include "lm/common/special.hh"
 #include "lm/common/ngram_stream.hh"
 #include "util/murmur_hash.hh"
 #include "util/file.hh"
--- a/lm/builder/initial_probabilities.hh
+++ b/lm/builder/initial_probabilities.hh
@ -10,9 +10,8 @@
 namespace util { namespace stream { class Chains; } }

 namespace lm {
-namespace builder {
-
 class SpecialVocab;
+namespace builder {

 struct InitialProbabilitiesConfig {
  // These should be small buffers to keep the adder from getting too far ahead
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@ -1,16 +1,16 @@
 #include "lm/builder/interpolate.hh"

 #include "lm/builder/hash_gamma.hh"
-#include "lm/builder/joint_order.hh"
-#include "lm/common/ngram_stream.hh"
+#include "lm/builder/payload.hh"
 #include "lm/common/compare.hh"
+#include "lm/common/joint_order.hh"
+#include "lm/common/ngram_stream.hh"
 #include "lm/lm_exception.hh"
 #include "util/fixed_array.hh"
 #include "util/murmur_hash.hh"

 #include <cassert>
 #include <cmath>
-#include <iostream>

 namespace lm { namespace builder {
 namespace {
@ -91,7 +91,8 @@ template <class Output> class Callback {
      }
    }

-    void Enter(unsigned order_minus_1, NGram<BuildingPayload> &gram) {
+    void Enter(unsigned order_minus_1, void *data) {
+      NGram<BuildingPayload> gram(data, order_minus_1 + 1);
      BuildingPayload &pay = gram.Value();
      pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
      probs_[order_minus_1 + 1] = pay.complete.prob;
@ -125,7 +126,7 @@ template <class Output> class Callback {
      output_.Gram(order_minus_1, out_backoff, pay.complete);
    }

-    void Exit(unsigned, const NGram<BuildingPayload> &) const {}
+    void Exit(unsigned, void *) const {}

  private:
    util::FixedArray<util::stream::Stream> backoffs_;
--- a/lm/builder/interpolate.hh
+++ b/lm/builder/interpolate.hh
@ -1,7 +1,7 @@
 #ifndef LM_BUILDER_INTERPOLATE_H
 #define LM_BUILDER_INTERPOLATE_H

-#include "lm/builder/special.hh"
+#include "lm/common/special.hh"
 #include "lm/word_index.hh"
 #include "util/stream/multi_stream.hh"

--- a/lm/builder/lmplz_main.cc
+++ b/lm/builder/lmplz_main.cc
@ -1,6 +1,6 @@
 #include "lm/builder/output.hh"
 #include "lm/builder/pipeline.hh"
-#include "lm/builder/print.hh"
+#include "lm/common/size_option.hh"
 #include "lm/lm_exception.hh"
 #include "util/file.hh"
 #include "util/file_piece.hh"
@ -13,21 +13,6 @@
 #include <vector>

 namespace {
-class SizeNotify {
-  public:
-    SizeNotify(std::size_t &out) : behind_(out) {}
-
-    void operator()(const std::string &from) {
-      behind_ = util::ParseSize(from);
-    }
-
-  private:
-    std::size_t &behind_;
-};
-
-boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
-  return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
-}

 // Parse and validate pruning thresholds then return vector of threshold counts
 // for each n-grams order.
@ -106,17 +91,16 @@ int main(int argc, char *argv[]) {
      ("interpolate_unigrams", po::value<bool>(&pipeline.initial_probs.interpolate_unigrams)->default_value(true)->implicit_value(true), "Interpolate the unigrams (default) as opposed to giving lots of mass to <unk> like SRI.  If you want SRI's behavior with a large <unk> and the old lmplz default, use --interpolate_unigrams 0.")
      ("skip_symbols", po::bool_switch(), "Treat <s>, </s>, and <unk> as whitespace instead of throwing an exception")
      ("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
-      ("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
-      ("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
-      ("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
+      ("memory,S", lm:: SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
+      ("minimum_block", lm::SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
+      ("sort_block", lm::SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
      ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
      ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
-      ("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write a file containing the unique vocabulary strings delimited by null bytes")
      ("vocab_pad", po::value<uint64_t>(&pipeline.vocab_size_for_unk)->default_value(0), "If the vocabulary is smaller than this value, pad with <unk> to reach this size. Requires --interpolate_unigrams")
      ("verbose_header", po::bool_switch(&verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.")
      ("text", po::value<std::string>(&text), "Read text from a file instead of stdin")
      ("arpa", po::value<std::string>(&arpa), "Write ARPA to a file instead of stdout")
-      ("intermediate", po::value<std::string>(&intermediate), "Write ngrams to an intermediate file.  Turns off ARPA output (which can be reactivated by --arpa file).  Forces --renumber on.  Implicitly makes --vocab_file be the provided name + .vocab.")
+      ("intermediate", po::value<std::string>(&intermediate), "Write ngrams to intermediate files.  Turns off ARPA output (which can be reactivated by --arpa file).  Forces --renumber on.")
      ("renumber", po::bool_switch(&pipeline.renumber_vocabulary), "Rrenumber the vocabulary identifiers so that they are monotone with the hash of each string.  This is consistent with the ordering used by the trie data structure.")
      ("collapse_values", po::bool_switch(&pipeline.output_q), "Collapse probability and backoff into a single value, q that yields the same sentence-level probabilities.  See http://kheafield.com/professional/edinburgh/rest_paper.pdf for more details, including a proof.")
      ("prune", po::value<std::vector<std::string> >(&pruning)->multitoken(), "Prune n-grams with count less than or equal to the given threshold.  Specify one value for each order i.e. 0 0 1 to prune singleton trigrams and above.  The sequence of values must be non-decreasing and the last value applies to any remaining orders. Default is to not prune, which is equivalent to --prune 0.")
@ -217,15 +201,10 @@ int main(int argc, char *argv[]) {
      bool writing_intermediate = vm.count("intermediate");
      if (writing_intermediate) {
        pipeline.renumber_vocabulary = true;
-        if (!pipeline.vocab_file.empty()) {
-          std::cerr << "--intermediate and --vocab_file are incompatible because --intermediate already makes a vocab file." << std::endl;
-          return 1;
-        }
-        pipeline.vocab_file = intermediate + ".vocab";
      }
-      lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate);
+      lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate, pipeline.output_q);
      if (!writing_intermediate || vm.count("arpa")) {
-        output.Add(new lm::builder::PrintARPA(out.release(), verbose_header));
+        output.Add(new lm::builder::PrintHook(out.release(), verbose_header));
      }
      lm::builder::Pipeline(pipeline, in.release(), output);
    } catch (const util::MallocException &e) {
--- a/lm/builder/output.cc
+++ b/lm/builder/output.cc
@ -1,6 +1,8 @@
 #include "lm/builder/output.hh"

 #include "lm/common/model_buffer.hh"
+#include "lm/common/print.hh"
+#include "util/fake_ofstream.hh"
 #include "util/stream/multi_stream.hh"

 #include <iostream>
@ -9,23 +11,22 @@ namespace lm { namespace builder {

 OutputHook::~OutputHook() {}

-Output::Output(StringPiece file_base, bool keep_buffer)
-  : file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer) {}
+Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
+  : buffer_(file_base, keep_buffer, output_q) {}

-void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {
+void Output::SinkProbs(util::stream::Chains &chains) {
  Apply(PROB_PARALLEL_HOOK, chains);
-  if (!keep_buffer_ && !Have(PROB_SEQUENTIAL_HOOK)) {
+  if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
    chains >> util::stream::kRecycle;
    chains.Wait(true);
    return;
  }
-  lm::common::ModelBuffer buf(file_base_, keep_buffer_, output_q);
-  buf.Sink(chains);
+  buffer_.Sink(chains, header_.counts_pruned);
  chains >> util::stream::kRecycle;
  chains.Wait(false);
  if (Have(PROB_SEQUENTIAL_HOOK)) {
    std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
-    buf.Source(chains);
+    buffer_.Source(chains);
    Apply(PROB_SEQUENTIAL_HOOK, chains);
    chains >> util::stream::kRecycle;
    chains.Wait(true);
@ -34,8 +35,18 @@ void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {

 void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
  for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
-    entry->Sink(chains);
+    entry->Sink(header_, VocabFile(), chains);
  }
 }

+void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
+  if (verbose_header_) {
+    util::FakeOFStream out(file_.get(), 50);
+    out << "# Input file: " << info.input_file << '\n';
+    out << "# Token count: " << info.token_count << '\n';
+    out << "# Smoothing: Modified Kneser-Ney" << '\n';
+  }
+  chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
+}
+
 }} // namespaces
--- a/lm/builder/output.hh
+++ b/lm/builder/output.hh
@ -2,6 +2,7 @@
 #define LM_BUILDER_OUTPUT_H

 #include "lm/builder/header_info.hh"
+#include "lm/common/model_buffer.hh"
 #include "util/file.hh"

 #include <boost/ptr_container/ptr_vector.hpp>
@ -20,69 +21,64 @@ enum HookType {
  NUMBER_OF_HOOKS // Keep this last so we know how many values there are.
 };

-class Output;
-
 class OutputHook {
  public:
-    explicit OutputHook(HookType hook_type) : type_(hook_type), master_(NULL) {}
+    explicit OutputHook(HookType hook_type) : type_(hook_type) {}

    virtual ~OutputHook();

-    virtual void Sink(util::stream::Chains &chains) = 0;
+    virtual void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) = 0;

-  protected:
-    const HeaderInfo &GetHeader() const;
-    int GetVocabFD() const;
+    HookType Type() const { return type_; }

  private:
-    friend class Output;
-    const HookType type_;
-    const Output *master_;
+    HookType type_;
 };

 class Output : boost::noncopyable {
  public:
-    Output(StringPiece file_base, bool keep_buffer);
+    Output(StringPiece file_base, bool keep_buffer, bool output_q);

    // Takes ownership.
    void Add(OutputHook *hook) {
-      hook->master_ = this;
-      outputs_[hook->type_].push_back(hook);
+      outputs_[hook->Type()].push_back(hook);
    }

    bool Have(HookType hook_type) const {
      return !outputs_[hook_type].empty();
    }

-    void SetVocabFD(int to) { vocab_fd_ = to; }
-    int GetVocabFD() const { return vocab_fd_; }
+    int VocabFile() const { return buffer_.VocabFile(); }

    void SetHeader(const HeaderInfo &header) { header_ = header; }
    const HeaderInfo &GetHeader() const { return header_; }

    // This is called by the pipeline.
-    void SinkProbs(util::stream::Chains &chains, bool output_q);
+    void SinkProbs(util::stream::Chains &chains);

    unsigned int Steps() const { return Have(PROB_SEQUENTIAL_HOOK); }

  private:
    void Apply(HookType hook_type, util::stream::Chains &chains);

-    boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
-    int vocab_fd_;
-    HeaderInfo header_;
+    ModelBuffer buffer_;

-    std::string file_base_;
-    bool keep_buffer_;
+    boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
+    HeaderInfo header_;
 };

-inline const HeaderInfo &OutputHook::GetHeader() const {
-  return master_->GetHeader();
-}
+class PrintHook : public OutputHook {
+  public:
+    // Takes ownership
+    PrintHook(int write_fd, bool verbose_header)
+      : OutputHook(PROB_SEQUENTIAL_HOOK), file_(write_fd), verbose_header_(verbose_header) {}

-inline int OutputHook::GetVocabFD() const {
-  return master_->GetVocabFD();
-}
+    void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains);
+
+  private:
+    util::scoped_fd file_;
+    bool verbose_header_;
+};

 }} // namespaces

--- a/lm/builder/pipeline.cc
+++ b/lm/builder/pipeline.cc
@ -277,27 +277,27 @@ void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &maste
  }
  master >> Interpolate(std::max(master.Config().vocab_size_for_unk, counts[0] - 1 /* <s> is not included */), util::stream::ChainPositions(gamma_chains), config.prune_thresholds, config.prune_vocab, config.output_q, specials);
  gamma_chains >> util::stream::kRecycle;
-  output.SinkProbs(master.MutableChains(), config.output_q);
+  output.SinkProbs(master.MutableChains());
 }

 class VocabNumbering {
  public:
-    VocabNumbering(StringPiece vocab_file, StringPiece temp_prefix, bool renumber)
-      : vocab_file_(vocab_file.data(), vocab_file.size()),
-        temp_prefix_(temp_prefix.data(), temp_prefix.size()),
+    VocabNumbering(int final_vocab, StringPiece temp_prefix, bool renumber)
+      : final_vocab_(final_vocab),
        renumber_(renumber),
        specials_(kBOS, kEOS) {
-      InitFile(renumber || vocab_file.empty());
+      if (renumber) {
+        temporary_.reset(util::MakeTemp(temp_prefix));
+      }
    }

-    int File() const { return null_delimited_.get(); }
+    int WriteOnTheFly() const { return renumber_ ? temporary_.get() : final_vocab_; }

    // Compute the vocabulary mapping and return the memory used.
    std::size_t ComputeMapping(WordIndex type_count) {
      if (!renumber_) return 0;
-      util::scoped_fd previous(null_delimited_.release());
-      InitFile(vocab_file_.empty());
-      ngram::SortedVocabulary::ComputeRenumbering(type_count, previous.get(), null_delimited_.get(), vocab_mapping_);
+      ngram::SortedVocabulary::ComputeRenumbering(type_count, temporary_.get(), final_vocab_, vocab_mapping_);
+      temporary_.reset();
      return sizeof(WordIndex) * vocab_mapping_.size();
    }

@ -312,15 +312,9 @@ class VocabNumbering {
    const SpecialVocab &Specials() const { return specials_; }

  private:
-    void InitFile(bool temp) {
-      null_delimited_.reset(temp ?
-        util::MakeTemp(temp_prefix_) :
-        util::CreateOrThrow(vocab_file_.c_str()));
-    }
-
-    std::string vocab_file_, temp_prefix_;
-
-    util::scoped_fd null_delimited_;
+    int final_vocab_;
+    // Out of order vocab file created on the fly.
+    util::scoped_fd temporary_;

    bool renumber_;

@ -349,18 +343,17 @@ void Pipeline(PipelineConfig &config, int text_file, Output &output) {
  // master's destructor will wait for chains.  But they might be deadlocked if
  // this thread dies because e.g. it ran out of memory.
  try {
-    VocabNumbering numbering(config.vocab_file, config.TempPrefix(), config.renumber_vocabulary);
+    VocabNumbering numbering(output.VocabFile(), config.TempPrefix(), config.renumber_vocabulary);
    uint64_t token_count;
    WordIndex type_count;
    std::string text_file_name;
    std::vector<bool> prune_words;
    util::scoped_ptr<util::stream::Sort<SuffixOrder, CombineCounts> > sorted_counts(
-        CountText(text_file, numbering.File(), master, token_count, type_count, text_file_name, prune_words));
+        CountText(text_file, numbering.WriteOnTheFly(), master, token_count, type_count, text_file_name, prune_words));
    std::cerr << "Unigram tokens " << token_count << " types " << type_count << std::endl;

    // Create vocab mapping, which uses temporary memory, while nothing else is happening.
    std::size_t subtract_for_numbering = numbering.ComputeMapping(type_count);
-    output.SetVocabFD(numbering.File());

    std::cerr << "=== 2/" << master.Steps() << " Calculating and sorting adjusted counts ===" << std::endl;
    master.InitForAdjust(*sorted_counts, type_count, subtract_for_numbering);
--- a/lm/builder/pipeline.hh
+++ b/lm/builder/pipeline.hh
@ -18,7 +18,6 @@ class Output;

 struct PipelineConfig {
  std::size_t order;
-  std::string vocab_file;
  util::stream::SortConfig sort;
  InitialProbabilitiesConfig initial_probs;
  util::stream::ChainConfig read_backoffs;
--- a/lm/builder/print.cc
+++ b/lm/builder/print.cc
@ -1,64 +0,0 @@
-#include "lm/builder/print.hh"
-
-#include "util/fake_ofstream.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/scoped.hh"
-#include "util/stream/timer.hh"
-
-#include <sstream>
-#include <cstring>
-
-namespace lm { namespace builder {
-
-VocabReconstitute::VocabReconstitute(int fd) {
-  uint64_t size = util::SizeOrThrow(fd);
-  util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
-  const char *const start = static_cast<const char*>(memory_.get());
-  const char *i;
-  for (i = start; i != start + size; i += strlen(i) + 1) {
-    map_.push_back(i);
-  }
-  // Last one for LookupPiece.
-  map_.push_back(i);
-}
-
-void PrintARPA::Sink(util::stream::Chains &chains) {
-  chains >> boost::ref(*this);
-}
-
-void PrintARPA::Run(const util::stream::ChainPositions &positions) {
-  VocabReconstitute vocab(GetVocabFD());
-  util::FakeOFStream out(out_fd_.get());
-
-  // Write header.
-  if (verbose_header_) {
-    out << "# Input file: " << GetHeader().input_file << '\n';
-    out << "# Token count: " << GetHeader().token_count << '\n';
-    out << "# Smoothing: Modified Kneser-Ney" << '\n';
-  }
-  out << "\\data\\\n";
-  for (size_t i = 0; i < positions.size(); ++i) {
-    out << "ngram " << (i+1) << '=' << GetHeader().counts_pruned[i] << '\n';
-  }
-  out << '\n';
-
-  for (unsigned order = 1; order <= positions.size(); ++order) {
-    out << "\\" << order << "-grams:" << '\n';
-    for (NGramStream<BuildingPayload> stream(positions[order - 1]); stream; ++stream) {
-      // Correcting for numerical precision issues.  Take that IRST.
-      out << stream->Value().complete.prob << '\t' << vocab.Lookup(*stream->begin());
-      for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
-        out << ' ' << vocab.Lookup(*i);
-      }
-      if (order != positions.size())
-        out << '\t' << stream->Value().complete.backoff;
-      out << '\n';
-
-    }
-    out << '\n';
-  }
-  out << "\\end\\\n";
-}
-
-}} // namespaces
--- a/lm/common/CMakeLists.txt
+++ b/lm/common/CMakeLists.txt
@ -0,0 +1,40 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+#
+# In order to set correct paths to these files
+#    in case this variable is referenced by CMake files in the parent directory,
+#    we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_COMMON_SOURCE 
+		${CMAKE_CURRENT_SOURCE_DIR}/model_buffer.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/print.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/renumber.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/size_option.cc
+	)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_common OBJECT ${KENLM_COMMON_SOURCE})
+
--- a/lm/common/Jamfile
+++ b/lm/common/Jamfile
@ -1,2 +1,2 @@
 fakelib common : [ glob *.cc : *test.cc *main.cc ]
-  ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm ;
+  ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm /top//boost_program_options ;
--- a/lm/builder/joint_order.hh
+++ b/lm/builder/joint_order.hh
@ -1,8 +1,7 @@
-#ifndef LM_BUILDER_JOINT_ORDER_H
-#define LM_BUILDER_JOINT_ORDER_H
+#ifndef LM_COMMON_JOINT_ORDER_H
+#define LM_COMMON_JOINT_ORDER_H

 #include "lm/common/ngram_stream.hh"
-#include "lm/builder/payload.hh"
 #include "lm/lm_exception.hh"

 #ifdef DEBUG
@ -12,15 +11,19 @@

 #include <cstring>

-namespace lm { namespace builder {
+namespace lm {

 template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) {
  // Allow matching to reference streams[-1].
-  NGramStreams<BuildingPayload> streams_with_dummy;
-  streams_with_dummy.InitWithDummy(positions);
-  NGramStream<BuildingPayload> *streams = streams_with_dummy.begin() + 1;
+  util::FixedArray<ProxyStream<NGramHeader> > streams_with_dummy(positions.size() + 1);
+  // A bogus stream for [-1].
+  streams_with_dummy.push_back();
+  for (std::size_t i = 0; i < positions.size(); ++i) {
+    streams_with_dummy.push_back(positions[i], NGramHeader(NULL, i + 1));
+  }
+  ProxyStream<NGramHeader> *streams = streams_with_dummy.begin() + 1;

-  unsigned int order;
+  std::size_t order;
  for (order = 0; order < positions.size() && streams[order]; ++order) {}
  assert(order); // should always have <unk>.

@ -31,11 +34,11 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
    less_compare.push_back(i + 1);
 #endif // DEBUG

-  unsigned int current = 0;
+  std::size_t current = 0;
  while (true) {
    // Does the context match the lower one?
    if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
-      callback.Enter(current, *streams[current]);
+      callback.Enter(current, streams[current].Get());
      // Transition to looking for extensions.
      if (++current < order) continue;
    }
@ -51,7 +54,7 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
    while(true) {
      assert(current > 0);
      --current;
-      callback.Exit(current, *streams[current]);
+      callback.Exit(current, streams[current].Get());

      if (++streams[current]) break;

@ -63,6 +66,6 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
  }
 }

-}} // namespaces
+} // namespaces

-#endif // LM_BUILDER_JOINT_ORDER_H
+#endif // LM_COMMON_JOINT_ORDER_H
--- a/lm/common/model_buffer.cc
+++ b/lm/common/model_buffer.cc
@ -8,25 +8,30 @@

 #include <boost/lexical_cast.hpp>

-namespace lm { namespace common {
+namespace lm {

 namespace {
 const char kMetadataHeader[] = "KenLM intermediate binary file";
 } // namespace

-ModelBuffer::ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q)
-  : file_base_(file_base), keep_buffer_(keep_buffer), output_q_(output_q) {}
-
-ModelBuffer::ModelBuffer(const std::string &file_base)
-  : file_base_(file_base), keep_buffer_(false) {
+ModelBuffer::ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q)
+  : file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer), output_q_(output_q),
+    vocab_file_(keep_buffer ? util::CreateOrThrow((file_base_ + ".vocab").c_str()) : util::MakeTemp(file_base_)) {}
+  
+ModelBuffer::ModelBuffer(StringPiece file_base)
+  : file_base_(file_base.data(), file_base.size()), keep_buffer_(false) {
  const std::string full_name = file_base_ + ".kenlm_intermediate";
  util::FilePiece in(full_name.c_str());
  StringPiece token = in.ReadLine();
  UTIL_THROW_IF2(token != kMetadataHeader, "File " << full_name << " begins with \"" << token << "\" not " << kMetadataHeader);

  token = in.ReadDelimited();
-  UTIL_THROW_IF2(token != "Order", "Expected Order, got \"" << token << "\" in " << full_name);
-  unsigned long order = in.ReadULong();
+  UTIL_THROW_IF2(token != "Counts", "Expected Counts, got \"" << token << "\" in " << full_name);
+  char got;
+  while ((got = in.get()) == ' ') {
+    counts_.push_back(in.ReadULong());
+  }
+  UTIL_THROW_IF2(got != '\n', "Expected newline at end of counts.");

  token = in.ReadDelimited();
  UTIL_THROW_IF2(token != "Payload", "Expected Payload, got \"" << token << "\" in " << full_name);
@ -39,16 +44,16 @@ ModelBuffer::ModelBuffer(const std::string &file_base)
    UTIL_THROW(util::Exception, "Unknown payload " << token);
  }

-  files_.Init(order);
-  for (unsigned long i = 0; i < order; ++i) {
+  vocab_file_.reset(util::OpenReadOrThrow((file_base_ + ".vocab").c_str()));
+
+  files_.Init(counts_.size());
+  for (unsigned long i = 0; i < counts_.size(); ++i) {
    files_.push_back(util::OpenReadOrThrow((file_base_ + '.' + boost::lexical_cast<std::string>(i + 1)).c_str()));
  }
 }

-// virtual destructor
-ModelBuffer::~ModelBuffer() {}
-
-void ModelBuffer::Sink(util::stream::Chains &chains) {
+void ModelBuffer::Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts) {
+  counts_ = counts;
  // Open files.
  files_.Init(chains.size());
  for (std::size_t i = 0; i < chains.size(); ++i) {
@ -64,19 +69,23 @@ void ModelBuffer::Sink(util::stream::Chains &chains) {
  if (keep_buffer_) {
    util::scoped_fd metadata(util::CreateOrThrow((file_base_ + ".kenlm_intermediate").c_str()));
    util::FakeOFStream meta(metadata.get(), 200);
-    meta << kMetadataHeader << "\nOrder " << chains.size() << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
+    meta << kMetadataHeader << "\nCounts";
+    for (std::vector<uint64_t>::const_iterator i = counts_.begin(); i != counts_.end(); ++i) {
+      meta << ' ' << *i;
+    }
+    meta << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
  }
 }

 void ModelBuffer::Source(util::stream::Chains &chains) {
-  assert(chains.size() == files_.size());
-  for (unsigned int i = 0; i < files_.size(); ++i) {
+  assert(chains.size() <= files_.size());
+  for (unsigned int i = 0; i < chains.size(); ++i) {
    chains[i] >> util::stream::PRead(files_[i].get());
  }
 }

-std::size_t ModelBuffer::Order() const {
-  return files_.size();
+void ModelBuffer::Source(std::size_t order_minus_1, util::stream::Chain &chain) {
+  chain >> util::stream::PRead(files_[order_minus_1].get());
 }

-}} // namespaces
+} // namespace
--- a/lm/common/model_buffer.hh
+++ b/lm/common/model_buffer.hh
@ -1,5 +1,5 @@
-#ifndef LM_BUILDER_MODEL_BUFFER_H
-#define LM_BUILDER_MODEL_BUFFER_H
+#ifndef LM_COMMON_MODEL_BUFFER_H
+#define LM_COMMON_MODEL_BUFFER_H

 /* Format with separate files in suffix order.  Each file contains
 * n-grams of the same order.
@ -9,37 +9,55 @@
 #include "util/fixed_array.hh"

 #include <string>
+#include <vector>

-namespace util { namespace stream { class Chains; } }
+namespace util { namespace stream {
+class Chains;
+class Chain;
+}} // namespaces

-namespace lm { namespace common {
+namespace lm {

 class ModelBuffer {
  public:
-    // Construct for writing.
-    ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q);
+    // Construct for writing.  Must call VocabFile() and fill it with null-delimited vocab words.
+    ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q);

    // Load from file.
-    explicit ModelBuffer(const std::string &file_base);
+    explicit ModelBuffer(StringPiece file_base);

-    // explicit for virtual destructor.
-    ~ModelBuffer();
-
-    void Sink(util::stream::Chains &chains);
+    // Must call VocabFile and populate before calling this function.
+    void Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts);

+    // Read files and write to the given chains.  If fewer chains are provided,
+    // only do the lower orders.
    void Source(util::stream::Chains &chains);

+    void Source(std::size_t order_minus_1, util::stream::Chain &chain);
+
    // The order of the n-gram model that is associated with the model buffer.
-    std::size_t Order() const;
+    std::size_t Order() const { return counts_.size(); }
+    // Requires Sink or load from file.
+    const std::vector<uint64_t> &Counts() const {
+      assert(!counts_.empty());
+      return counts_;
+    }
+
+    int VocabFile() const { return vocab_file_.get(); }
+    int StealVocabFile() { return vocab_file_.release(); }
+
+    bool Keep() const { return keep_buffer_; }

  private:
    const std::string file_base_;
    const bool keep_buffer_;
    bool output_q_;
+    std::vector<uint64_t> counts_;

+    util::scoped_fd vocab_file_;
    util::FixedArray<util::scoped_fd> files_;
 };

-}} // namespaces
+} // namespace lm

-#endif // LM_BUILDER_MODEL_BUFFER_H
+#endif // LM_COMMON_MODEL_BUFFER_H
--- a/lm/common/ngram.hh
+++ b/lm/common/ngram.hh
@ -16,6 +16,8 @@ class NGramHeader {
    NGramHeader(void *begin, std::size_t order)
      : begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}

+    NGramHeader() : begin_(NULL), end_(NULL) {}
+
    const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
    uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); }

@ -32,6 +34,7 @@ class NGramHeader {
    const WordIndex *end() const { return end_; }
    WordIndex *end() { return end_; }

+    std::size_t size() const { return end_ - begin_; }
    std::size_t Order() const { return end_ - begin_; }

  private:
@ -42,6 +45,8 @@ template <class PayloadT> class NGram : public NGramHeader {
  public:
    typedef PayloadT Payload;

+    NGram() : NGramHeader(NULL, 0) {}
+
    NGram(void *begin, std::size_t order) : NGramHeader(begin, order) {}

    // Would do operator++ but that can get confusing for a stream.
--- a/lm/common/ngram_stream.hh
+++ b/lm/common/ngram_stream.hh
@ -10,24 +10,21 @@

 namespace lm {

-template <class Payload> class NGramStream {
+template <class Proxy> class ProxyStream {
  public:
-    NGramStream() : gram_(NULL, 0) {}
+    // Make an invalid stream.
+    ProxyStream() {}

-    NGramStream(const util::stream::ChainPosition &position) : gram_(NULL, 0) {
-      Init(position);
+    explicit ProxyStream(const util::stream::ChainPosition &position, const Proxy &proxy = Proxy())
+      : proxy_(proxy), stream_(position) {
+      proxy_.ReBase(stream_.Get());
    }

-    void Init(const util::stream::ChainPosition &position) {
-      stream_.Init(position);
-      gram_ = NGram<Payload>(stream_.Get(), NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()));
-    }
+    Proxy &operator*() { return proxy_; }
+    const Proxy &operator*() const { return proxy_; }

-    NGram<Payload> &operator*() { return gram_; }
-    const NGram<Payload> &operator*() const { return gram_; }
-
-    NGram<Payload> *operator->() { return &gram_; }
-    const NGram<Payload> *operator->() const { return &gram_; }
+    Proxy *operator->() { return &proxy_; }
+    const Proxy *operator->() const { return &proxy_; }

    void *Get() { return stream_.Get(); }
    const void *Get() const { return stream_.Get(); }
@ -36,21 +33,25 @@ template <class Payload> class NGramStream {
    bool operator!() const { return !stream_; }
    void Poison() { stream_.Poison(); }

-    NGramStream &operator++() {
+    ProxyStream<Proxy> &operator++() {
      ++stream_;
-      gram_.ReBase(stream_.Get());
+      proxy_.ReBase(stream_.Get());
      return *this;
    }

  private:
-    NGram<Payload> gram_;
+    Proxy proxy_;
    util::stream::Stream stream_;
 };

-template <class Payload> inline util::stream::Chain &operator>>(util::stream::Chain &chain, NGramStream<Payload> &str) {
-  str.Init(chain.Add());
-  return chain;
-}
+template <class Payload> class NGramStream : public ProxyStream<NGram<Payload> > {
+  public:
+    // Make an invalid stream.
+    NGramStream() {}
+
+    explicit NGramStream(const util::stream::ChainPosition &position) :
+      ProxyStream<NGram<Payload> >(position, NGram<Payload>(NULL, NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()))) {}
+};

 template <class Payload> class NGramStreams : public util::stream::GenericStreams<NGramStream<Payload> > {
  private:
--- a/lm/common/print.cc
+++ b/lm/common/print.cc
@ -0,0 +1,62 @@
+#include "lm/common/print.hh"
+
+#include "lm/common/ngram_stream.hh"
+#include "util/fake_ofstream.hh"
+#include "util/file.hh"
+#include "util/mmap.hh"
+#include "util/scoped.hh"
+
+#include <sstream>
+#include <cstring>
+
+namespace lm {
+
+VocabReconstitute::VocabReconstitute(int fd) {
+  uint64_t size = util::SizeOrThrow(fd);
+  util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
+  const char *const start = static_cast<const char*>(memory_.get());
+  const char *i;
+  for (i = start; i != start + size; i += strlen(i) + 1) {
+    map_.push_back(i);
+  }
+  // Last one for LookupPiece.
+  map_.push_back(i);
+}
+
+namespace {
+template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStream<Payload> &stream, util::FakeOFStream &out) {
+  out << stream->Value().prob << '\t' << vocab.Lookup(*stream->begin());
+  for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
+    out << ' ' << vocab.Lookup(*i);
+  }
+}
+} // namespace
+
+void PrintARPA::Run(const util::stream::ChainPositions &positions) {
+  VocabReconstitute vocab(vocab_fd_);
+  util::FakeOFStream out(out_fd_);
+  out << "\\data\\\n";
+  for (size_t i = 0; i < positions.size(); ++i) {
+    out << "ngram " << (i+1) << '=' << counts_[i] << '\n';
+  }
+  out << '\n';
+
+  for (unsigned order = 1; order < positions.size(); ++order) {
+    out << "\\" << order << "-grams:" << '\n';
+    for (ProxyStream<NGram<ProbBackoff> > stream(positions[order - 1], NGram<ProbBackoff>(NULL, order)); stream; ++stream) {
+      PrintLead(vocab, stream, out);
+      out << '\t' << stream->Value().backoff << '\n';
+    }
+    out << '\n';
+  }
+
+  out << "\\" << positions.size() << "-grams:" << '\n';
+  for (ProxyStream<NGram<Prob> > stream(positions.back(), NGram<Prob>(NULL, positions.size())); stream; ++stream) {
+    PrintLead(vocab, stream, out);
+    out << '\n';
+  }
+  out << '\n';
+  out << "\\end\\\n";
+}
+
+} // namespace lm
--- a/lm/common/print.hh
+++ b/lm/common/print.hh
@ -0,0 +1,58 @@
+#ifndef LM_COMMON_PRINT_H
+#define LM_COMMON_PRINT_H
+
+#include "lm/word_index.hh"
+#include "util/mmap.hh"
+#include "util/string_piece.hh"
+
+#include <cassert>
+#include <vector>
+
+namespace util { namespace stream { class ChainPositions; }}
+
+// Warning: PrintARPA routines read all unigrams before all bigrams before all
+// trigrams etc.  So if other parts of the chain move jointly, you'll have to
+// buffer.
+
+namespace lm {
+
+class VocabReconstitute {
+  public:
+    // fd must be alive for life of this object; does not take ownership.
+    explicit VocabReconstitute(int fd);
+
+    const char *Lookup(WordIndex index) const {
+      assert(index < map_.size() - 1);
+      return map_[index];
+    }
+
+    StringPiece LookupPiece(WordIndex index) const {
+      return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
+    }
+
+    std::size_t Size() const {
+      // There's an extra entry to support StringPiece lengths.
+      return map_.size() - 1;
+    }
+
+  private:
+    util::scoped_memory memory_;
+    std::vector<const char*> map_;
+};
+
+class PrintARPA {
+  public:
+    // Does not take ownership of vocab_fd or out_fd.
+    explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts)
+      : vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {}
+
+    void Run(const util::stream::ChainPositions &positions);
+
+  private:
+    int vocab_fd_;
+    int out_fd_;
+    std::vector<uint64_t> counts_;
+};
+
+} // namespace lm
+#endif // LM_COMMON_PRINT_H
--- a/lm/common/size_option.cc
+++ b/lm/common/size_option.cc
@ -0,0 +1,24 @@
+#include <boost/program_options.hpp>
+#include "util/usage.hh"
+
+namespace lm {
+
+namespace {
+class SizeNotify {
+  public:
+    explicit SizeNotify(std::size_t &out) : behind_(out) {}
+
+    void operator()(const std::string &from) {
+      behind_ = util::ParseSize(from);
+    }
+
+  private:
+    std::size_t &behind_;
+};
+}
+
+boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
+  return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
+}
+
+} // namespace lm
--- a/lm/common/size_option.hh
+++ b/lm/common/size_option.hh
@ -0,0 +1,11 @@
+#include <boost/program_options.hpp>
+
+#include <cstddef>
+#include <string>
+
+namespace lm {
+
+// Create a boost program option for data sizes.  This parses sizes like 1T and 10k.
+boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value);
+
+} // namespace lm
--- a/lm/builder/special.hh
+++ b/lm/builder/special.hh
@ -1,9 +1,9 @@
-#ifndef LM_BUILDER_SPECIAL_H
-#define LM_BUILDER_SPECIAL_H
+#ifndef LM_COMMON_SPECIAL_H
+#define LM_COMMON_SPECIAL_H

 #include "lm/word_index.hh"

-namespace lm { namespace builder {
+namespace lm {

 class SpecialVocab {
  public:
@ -22,6 +22,6 @@ class SpecialVocab {
    WordIndex eos_;
 };

-}} // namespaces
+} // namespace lm

-#endif // LM_BUILDER_SPECIAL_H
+#endif // LM_COMMON_SPECIAL_H
--- a/lm/filter/CMakeLists.txt
+++ b/lm/filter/CMakeLists.txt
@ -0,0 +1,62 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+#
+# In order to set correct paths to these files
+#    in case this variable is referenced by CMake files in the parent directory,
+#    we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_FILTER_SOURCE 
+		${CMAKE_CURRENT_SOURCE_DIR}/arpa_io.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/phrase.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc
+	)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_filter OBJECT ${KENLM_FILTER_SOURCE})
+
+
+# Explicitly list the executable files to be compiled
+set(EXE_LIST
+  filter
+  phrase_table_vocab
+)
+
+
+# Iterate through the executable list   
+foreach(exe ${EXE_LIST})
+
+  # Compile the executable, linking against the requisite dependent object files
+	add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_filter> $<TARGET_OBJECTS:kenlm_util>)
+
+  # Link the executable against boost
+  target_link_libraries(${exe} ${Boost_LIBRARIES})
+
+  # Group executables together
+  set_target_properties(${exe} PROPERTIES FOLDER executables)
+
+# End for loop
+endforeach(exe)
+
--- a/mert/HwcmScorer.h
+++ b/mert/HwcmScorer.h
@ -5,10 +5,7 @@
 #include <vector>

 #include "StatisticsBasedScorer.h"
-#include "moses/FF/InternalTree.h"
-
-using Moses::TreePointer;
-using Moses::InternalTree;
+#include "InternalTree.h"

 namespace MosesTuning
 {
--- a/mert/InternalTree.cpp
+++ b/mert/InternalTree.cpp
@ -0,0 +1,110 @@
+#include "InternalTree.h"
+
+namespace MosesTuning
+{
+
+InternalTree::InternalTree(const std::string & line, const bool terminal):
+  m_isTerminal(terminal)
+{
+
+  size_t found = line.find_first_of("[] ");
+
+  if (found == line.npos) {
+    m_value = line;
+  }
+
+  else {
+    AddSubTree(line, 0);
+  }
+}
+
+size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
+{
+
+  std::string value;
+  char token = 0;
+
+  while (token != ']' && pos != std::string::npos) {
+    size_t oldpos = pos;
+    pos = line.find_first_of("[] ", pos);
+    if (pos == std::string::npos) break;
+    token = line[pos];
+    value = line.substr(oldpos,pos-oldpos);
+
+    if (token == '[') {
+      if (m_value.size() > 0) {
+        m_children.push_back(boost::make_shared<InternalTree>(value,false));
+        pos = m_children.back()->AddSubTree(line, pos+1);
+      } else {
+        if (value.size() > 0) {
+          m_value = value;
+        }
+        pos = AddSubTree(line, pos+1);
+      }
+    } else if (token == ' ' || token == ']') {
+      if (value.size() > 0 && !(m_value.size() > 0)) {
+        m_value = value;
+      } else if (value.size() > 0) {
+        m_isTerminal = false;
+        m_children.push_back(boost::make_shared<InternalTree>(value,true));
+      }
+      if (token == ' ') {
+        pos++;
+      }
+    }
+
+    if (m_children.size() > 0) {
+      m_isTerminal = false;
+    }
+  }
+
+  if (pos == std::string::npos) {
+    return line.size();
+  }
+  return std::min(line.size(),pos+1);
+
+}
+
+std::string InternalTree::GetString(bool start) const
+{
+
+  std::string ret = "";
+  if (!start) {
+    ret += " ";
+  }
+
+  if (!m_isTerminal) {
+    ret += "[";
+  }
+
+  ret += m_value;
+  for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
+    ret += (*it)->GetString(false);
+  }
+
+  if (!m_isTerminal) {
+    ret += "]";
+  }
+  return ret;
+
+}
+
+
+void InternalTree::Combine(const std::vector<TreePointer> &previous)
+{
+
+  std::vector<TreePointer>::iterator it;
+  bool found = false;
+  leafNT next_leafNT(this);
+  for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
+    found = next_leafNT(it);
+    if (found) {
+      *it = *it_prev;
+    } else {
+      std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
+    }
+  }
+}
+
+
+}
--- a/mert/InternalTree.h
+++ b/mert/InternalTree.h
@ -0,0 +1,77 @@
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <map>
+#include <vector>
+#include <boost/shared_ptr.hpp>
+#include <boost/make_shared.hpp>
+#include "util/generator.hh"
+#include "util/exception.hh"
+
+namespace MosesTuning
+{
+
+class InternalTree;
+typedef boost::shared_ptr<InternalTree> TreePointer;
+typedef int NTLabel;
+
+class InternalTree
+{
+  std::string m_value;
+  std::vector<TreePointer> m_children;
+  bool m_isTerminal;
+public:
+  InternalTree(const std::string & line, const bool terminal = false);
+  InternalTree(const InternalTree & tree):
+    m_value(tree.m_value),
+    m_isTerminal(tree.m_isTerminal) {
+    const std::vector<TreePointer> & children = tree.m_children;
+    for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
+      m_children.push_back(boost::make_shared<InternalTree>(**it));
+    }
+  }
+  size_t AddSubTree(const std::string & line, size_t start);
+
+  std::string GetString(bool start = true) const;
+  void Combine(const std::vector<TreePointer> &previous);
+  const std::string & GetLabel() const {
+    return m_value;
+  }
+
+  size_t GetLength() const {
+    return m_children.size();
+  }
+  std::vector<TreePointer> & GetChildren() {
+    return m_children;
+  }
+
+  bool IsTerminal() const {
+    return m_isTerminal;
+  }
+
+  bool IsLeafNT() const {
+    return (!m_isTerminal && m_children.size() == 0);
+  }
+};
+
+// Python-like generator that yields next nonterminal leaf on every call
+$generator(leafNT)
+{
+  std::vector<TreePointer>::iterator it;
+  InternalTree* tree;
+  leafNT(InternalTree* root = 0): tree(root) {}
+  $emit(std::vector<TreePointer>::iterator)
+  for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+    if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+      $yield(it);
+    } else if ((*it)->GetLength() > 0) {
+      if ((*it).get()) { // normal pointer to same object that TreePointer points to
+        $restart(tree = (*it).get());
+      }
+    }
+  }
+  $stop;
+};
+
+}
--- a/mert/Jamfile
+++ b/mert/Jamfile
@ -30,7 +30,7 @@ InterpolatedScorer.cpp
 Point.cpp
 PerScorer.cpp
 HwcmScorer.cpp
-../moses/FF/InternalTree.cpp
+InternalTree.cpp
 Scorer.cpp
 ScorerFactory.cpp
 Optimizer.cpp
--- a/moses-cmd/LatticeMBRGrid.cpp
+++ b/moses-cmd/LatticeMBRGrid.cpp
@ -159,13 +159,15 @@ int main(int argc, char* argv[])
  }

  StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
-  SD.SetUseLatticeMBR(true);
+  LMBR_Options& lmbr = SD.options().lmbr;
+  MBR_Options&   mbr = SD.options().mbr;
+  lmbr.enabled = true;

  boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
  if (!ioWrapper) {
    throw runtime_error("Failed to initialise IOWrapper");
  }
-  size_t nBestSize = SD.GetMBRSize();
+  size_t nBestSize = mbr.size;

  if (nBestSize <= 0) {
    throw new runtime_error("Non-positive size specified for n-best list");
@ -187,13 +189,13 @@ int main(int argc, char* argv[])
    manager.CalcNBest(nBestSize, nBestList,true);
    //grid search
    BOOST_FOREACH(float const& p, pgrid) {
-      SD.SetLatticeMBRPrecision(p);
+      lmbr.precision = p;
      BOOST_FOREACH(float const& r, rgrid) {
-        SD.SetLatticeMBRPRatio(r);
+        lmbr.ratio = r;
        BOOST_FOREACH(size_t const prune_i, prune_grid) {
-          SD.SetLatticeMBRPruningFactor(size_t(prune_i));
+          lmbr.pruning_factor = prune_i;
          BOOST_FOREACH(float const& scale_i, scale_grid) {
-            SD.SetMBRScale(scale_i);
+            mbr.scale = scale_i;
            size_t lineCount = source->GetTranslationId();
            cout << lineCount << " ||| " << p << " "
                 << r << " " << size_t(prune_i) << " " << scale_i
--- a/moses/BaseManager.cpp
+++ b/moses/BaseManager.cpp
@ -27,7 +27,7 @@ BaseManager::GetSource() const
  return m_source;
 }

-const ttasksptr&
+const ttasksptr
 BaseManager::GetTtask() const
 {
  return m_ttask.lock();
@ -140,6 +140,14 @@ void BaseManager::WriteApplicationContext(std::ostream &out,
  }
 }

+AllOptions const&
+BaseManager::
+options() const
+{
+  return GetTtask()->options();
+}
+
+
 } // namespace


--- a/moses/BaseManager.h
+++ b/moses/BaseManager.h
@ -5,7 +5,7 @@
 #include <string>
 #include "ScoreComponentCollection.h"
 #include "InputType.h"
-
+#include "moses/parameters/AllOptions.h"
 namespace Moses
 {
 class ScoreComponentCollection;
@ -50,7 +50,8 @@ public:

  //! the input sentence being decoded
  const InputType& GetSource() const;
-  const ttasksptr& GetTtask() const;
+  const ttasksptr  GetTtask() const;
+  AllOptions const& options() const;

  virtual void Decode() = 0;
  // outputs
--- a/moses/ChartCell.cpp
+++ b/moses/ChartCell.cpp
@ -53,7 +53,7 @@ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
  ChartCellBase(startPos, endPos), m_manager(manager)
 {
  const StaticData &staticData = StaticData::Instance();
-  m_nBestIsEnabled = staticData.IsNBestEnabled();
+  m_nBestIsEnabled = staticData.options().nbest.enabled;
 }

 ChartCell::~ChartCell() {}
@ -100,7 +100,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
  }

  // pluck things out of queue and add to hypo collection
-  const size_t popLimit = staticData.GetCubePruningPopLimit();
+  const size_t popLimit = staticData.options().cube.pop_limit;
  for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
    ChartHypothesis *hypo = queue.Pop();
    AddHypothesis(hypo);
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@ -287,8 +287,11 @@ void ChartHypothesis::CleanupArcList()
   * so we'll keep all of arc list if nedd distinct n-best list
   */
  const StaticData &staticData = StaticData::Instance();
-  size_t nBestSize = staticData.GetNBestSize();
-  bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
+  size_t nBestSize = staticData.options().nbest.nbest_size;
+  bool distinctNBest = (staticData.options().nbest.only_distinct
+                        || staticData.options().mbr.enabled
+                        || staticData.GetOutputSearchGraph()
+                        || staticData.GetOutputSearchGraphHypergraph());

  if (!distinctNBest && m_arcList->size() > nBestSize) {
    // prune arc list only if there too many arcs
--- a/moses/ChartHypothesisCollection.cpp
+++ b/moses/ChartHypothesisCollection.cpp
@ -38,8 +38,8 @@ ChartHypothesisCollection::ChartHypothesisCollection()
  const StaticData &staticData = StaticData::Instance();

  m_beamWidth = staticData.GetBeamWidth();
-  m_maxHypoStackSize = staticData.GetMaxHypoStackSize();
-  m_nBestIsEnabled = staticData.IsNBestEnabled();
+  m_maxHypoStackSize = staticData.options().search.stack_size;
+  m_nBestIsEnabled = staticData.options().nbest.enabled;
  m_bestScore = -std::numeric_limits<float>::infinity();
 }

--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@ -207,7 +207,7 @@ void ChartManager::CalcNBest(
  // with 0 being 'unlimited.'  This actually sets a large-ish limit in case
  // too many translations are identical.
  const StaticData &staticData = StaticData::Instance();
-  const std::size_t nBestFactor = staticData.GetNBestFactor();
+  const std::size_t nBestFactor = staticData.options().nbest.factor;
  std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;

  // Extract the derivations.
@ -318,13 +318,14 @@ void ChartManager::OutputBest(OutputCollector *collector) const
 void ChartManager::OutputNBest(OutputCollector *collector) const
 {
  const StaticData &staticData = StaticData::Instance();
-  size_t nBestSize = staticData.GetNBestSize();
+  size_t nBestSize = staticData.options().nbest.nbest_size;
  if (nBestSize > 0) {
    const size_t translationId = m_source.GetTranslationId();

-    VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
+    VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
+            << staticData.options().nbest.output_file_path << endl);
    std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
-    CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
+    CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
    OutputNBestList(collector, nBestList, translationId);
    IFVERBOSE(2) {
      PrintUserTime("N-Best Hypotheses Generation Time:");
@ -348,10 +349,9 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
    FixPrecision(out);
  }

-  bool includeWordAlignment =
-    StaticData::Instance().PrintAlignmentInfoInNbest();
-
-  bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
+  NBestOptions const& nbo = StaticData::Instance().options().nbest;
+  bool includeWordAlignment = nbo.include_alignment_info;
+  bool PrintNBestTrees = nbo.print_trees;

  for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
       p != nBestList.end(); ++p) {
@ -620,9 +620,9 @@ void ChartManager::OutputDetailedTranslationReport(

  if (staticData.IsDetailedAllTranslationReportingEnabled()) {
    const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
-    size_t nBestSize = staticData.GetNBestSize();
+    size_t nBestSize = staticData.options().nbest.nbest_size;
    std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
-    CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
+    CalcNBest(nBestSize, nBestList, staticData.options().nbest.nbest_size);
    OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
  }

--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@ -106,7 +106,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
      targetPhrase->SetTargetLHS(targetLHS);
      targetPhrase->SetAlignmentInfo("0-0");
      targetPhrase->EvaluateInIsolation(*unksrc);
-      if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
+
+      if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
        targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
      }

--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@ -1,3 +1,4 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
 // $Id$

 #include "ConfusionNet.h"
@ -65,9 +66,9 @@ ConfusionNet() : InputType()
 {
  stats.createOne();

-  const StaticData& staticData = StaticData::Instance();
-  if (staticData.IsSyntax()) {
-    m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
+  const StaticData& SD = StaticData::Instance();
+  if (SD.IsSyntax()) {
+    m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
  }
  UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
 }
--- a/moses/ExportInterface.cpp
+++ b/moses/ExportInterface.cpp
@ -1,3 +1,4 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
 // $Id: ExportInterface.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $

 /***********************************************************************
@ -63,9 +64,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <xmlrpc-c/base.hpp>
 #include <xmlrpc-c/registry.hpp>
 #include <xmlrpc-c/server_abyss.hpp>
-#include "server/Translator.h"
-#include "server/Optimizer.h"
-#include "server/Updater.h"
+#include "server/Server.h"
 #endif

 using namespace std;
@ -147,41 +146,9 @@ int
 run_as_server()
 {
 #ifdef HAVE_XMLRPC_C
-  int port;
-  params.SetParameter(port, "server-port", 8080);
-  bool isSerial;
-  params.SetParameter(isSerial, "serial", false);
-  string logfile;
-  params.SetParameter(logfile, "server-log", string(""));
-  size_t num_threads;
-  params.SetParameter(num_threads, "threads", size_t(10));
-  if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
-
-  xmlrpc_c::registry myRegistry;
-
-  xmlrpc_c::methodPtr const translator(new MosesServer::Translator(num_threads));
-  xmlrpc_c::methodPtr const updater(new MosesServer::Updater);
-  xmlrpc_c::methodPtr const optimizer(new MosesServer::Optimizer);
-
-  myRegistry.addMethod("translate", translator);
-  myRegistry.addMethod("updater", updater);
-  myRegistry.addMethod("optimize", optimizer);
-
-  xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
-
-  XVERBOSE(1,"Listening on port " << port << endl);
-  if (isSerial) {
-    while(1) myAbyssServer.runOnce();
-  } else myAbyssServer.run();
-
-  std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
-  // #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
-#else
-  // #pragma message("BUILDING MOSES WITHOUT SERVER SUPPORT")
-  std::cerr << "Moses was compiled without server support." << endl;
+  MosesServer::Server server(params);
+  return server.run(); // actually: don't return. see Server::run()
 #endif
-  return 1;
-
 }

 int
@ -212,31 +179,58 @@ batch_run()
  ThreadPool pool(staticData.ThreadCount());
 #endif

+  // using context for adaptation:
+  // e.g., context words / strings from config file / cmd line
  std::string context_string;
  params.SetParameter(context_string,"context-string",string(""));

+  // ... or weights for documents/domains from config file / cmd. line
  std::string context_weights;
  params.SetParameter(context_weights,"context-weights",string(""));

-  // main loop over set of input sentences
+  // ... or the surrounding context (--context-window ...)
+  size_t size_t_max = std::numeric_limits<size_t>::max();
+  bool use_context_window = ioWrapper->GetLookAhead() || ioWrapper->GetLookBack();
+  bool use_context = use_context_window || context_string.size();
+  bool use_sliding_context_window = (use_context_window
+                                     && ioWrapper->GetLookAhead() != size_t_max);

+  boost::shared_ptr<std::vector<std::string> >  context_window;
+  boost::shared_ptr<std::vector<std::string> >* cw;
+  cw = use_context_window ? &context_window : NULL;
+  if (!cw && context_string.size())
+    context_window.reset(new std::vector<std::string>(1,context_string));
+
+  // global scope of caches, biases, etc., if any
+  boost::shared_ptr<ContextScope> gscope;
+  if (!use_sliding_context_window)
+    gscope.reset(new ContextScope);
+
+  // main loop over set of input sentences
  boost::shared_ptr<InputType> source;
-  while ((source = ioWrapper->ReadInput()) != NULL) {
+  while ((source = ioWrapper->ReadInput(cw)) != NULL) {
    IFVERBOSE(1) ResetUserTime();

    // set up task of translating one sentence
-    boost::shared_ptr<TranslationTask>
-    task = TranslationTask::create(source, ioWrapper);
-    if (source->GetContext())
-      task->SetContextString(*source->GetContext());
-    else task->SetContextString(context_string);
+    boost::shared_ptr<ContextScope>  lscope;
+    if (gscope) lscope = gscope;
+    else lscope.reset(new ContextScope);

-    //if (source->GetContextWeights().isEmpty())
-    //  task->SetContextWeights(*source->GetContextWeights());
-    /*else //The context_weights will never be passed to the config file.*/
-    if (context_weights != "") {
-      task->SetContextWeights(context_weights);
+    boost::shared_ptr<TranslationTask> task;
+    task = TranslationTask::create(source, ioWrapper, lscope);
+
+    if (cw) {
+      if (context_string.size())
+        context_window->push_back(context_string);
+      if(!use_sliding_context_window)
+        cw = NULL;
    }
+    if (context_window)
+      task->SetContextWindow(context_window);
+
+    if (context_weights != "")
+      task->SetContextWeights(context_weights);
+
    // Allow for (sentence-)context-specific processing prior to
    // decoding. This can be used, for example, for context-sensitive
    // phrase lookup.
--- a/moses/ExportInterface.h
+++ b/moses/ExportInterface.h
@ -1,3 +1,4 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
 #pragma once
 // $Id$

--- a/moses/FF/ConstrainedDecoding.cpp
+++ b/moses/FF/ConstrainedDecoding.cpp
@ -43,7 +43,9 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
 void ConstrainedDecoding::Load()
 {
  const StaticData &staticData = StaticData::Instance();
-  bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
+  bool addBeginEndWord
+  = ((staticData.options().search.algo == CYKPlus)
+     || (staticData.options().search.algo == ChartIncremental));

  for(size_t i = 0; i < m_paths.size(); ++i) {
    InputFileStream constraintFile(m_paths[i]);
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -6,7 +6,6 @@
 #include "moses/TranslationModel/PhraseDictionaryMemory.h"
 #include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
-#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
 #include "moses/TranslationModel/PhraseDictionaryScope3.h"
 #include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
 #include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
@ -152,7 +151,7 @@ FeatureFactory
 ::DefaultSetup(F *feature)
 {
  StaticData &static_data = StaticData::InstanceNonConst();
-  const string &featureName = feature->GetScoreProducerDescription();
+  const std::string &featureName = feature->GetScoreProducerDescription();
  std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);


@ -165,8 +164,8 @@ FeatureFactory
                  << "WARNING: Auto-initializing all weights for this FF to 1.0");
        weights.assign(feature->GetNumScoreComponents(),1.0);
      } else {
-        TRACE_ERR("WARNING: No weights specified in config file for FF "
-                  << featureName << ". Using default values supplied by FF.");
+        VERBOSE(2,"WARNING: No weights specified in config file for FF "
+                << featureName << ". Using default values supplied by FF.");
      }
    }
    UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
@ -215,7 +214,7 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(PhraseDictionaryMultiModel);
  MOSES_FNAME(PhraseDictionaryMultiModelCounts);
  MOSES_FNAME(PhraseDictionaryALSuffixArray);
-  MOSES_FNAME(PhraseDictionaryDynSuffixArray);
+  //  MOSES_FNAME(PhraseDictionaryDynSuffixArray);
  MOSES_FNAME(PhraseDictionaryTransliteration);
  MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
  MOSES_FNAME(PhraseDictionaryFuzzyMatch);
@ -353,18 +352,18 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line

 void FeatureRegistry::PrintFF() const
 {
-  vector<string> ffs;
+  std::vector<std::string> ffs;
  std::cerr << "Available feature functions:" << std::endl;
  Map::const_iterator iter;
  for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
-    const string &ffName = iter->first;
+    const std::string &ffName = iter->first;
    ffs.push_back(ffName);
  }

-  vector<string>::const_iterator iterVec;
+  std::vector<std::string>::const_iterator iterVec;
  std::sort(ffs.begin(), ffs.end());
  for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
-    const string &ffName = *iterVec;
+    const std::string &ffName = *iterVec;
    std::cerr << ffName << " ";
  }

--- a/moses/FF/HyperParameterAsWeight.cpp
+++ b/moses/FF/HyperParameterAsWeight.cpp
@ -19,8 +19,8 @@ HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)

  vector<float> weights = staticData.GetWeights(this);

-  staticData.m_maxHypoStackSize = weights[0] * 1000;
-  staticData.m_beamWidth = weights[1] * 10;
+  staticData.m_options.search.stack_size = weights[0] * 1000;
+  staticData.m_options.search.beam_width = weights[1] * 10;

 }

--- a/moses/FF/InternalTree.cpp
+++ b/moses/FF/InternalTree.cpp
@ -1,27 +1,24 @@
 #include "InternalTree.h"
+#include "moses/StaticData.h"

 namespace Moses
 {

-InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
-  m_value_nt(0),
-  m_isTerminal(terminal)
+InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool nonterminal)
 {

  if (len > 0) {
-    m_value.assign(line, start, len);
+    m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(start, len), nonterminal);
  }
 }

-InternalTree::InternalTree(const std::string & line, const bool terminal):
-  m_value_nt(0),
-  m_isTerminal(terminal)
+InternalTree::InternalTree(const std::string & line, const bool nonterminal)
 {

  size_t found = line.find_first_of("[] ");

  if (found == line.npos) {
-    m_value = line;
+    m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), line, nonterminal);
  } else {
    AddSubTree(line, 0);
  }
@ -32,6 +29,7 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)

  char token = 0;
  size_t len = 0;
+  bool has_value = false;

  while (token != ']' && pos != std::string::npos) {
    size_t oldpos = pos;
@ -41,30 +39,27 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
    len = pos-oldpos;

    if (token == '[') {
-      if (!m_value.empty()) {
-        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
+      if (has_value) {
+        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
        pos = m_children.back()->AddSubTree(line, pos+1);
      } else {
        if (len > 0) {
-          m_value.assign(line, oldpos, len);
+          m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), false);
+          has_value = true;
        }
        pos = AddSubTree(line, pos+1);
      }
    } else if (token == ' ' || token == ']') {
-      if (len > 0 && m_value.empty()) {
-        m_value.assign(line, oldpos, len);
+      if (len > 0 && !has_value) {
+        m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), true);
+        has_value = true;
      } else if (len > 0) {
-        m_isTerminal = false;
-        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
+        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
      }
      if (token == ' ') {
        pos++;
      }
    }
-
-    if (!m_children.empty()) {
-      m_isTerminal = false;
-    }
  }

  if (pos == std::string::npos) {
@ -82,16 +77,16 @@ std::string InternalTree::GetString(bool start) const
    ret += " ";
  }

-  if (!m_isTerminal) {
+  if (!IsTerminal()) {
    ret += "[";
  }

-  ret += m_value;
+  ret += m_value.GetString(StaticData::Instance().GetOutputFactorOrder(), false);
  for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
    ret += (*it)->GetString(false);
  }

-  if (!m_isTerminal) {
+  if (!IsTerminal()) {
    ret += "]";
  }
  return ret;
@ -120,13 +115,13 @@ void InternalTree::Unbinarize()
 {

  // nodes with virtual label cannot be unbinarized
-  if (m_value.empty() || m_value[0] == '^') {
+  if (m_value.GetString(0).empty() || m_value.GetString(0).as_string()[0] == '^') {
    return;
  }

  //if node has child that is virtual node, get unbinarized list of children
  for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
-    if (!(*it)->IsTerminal() && (*it)->GetLabel()[0] == '^') {
+    if (!(*it)->IsTerminal() && (*it)->GetLabel().GetString(0).as_string()[0] == '^') {
      std::vector<TreePointer> new_children;
      GetUnbinarizedChildren(new_children);
      m_children = new_children;
@ -144,8 +139,8 @@ void InternalTree::Unbinarize()
 void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
 {
  for (std::vector<TreePointer>::const_iterator itx = m_children.begin(); itx != m_children.end(); ++itx) {
-    const std::string &label = (*itx)->GetLabel();
-    if (!label.empty() && label[0] == '^') {
+    const StringPiece label = (*itx)->GetLabel().GetString(0);
+    if (!label.empty() && label.as_string()[0] == '^') {
      (*itx)->GetUnbinarizedChildren(ret);
    } else {
      ret.push_back(*itx);
@ -153,7 +148,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
  }
 }

-bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+bool InternalTree::FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
 {
  for (it = m_children.begin(); it != m_children.end(); ++it) {
    if ((*it)->GetLabel() == label) {
@ -163,7 +158,7 @@ bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer
  return false;
 }

-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
 {
  for (it = m_children.begin(); it != m_children.end(); ++it) {
    if ((*it)->GetLabel() == label) {
@ -178,7 +173,7 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
  return false;
 }

-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
 {
  for (it = m_children.begin(); it != m_children.end(); ++it) {
    if ((*it)->GetLabel() == label) {
@ -194,88 +189,4 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
  return false;
 }

-
-bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if ((*it)->GetNTLabel() == label) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if ((*it)->GetNTLabel() == label) {
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(label, it2)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if ((*it)->GetNTLabel() == label) {
-      parent = this;
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(label, it2, parent)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
-
-bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(labels, it2)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
-      parent = this;
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(labels, it2, parent)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
 }
--- a/moses/FF/InternalTree.h
+++ b/moses/FF/InternalTree.h
@ -5,30 +5,28 @@
 #include <map>
 #include <vector>
 #include "FFState.h"
+#include "moses/Word.h"
 #include <boost/shared_ptr.hpp>
 #include <boost/make_shared.hpp>
 #include "util/generator.hh"
 #include "util/exception.hh"
+#include "util/string_piece.hh"

 namespace Moses
 {

 class InternalTree;
 typedef boost::shared_ptr<InternalTree> TreePointer;
-typedef int NTLabel;

 class InternalTree
 {
-  std::string m_value;
-  NTLabel m_value_nt;
+  Word m_value;
  std::vector<TreePointer> m_children;
-  bool m_isTerminal;
 public:
  InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
-  InternalTree(const std::string & line, const bool terminal = false);
+  InternalTree(const std::string & line, const bool nonterminal = true);
  InternalTree(const InternalTree & tree):
-    m_value(tree.m_value),
-    m_isTerminal(tree.m_isTerminal) {
+    m_value(tree.m_value) {
    const std::vector<TreePointer> & children = tree.m_children;
    for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
      m_children.push_back(boost::make_shared<InternalTree>(**it));
@ -40,20 +38,10 @@ public:
  void Combine(const std::vector<TreePointer> &previous);
  void Unbinarize();
  void GetUnbinarizedChildren(std::vector<TreePointer> &children) const;
-  const std::string & GetLabel() const {
+  const Word & GetLabel() const {
    return m_value;
  }

-  // optionally identify label by int instead of string;
-  // allows abstraction if multiple nonterminal strings should map to same label.
-  const NTLabel & GetNTLabel() const {
-    return m_value_nt;
-  }
-
-  void SetNTLabel(NTLabel value) {
-    m_value_nt = value;
-  }
-
  size_t GetLength() const {
    return m_children.size();
  }
@ -62,38 +50,22 @@ public:
  }

  bool IsTerminal() const {
-    return m_isTerminal;
+    return !m_value.IsNonTerminal();
  }

  bool IsLeafNT() const {
-    return (!m_isTerminal && m_children.size() == 0);
+    return (m_value.IsNonTerminal() && m_children.size() == 0);
  }

  // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
  // can be used for formulating syntax constraints.

  // if found, 'it' is iterator to first tree node that matches search string
-  bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
-  bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
+  bool FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;
+  bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;

  // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
-  bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
-
-  // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
-  // if found, 'it' is iterator to first tree node that matches search string
-  bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
-  bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
-
-  // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
-  bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
-
-  // pass vector of possible labels to search
-  // if found, 'it' is iterator to first tree node that matches search string
-  bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
-  bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
-
-  // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
-  bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+  bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;

  // Python-like generator that yields next nonterminal leaf on every call
  $generator(leafNT) {
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@ -1,4 +1,4 @@
-// -*- c++ -*-
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
 #pragma once

 #include <string>
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@ -1,6 +1,5 @@
-// -*- c++ -*-
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
 #pragma once
-
 #include <vector>
 #include <string>

@ -12,7 +11,6 @@
 #include "moses/WordsBitmap.h"
 #include "moses/TranslationOption.h"
 #include "moses/FF/FFState.h"
-
 #include "ReorderingStack.h"

 namespace Moses
--- a/moses/FF/Model1Feature.cpp
+++ b/moses/FF/Model1Feature.cpp
@ -75,7 +75,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
    ++i;
    std::vector<std::string> tokens = Tokenize(line);
    UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
-    unsigned id = Scan<unsigned>(tokens[0]);
+    unsigned id = atoll( tokens[0].c_str() );
    if (! ( (id == 1) && (tokens[1] == "UNK") )) {
      const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
      bool stored = Store(factor, id);
@ -86,7 +86,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
    ++i;
    std::vector<std::string> tokens = Tokenize(line);
    UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
-    unsigned id = Scan<unsigned>(tokens[0]);
+    unsigned id = atoll( tokens[0].c_str() );
    const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
    bool stored = Store(factor, id);
    UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
@ -105,11 +105,11 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
    ++i;
    std::vector<std::string> tokens = Tokenize(line);
    UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
-    unsigned idS = Scan<unsigned>(tokens[0]);
-    unsigned idT = Scan<unsigned>(tokens[1]);
+    unsigned idS = atoll( tokens[0].c_str() );
+    unsigned idT = atoll( tokens[1].c_str() );
    const Factor* wordS = vcbS.GetWord(idS);
    const Factor* wordT = vcbT.GetWord(idT);
-    float prob = Scan<float>(tokens[2]);
+    float prob = std::atof( tokens[2].c_str() );
    if ( (wordS != NULL) && (wordT != NULL) ) {
      m_ltable[ wordS ][ wordT ] = prob;
    }
--- a/moses/FF/PhrasePairFeature.cpp
+++ b/moses/FF/PhrasePairFeature.cpp
@ -16,21 +16,29 @@ namespace Moses

 PhrasePairFeature::PhrasePairFeature(const std::string &line)
  :StatelessFeatureFunction(0, line)
+  ,m_unrestricted(false)
+  ,m_simple(true)
+  ,m_sourceContext(false)
+  ,m_domainTrigger(false)
+  ,m_ignorePunctuation(false)
 {
-  std::cerr << "Initializing PhrasePairFeature.." << std::endl;
+  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
  ReadParameters();

-  if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
-  if (m_sourceContext == 1) std::cerr << "using source context.. ";
-  if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
+  if (m_simple == 1) VERBOSE(1, " Using simple phrase pairs.");
+  if (m_sourceContext == 1) VERBOSE(1, " Using source context.");
+  if (m_domainTrigger == 1) VERBOSE(1, " Using domain triggers.");

  // compile a list of punctuation characters
  if (m_ignorePunctuation) {
-    std::cerr << "ignoring punctuation for triggers.. ";
+    VERBOSE(1, " Ignoring punctuation for triggers.");
    char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
-    for (size_t i=0; i < sizeof(punctuation)-1; ++i)
+    for (size_t i=0; i < sizeof(punctuation)-1; ++i) {
      m_punctuationHash[punctuation[i]] = 1;
+    }
  }
+
+  VERBOSE(1, " Done." << std::endl);
 }

 void PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
@ -76,7 +84,7 @@ void PhrasePairFeature::Load()
    }

    inFileSource.close();
-  } else {
+  } else if (!m_unrestricted) {
    // restricted source word vocabulary
    ifstream inFileSource(m_filePathSource.c_str());
    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
@ -101,8 +109,6 @@ void PhrasePairFeature::Load()
    }

    inFileTarget.close();*/
-
-    m_unrestricted = false;
  }
 }

@ -114,25 +120,6 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
    , ScoreComponentCollection *estimatedFutureScore) const
 {
  const Phrase& source = inputPath.GetPhrase();
-  if (m_simple) {
-    ostringstream namestr;
-    namestr << "pp_";
-    namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
-    for (size_t i = 1; i < source.GetSize(); ++i) {
-      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
-      namestr << ",";
-      namestr << sourceFactor->GetString();
-    }
-    namestr << "~";
-    namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
-    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
-      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
-      namestr << ",";
-      namestr << targetFactor->GetString();
-    }
-
-    scoreBreakdown.SparsePlusEquals(namestr.str(),1);
-  }
  if (m_domainTrigger) {
    const Sentence& isnt = static_cast<const Sentence&>(input);
    const bool use_topicid = isnt.GetUseTopicId();
@ -140,18 +127,18 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input

    // compute pair
    ostringstream pair;
-    pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
+    pair << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
-      pair << ",";
-      pair << sourceFactor->GetString();
+      pair << "~";
+      pair << ReplaceTilde( sourceFactor->GetString() );
    }
-    pair << "~";
-    pair << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
+    pair << "~~";
+    pair << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
-      pair << ",";
-      pair << targetFactor->GetString();
+      pair << "~";
+      pair << ReplaceTilde( targetFactor->GetString() );
    }

    if (use_topicid || use_topicid_prob) {
@ -159,7 +146,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
        // use topicid as trigger
        const long topicid = isnt.GetTopicId();
        stringstream feature;
-        feature << "pp_";
+        feature << m_description << "_";
        if (topicid == -1)
          feature << "unk";
        else
@ -173,13 +160,13 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
        const vector<string> &topicid_prob = *(isnt.GetTopicIdAndProb());
        if (atol(topicid_prob[0].c_str()) == -1) {
          stringstream feature;
-          feature << "pp_unk_";
+          feature << m_description << "_unk_";
          feature << pair.str();
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        } else {
          for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
            stringstream feature;
-            feature << "pp_";
+            feature << m_description << "_";
            feature << topicid_prob[i];
            feature << "_";
            feature << pair.str();
@ -193,7 +180,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
      for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
        string sourceTrigger = *p;
        ostringstream namestr;
-        namestr << "pp_";
+        namestr << m_description << "_";
        namestr << sourceTrigger;
        namestr << "_";
        namestr << pair.str();
@ -221,21 +208,21 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input

      if (m_unrestricted || sourceTriggerExists) {
        ostringstream namestr;
-        namestr << "pp_";
+        namestr << m_description << "_";
        namestr << sourceTrigger;
        namestr << "~";
-        namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
+        namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
        for (size_t i = 1; i < source.GetSize(); ++i) {
          const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
-          namestr << ",";
-          namestr << sourceFactor->GetString();
+          namestr << "~";
+          namestr << ReplaceTilde( sourceFactor->GetString() );
        }
-        namestr << "~";
-        namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
+        namestr << "~~";
+        namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
        for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
          const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
-          namestr << ",";
-          namestr << targetFactor->GetString();
+          namestr << "~";
+          namestr << ReplaceTilde( targetFactor->GetString() );
        }

        scoreBreakdown.SparsePlusEquals(namestr.str(),1);
@ -244,6 +231,31 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
  }
 }

+void PhrasePairFeature::EvaluateInIsolation(const Phrase &source
+    , const TargetPhrase &targetPhrase
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection &estimatedFutureScore) const
+{
+  if (m_simple) {
+    ostringstream namestr;
+    namestr << m_description << "_";
+    namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
+    for (size_t i = 1; i < source.GetSize(); ++i) {
+      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
+      namestr << "~";
+      namestr << ReplaceTilde( sourceFactor->GetString() );
+    }
+    namestr << "~~";
+    namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
+    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
+      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
+      namestr << "~";
+      namestr << ReplaceTilde( targetFactor->GetString() );
+    }
+    scoreBreakdown.SparsePlusEquals(namestr.str(),1);
+  }
+}
+
 bool PhrasePairFeature::IsUseable(const FactorMask &mask) const
 {
  bool ret = mask[m_targetFactorId];
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@ -1,5 +1,4 @@
-#ifndef moses_PhrasePairFeature_h
-#define moses_PhrasePairFeature_h
+#pragma once

 #include <stdexcept>
 #include <boost/unordered_set.hpp>
@ -32,6 +31,16 @@ class PhrasePairFeature: public StatelessFeatureFunction
  CharHash m_punctuationHash;
  std::string m_filePathSource;

+  inline std::string ReplaceTilde(const StringPiece &str) const {
+    std::string out = str.as_string();
+    size_t pos = out.find('~');
+    while ( pos != std::string::npos ) {
+      out.replace(pos,1,"<TILDE>");
+      pos = out.find('~',pos);
+    }
+    return out;
+  };
+
 public:
  PhrasePairFeature(const std::string &line);

@ -43,8 +52,7 @@ public:
  void EvaluateInIsolation(const Phrase &source
                           , const TargetPhrase &targetPhrase
                           , ScoreComponentCollection &scoreBreakdown
-                           , ScoreComponentCollection &estimatedFutureScore) const {
-  }
+                           , ScoreComponentCollection &estimatedFutureScore) const;

  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const {
@ -69,5 +77,3 @@ public:

 }

-
-#endif
--- a/moses/FF/RulePairUnlexicalizedSource.cpp
+++ b/moses/FF/RulePairUnlexicalizedSource.cpp
@ -12,7 +12,7 @@ namespace Moses
 {

 RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line)
-  : StatelessFeatureFunction(0, line)
+  : StatelessFeatureFunction(1, line)
  , m_glueRules(false)
  , m_nonGlueRules(true)
  , m_glueTargetLHSStr("Q")
@ -81,6 +81,9 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
  }

  scoreBreakdown.PlusEquals(this, namestr.str(), 1);
+  if ( targetPhraseLHS != m_glueTargetLHS ) {
+    scoreBreakdown.PlusEquals(this, 1);
+  }
 }

 }
--- a/moses/FF/SkeletonTranslationOptionListFeature.h
+++ b/moses/FF/SkeletonTranslationOptionListFeature.h
@ -34,7 +34,7 @@ public:

  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const {
-    vector<float> newScores(m_numScoreComponents);
+    std::vector<float> newScores(m_numScoreComponents);
    newScores[0] = translationOptionList.size();

    TranslationOptionList::const_iterator iterTransOpt;
--- a/moses/FF/SoftMatchingFeature.cpp
+++ b/moses/FF/SoftMatchingFeature.cpp
@ -13,6 +13,7 @@ namespace Moses
 SoftMatchingFeature::SoftMatchingFeature(const std::string &line)
  : StatelessFeatureFunction(0, line)
  , m_softMatches(moses_MaxNumNonterminals)
+  , m_scoreIdentical(true)
 {
  ReadParameters();
 }
@ -26,6 +27,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
  } else if (key == "path") {
    const std::string filePath = value;
    Load(filePath);
+  } else if (key == "score-identical") {
+    m_scoreIdentical = Scan<bool>(value);
  } else {
    UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
  }
@ -80,8 +83,10 @@ void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
      const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd);
      const Word& prevLHS = prevHypo->GetTargetLHS();

-      const std::string &name = GetOrSetFeatureName(word, prevLHS);
-      accumulator->PlusEquals(this,name,1);
+      if ( (word != prevLHS) || m_scoreIdentical ) {
+        const std::string &name = GetOrSetFeatureName(word, prevLHS);
+        accumulator->PlusEquals(this,name,1);
+      }
    }
  }
 }
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@ -55,6 +55,7 @@ public:
 private:
  mutable std::vector<std::vector<Word> > m_softMatches; // map RHS of new rule to list of possible LHS of old rule (subtree)
  mutable std::vector<std::vector<std::string> > m_nameCache;
+  bool m_scoreIdentical;

 #ifdef WITH_THREADS
  //reader-writer lock
--- a/moses/FF/SourceWordDeletionFeature.cpp
+++ b/moses/FF/SourceWordDeletionFeature.cpp
@ -38,9 +38,8 @@ void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::

 void SourceWordDeletionFeature::Load()
 {
-  if (m_filename == "") {
+  if (m_filename.empty())
    return;
-  }

  FEATUREVERBOSE(1, "Loading source word deletion word list from " << m_filename << std::endl);
  ifstream inFile(m_filename.c_str());
--- a/moses/FF/TreeStructureFeature.cpp
+++ b/moses/FF/TreeStructureFeature.cpp
@ -13,33 +13,12 @@ void TreeStructureFeature::Load()

  // syntactic constraints can be hooked in here.
  m_constraints = NULL;
-  m_labelset = NULL;

  StaticData &staticData = StaticData::InstanceNonConst();
  staticData.SetTreeStructure(this);
 }


-// define NT labels (ints) that are mapped from strings for quicker comparison.
-void TreeStructureFeature::AddNTLabels(TreePointer root) const
-{
-  std::string label = root->GetLabel();
-
-  if (root->IsTerminal()) {
-    return;
-  }
-
-  std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
-  if (it != m_labelset->string_to_label.end()) {
-    root->SetNTLabel(it->second);
-  }
-
-  std::vector<TreePointer> children = root->GetChildren();
-  for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
-    AddNTLabels(*it2);
-  }
-}
-
 FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
    , int featureID /* used to index the state in the previous hypotheses */
    , ScoreComponentCollection* accumulator) const
@ -48,10 +27,6 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
    const std::string *tree = property->GetValueString();
    TreePointer mytree (boost::make_shared<InternalTree>(*tree));

-    if (m_labelset) {
-      AddNTLabels(mytree);
-    }
-
    //get subtrees (in target order)
    std::vector<TreePointer> previous_trees;
    for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) {
@ -70,7 +45,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
    }
    mytree->Combine(previous_trees);

-    bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "</s>" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "</s>"));
+    bool full_sentence = (mytree->GetChildren().back()->GetLabel() == m_send || (mytree->GetChildren().back()->GetLabel() == m_send_nt && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == m_send));
    if (m_binarized && full_sentence) {
      mytree->Unbinarize();
    }
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@ -4,6 +4,7 @@
 #include <map>
 #include "StatefulFeatureFunction.h"
 #include "FFState.h"
+#include "moses/Word.h"
 #include "InternalTree.h"

 namespace Moses
@ -35,11 +36,18 @@ class TreeStructureFeature : public StatefulFeatureFunction
  SyntaxConstraints* m_constraints;
  LabelSet* m_labelset;
  bool m_binarized;
+  Word m_send;
+  Word m_send_nt;
+
 public:
  TreeStructureFeature(const std::string &line)
    :StatefulFeatureFunction(0, line)
    , m_binarized(false) {
    ReadParameters();
+    std::vector<FactorType> factors;
+    factors.push_back(0);
+    m_send.CreateFromString(Output, factors, "</s>", false);
+    m_send_nt.CreateFromString(Output, factors, "SEND", true);
  }
  ~TreeStructureFeature() {
    delete m_constraints;
@ -49,8 +57,6 @@ public:
    return new TreeState(TreePointer());
  }

-  void AddNTLabels(TreePointer root) const;
-
  bool IsUseable(const FactorMask &mask) const {
    return true;
  }
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@ -307,7 +307,7 @@ public:
  }

  virtual void InitializeForInput(ttasksptr const& ttask) {
-    InputType const& source = ttask->GetSource();
+    InputType const& source = *(ttask->GetSource().get());
    // tabbed sentence is assumed only in training
    if (! m_train)
      return;
--- a/moses/FF/VW/VWFeatureBase.h
+++ b/moses/FF/VW/VWFeatureBase.h
@ -5,6 +5,7 @@

 #include "vw/Classifier.h"
 #include "moses/TypeDef.h"
+#include "moses/TranslationTask.h"
 #include "moses/Util.h"
 #include "moses/FF/StatelessFeatureFunction.h"

--- a/moses/FF/VW/VWFeatureSourceExternalFeatures.h
+++ b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
@ -40,7 +40,7 @@ public:
  }

  virtual void InitializeForInput(ttasksptr const& ttask) {
-    InputType const& source = ttask->GetSource();
+    InputType const& source = *(ttask->GetSource().get());
    UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
                   "This feature function requires the TabbedSentence input type");

--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@ -110,7 +110,8 @@ void WordTranslationFeature::Load()
    }

    inFileSource.close();
-  } else {
+  } else if (!m_filePathSource.empty() || !m_filePathTarget.empty()) {
+    return;
    // restricted source word vocabulary
    ifstream inFileSource(m_filePathSource.c_str());
    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
--- a/moses/FactorCollection.h
+++ b/moses/FactorCollection.h
@ -110,7 +110,7 @@ public:
  */
  const Factor *AddFactor(const StringPiece &factorString, bool isNonTerminal = false);

-  const size_t GetNumNonTerminals() {
+  size_t GetNumNonTerminals() {
    return m_factorIdNonTerminal;
  }

--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@ -213,7 +213,8 @@ RecombineCompare(const Hypothesis &compare) const

  for (unsigned i = 0; i < m_ffStates.size(); ++i) {
    if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
-      comp = m_ffStates[i] - compare.m_ffStates[i];
+      // TODO: Can this situation actually occur?
+      comp = int(m_ffStates[i] != NULL) - int(compare.m_ffStates[i] != NULL);
    } else {
      comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
    }
@ -361,14 +362,14 @@ CleanupArcList()
   * so we'll keep all of arc list if nedd distinct n-best list
   */
  const StaticData &staticData = StaticData::Instance();
-  size_t nBestSize = staticData.GetNBestSize();
-  bool distinctNBest = (staticData.GetDistinctNBest() ||
+  size_t nBestSize = staticData.options().nbest.nbest_size;
+  bool distinctNBest = (m_manager.options().nbest.only_distinct ||
                        staticData.GetLatticeSamplesSize() ||
-                        staticData.UseMBR() ||
+                        m_manager.options().mbr.enabled ||
                        staticData.GetOutputSearchGraph() ||
                        staticData.GetOutputSearchGraphSLF() ||
                        staticData.GetOutputSearchGraphHypergraph() ||
-                        staticData.UseLatticeMBR());
+                        m_manager.options().lmbr.enabled);

  if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
    // prune arc list only if there too many arcs
@ -585,7 +586,9 @@ OutputSurface(std::ostream &out, const Hypothesis &edge,
      //preface surface form with UNK if marking unknowns
      const Word &word = phrase.GetWord(pos);
      if(markUnknown && word.IsOOV()) {
-        out << "UNK" << *factor;
+        out << StaticData::Instance().GetUnknownWordPrefix()
+            << *factor
+            << StaticData::Instance().GetUnknownWordSuffix();
      } else {
        out << *factor;
      }
--- a/moses/HypothesisStackCubePruning.cpp
+++ b/moses/HypothesisStackCubePruning.cpp
@ -36,7 +36,7 @@ namespace Moses
 HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
  HypothesisStack(manager)
 {
-  m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
+  m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
  m_bestScore = -std::numeric_limits<float>::infinity();
  m_worstScore = -std::numeric_limits<float>::infinity();
 }
--- a/moses/HypothesisStackNormal.cpp
+++ b/moses/HypothesisStackNormal.cpp
@ -36,7 +36,7 @@ namespace Moses
 HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
  HypothesisStack(manager)
 {
-  m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
+  m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
  m_bestScore = -std::numeric_limits<float>::infinity();
  m_worstScore = -std::numeric_limits<float>::infinity();
 }
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <iostream>
 #include <stack>
 #include <boost/algorithm/string.hpp>
+#include <boost/foreach.hpp>

 #include "moses/Syntax/KBestExtractor.h"
 #include "moses/Syntax/PVertex.h"
@ -78,12 +79,12 @@ namespace Moses

 IOWrapper::IOWrapper()
  : m_nBestStream(NULL)
-  , m_outputWordGraphStream(NULL)
-  , m_outputSearchGraphStream(NULL)
-  , m_detailedTranslationReportingStream(NULL)
-  , m_unknownsStream(NULL)
-  , m_alignmentInfoStream(NULL)
-  , m_latticeSamplesStream(NULL)
+  // , m_outputWordGraphStream(NULL)
+  // , m_outputSearchGraphStream(NULL)
+  // , m_detailedTranslationReportingStream(NULL)
+  // , m_unknownsStream(NULL)
+  // , m_alignmentInfoStream(NULL)
+  // , m_latticeSamplesStream(NULL)
  , m_surpressSingleBestOutput(false)
  , m_look_ahead(0)
  , m_look_back(0)
@ -93,10 +94,11 @@ IOWrapper::IOWrapper()
  , spe_aln(NULL)
 {
  const StaticData &staticData = StaticData::Instance();
+  Parameter const& P = staticData.GetParameter();

  // context buffering for context-sensitive decoding
-  m_look_ahead = staticData.GetContextParameters().look_ahead;
-  m_look_back  = staticData.GetContextParameters().look_back;
+  m_look_ahead = staticData.options().context.look_ahead;
+  m_look_back  = staticData.options().context.look_back;

  m_inputType = staticData.GetInputType();

@ -107,8 +109,8 @@ IOWrapper::IOWrapper()

  m_inputFactorOrder = &staticData.GetInputFactorOrder();

-  size_t nBestSize = staticData.GetNBestSize();
-  string nBestFilePath = staticData.GetNBestFilePath();
+  size_t nBestSize = staticData.options().nbest.nbest_size;
+  string nBestFilePath = staticData.options().nbest.output_file_path;

  staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
  if (m_inputFilePath.empty()) {
@ -121,95 +123,38 @@ IOWrapper::IOWrapper()
  }

  if (nBestSize > 0) {
-    if (nBestFilePath == "-" || nBestFilePath == "/dev/stdout") {
-      m_nBestStream = &std::cout;
-      m_nBestOutputCollector.reset(new Moses::OutputCollector(&std::cout));
+    m_nBestOutputCollector.reset(new Moses::OutputCollector(nBestFilePath));
+    if (m_nBestOutputCollector->OutputIsCout()) {
      m_surpressSingleBestOutput = true;
-    } else {
-      std::ofstream *file = new std::ofstream;
-      file->open(nBestFilePath.c_str());
-      m_nBestStream = file;
-
-      m_nBestOutputCollector.reset(new Moses::OutputCollector(file));
-      //m_nBestOutputCollector->HoldOutputStream();
    }
  }

-  // search graph output
-  if (staticData.GetOutputSearchGraph()) {
-    string fileName;
-    if (staticData.GetOutputSearchGraphExtended()) {
-      staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph-extended", "");
-    } else {
-      staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
-    }
-    std::ofstream *file = new std::ofstream;
-    m_outputSearchGraphStream = file;
-    file->open(fileName.c_str());
-  }
+  std::string path;
+  P.SetParameter<std::string>(path, "output-search-graph-extended", "");
+  if (!path.size()) P.SetParameter<std::string>(path, "output-search-graph", "");
+  if (path.size()) m_searchGraphOutputCollector.reset(new OutputCollector(path));

-  if (!staticData.GetOutputUnknownsFile().empty()) {
-    m_unknownsStream = new std::ofstream(staticData.GetOutputUnknownsFile().c_str());
-    m_unknownsCollector.reset(new Moses::OutputCollector(m_unknownsStream));
-    UTIL_THROW_IF2(!m_unknownsStream->good(),
-                   "File for unknowns words could not be opened: " <<
-                   staticData.GetOutputUnknownsFile());
-  }
+  P.SetParameter<std::string>(path, "output-unknowns", "");
+  if (path.size()) m_unknownsCollector.reset(new OutputCollector(path));

-  if (!staticData.GetAlignmentOutputFile().empty()) {
-    m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
-    m_alignmentInfoCollector.reset(new Moses::OutputCollector(m_alignmentInfoStream));
-    UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
-                   "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
-  }
+  P.SetParameter<std::string>(path, "alignment-output-file", "");
+  if (path.size()) m_alignmentInfoCollector.reset(new OutputCollector(path));

-  if (staticData.GetOutputSearchGraph()) {
-    string fileName;
-    staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
+  P.SetParameter<string>(path, "translation-details", "");
+  if (path.size()) m_detailedTranslationCollector.reset(new OutputCollector(path));

-    std::ofstream *file = new std::ofstream;
-    m_outputSearchGraphStream = file;
-    file->open(fileName.c_str());
-    m_searchGraphOutputCollector.reset(new Moses::OutputCollector(m_outputSearchGraphStream));
-  }
+  P.SetParameter<string>(path, "tree-translation-details", "");
+  if (path.size()) m_detailTreeFragmentsOutputCollector.reset(new OutputCollector(path));

-  // detailed translation reporting
-  if (staticData.IsDetailedTranslationReportingEnabled()) {
-    const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
-    m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
-    m_detailedTranslationCollector.reset(new Moses::OutputCollector(m_detailedTranslationReportingStream));
-  }
-
-  if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
-    const std::string &path = staticData.GetDetailedTreeFragmentsTranslationReportingFilePath();
-    m_detailedTreeFragmentsTranslationReportingStream = new std::ofstream(path.c_str());
-    m_detailTreeFragmentsOutputCollector.reset(new Moses::OutputCollector(m_detailedTreeFragmentsTranslationReportingStream));
-  }
-
-  // wordgraph output
-  if (staticData.GetOutputWordGraph()) {
-    string fileName;
-    staticData.GetParameter().SetParameter<string>(fileName, "output-word-graph", "");
-
-    std::ofstream *file = new std::ofstream;
-    m_outputWordGraphStream  = file;
-    file->open(fileName.c_str());
-    m_wordGraphCollector.reset(new OutputCollector(m_outputWordGraphStream));
-  }
+  P.SetParameter<string>(path, "output-word-graph", "");
+  if (path.size()) m_wordGraphCollector.reset(new OutputCollector(path));

  size_t latticeSamplesSize = staticData.GetLatticeSamplesSize();
  string latticeSamplesFile = staticData.GetLatticeSamplesFilePath();
  if (latticeSamplesSize) {
-    if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") {
-      m_latticeSamplesCollector.reset(new OutputCollector());
+    m_latticeSamplesCollector.reset(new OutputCollector(latticeSamplesFile));
+    if (m_latticeSamplesCollector->OutputIsCout()) {
      m_surpressSingleBestOutput = true;
-    } else {
-      m_latticeSamplesStream = new ofstream(latticeSamplesFile.c_str());
-      if (!m_latticeSamplesStream->good()) {
-        TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl);
-        exit(1);
-      }
-      m_latticeSamplesCollector.reset(new OutputCollector(m_latticeSamplesStream));
    }
  }

@ -235,6 +180,7 @@ IOWrapper::IOWrapper()
                 << "' for hypergraph output!");
  fmt += string("%d.") + extension;

+  // input streams for simulated post-editing
  if (staticData.GetParameter().GetParam("spe-src")) {
    spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
    spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
@ -246,17 +192,17 @@ IOWrapper::~IOWrapper()
 {
  if (m_inputFile != NULL)
    delete m_inputFile;
-  if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
-    // outputting n-best to file, rather than stdout. need to close file and delete obj
-    delete m_nBestStream;
-  }
+  // if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
+  // outputting n-best to file, rather than stdout. need to close file and delete obj
+  // delete m_nBestStream;
+  // }

-  delete m_detailedTranslationReportingStream;
-  delete m_alignmentInfoStream;
-  delete m_unknownsStream;
-  delete m_outputSearchGraphStream;
-  delete m_outputWordGraphStream;
-  delete m_latticeSamplesStream;
+  // delete m_detailedTranslationReportingStream;
+  // delete m_alignmentInfoStream;
+  // delete m_unknownsStream;
+  // delete m_outputSearchGraphStream;
+  // delete m_outputWordGraphStream;
+  // delete m_latticeSamplesStream;
 }

 // InputType*
@ -297,7 +243,7 @@ GetBufferedInput()

 boost::shared_ptr<InputType>
 IOWrapper::
-ReadInput()
+ReadInput(boost::shared_ptr<std::vector<std::string> >* cw)
 {
 #ifdef WITH_THREADS
  boost::lock_guard<boost::mutex> lock(m_lock);
@ -305,48 +251,32 @@ ReadInput()
  boost::shared_ptr<InputType> source = GetBufferedInput();
  if (source) {
    source->SetTranslationId(m_currentLine++);
-    if (m_look_ahead || m_look_back)
-      this->set_context_for(*source);
+
+    // when using a sliding context window, remove obsolete past input from buffer:
+    if (m_past_input.size() && m_look_back != std::numeric_limits<size_t>::max()) {
+      list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
+      for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();)
+        cnt += (*m)->GetSize();
+      while (m_past_input.begin() != m) m_past_input.pop_front();
+    }
+
+    if (m_look_back)
+      m_past_input.push_back(source);
  }
-  m_past_input.push_back(source);
+  if (cw) *cw = GetCurrentContextWindow();
  return source;
 }

-void
+boost::shared_ptr<std::vector<std::string> >
 IOWrapper::
-set_context_for(InputType& source)
+GetCurrentContextWindow() const
 {
-  boost::shared_ptr<string> context(new string);
-  list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
-  // remove obsolete past input from buffer:
-  if (m_past_input.end() != m_past_input.begin()) {
-    for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();
-         cnt += (*m)->GetSize());
-    while (m_past_input.begin() != m) m_past_input.pop_front();
-  }
-  // cerr << string(80,'=') << endl;
-  if (m_past_input.size()) {
-    m = m_past_input.begin();
-    *context += (*m)->ToString();
-    // cerr << (*m)->ToString() << endl;
-    for (++m; m != m_past_input.end(); ++m) {
-      // cerr << "\n" << (*m)->ToString() << endl;
-      *context += string(" ") + (*m)->ToString();
-    }
-    // cerr << string(80,'-') << endl;
-  }
-  // cerr << source.ToString() << endl;
-  if (m_future_input.size()) {
-    // cerr << string(80,'-') << endl;
-    for (m = m_future_input.begin(); m != m_future_input.end(); ++m) {
-      // if (m != m_future_input.begin()) cerr << "\n";
-      // cerr << (*m)->ToString() << endl;
-      if (context->size()) *context += " ";
-      *context += (*m)->ToString();
-    }
-  }
-  // cerr << string(80,'=') << endl;
-  if (context->size()) source.SetContext(context);
+  boost::shared_ptr<std::vector<string> > context(new std::vector<string>);
+  BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_past_input)
+  context->push_back(i->ToString());
+  BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_future_input)
+  context->push_back(i->ToString());
+  return context;
 }


--- a/moses/IOWrapper.h
+++ b/moses/IOWrapper.h
@ -1,4 +1,4 @@
-// -*- c++ -*-
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
 // $Id$

 /***********************************************************************
@ -45,6 +45,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 #include <list>
 #include <iomanip>
+#include <limits>

 #include "moses/TypeDef.h"
 #include "moses/Sentence.h"
@ -85,11 +86,11 @@ protected:
  Moses::InputFileStream *m_inputFile;
  std::istream *m_inputStream;
  std::ostream *m_nBestStream;
-  std::ostream *m_outputWordGraphStream;
-  std::ostream *m_outputSearchGraphStream;
-  std::ostream *m_detailedTranslationReportingStream;
+  // std::ostream *m_outputWordGraphStream;
+  // std::auto_ptr<std::ostream> m_outputSearchGraphStream;
+  // std::ostream *m_detailedTranslationReportingStream;
  std::ostream *m_unknownsStream;
-  std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
+  // std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
  std::ofstream *m_alignmentInfoStream;
  std::ofstream *m_latticeSamplesStream;

@ -127,7 +128,9 @@ public:
  ~IOWrapper();

  // Moses::InputType* GetInput(Moses::InputType *inputType);
-  boost::shared_ptr<InputType> ReadInput();
+
+  boost::shared_ptr<InputType>
+  ReadInput(boost::shared_ptr<std::vector<std::string> >* cw = NULL);

  Moses::OutputCollector *GetSingleBestOutputCollector() {
    return m_singleBestOutputCollector.get();
@ -181,6 +184,21 @@ public:
  // post editing
  std::ifstream *spe_src, *spe_trg, *spe_aln;

+  std::list<boost::shared_ptr<InputType> > const& GetPastInput() const {
+    return m_past_input;
+  }
+
+  std::list<boost::shared_ptr<InputType> > const& GetFutureInput() const {
+    return m_future_input;
+  }
+  size_t GetLookAhead() const {
+    return m_look_ahead;
+  }
+
+  size_t GetLookBack() const {
+    return m_look_back;
+  }
+
 private:
  template<class itype>
  boost::shared_ptr<InputType>
@ -189,8 +207,8 @@ private:
  boost::shared_ptr<InputType>
  GetBufferedInput();

-  void
-  set_context_for(InputType& source);
+  boost::shared_ptr<std::vector<std::string> >
+  GetCurrentContextWindow() const;
 };

 template<class itype>
@ -210,10 +228,10 @@ BufferInput()
      return ret;
    ret = source;
  }
-
  while (m_buffered_ahead < m_look_ahead) {
    source.reset(new itype);
-    if (!source->Read(*m_inputStream, *m_inputFactorOrder)) break;
+    if (!source->Read(*m_inputStream, *m_inputFactorOrder))
+      break;
    m_future_input.push_back(source);
    m_buffered_ahead += source->GetSize();
  }
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@ -208,7 +208,7 @@ Manager::Manager(ttasksptr const& ttask)
  : BaseManager(ttask)
  , cells_(m_source, ChartCellBaseFactory(), parser_)
  , parser_(ttask, cells_)
-  , n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize()))
+  , n_best_(search::NBestConfig(StaticData::Instance().options().nbest.nbest_size))
 { }

 Manager::~Manager()
@ -223,12 +223,17 @@ namespace
 const float log_10 = logf(10);
 }

-template <class Model, class Best> search::History Manager::PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
+template <class Model, class Best>
+search::History
+Manager::
+PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
 {
  const LanguageModel &abstract = LanguageModel::GetFirstLM();
  const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
  const StaticData &data = StaticData::Instance();
-  search::Config config(abstract.GetWeight() * log_10, data.GetCubePruningPopLimit(), search::NBestConfig(data.GetNBestSize()));
+  size_t cpl = data.options().cube.pop_limit;
+  size_t nbs = data.options().nbest.nbest_size;
+  search::Config config(abstract.GetWeight() * log_10, cpl, search::NBestConfig(nbs));
  search::Context<Model> context(config, model);

  size_t size = m_source.GetSize();
@ -255,7 +260,7 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M

 template <class Model> void Manager::LMCallback(const Model &model, const std::vector<lm::WordIndex> &words)
 {
-  std::size_t nbest = StaticData::Instance().GetNBestSize();
+  std::size_t nbest = StaticData::Instance().options().nbest.nbest_size;
  if (nbest <= 1) {
    search::History ret = PopulateBest(model, words, single_best_);
    if (ret) {
--- a/moses/InputType.h
+++ b/moses/InputType.h
@ -58,7 +58,7 @@ protected:
  ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */
  std::string m_textType;
  std::string m_passthrough;
-  boost::shared_ptr<std::string> m_context;
+  boost::shared_ptr<std::vector<std::string> > m_context;
 public:

  // used in -continue-partial-translation
@ -173,13 +173,13 @@ public:
  //! number of words in this sentence/confusion network
  virtual size_t GetSize() const =0;

-  virtual boost::shared_ptr<std::string> const&
+  virtual boost::shared_ptr<std::vector<std::string> > const&
  GetContext() const {
    return m_context;
  }

  virtual void
-  SetContext(boost::shared_ptr<std::string> const& ctx) {
+  SetContext(boost::shared_ptr<std::vector<std::string> > const& ctx) {
    m_context = ctx;
  }

--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@ -88,9 +88,9 @@ if $(with-ldhtlm) {
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
  lib nplm : : <search>$(with-nplm)/lib <search>$(with-nplm)/lib64 ;
-  obj NeuralLMWrapper.o : NeuralLMWrapper.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
-  obj BiLM_NPLM.o : bilingual-lm/BiLM_NPLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
-  obj RDLM.o : RDLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
+  obj NeuralLMWrapper.o : NeuralLMWrapper.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
+  obj BiLM_NPLM.o : bilingual-lm/BiLM_NPLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <cxxflags>-fopenmp <define>NPLM_DOUBLE_PRECISION=0 ;
+  obj RDLM.o : RDLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
  alias neural : NeuralLMWrapper.o nplm : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
  alias bilinguallm : BiLM_NPLM.o nplm : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
  alias rdlm : RDLM.o nplm : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
@ -134,11 +134,11 @@ if $(with-dalm) {
 }

 #ORLM is always compiled but needs special headers
-obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : : <include>../TranslationModel/DynSAInclude ;
+#obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : : <include>../TranslationModel/DynSAInclude ;

 #Top-level LM library.  If you've added a file that doesn't depend on external
 #libraries, put it here.  
-alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp ORLM.o
+alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp 
  ../../lm//kenlm ..//headers $(dependencies) ;

 alias macros : : : : <define>$(lmmacros) ;
--- a/moses/LM/ORLM.cpp
+++ b/moses/LM/ORLM.cpp
@ -1,107 +0,0 @@
-#include <limits>
-#include <iostream>
-#include <fstream>
-
-#include "moses/FactorCollection.h"
-#include "moses/Phrase.h"
-#include "moses/InputFileStream.h"
-#include "moses/StaticData.h"
-#include "ORLM.h"
-
-using std::map;
-namespace Moses
-{
-bool LanguageModelORLM::Load(const std::string &filePath, FactorType factorType,
-                             size_t nGramOrder)
-{
-  cerr << "Loading LanguageModelORLM..." << endl;
-  m_filePath = filePath;
-  m_factorType = factorType;
-  m_nGramOrder = nGramOrder;
-  FileHandler fLmIn(m_filePath, std::ios::in|std::ios::binary, true);
-  m_lm = new OnlineRLM<T>(&fLmIn, m_nGramOrder);
-  fLmIn.close();
-  //m_lm = new MultiOnlineRLM<T>(m_filePath, m_nGramOrder);
-  // get special word ids
-  m_oov_id = m_lm->vocab_->GetWordID("<unk>");
-  CreateFactors();
-  return true;
-}
-void LanguageModelORLM::CreateFactors()
-{
-  FactorCollection &factorCollection = FactorCollection::Instance();
-  size_t maxFactorId = 0; // to create lookup vector later on
-  std::map<size_t, wordID_t> m_lmids_map; // map from factor id -> word id
-
-  for(std::map<Word, wordID_t>::const_iterator vIter = m_lm->vocab_->VocabStart();
-      vIter != m_lm->vocab_->VocabEnd(); vIter++) {
-    // get word from ORLM vocab and associate with (new) factor id
-    size_t factorId = factorCollection.AddFactor(Output,m_factorType,vIter->first.ToString())->GetId();
-    m_lmids_map[factorId] = vIter->second;
-    maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
-  }
-  // add factors for BOS and EOS and store bf word ids
-  size_t factorId;
-  m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, "<s>");
-  factorId = m_sentenceStart->GetId();
-  maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
-  m_sentenceStartWord[m_factorType] = m_sentenceStart;
-
-  m_sentenceEnd	= factorCollection.AddFactor(Output, m_factorType, "</s>");
-  factorId = m_sentenceEnd->GetId();
-  maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
-  m_sentenceEndWord[m_factorType] = m_sentenceEnd;
-  // add to lookup vector in object
-  lm_ids_vec_.resize(maxFactorId+1);
-  // fill with OOV code
-  fill(lm_ids_vec_.begin(), lm_ids_vec_.end(), m_oov_id);
-
-  for (map<size_t, wordID_t>::const_iterator iter = m_lmids_map.begin();
-       iter != m_lmids_map.end() ; ++iter)
-    lm_ids_vec_[iter->first] = iter->second;
-}
-wordID_t LanguageModelORLM::GetLmID(const std::string& str) const
-{
-  return m_lm->vocab_->GetWordID(str);
-}
-wordID_t LanguageModelORLM::GetLmID(const Factor* factor) const
-{
-  size_t factorId = factor->GetId();
-  return (factorId >= lm_ids_vec_.size()) ? m_oov_id : lm_ids_vec_[factorId];
-}
-LMResult LanguageModelORLM::GetValue(const std::vector<const Word*> &contextFactor,
-                                     State* finalState) const
-{
-  FactorType factorType = GetFactorType();
-  // set up context
-  //std::vector<long unsigned int> factor(1,0);
-  //std::vector<string> sngram;
-  wordID_t ngram[MAX_NGRAM_SIZE];
-  int count = contextFactor.size();
-  for (int i = 0; i < count; i++) {
-    ngram[i] = GetLmID((*contextFactor[i])[factorType]);
-    //sngram.push_back(contextFactor[i]->GetString(factor, false));
-  }
-  //float logprob = FloorScore(TransformLMScore(lm_->getProb(sngram, count, finalState)));
-  LMResult ret;
-  ret.score = FloorScore(TransformLMScore(m_lm->getProb(&ngram[0], count, finalState)));
-  ret.unknown = count && (ngram[count - 1] == m_oov_id);
-  /*if (finalState)
-    std::cout << " = " << logprob << "(" << *finalState << ", " << *len <<")"<< std::endl;
-  else
-    std::cout << " = " << logprob << std::endl;
-  */
-  return ret;
-}
-bool LanguageModelORLM::UpdateORLM(const std::vector<string>& ngram, const int value)
-{
-  /*cerr << "Inserting into ORLM: \"";
-  iterate(ngram, nit)
-    cerr << *nit << " ";
-  cerr << "\"\t" << value << endl; */
-  m_lm->vocab_->MakeOpen();
-  bool res = m_lm->update(ngram, value);
-  m_lm->vocab_->MakeClosed();
-  return res;
-}
-}
--- a/moses/LM/ORLM.h
+++ b/moses/LM/ORLM.h
@ -1,53 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-#include "moses/Factor.h"
-#include "moses/Util.h"
-#include "SingleFactor.h"
-#include "moses/TranslationModel/DynSAInclude/onlineRLM.h"
-//#include "multiOnlineRLM.h"
-#include "moses/TranslationModel/DynSAInclude/FileHandler.h"
-#include "moses/TranslationModel/DynSAInclude/vocab.h"
-
-namespace Moses
-{
-class Factor;
-class Phrase;
-
-/** @todo ask ollie
- */
-class LanguageModelORLM : public LanguageModelSingleFactor
-{
-public:
-  typedef count_t T;  // type for ORLM filter
-  LanguageModelORLM(const std::string &line)
-    :LanguageModelSingleFactor(line)
-    ,m_lm(0) {
-  }
-  bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
-  virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
-  ~LanguageModelORLM() {
-    //save LM with markings
-    Utils::rtrim(m_filePath, ".gz");
-    FileHandler fout(m_filePath + ".marked.gz", std::ios::out|std::ios::binary, false);
-    m_lm->save(&fout);
-    fout.close();
-    delete m_lm;
-  }
-  void CleanUpAfterSentenceProcessing() {
-    m_lm->clearCache(); // clear caches
-  }
-
-  bool UpdateORLM(const std::vector<string>& ngram, const int value);
-protected:
-  OnlineRLM<T>* m_lm;
-  //MultiOnlineRLM<T>* m_lm;
-  wordID_t m_oov_id;
-  std::vector<wordID_t> lm_ids_vec_;
-  void CreateFactors();
-  wordID_t GetLmID(const std::string &str) const;
-  wordID_t GetLmID(const Factor *factor) const;
-};
-} // end namespace
-
--- a/moses/LM/RDLM.cpp
+++ b/moses/LM/RDLM.cpp
@ -11,6 +11,26 @@
 namespace Moses
 {

+namespace rdlm
+{
+ThreadLocal::ThreadLocal(nplm::neuralTM *lm_head_base_instance_, nplm::neuralTM *lm_label_base_instance_, bool normalizeHeadLM, bool normalizeLabelLM, int cacheSize)
+{
+  lm_head = new nplm::neuralTM(*lm_head_base_instance_);
+  lm_label = new nplm::neuralTM(*lm_label_base_instance_);
+  lm_head->set_normalization(normalizeHeadLM);
+  lm_label->set_normalization(normalizeLabelLM);
+  lm_head->set_cache(cacheSize);
+  lm_label->set_cache(cacheSize);
+}
+
+ThreadLocal::~ThreadLocal()
+{
+  delete lm_head;
+  delete lm_label;
+}
+
+}
+
 typedef Eigen::Map<Eigen::Matrix<int,Eigen::Dynamic,1> > EigenMap;

 RDLM::~RDLM()
@ -70,7 +90,7 @@ void RDLM::Load()
    static_label_null[i] = lm_label_base_instance_->lookup_input_word(numstr);
  }

-  static_dummy_head = lm_head_base_instance_->lookup_input_word(dummy_head);
+  static_dummy_head = lm_head_base_instance_->lookup_input_word(dummy_head.GetString(0).as_string());

  static_start_head = lm_head_base_instance_->lookup_input_word("<start_head>");
  static_start_label = lm_head_base_instance_->lookup_input_word("<start_label>");
@ -99,8 +119,16 @@ void RDLM::Load()
 //    TreePointer mytree4 (new InternalTree("[pred [det [ART die]] [attr [adv [adv [PTKNEG nicht]] [ADV fast]] [ADJA neue]] [attr [ADJA]] [NN Zeit]]]"));
 //    TreePointer mytree2 (new InternalTree("[vroot [subj [PPER ich]] [VAFIN bin] [pred]]"));
 //
-//    std::vector<int> ancestor_heads;
-//    std::vector<int> ancestor_labels;
+//     rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
+//     if (!thread_objects) {
+//       thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
+//       thread_objects_backend_.reset(thread_objects);
+//     }
+//
+// #ifdef WITH_THREADS
+//     //read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
+//     m_accessLock.lock_shared();
+// #endif
 //
 //    size_t boundary_hash(0);
 //    boost::array<float, 4> score;
@ -108,13 +136,13 @@ void RDLM::Load()
 //    std::cerr << "scoring: " << mytree3->GetString() << std::endl;
 //    std::vector<TreePointer> previous_trees;
 //    TreePointerMap back_pointers = AssociateLeafNTs(mytree3.get(), previous_trees);
-//    Score(mytree3.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree3.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
 //    previous_trees.push_back(mytree3);
 //    back_pointers = AssociateLeafNTs(mytree4.get(), previous_trees);
 //    std::cerr << "scoring: " << mytree4->GetString() << std::endl;
-//    Score(mytree4.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree4.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
 //    mytree4->Combine(previous_trees);
@ -125,7 +153,7 @@ void RDLM::Load()
 //
 //    score[1] = 0;
 //    score[3] = 0;
-//    Score(mytree2.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree2.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
 //    score[0] = 0;
@ -134,12 +162,12 @@ void RDLM::Load()
 //    score[3] = 0;
 //    std::cerr << "scoring: " << mytree->GetString() << std::endl;
 //
-//    Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
-//   }
-//    UTIL_THROW2("Finished");
-//
+// #ifdef WITH_THREADS
+//       m_accessLock.unlock_shared();
+// #endif
 //   }
 //
 //   {
@ -149,8 +177,16 @@ void RDLM::Load()
 //    TreePointer mytree4 (new InternalTree("[^pred [attr [adv [adv [PTKNEG nicht]] [ADV fast]] [ADJA neue]] [^pred [attr [ADJA]] [NN Zeit]]]"));
 //    TreePointer mytree2 (new InternalTree("[vroot [subj [PPER ich]] [^vroot [VAFIN bin] [pred [det [ART die]] [^pred]]]]"));
 //
-//    std::vector<int> ancestor_heads;
-//    std::vector<int> ancestor_labels;
+//    rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
+//     if (!thread_objects) {
+//       thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
+//       thread_objects_backend_.reset(thread_objects);
+//     }
+//
+// #ifdef WITH_THREADS
+//     //read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
+//     m_accessLock.lock_shared();
+// #endif
 //
 //    size_t boundary_hash(0);
 //    boost::array<float, 4> score;
@ -158,13 +194,13 @@ void RDLM::Load()
 //    std::cerr << "scoring: " << mytree3->GetString() << std::endl;
 //    std::vector<TreePointer> previous_trees;
 //    TreePointerMap back_pointers = AssociateLeafNTs(mytree3.get(), previous_trees);
-//    Score(mytree3.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree3.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
 //    previous_trees.push_back(mytree3);
 //    back_pointers = AssociateLeafNTs(mytree4.get(), previous_trees);
 //    std::cerr << "scoring: " << mytree4->GetString() << std::endl;
-//    Score(mytree4.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree4.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
 //    mytree4->Combine(previous_trees);
@ -175,7 +211,7 @@ void RDLM::Load()
 //
 //    score[1] = 0;
 //    score[3] = 0;
-//    Score(mytree2.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree2.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
 //    score[0] = 0;
@ -184,16 +220,20 @@ void RDLM::Load()
 //    score[3] = 0;
 //    std::cerr << "scoring: " << mytree->GetString() << std::endl;
 //
-//    Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+//    Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
 //    std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
 //
+// #ifdef WITH_THREADS
+//       m_accessLock.unlock_shared();
+// #endif
+//
 //   }
 //    UTIL_THROW2("Finished");

 }


-void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float, 4> &score, std::vector<int> &ancestor_heads, std::vector<int> &ancestor_labels, size_t &boundary_hash, int num_virtual, int rescoring_levels) const
+void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float, 4> &score, size_t &boundary_hash, rdlm::ThreadLocal &thread_objects, int num_virtual, int rescoring_levels) const
 {

  // ignore terminal nodes
@ -205,20 +245,23 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
  if (root->GetLabel() == m_glueSymbol) {
    // recursion
    for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
-      Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
+      Score(it->get(), back_pointers, score, boundary_hash, thread_objects, num_virtual, rescoring_levels);
    }
    return;
  }

+  std::vector<int> &ancestor_heads = thread_objects.ancestor_heads;
+  std::vector<int> &ancestor_labels = thread_objects.ancestor_labels;
+
  // ignore virtual nodes (in binarization; except if it's the root)
-  if (m_binarized && root->GetLabel()[0] == '^' && !ancestor_heads.empty()) {
+  if (m_binarized && root->GetLabel().GetString(0).as_string()[0] == '^' && !ancestor_heads.empty()) {
    // recursion
    if (root->IsLeafNT() && m_context_up > 1 && ancestor_heads.size()) {
      root = back_pointers.find(root)->second.get();
      rescoring_levels = m_context_up-1;
    }
    for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
-      Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
+      Score(it->get(), back_pointers, score, boundary_hash, thread_objects, num_virtual, rescoring_levels);
    }
    return;
  }
@ -228,25 +271,19 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
    return;
  }

-  nplm::neuralTM *lm_head = lm_head_backend_.get();
-  if (!lm_head) {
-    lm_head = new nplm::neuralTM(*lm_head_base_instance_);
-    lm_head->set_normalization(m_normalizeHeadLM);
-    lm_head->set_cache(m_cacheSize);
-    lm_head_backend_.reset(lm_head);
-  }

  // ignore preterminal node (except if we're scoring root nodes)
  if (root->GetLength() == 1 && root->GetChildren()[0]->IsTerminal()) {
    // root of tree: score without context
    if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
-      std::vector<int> ngram_head_null (static_head_null);
-      ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel());
-      if (m_isPretermBackoff && ngram_head_null.back() == 0) {
-        ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel());
+      std::vector<int> & ngram = thread_objects.ngram;
+      ngram = static_head_null;
+      ngram.back() = Factor2ID(root->GetChildren()[0]->GetLabel()[m_factorType], HEAD_OUTPUT);
+      if (m_isPretermBackoff && ngram.back() == 0) {
+        ngram.back() = Factor2ID(root->GetLabel()[m_factorType], HEAD_OUTPUT);
      }
      if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) {
-        std::vector<int>::iterator it = ngram_head_null.begin();
+        std::vector<int>::iterator it = ngram.begin();
        std::fill_n(it, m_context_left, static_start_head);
        it += m_context_left;
        std::fill_n(it, m_context_left, static_start_label);
@ -260,10 +297,10 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
        it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
      }
      if (ancestor_labels.size() >= m_context_up && !num_virtual) {
-        score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
+        score[0] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
      } else {
-        boost::hash_combine(boundary_hash, ngram_head_null.back());
-        score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
+        boost::hash_combine(boundary_hash, ngram.back());
+        score[1] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
      }
    }
    return;
@ -281,22 +318,15 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
    }
  }

-  nplm::neuralTM *lm_label = lm_label_backend_.get();
-  if (!lm_label) {
-    lm_label = new nplm::neuralTM(*lm_label_base_instance_);
-    lm_label->set_normalization(m_normalizeLabelLM);
-    lm_label->set_cache(m_cacheSize);
-    lm_label_backend_.reset(lm_label);
-  }

  std::pair<int,int> head_ids;
-  InternalTree* found = GetHead(root, back_pointers, head_ids);
-  if (found == NULL) {
+  bool found = GetHead(root, back_pointers, head_ids);
+  if (!found) {
    head_ids = std::make_pair(static_dummy_head, static_dummy_head);
  }

  size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size());
-  const std::string & head_label = root->GetLabel();
+  const StringPiece & head_label = root->GetLabel().GetString(0);
  bool virtual_head = false;
  int reached_end = 0;
  int label_idx, label_idx_out;
@ -308,45 +338,24 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
      reached_end = 2; // indicate that we've seen the last symbol of the RHS
    }
    // with 'full' binarization, direction is encoded in 2nd char
-    std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
-    label_idx = lm_label->lookup_input_word(clipped_label);
-    label_idx_out = lm_label->lookup_output_word(clipped_label);
+    StringPiece clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
+    label_idx = lm_label_base_instance_->lookup_input_word(clipped_label.as_string());
+    label_idx_out = lm_label_base_instance_->lookup_output_word(clipped_label.as_string());
  } else {
    reached_end = 3; // indicate that we've seen first and last symbol of the RHS
-    label_idx = lm_label->lookup_input_word(head_label);
-    label_idx_out = lm_label->lookup_output_word(head_label);
+    label_idx = Factor2ID(root->GetLabel()[0], LABEL_INPUT);
+    label_idx_out = Factor2ID(root->GetLabel()[0], LABEL_OUTPUT);
  }

  int head_idx = (virtual_head && head_ids.first == static_dummy_head) ? static_label_null[offset_up_head+m_context_up-1] : head_ids.first;

  // root of tree: score without context
  if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
-    if (head_idx != static_dummy_head && head_idx != static_head_head) {
-      std::vector<int> ngram_head_null (static_head_null);
-      *(ngram_head_null.end()-2) = label_idx;
-      ngram_head_null.back() = head_ids.second;
-      if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
-        std::vector<int>::iterator it = ngram_head_null.begin();
-        std::fill_n(it, m_context_left, static_start_head);
-        it += m_context_left;
-        std::fill_n(it, m_context_left, static_start_label);
-        it += m_context_left;
-        std::fill_n(it, m_context_right, static_stop_head);
-        it += m_context_right;
-        std::fill_n(it, m_context_right, static_stop_label);
-        it += m_context_right;
-        it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
-        it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
-        score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
-      } else {
-        boost::hash_combine(boundary_hash, ngram_head_null.back());
-        score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
-      }
-    }
-    std::vector<int> ngram_label_null (static_label_null);
-    ngram_label_null.back() = label_idx_out;
+    std::vector<int> & ngram = thread_objects.ngram;
+    ngram = static_label_null;
+    ngram.back() = label_idx_out;
    if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
-      std::vector<int>::iterator it = ngram_label_null.begin();
+      std::vector<int>::iterator it = ngram.begin();
      std::fill_n(it, m_context_left, static_start_head);
      it += m_context_left;
      std::fill_n(it, m_context_left, static_start_label);
@ -357,10 +366,20 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
      it += m_context_right;
      it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
      it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
-      score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
+      score[2] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
    } else {
-      boost::hash_combine(boundary_hash, ngram_label_null.back());
-      score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
+      boost::hash_combine(boundary_hash, ngram.back());
+      score[3] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
+    }
+    if (head_idx != static_dummy_head && head_idx != static_head_head) {
+      ngram.push_back(head_ids.second);
+      *(ngram.end()-2) = label_idx;
+      if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
+        score[0] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
+      } else {
+        boost::hash_combine(boundary_hash, ngram.back());
+        score[1] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
+      }
    }
  }

@ -380,7 +399,8 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
  }
  size_t up_padding = m_context_up - context_up_nonempty;

-  std::vector<int> ngram (static_label_null);
+  std::vector<int> & ngram = thread_objects.ngram;
+  ngram = static_label_null;

  std::vector<int>::iterator it = ngram.begin() + offset_up_head;
  if (up_padding > 0) {
@ -401,21 +421,25 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
  // get number of children after unbinarization
  if (m_binarized) {
    num_children = 0;
-    UnbinarizedChildren real_children(root, back_pointers, m_binarized);
-    for (std::vector<TreePointer>::const_iterator it = real_children.begin(); it != real_children.end(); it = ++real_children) {
+    UnbinarizedChildren real_children(root, back_pointers, m_binarized, thread_objects.stack);
+    for (std::vector<TreePointer>::const_iterator it = real_children.begin(); !real_children.ended(); it = ++real_children) {
      num_children++;
    }
  }

  if (m_context_right && (reached_end == 1 || reached_end == 3)) num_children++; //also predict start label
  if (m_context_left && (reached_end == 2 || reached_end == 3)) num_children++; //also predict end label
+  std::vector<int> & heads = thread_objects.heads;
+  std::vector<int> & labels = thread_objects.labels;
+  std::vector<int> & heads_output = thread_objects.heads_output;
+  std::vector<int> & labels_output = thread_objects.labels_output;

-  std::vector<int> heads(num_children);
-  std::vector<int> labels(num_children);
-  std::vector<int> heads_output(num_children);
-  std::vector<int> labels_output(num_children);
+  heads.resize(num_children);
+  labels.resize(num_children);
+  heads_output.resize(num_children);
+  labels_output.resize(num_children);

-  GetChildHeadsAndLabels(root, back_pointers, reached_end, lm_head, lm_label, heads, labels, heads_output, labels_output);
+  GetChildHeadsAndLabels(root, back_pointers, reached_end, thread_objects);

  //left padding; only need to add this initially
  if (reached_end == 1 || reached_end == 3) {
@ -469,10 +493,10 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
    ngram.back() = labels_output[i];

    if (ancestor_labels.size() >= m_context_up && !num_virtual) {
-      score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
+      score[2] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
    } else {
      boost::hash_combine(boundary_hash, ngram.back());
-      score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
+      score[3] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
    }

    // construct context of head model and predict head
@ -482,10 +506,10 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
      ngram.push_back(heads_output[i]);

      if (ancestor_labels.size() >= m_context_up && !num_virtual) {
-        score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
+        score[0] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
      } else {
        boost::hash_combine(boundary_hash, ngram.back());
-        score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
+        score[1] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
      }
      ngram.pop_back();
    }
@ -510,13 +534,13 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
  }
  // recursion
  for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
-    Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels - 1);
+    Score(it->get(), back_pointers, score, boundary_hash, thread_objects, num_virtual, rescoring_levels - 1);
  }
  ancestor_heads.pop_back();
  ancestor_labels.pop_back();
 }

-InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs, InternalTree* head_ptr) const
+bool RDLM::GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs) const
 {
  InternalTree *tree;

@ -527,54 +551,35 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
      tree = it->get();
    }

-    if (m_binarized && tree->GetLabel()[0] == '^') {
-      head_ptr = GetHead(tree, back_pointers, IDs, head_ptr);
-      if (head_ptr != NULL && !m_isPTKVZ) {
-        return head_ptr;
+    if (m_binarized && tree->GetLabel().GetString(0).as_string()[0] == '^') {
+      bool found = GetHead(tree, back_pointers, IDs);
+      if (found) {
+        return true;
      }
    }

    // assumption (only true for dependency parse): each constituent has a preterminal label, and corresponding terminal is head
    // if constituent has multiple preterminals, first one is picked; if it has no preterminals, dummy_head is returned
-    else if (tree->GetLength() == 1 && tree->GetChildren()[0]->IsTerminal() && head_ptr == NULL) {
-      head_ptr = tree;
-      if (!m_isPTKVZ) {
-        GetIDs(head_ptr->GetChildren()[0]->GetLabel(), head_ptr->GetLabel(), IDs);
-        return head_ptr;
-      }
-    }
-
-    // add PTKVZ to lemma of verb
-    else if (m_isPTKVZ && head_ptr && tree->GetLabel() == "avz") {
-      InternalTree *tree2;
-      for (std::vector<TreePointer>::const_iterator it2 = tree->GetChildren().begin(); it2 != tree->GetChildren().end(); ++it2) {
-        if ((*it2)->IsLeafNT()) {
-          tree2 = back_pointers.find(it2->get())->second.get();
-        } else {
-          tree2 = it2->get();
-        }
-        if (tree2->GetLabel() == "PTKVZ" && tree2->GetLength() == 1 && tree2->GetChildren()[0]->IsTerminal()) {
-          std::string verb = tree2->GetChildren()[0]->GetLabel() + head_ptr->GetChildren()[0]->GetLabel();
-          GetIDs(verb, head_ptr->GetLabel(), IDs);
-          return head_ptr;
-        }
-      }
+    else if (tree->GetLength() == 1 && tree->GetChildren()[0]->IsTerminal()) {
+      GetIDs(tree->GetChildren()[0]->GetLabel(), tree->GetLabel(), IDs);
+      return true;
    }
  }

-  if (head_ptr != NULL) {
-    GetIDs(head_ptr->GetChildren()[0]->GetLabel(), head_ptr->GetLabel(), IDs);
-  }
-  return head_ptr;
+  return false;
 }


-void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, const nplm::neuralTM *lm_head, const nplm::neuralTM *lm_label, std::vector<int> & heads, std::vector<int> & labels, std::vector<int> & heads_output, std::vector<int> & labels_output) const
+void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, rdlm::ThreadLocal &thread_objects) const
 {
  std::pair<int,int> child_ids;
-  InternalTree* found;
  size_t j = 0;

+  std::vector<int> & heads = thread_objects.heads;
+  std::vector<int> & labels = thread_objects.labels;
+  std::vector<int> & heads_output = thread_objects.heads_output;
+  std::vector<int> & labels_output = thread_objects.labels_output;
+
  // score start label (if enabled) for all nonterminal nodes (but not for terminal or preterminal nodes)
  if (m_context_right && (reached_end == 1 || reached_end == 3)) {
    heads[j] = static_start_head;
@ -583,10 +588,10 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
    j++;
  }

-  UnbinarizedChildren real_children(root, back_pointers, m_binarized);
+  UnbinarizedChildren real_children(root, back_pointers, m_binarized, thread_objects.stack);

  // extract head words / labels
-  for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); itx != real_children.end(); itx = ++real_children) {
+  for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); !real_children.ended(); itx = ++real_children) {
    if ((*itx)->IsTerminal()) {
      std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl;
      std::cerr << "children: ";
@ -616,13 +621,13 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
      continue;
    }

-    found = GetHead(child, back_pointers, child_ids);
-    if (found == NULL) {
+    bool found = GetHead(child, back_pointers, child_ids);
+    if (!found) {
      child_ids = std::make_pair(static_dummy_head, static_dummy_head);
    }

-    labels[j] = lm_head->lookup_input_word(child->GetLabel());
-    labels_output[j] = lm_label->lookup_output_word(child->GetLabel());
+    labels[j] = Factor2ID(child->GetLabel()[0], LABEL_INPUT);
+    labels_output[j] = Factor2ID(child->GetLabel()[0], LABEL_OUTPUT);
    heads[j] = child_ids.first;
    heads_output[j] = child_ids.second;
    j++;
@ -637,22 +642,78 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
 }


-void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std::pair<int,int> & IDs) const
+void RDLM::GetIDs(const Word & head, const Word & preterminal, std::pair<int,int> & IDs) const
 {
-  IDs.first = lm_head_base_instance_->lookup_input_word(head);
+  IDs.first = Factor2ID(head[m_factorType], HEAD_INPUT);
  if (m_isPretermBackoff && IDs.first == 0) {
-    IDs.first = lm_head_base_instance_->lookup_input_word(preterminal);
+    IDs.first = Factor2ID(preterminal[0], HEAD_INPUT);
  }
  if (m_sharedVocab) {
    IDs.second = IDs.first;
  } else {
-    IDs.second = lm_head_base_instance_->lookup_output_word(head);
+    IDs.second = Factor2ID(head[m_factorType], HEAD_OUTPUT);
    if (m_isPretermBackoff && IDs.second == 0) {
-      IDs.second = lm_head_base_instance_->lookup_output_word(preterminal);
+      IDs.second = Factor2ID(preterminal[0], HEAD_OUTPUT);
    }
  }
 }

+// map from moses factor to NPLM ID; use vectors as cache to avoid hash table lookups
+int RDLM::Factor2ID(const Factor * const factor, int model_type) const
+{
+  size_t ID = factor->GetId();
+  int ret;
+
+  std::vector<int>* cache = NULL;
+  switch(model_type) {
+  case LABEL_INPUT:
+    cache = &factor2id_label_input;
+    break;
+  case LABEL_OUTPUT:
+    cache = &factor2id_label_output;
+    break;
+  case HEAD_INPUT:
+    cache = &factor2id_head_input;
+    break;
+  case HEAD_OUTPUT:
+    cache = &factor2id_head_output;
+    break;
+  }
+
+  try {
+    ret = cache->at(ID);
+  } catch (const std::out_of_range& oor) {
+#ifdef WITH_THREADS //need to resize cache; write lock
+    m_accessLock.unlock_shared();
+    m_accessLock.lock();
+#endif
+    cache->resize(ID*2, -1);
+#ifdef WITH_THREADS //go back to read lock
+    m_accessLock.unlock();
+    m_accessLock.lock_shared();
+#endif
+    ret = -1;
+  }
+  if (ret == -1) {
+    switch(model_type) {
+    case LABEL_INPUT:
+      ret = lm_label_base_instance_->lookup_input_word(factor->GetString().as_string());
+      break;
+    case LABEL_OUTPUT:
+      ret = lm_label_base_instance_->lookup_output_word(factor->GetString().as_string());
+      break;
+    case HEAD_INPUT:
+      ret = lm_head_base_instance_->lookup_input_word(factor->GetString().as_string());
+      break;
+    case HEAD_OUTPUT:
+      ret = lm_head_base_instance_->lookup_output_word(factor->GetString().as_string());
+      break;
+    }
+    (*cache)[ID] = ret;
+  }
+
+  return ret;
+}

 void RDLM::PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const
 {
@ -689,18 +750,32 @@ RDLM::TreePointerMap RDLM::AssociateLeafNTs(InternalTree* root, const std::vecto
 void RDLM::ScoreFile(std::string &path)
 {
  InputFileStream inStream(path);
+  rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
+  if (!thread_objects) {
+    thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
+    thread_objects_backend_.reset(thread_objects);
+  }
  std::string line, null;
-  std::vector<int> ancestor_heads(m_context_up, static_root_head);
-  std::vector<int> ancestor_labels(m_context_up, static_root_label);
+  thread_objects->ancestor_heads.resize(0);
+  thread_objects->ancestor_labels.resize(0);
+  thread_objects->ancestor_heads.resize(m_context_up, static_root_head);
+  thread_objects->ancestor_labels.resize(m_context_up, static_root_label);
+#ifdef WITH_THREADS
+  //read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
+  m_accessLock.lock_shared();
+#endif
  while(getline(inStream, line)) {
    TreePointerMap back_pointers;
    boost::array<float, 4> score;
    score.fill(0);
    InternalTree* mytree (new InternalTree(line));
    size_t boundary_hash = 0;
-    Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+    Score(mytree, back_pointers, score, boundary_hash, *thread_objects);
    std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl;
  }
+#ifdef WITH_THREADS
+  m_accessLock.unlock_shared();
+#endif
 }


@ -714,8 +789,6 @@ void RDLM::SetParameter(const std::string& key, const std::string& value)
    m_path_head_lm = value;
  } else if (key == "path_label_lm") {
    m_path_label_lm = value;
-  } else if (key == "ptkvz") {
-    m_isPTKVZ = Scan<bool>(value);
  } else if (key == "backoff") {
    m_isPretermBackoff = Scan<bool>(value);
  } else if (key == "context_up") {
@ -744,7 +817,9 @@ void RDLM::SetParameter(const std::string& key, const std::string& value)
    else
      UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value);
  } else if (key == "glue_symbol") {
-    m_glueSymbol = value;
+    m_glueSymbolString = value;
+  } else if (key == "factor") {
+    m_factorType = Scan<FactorType>(value);
  } else if (key == "cache_size") {
    m_cacheSize = Scan<int>(value);
  } else {
@ -780,10 +855,6 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
    accumulator->PlusEquals(ff_idx+1, prev_approx_label);

    bool full_sentence = (mytree->GetChildren().back()->GetLabel() == m_endTag || (mytree->GetChildren().back()->GetLabel() == m_endSymbol && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == m_endTag));
-    std::vector<int> ancestor_heads ((full_sentence ? m_context_up : 0), static_root_head);
-    std::vector<int> ancestor_labels ((full_sentence ? m_context_up : 0), static_root_label);
-    ancestor_heads.reserve(10);
-    ancestor_labels.reserve(10);

    TreePointerMap back_pointers = AssociateLeafNTs(mytree.get(), previous_trees);
    boost::array<float, 4> score; // score_head, approx_score_head, score_label, approx_score_label
@ -791,13 +862,45 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
    //hash of all boundary symbols (symbols with incomplete context); trees with same hash share state for cube pruning.
    size_t boundary_hash = 0;
    if (!m_rerank) {
-      Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+#ifdef WITH_THREADS
+      //read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
+      m_accessLock.lock_shared();
+#endif
+      rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
+      if (!thread_objects) {
+        thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
+        thread_objects_backend_.reset(thread_objects);
+      }
+      thread_objects->ancestor_heads.resize(0);
+      thread_objects->ancestor_labels.resize(0);
+      thread_objects->ancestor_heads.resize((full_sentence ? m_context_up : 0), static_root_head);
+      thread_objects->ancestor_labels.resize((full_sentence ? m_context_up : 0), static_root_label);
+      Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
+#ifdef WITH_THREADS
+      m_accessLock.unlock_shared();
+#endif
      accumulator->PlusEquals(ff_idx, score[0] + score[1]);
      accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
    }
    mytree->Combine(previous_trees);
    if (m_rerank && full_sentence) {
-      Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+#ifdef WITH_THREADS
+      //read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
+      m_accessLock.lock_shared();
+#endif
+      rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
+      if (!thread_objects) {
+        thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
+        thread_objects_backend_.reset(thread_objects);
+      }
+      thread_objects->ancestor_heads.resize(0);
+      thread_objects->ancestor_labels.resize(0);
+      thread_objects->ancestor_heads.resize((full_sentence ? m_context_up : 0), static_root_head);
+      thread_objects->ancestor_labels.resize((full_sentence ? m_context_up : 0), static_root_label);
+      Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
+#ifdef WITH_THREADS
+      m_accessLock.unlock_shared();
+#endif
      accumulator->PlusEquals(ff_idx, score[0] + score[1]);
      accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
    }
--- a/moses/LM/RDLM.h
+++ b/moses/LM/RDLM.h
@ -3,10 +3,16 @@
 #include "moses/FF/StatefulFeatureFunction.h"
 #include "moses/FF/FFState.h"
 #include "moses/FF/InternalTree.h"
+#include "moses/Word.h"

 #include <boost/thread/tss.hpp>
 #include <boost/array.hpp>

+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#endif
+
+
 // relational dependency language model, described in:
 // Sennrich, Rico (2015). Modelling and Optimizing on Syntactic N-Grams for Statistical Machine Translation. Transactions of the Association for Computational Linguistics.
 // see 'scripts/training/rdlm' for training scripts
@ -19,6 +25,31 @@ class neuralTM;
 namespace Moses
 {

+namespace rdlm
+{
+
+// we re-use some short-lived objects to reduce the number of allocations;
+// each thread gets its own instance to prevent collision
+// [could be replaced with thread_local keyword in C++11]
+class ThreadLocal
+{
+public:
+  std::vector<int> ancestor_heads;
+  std::vector<int> ancestor_labels;
+  std::vector<int> ngram;
+  std::vector<int> heads;
+  std::vector<int> labels;
+  std::vector<int> heads_output;
+  std::vector<int> labels_output;
+  std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
+  nplm::neuralTM* lm_head;
+  nplm::neuralTM* lm_label;
+
+  ThreadLocal(nplm::neuralTM *lm_head_base_instance_, nplm::neuralTM *lm_label_base_instance_, bool normalizeHeadLM, bool normalizeLabelLM, int cacheSize);
+  ~ThreadLocal();
+};
+}
+
 class RDLMState : public TreeState
 {
  float m_approx_head; //score that was approximated due to lack of context
@ -56,19 +87,18 @@ class RDLM : public StatefulFeatureFunction
  typedef std::map<InternalTree*,TreePointer> TreePointerMap;

  nplm::neuralTM* lm_head_base_instance_;
-  mutable boost::thread_specific_ptr<nplm::neuralTM> lm_head_backend_;
-
  nplm::neuralTM* lm_label_base_instance_;
-  mutable boost::thread_specific_ptr<nplm::neuralTM> lm_label_backend_;

-  std::string dummy_head;
-  std::string m_glueSymbol;
-  std::string m_startSymbol;
-  std::string m_endSymbol;
-  std::string m_endTag;
+  mutable boost::thread_specific_ptr<rdlm::ThreadLocal> thread_objects_backend_;
+
+  std::string m_glueSymbolString;
+  Word dummy_head;
+  Word m_glueSymbol;
+  Word m_startSymbol;
+  Word m_endSymbol;
+  Word m_endTag;
  std::string m_path_head_lm;
  std::string m_path_label_lm;
-  bool m_isPTKVZ;
  bool m_isPretermBackoff;
  size_t m_context_left;
  size_t m_context_right;
@ -103,15 +133,26 @@ class RDLM : public StatefulFeatureFunction
  int static_stop_label_output;
  int static_start_label_output;

+  FactorType m_factorType;
+
+  static const int LABEL_INPUT = 0;
+  static const int LABEL_OUTPUT = 1;
+  static const int HEAD_INPUT = 2;
+  static const int HEAD_OUTPUT = 3;
+  mutable std::vector<int> factor2id_label_input;
+  mutable std::vector<int> factor2id_label_output;
+  mutable std::vector<int> factor2id_head_input;
+  mutable std::vector<int> factor2id_head_output;
+
+#ifdef WITH_THREADS
+  //reader-writer lock
+  mutable boost::shared_mutex m_accessLock;
+#endif
+
 public:
  RDLM(const std::string &line)
    : StatefulFeatureFunction(2, line)
-    , dummy_head("<dummy_head>")
-    , m_glueSymbol("Q")
-    , m_startSymbol("SSTART")
-    , m_endSymbol("SEND")
-    , m_endTag("</s>")
-    , m_isPTKVZ(false)
+    , m_glueSymbolString("Q")
    , m_isPretermBackoff(true)
    , m_context_left(3)
    , m_context_right(0)
@ -122,8 +163,16 @@ public:
    , m_normalizeLabelLM(false)
    , m_sharedVocab(false)
    , m_binarized(0)
-    , m_cacheSize(1000000) {
+    , m_cacheSize(1000000)
+    , m_factorType(0) {
    ReadParameters();
+    std::vector<FactorType> factors;
+    factors.push_back(0);
+    dummy_head.CreateFromString(Output, factors, "<dummy_head>", false);
+    m_glueSymbol.CreateFromString(Output, factors, m_glueSymbolString, true);
+    m_startSymbol.CreateFromString(Output, factors, "SSTART", true);
+    m_endSymbol.CreateFromString(Output, factors, "SEND", true);
+    m_endTag.CreateFromString(Output, factors, "</s>", false);
  }

  ~RDLM();
@ -132,10 +181,11 @@ public:
    return new RDLMState(TreePointer(), 0, 0, 0);
  }

-  void Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float,4> &score, std::vector<int> &ancestor_heads, std::vector<int> &ancestor_labels, size_t &boundary_hash, int num_virtual = 0, int rescoring_levels = 0) const;
-  InternalTree* GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs, InternalTree * head_ptr=NULL) const;
-  void GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, const nplm::neuralTM *lm_head, const nplm::neuralTM *lm_labels, std::vector<int> & heads, std::vector<int> & labels, std::vector<int> & heads_output, std::vector<int> & labels_output) const;
-  void GetIDs(const std::string & head, const std::string & preterminal, std::pair<int,int> & IDs) const;
+  void Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float,4> &score, size_t &boundary_hash, rdlm::ThreadLocal &thread_objects, int num_virtual = 0, int rescoring_levels = 0) const;
+  bool GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs) const;
+  void GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, rdlm::ThreadLocal &thread_objects) const;
+  void GetIDs(const Word & head, const Word & preterminal, std::pair<int,int> & IDs) const;
+  int Factor2ID(const Factor * const factor, int model_type) const;
  void ScoreFile(std::string &path); //for debugging
  void PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const; //for debugging

@ -177,22 +227,23 @@ public:
  private:
    std::vector<TreePointer>::const_iterator iter;
    std::vector<TreePointer>::const_iterator _begin;
-    std::vector<TreePointer>::const_iterator _end;
+    bool _ended;
    InternalTree* current;
    const TreePointerMap & back_pointers;
    bool binarized;
-    std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
+    std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > &stack;

  public:
-    UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary):
+    UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary, std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > & persistent_stack):
      current(root),
      back_pointers(pointers),
-      binarized(binary) {
-      stack.reserve(10);
-      _end = current->GetChildren().end();
+      binarized(binary),
+      stack(persistent_stack) {
+      stack.resize(0);
+      _ended = current->GetChildren().empty();
      iter = current->GetChildren().begin();
      // expand virtual node
-      while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
+      while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') {
        stack.push_back(std::make_pair(current, iter));
        // also go through trees or previous hypotheses to rescore nodes for which more context has become available
        if ((*iter)->IsLeafNT()) {
@ -208,8 +259,8 @@ public:
    std::vector<TreePointer>::const_iterator begin() const {
      return _begin;
    }
-    std::vector<TreePointer>::const_iterator end() const {
-      return _end;
+    bool ended() const {
+      return _ended;
    }

    std::vector<TreePointer>::const_iterator operator++() {
@ -224,12 +275,13 @@ public:
            break;
          }
        }
-        if (iter == _end) {
+        if (iter == current->GetChildren().end()) {
+          _ended = true;
          return iter;
        }
      }
      // expand virtual node
-      while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
+      while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') {
        stack.push_back(std::make_pair(current, iter));
        // also go through trees or previous hypotheses to rescore nodes for which more context has become available
        if ((*iter)->IsLeafNT()) {
--- a/Show More
+++ b/Show More