Merge branch 'master' of http://github.com/moses-smt/mosesdecoder

2025-01-01 16:33:16 +03:00 · 2015-09-01 23:42:29 +01:00 · 2015-09-01 23:42:29 +01:00 · 764780ea26
commit 764780ea26
parent 5779aee16e 9fd0486815
459 changed files with 12395 additions and 12732 deletions
--- a/.beautify-ignore
+++ b/.beautify-ignore
@ -21,6 +21,9 @@ mingw/MosesGUI/icons_rc.py
 mingw/MosesGUI/Ui_credits.py
 mingw/MosesGUI/Ui_mainWindow.py
 moses/TranslationModel/UG
+moses/server
+moses/parameters
+moses/thread_safe_container.h
 phrase-extract/pcfg-common
 phrase-extract/syntax-common
 randlm
@ -32,3 +35,4 @@ srilm
 util
 xmlrpc-c
 .git
+util/ug_cache_with_timeout.h
--- a/10
+++ b/10
@ -108,7 +108,7 @@ external-lib z ;

 #lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
 #requirements += <library>dl ;
-
+#requirements += <cxxflags>-std=c++0x ;

 if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
  if [ option.get "full-tcmalloc" : : "yes" ] {
@ -133,7 +133,9 @@ if [ option.get "filter-warnings" : : "yes" ] {
  requirements += <cxxflags>-Wno-unused-but-set-variable ;
  requirements += <cxxflags>-Wno-unused-result ;
  requirements += <cxxflags>-Wno-unused-variable ;
-  requirements += <cxxflags>-Wcomment ;
+  requirements += <cxxflags>-Wno-comment ;
+  requirements += <cxxflags>-Wno-strict-aliasing ;
+  requirements += <cxxflags>-Wno-overloaded-virtual ;
 }

 if [ option.get "debug-build" : : "yes" ] {
@ -179,7 +181,7 @@ if [ option.get "with-icu" : : "yes" ]
  requirements += <library>icui18n/<link>shared ;
  requirements += <cxxflags>-fPIC ;
  requirements += <address-model>64 ;
-  requirements += <runtime-link>shared ;
+#  requirements += <runtime-link>shared ;
 }

 if [ option.get "with-probing-pt" : : "yes" ]
@ -301,5 +303,5 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {

 #local temp = [ _shell "bash source ./s.sh" ] ;
 local temp = [ _shell "mkdir -p $(TOP)/bin" ] ;
-local temp = [ _shell "rm $(TOP)/bin/moses_chart" ] ;
+local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ;
 local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ;
--- a/biconcor/SuffixArray.cpp
+++ b/biconcor/SuffixArray.cpp
@ -21,6 +21,11 @@ SuffixArray::SuffixArray()
    m_wordInSentence(NULL),
    m_sentence(NULL),
    m_sentenceLength(NULL),
+    m_document(NULL),
+    m_documentName(NULL),
+    m_documentNameLength(0),
+    m_documentCount(0),
+    m_useDocument(false),
    m_vcb(),
    m_size(0),
    m_sentenceCount(0) { }
@ -32,6 +37,8 @@ SuffixArray::~SuffixArray()
  free(m_wordInSentence);
  free(m_sentence);
  free(m_sentenceLength);
+  free(m_document);
+  free(m_documentName);
 }

 void SuffixArray::Create(const string& fileName )
@ -46,22 +53,32 @@ void SuffixArray::Create(const string& fileName )
  textFile.open(fileName.c_str());

  if (!textFile) {
-    cerr << "no such file or directory " << fileName << endl;
+    cerr << "Error: no such file or directory " << fileName << endl;
    exit(1);
  }

+  // first pass through data: get size
  istream *fileP = &textFile;
  m_size = 0;
  m_sentenceCount = 0;
+  m_documentCount = 0;
  while(!fileP->eof()) {
    SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
    if (fileP->eof()) break;
+    if (m_useDocument && ProcessDocumentLine(line,0)) continue;
    vector< WORD_ID > words = m_vcb.Tokenize( line );
    m_size += words.size() + 1;
    m_sentenceCount++;
  }
  textFile.close();
  cerr << m_size << " words (incl. sentence boundaries)" << endl;
+  if (m_useDocument) {
+    cerr << m_documentCount << " documents" << endl;
+    if (m_documentCount == 0) {
+      cerr << "Error: no documents found, aborting." << endl;
+      exit(1);
+    }
+  }

  // allocate memory
  m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
@ -69,21 +86,31 @@ void SuffixArray::Create(const string& fileName )
  m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
  m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
  m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
-
-  // fill the array
-  int wordIndex = 0;
-  int sentenceId = 0;
-  textFile.open(fileName.c_str());
-
-  if (!textFile) {
-    cerr << "no such file or directory " << fileName << endl;
-    exit(1);
+  CheckAllocation(m_array != NULL, "m_array");
+  CheckAllocation(m_index != NULL, "m_index");
+  CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
+  CheckAllocation(m_sentence != NULL, "m_sentence");
+  CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
+  if (m_useDocument) {
+    m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
+    m_documentName = (INDEX*) calloc( sizeof( char ), m_documentCount );
+    m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
+    CheckAllocation(m_document != NULL, "m_document");
+    CheckAllocation(m_documentName != NULL, "m_documentName");
+    CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
  }

+  // second pass through data: fill the arrays
+  int wordIndex = 0;
+  int sentenceId = 0;
+  m_documentNameLength = 0; // re-use as counter
+  m_documentCount = 0;      // re-use as counter
+  textFile.open(fileName.c_str());
  fileP = &textFile;
  while(!fileP->eof()) {
    SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
    if (fileP->eof()) break;
+    if (m_useDocument && ProcessDocumentLine(line,sentenceId)) continue;
    vector< WORD_ID > words = m_vcb.Tokenize( line );
    vector< WORD_ID >::const_iterator i;

@ -105,7 +132,7 @@ void SuffixArray::Create(const string& fileName )
  m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );

  if (m_buffer == NULL) {
-    cerr << "cannot allocate memory to m_buffer" << endl;
+    cerr << "Error: cannot allocate memory to m_buffer" << endl;
    exit(1);
  }

@ -114,6 +141,45 @@ void SuffixArray::Create(const string& fileName )
  cerr << "done sorting" << endl;
 }

+// very specific code to deal with common crawl document ids
+bool SuffixArray::ProcessDocumentLine( const char *line, const size_t sentenceId )
+{
+  size_t i;
+  // first 32 characters are hex-hash
+  for(i=0; i<32; i++) {
+    if ((line[i] < '0' || line[i] > '9') && (line[i] < 'a' || line[i] > 'f')) {
+      return false;
+    }
+  }
+  if (line[i++] != ' ') return false;
+
+  // second token is float
+  for (; line[i] != ' ' && line[i] != 0; i++) {
+    if (line[i] != '.' && (line[i] < '0' || line[i] > '9')) {
+      return false;
+    }
+  }
+  i++;
+
+  // last token is url (=name)
+  size_t startName = i;
+  for (; line[i] != ' ' && line[i] != 0; i++) {}
+  if (line[i] == ' ') return false;
+  size_t endName = i+1; // include '\0'
+
+  // second pass: record name and sentence number
+  if (m_document != NULL) {
+    m_documentName[m_documentCount] = m_documentNameLength;
+    for(size_t i=startName; i<endName; i++) {
+      m_documentNameBuffer[m_documentNameLength + i-startName] = line[i];
+    }
+    m_document[m_documentCount] = sentenceId;
+  }
+  m_documentNameLength += endName-startName;
+  m_documentCount++;
+  return true;
+}
+
 // good ol' quick sort
 void SuffixArray::Sort(INDEX start, INDEX end)
 {
@ -162,7 +228,6 @@ int SuffixArray::CompareIndex( INDEX a, INDEX b ) const

 inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
 {
-  // cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
  return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
 }

@ -272,13 +337,73 @@ void SuffixArray::List(INDEX start, INDEX end)
  }
 }

+void SuffixArray::PrintSentenceMatches( const std::vector< WORD > &phrase )
+{
+  cout << "QUERY\t";
+  for(size_t i=0; i<phrase.size(); i++) {
+    if (i>0) cout << " ";
+    cout <<  phrase[i];
+  }
+  cout << '\t';
+  INDEX start = 0;
+  INDEX end = m_size-1;
+  INDEX mid = FindFirst( phrase, start, end );
+  if (mid == m_size) { // no matches
+    cout << "0 matches" << endl;
+    return;
+  }
+
+  INDEX firstMatch = FindLast( phrase, mid, start, -1 );
+  INDEX lastMatch = FindLast( phrase, mid, end, 1 );
+
+  // loop through all matches
+  cout << (lastMatch-firstMatch+1) << " matches" << endl;
+  for(INDEX i=firstMatch; i<=lastMatch; i++) {
+    // get sentence information
+    INDEX pos = GetPosition( i );
+    INDEX start = pos - GetWordInSentence( pos );
+    char length = GetSentenceLength( GetSentence( pos ) );
+    // print document name
+    if (m_useDocument) {
+      INDEX sentence = GetSentence( pos );
+      INDEX document = GetDocument( sentence );
+      PrintDocumentName( document );
+      cout << '\t';
+    }
+    // print sentence
+    for(char i=0; i<length; i++) {
+      if (i>0) cout << " ";
+      cout << GetWord( start + i );
+    }
+    cout << endl;
+  }
+}
+
+SuffixArray::INDEX SuffixArray::GetDocument( INDEX sentence ) const
+{
+  // binary search
+  INDEX min = 0;
+  INDEX max = m_documentCount-1;
+  if (sentence >= m_document[max]) {
+    return max;
+  }
+  while(true) {
+    INDEX mid = (min + max) / 2;
+    if (sentence >= m_document[mid] && sentence < m_document[mid+1]) {
+      return mid;
+    }
+    if (sentence < m_document[mid]) {
+      max = mid-1;
+    } else {
+      min = mid+1;
+    }
+  }
+}
+
 void SuffixArray::Save(const string& fileName ) const
 {
  FILE *pFile = fopen ( fileName.c_str() , "w" );
-  if (pFile == NULL) {
-    cerr << "Cannot open " << fileName << endl;
-    exit(1);
-  }
+  if (pFile == NULL) Error("cannot open",fileName);

  fwrite( &m_size, sizeof(INDEX), 1, pFile );
  fwrite( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
@ -288,6 +413,16 @@ void SuffixArray::Save(const string& fileName ) const

  fwrite( &m_sentenceCount, sizeof(INDEX), 1, pFile );
  fwrite( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
+
+  char useDocument = m_useDocument; // not sure if that is needed
+  fwrite( &useDocument, sizeof(char), 1, pFile );
+  if (m_useDocument) {
+    fwrite( &m_documentCount, sizeof(INDEX), 1, pFile );
+    fwrite( m_document, sizeof(INDEX), m_documentCount, pFile );
+    fwrite( m_documentName, sizeof(INDEX), m_documentCount, pFile );
+    fwrite( &m_documentNameLength, sizeof(INDEX), 1, pFile );
+    fwrite( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile );
+  }
  fclose( pFile );

  m_vcb.Save( fileName + ".src-vcb" );
@ -296,56 +431,81 @@ void SuffixArray::Save(const string& fileName ) const
 void SuffixArray::Load(const string& fileName )
 {
  FILE *pFile = fopen ( fileName.c_str() , "r" );
-  if (pFile == NULL) {
-    cerr << "no such file or directory " << fileName << endl;
-    exit(1);
-  }
+  if (pFile == NULL) Error("no such file or directory", fileName);

  cerr << "loading from " << fileName << endl;

-  fread( &m_size, sizeof(INDEX), 1, pFile );
+  fread( &m_size, sizeof(INDEX), 1, pFile )
+  || Error("could not read m_size from", fileName);
  cerr << "words in corpus: " << m_size << endl;
+
  m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
  m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
  m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
  m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
+  CheckAllocation(m_array != NULL, "m_array");
+  CheckAllocation(m_index != NULL, "m_index");
+  CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
+  CheckAllocation(m_sentence != NULL, "m_sentence");
+  fread( m_array, sizeof(WORD_ID), m_size, pFile ) // corpus
+  || Error("could not read m_array from", fileName);
+  fread( m_index, sizeof(INDEX), m_size, pFile )   // suffix array
+  || Error("could not read m_index from", fileName);
+  fread( m_wordInSentence, sizeof(char), m_size, pFile) // word index
+  || Error("could not read m_wordInSentence from", fileName);
+  fread( m_sentence, sizeof(INDEX), m_size, pFile ) // sentence index
+  || Error("could not read m_sentence from", fileName);

-  if (m_array == NULL) {
-    cerr << "Error: cannot allocate memory to m_array" << endl;
-    exit(1);
-  }
-
-  if (m_index == NULL) {
-    cerr << "Error: cannot allocate memory to m_index" << endl;
-    exit(1);
-  }
-
-  if (m_wordInSentence == NULL) {
-    cerr << "Error: cannot allocate memory to m_wordInSentence" << endl;
-    exit(1);
-  }
-
-  if (m_sentence == NULL) {
-    cerr << "Error: cannot allocate memory to m_sentence" << endl;
-    exit(1);
-  }
-
-  fread( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
-  fread( m_index, sizeof(INDEX), m_size, pFile );   // suffix array
-  fread( m_wordInSentence, sizeof(char), m_size, pFile); // word index
-  fread( m_sentence, sizeof(INDEX), m_size, pFile); // sentence index
-
-  fread( &m_sentenceCount, sizeof(INDEX), 1, pFile );
+  fread( &m_sentenceCount, sizeof(INDEX), 1, pFile )
+  || Error("could not read m_sentenceCount from", fileName);
  cerr << "sentences in corpus: " << m_sentenceCount << endl;
-  m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );

-  if (m_sentenceLength == NULL) {
-    cerr << "Error: cannot allocate memory to m_sentenceLength" << endl;
-    exit(1);
+  m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
+  CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
+  fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile) // sentence length
+  || Error("could not read m_sentenceLength from", fileName);
+
+  if (m_useDocument) { // do not read it when you do not need it
+    char useDocument;
+    fread( &useDocument, sizeof(char), 1, pFile )
+    || Error("could not read m_useDocument from", fileName);
+    if (!useDocument) {
+      cerr << "Error: stored suffix array does not have a document index\n";
+      exit(1);
+    }
+    fread( &m_documentCount, sizeof(INDEX), 1, pFile )
+    || Error("could not read m_documentCount from", fileName);
+    m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
+    m_documentName = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
+    CheckAllocation(m_document != NULL, "m_document");
+    CheckAllocation(m_documentName != NULL, "m_documentName");
+    fread( m_document, sizeof(INDEX), m_documentCount, pFile )
+    || Error("could not read m_document from", fileName);
+    fread( m_documentName, sizeof(INDEX), m_documentCount, pFile )
+    || Error("could not read m_documentName from", fileName);
+    fread( &m_documentNameLength, sizeof(INDEX), 1, pFile )
+    || Error("could not read m_documentNameLength from", fileName);
+    m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
+    CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
+    fread( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile )
+    || Error("could not read m_document from", fileName);
  }

-  fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
  fclose( pFile );

  m_vcb.Load( fileName + ".src-vcb" );
 }
+
+void SuffixArray::CheckAllocation( bool check, const char *dataStructure ) const
+{
+  if (check) return;
+  cerr << "Error: could not allocate memory for " << dataStructure << endl;
+  exit(1);
+}
+
+bool SuffixArray::Error( const char *message, const string &fileName) const
+{
+  cerr << "Error: " << message << " " << fileName << endl;
+  exit(1);
+  return true; // yeah, i know.
+}
--- a/biconcor/SuffixArray.h
+++ b/biconcor/SuffixArray.h
@ -15,6 +15,12 @@ private:
  INDEX *m_sentence;
  char *m_sentenceLength;
  WORD_ID m_endOfSentence;
+  INDEX *m_document;
+  INDEX *m_documentName;
+  char *m_documentNameBuffer;
+  size_t m_documentNameLength;
+  size_t m_documentCount;
+  bool m_useDocument;
  Vocabulary m_vcb;
  INDEX m_size;
  INDEX m_sentenceCount;
@ -28,6 +34,7 @@ public:
  ~SuffixArray();

  void Create(const std::string& fileName );
+  bool ProcessDocumentLine( const char* const, const size_t );
  void Sort(INDEX start, INDEX end);
  int CompareIndex( INDEX a, INDEX b ) const;
  inline int CompareWord( WORD_ID a, WORD_ID b ) const;
@ -40,6 +47,7 @@ public:
  INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
  int Match( const std::vector< WORD > &phrase, INDEX index );
  void List( INDEX start, INDEX end );
+  void PrintSentenceMatches( const std::vector< WORD > &phrase );
  inline INDEX GetPosition( INDEX index ) const {
    return m_index[ index ];
  }
@ -58,6 +66,17 @@ public:
  inline WORD GetWord( INDEX position ) const {
    return m_vcb.GetWord( m_array[position] );
  }
+  void UseDocument() {
+    m_useDocument = true;
+  }
+  INDEX GetDocument( INDEX sentence ) const;
+  void PrintDocumentName( INDEX document ) {
+    for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
+      std::cout << m_documentNameBuffer[ i ];
+    }
+  }
  void Save(const std::string& fileName ) const;
  void Load(const std::string& fileName );
+  void CheckAllocation(bool, const char *dataStructure) const;
+  bool Error( const char* message, const std::string& fileName) const;
 };
--- a/biconcor/Vocabulary.cpp
+++ b/biconcor/Vocabulary.cpp
@ -62,7 +62,7 @@ void Vocabulary::Save(const string& fileName ) const
  vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc);

  if (!vcbFile) {
-    cerr << "Failed to open " << vcbFile << endl;
+    cerr << "Failed to open " << fileName << endl;
    exit(1);
  }

@ -81,7 +81,7 @@ void Vocabulary::Load(const string& fileName )
  vcbFile.open(fileName.c_str());

  if (!vcbFile) {
-    cerr << "no such file or directory: " << vcbFile << endl;
+    cerr << "no such file or directory: " << fileName << endl;
    exit(1);
  }

--- a/biconcor/phrase-lookup.cpp
+++ b/biconcor/phrase-lookup.cpp
@ -1,4 +1,5 @@
 #include "SuffixArray.h"
+#include "../util/tokenize.hh"
 #include <getopt.h>

 using namespace std;
@ -13,10 +14,12 @@ int main(int argc, char* argv[])
  string query;
  string fileNameSuffix;
  string fileNameSource;
-  int loadFlag = false;
-  int saveFlag = false;
-  int createFlag = false;
-  int queryFlag = false;
+  bool loadFlag = false;
+  bool saveFlag = false;
+  bool createFlag = false;
+  bool queryFlag = false;
+  bool querySentenceFlag = false;
+
  int stdioFlag = false;  // receive requests from STDIN, respond to STDOUT
  string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create corpus]\n\t[--query string]\n\t[--stdio]\n";
  while(1) {
@ -25,11 +28,14 @@ int main(int argc, char* argv[])
      {"save", required_argument, 0, 's'},
      {"create", required_argument, 0, 'c'},
      {"query", required_argument, 0, 'q'},
+      {"query-sentence", required_argument, 0, 'Q'},
+      {"document", required_argument, 0, 'd'},
      {"stdio", no_argument, 0, 'i'},
+      {"stdio-sentence", no_argument, 0, 'I'},
      {0, 0, 0, 0}
    };
    int option_index = 0;
-    int c = getopt_long (argc, argv, "l:s:c:q:i", long_options, &option_index);
+    int c = getopt_long (argc, argv, "l:s:c:q:Q:iId", long_options, &option_index);
    if (c == -1) break;
    switch (c) {
    case 'l':
@ -48,17 +54,25 @@ int main(int argc, char* argv[])
      query = string(optarg);
      queryFlag = true;
      break;
+    case 'Q':
+      query = string(optarg);
+      querySentenceFlag = true;
+      break;
    case 'i':
      stdioFlag = true;
      break;
+    case 'I':
+      stdioFlag = true;
+      querySentenceFlag = true;
+      break;
+    case 'd':
+      suffixArray.UseDocument();
+      break;
    default:
      cerr << info;
      exit(1);
    }
  }
-  if (stdioFlag) {
-    queryFlag = true;
-  }

  // check if parameter settings are legal
  if (saveFlag && !createFlag) {
@ -74,7 +88,7 @@ int main(int argc, char* argv[])
    exit(1);
  }

-  // do your thing
+  // get suffix array
  if (createFlag) {
    cerr << "will create\n";
    cerr << "corpus is in " << fileNameSource << endl;
@ -88,16 +102,26 @@ int main(int argc, char* argv[])
    cerr << "will load from " << fileNameSuffix << endl;
    suffixArray.Load( fileNameSuffix );
  }
+
+  // do something with it
  if (stdioFlag) {
    while(true) {
      string query;
      if (getline(cin, query, '\n').eof()) {
        return 0;
      }
-      cout << lookup( query ) << endl;
+      if (querySentenceFlag) {
+        vector< string > queryString = util::tokenize( query.c_str() );
+        suffixArray.PrintSentenceMatches( queryString );
+      } else {
+        cout << lookup( query ) << endl;
+      }
    }
  } else if (queryFlag) {
    cout << lookup( query ) << endl;
+  } else if (querySentenceFlag) {
+    vector< string > queryString = util::tokenize( query.c_str() );
+    suffixArray.PrintSentenceMatches( queryString );
  }
  return 0;
 }
@ -105,32 +129,6 @@ int main(int argc, char* argv[])
 size_t lookup( string query )
 {
  cerr << "query is " << query << endl;
-  vector< string > queryString = tokenize( query.c_str() );
+  vector< string > queryString = util::tokenize( query.c_str() );
  return suffixArray.Count( queryString );
 }
-
-// Duplicate of definition in util/tokenize.hh.
-// TODO: Can we de-duplicate this?  At the time of writing biconcor does not
-// use util at all.
-vector<string> tokenize(const char input[])
-{
-  vector< string > token;
-  bool betweenWords = true;
-  int start=0;
-  int i;
-  for(i = 0; input[i] != '\0'; i++) {
-    const bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
-    if (!isSpace && betweenWords) {
-      start = i;
-      betweenWords = false;
-    } else if (isSpace && !betweenWords) {
-      token.push_back( string( input+start, i-start ) );
-      betweenWords = true;
-    }
-  }
-  if (!betweenWords)
-    token.push_back( string( input+start, i-start ) );
-  return token;
-}
-
--- a/contrib/moses-speedtest/README.md
+++ b/contrib/moses-speedtest/README.md
@ -28,14 +28,16 @@ TEST_DIR: /home/moses-speedtest/phrase_tables/tests
 TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
 BASEBRANCH: RELEASE-2.1.1
 MOSES_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-prof
+MOSES_GOOGLE_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-gperftools
 </pre>

 The _MOSES\_REPO\_PATH_ is the place where you have set up and built moses.
-The _DROP\_CACHES\_COMM_ is the command that would b eused to drop caches. It should run without needing root access.
+The _DROP\_CACHES\_COMM_ is the command that would be used to drop caches. It should run without needing root access.
 _TEST\_DIR_ is the directory where all the tests will reside.
 _TEST\_LOG\_DIR_ is the directory where the performance logs will be gathered. It should be created before running the testsuite for the first time.
 _BASEBRANCH_ is the branch against which all new tests will be compared. It should normally be set to be the latest Moses stable release.
 _MOSES\_PROFILER\_REPO_ is a path to a moses repository set up and built with profiling enabled. Optional if you want to produce profiling results.
+_MOSES\_GOOGLE\_PROFILER\_REPO is a path to moses repository set up with full tcmalloc and profiler, as well as shared link for use with gperftools.
 ### Creating tests

 In order to create a test one should go into the TEST_DIR and create a new folder. That folder will be used for the name of the test.
@ -45,7 +47,7 @@ An example such configuration file is **test\_config**
 <pre>
 Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
 LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/, 
-Variants: vanilla, cached, ldpre, profile #Can't have cached without ldpre or vanilla
+Variants: vanilla, cached, ldpre, profile, google-profiler #Can't have cached without ldpre or vanilla
 </pre>

 The _Command:_ line specifies the executable (which is looked up in the /bin directory of the repo.) and any arguments necessary. Before running the test, the script cds to the current test directory so you can use relative paths.
@ -61,11 +63,21 @@ The _Variants:_ line specifies what type of tests should we run. This particular
 If you want to produce profiler results together in some tests you need to specify the _MOSES\_PROFILER\_REPO_ in the config
 ```bash
 git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-profile
-cd mosesdecoder
+cd mosesdecoder-profile
 ./bjam -j10 --with-cmph=/usr/include/ variant=profile
 ```

-Afterwards for testcases which contain the **profile** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **gprof** output from every run.
+Afterwards for testcases which contain the **profile** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **gprof** output from every run (files ending in **\_profile**).
+
+#### Produce google profiler results.
+If you want to produce profiler results together in some tests you need to specify the _MOSES\_GOOGLE\_PROFILER\_REPO in the config
+```bash
+git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-google-profile
+cd mosesdecoder
+./bjam link=shared -j10 --full-tcmalloc --with-cmph=/usr/include/
+```
+
+Afterwards for testcases which contain the **google-profiler** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **google-profiler** output from every run (files prefixed with **pprof**). To analyze the output you need to use [pprof](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html).

 ### Running tests.
 Running the tests is done through the **runtests.py** script.
--- a/contrib/moses-speedtest/runtests.py
+++ b/contrib/moses-speedtest/runtests.py
@ -2,6 +2,7 @@
 import os
 import subprocess
 import time
+import shutil
 from argparse import ArgumentParser
 from testsuite_common import processLogLine

@ -26,16 +27,21 @@ def parse_cmd():
    arguments = parser.parse_args()
    return arguments

-def repoinit(testconfig, profiler=True):
+def repoinit(testconfig, profiler=None):
    """Determines revision and sets up the repo. If given the profiler optional
    argument, wil init the profiler repo instead of the default one."""
    revision = ''
    #Update the repo
-    if profiler:
+    if profiler == "gnu-profiler":
        if testconfig.repo_prof is not None:
            os.chdir(testconfig.repo_prof)
        else:
            raise ValueError('Profiling repo is not defined')
+    elif profiler == "google-profiler":
+        if testconfig.repo_gprof is not None:
+            os.chdir(testconfig.repo_gprof)
+        else:
+            raise ValueError('Profiling repo is not defined')
    else:
        os.chdir(testconfig.repo)
    #Checkout specific branch, else maintain main branch
@ -61,9 +67,10 @@ def repoinit(testconfig, profiler=True):

 class Configuration:
    """A simple class to hold all of the configuration constatns"""
-    def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev, repo_prof=None):
+    def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev, repo_prof=None, repo_gprof=None):
        self.repo = repo
        self.repo_prof = repo_prof
+        self.repo_gprof = repo_gprof
        self.drop_caches = drop_caches
        self.tests = tests
        self.testlogs = testlogs
@ -88,16 +95,17 @@ class Configuration:

 class Test:
    """A simple class to contain all information about tests"""
-    def __init__(self, name, command, ldopts, permutations, prof_command=None):
+    def __init__(self, name, command, ldopts, permutations, prof_command=None, gprof_command=None):
        self.name = name
        self.command = command
        self.prof_command = prof_command
+        self.gprof_command = gprof_command
        self.ldopts = ldopts.replace(' ', '').split(',') #Not tested yet
        self.permutations = permutations

-def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None):
+def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None, moses_gprof_repo=None):
    """Parses the config file"""
-    command, ldopts, prof_command = '', '', None
+    command, ldopts, prof_command, gprof_command = '', '', None, None
    permutations = []
    fileopen = open(conffile, 'r')
    for line in fileopen:
@ -108,8 +116,10 @@ def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None):

        if opt == 'Command:':
            command = args.replace('\n', '')
-            if moses_prof is not None:  # Get optional command for profiling
+            if moses_prof_repo is not None:  # Get optional command for profiling
                prof_command = moses_prof_repo + '/bin/' + command
+            if moses_gprof_repo is not None: # Get optional command for google-perftools
+                gprof_command = moses_gprof_repo + '/bin/' + command
            command = moses_repo + '/bin/' + command
        elif opt == 'LDPRE:':
            ldopts = args.replace('\n', '')
@ -118,14 +128,14 @@ def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None):
        else:
            raise ValueError('Unrecognized option ' + opt)
    #We use the testdir as the name.
-    testcase = Test(testdir, command, ldopts, permutations, prof_command)
+    testcase = Test(testdir, command, ldopts, permutations, prof_command, gprof_command)
    fileopen.close()
    return testcase

 def parse_testconfig(conffile):
    """Parses the config file for the whole testsuite."""
    repo_path, drop_caches, tests_dir, testlog_dir = '', '', '', ''
-    basebranch, baserev, repo_prof_path = '', '', None
+    basebranch, baserev, repo_prof_path, repo_gprof_path = '', '', None, None
    fileopen = open(conffile, 'r')
    for line in fileopen:
        line = line.split('#')[0] # Discard comments
@ -146,10 +156,12 @@ def parse_testconfig(conffile):
            baserev = args.replace('\n', '')
        elif opt == 'MOSES_PROFILER_REPO:':  # Optional
            repo_prof_path = args.replace('\n', '')
+        elif opt == 'MOSES_GOOGLE_PROFILER_REPO:':  # Optional
+            repo_gprof_path = args.replace('\n', '')
        else:
            raise ValueError('Unrecognized option ' + opt)
    config = Configuration(repo_path, drop_caches, tests_dir, testlog_dir,\
-    basebranch, baserev, repo_prof_path)
+    basebranch, baserev, repo_prof_path, repo_gprof_path)
    fileopen.close()
    return config

@ -160,7 +172,9 @@ def get_config():
    config.additional_args(args.singletestdir, args.revision, args.branch)
    revision = repoinit(config)
    if config.repo_prof is not None:
-        repoinit(config, True)
+        repoinit(config, "gnu-profiler")
+    if config.repo_gprof is not None:
+        repoinit(config, "google-profiler")
    config.set_revision(revision)
    return config

@ -212,16 +226,27 @@ def write_gprof(command, name, variant, config):
    executable_path = command.split(' ')[0]  # Path to the moses binary
    gprof_command = 'gprof ' + executable_path + ' ' + gmon_path + ' > ' + outputfile
    subprocess.call([gprof_command], shell=True)
-    os.remove('gmon_path')  # After we are done discard the gmon file
+    os.remove(gmon_path)  # After we are done discard the gmon file

-def execute_test(command, path, name, variant, config, profile=False):
+def write_pprof(name, variant, config):
+    """Copies the google-perftools profiler output to the corresponding test directory"""
+    output_dir = config.testlogs + '/' + name
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    outputfile = output_dir + '/pprof_' + time.strftime("%d.%m.%Y_%H:%M:%S") + '_' + name + '_' + variant
+    shutil.move("/tmp/moses.prof", outputfile)
+
+
+def execute_test(command, path, name, variant, config, profile=None):
    """Executes a testcase given a whole command, path to the test file output,
    name of the test and variant tested. Config is the global configuration"""
    subprocess.Popen([command], stdout=None, stderr=subprocess.PIPE, shell=True).communicate()
-    if not profile:
+    if profile is None:
        write_log(path, name + '_' + variant, config)
-    else:  # Basically produce a gmon output
+    elif profile == "gnu-profiler":  # Basically produce a gmon output
        write_gprof(command, name, variant, config)
+    elif profile == "google-profiler":
+        write_pprof(name, variant, config)        


 def execute_tests(testcase, cur_directory, config):
@ -255,7 +280,7 @@ def execute_tests(testcase, cur_directory, config):
            subprocess.call([config.drop_caches], shell=True)

            #Create the command for executing moses:
-            whole_command = 'LD_PRELOAD ' + opt + time_command + testcase.command
+            whole_command = 'LD_PRELOAD=' + opt + time_command + testcase.command
            variant = 'ldpre_' + opt

            #test normal and cached
@ -271,9 +296,9 @@ def execute_tests(testcase, cur_directory, config):

        if 'vanilla' in testcase.permutations:
            whole_command = testcase.prof_command
-            execute_test(whole_command, time_path, testcase.name, 'profile', config, True)
+            execute_test(whole_command, time_path, testcase.name, 'profile', config, "gnu-profiler")
            if 'cached' in testcase.permutations:
-                execute_test(whole_command, time_path, testcase.name, 'profile_cached', config, True)
+                execute_test(whole_command, time_path, testcase.name, 'profile_cached', config, "gnu-profiler")

        if 'ldpre' in testcase.permutations:
            for opt in testcase.ldopts:
@ -282,13 +307,42 @@ def execute_tests(testcase, cur_directory, config):
                subprocess.call([config.drop_caches], shell=True)

                #Create the command for executing moses:
-                whole_command = 'LD_PRELOAD ' + opt + testcase.prof_command
+                whole_command = 'LD_PRELOAD=' + opt + " " + testcase.prof_command
                variant = 'profile_ldpre_' + opt

                #test normal and cached
-                execute_test(whole_command, time_path, testcase.name, variant, config, True)
+                execute_test(whole_command, time_path, testcase.name, variant, config, "gnu-profiler")
                if 'cached' in testcase.permutations:
-                    execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, True)
+                    execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, "gnu-profiler")
+
+    #Google-perftools profiler
+    if 'google-profiler' in testcase.permutations:
+        subprocess.call(['sync'], shell=True)  # Drop caches first
+        subprocess.call([config.drop_caches], shell=True)
+
+        #Create the command for executing moses
+        whole_command = "CPUPROFILE=/tmp/moses.prof " + testcase.gprof_command
+
+        #test normal and cached
+        execute_test(whole_command, time_path, testcase.name, 'vanilla', config, 'google-profiler')
+        if 'cached' in testcase.permutations:
+            execute_test(whole_command, time_path, testcase.name, 'vanilla_cached', config, 'google-profiler')
+
+    #Now perform LD_PRELOAD tests
+    if 'ldpre' in testcase.permutations:
+        for opt in testcase.ldopts:
+            #Clear caches
+            subprocess.call(['sync'], shell=True)
+            subprocess.call([config.drop_caches], shell=True)
+
+            #Create the command for executing moses:
+            whole_command = 'LD_PRELOAD=' + opt + " " + whole_command
+            variant = 'ldpre_' + opt
+
+            #test normal and cached
+            execute_test(whole_command, time_path, testcase.name, variant, config, 'google-profiler')
+            if 'cached' in testcase.permutations:
+                execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, 'google-profiler')


 # Go through all the test directories and executes tests
@ -319,7 +373,7 @@ if __name__ == '__main__':

    for logfile in os.listdir(CONFIG.testlogs):
        logfile_name = CONFIG.testlogs + '/' + logfile
-        if not check_for_basever(logfile_name, CONFIG.basebranch):
+        if os.path.isfile(logfile_name) and not check_for_basever(logfile_name, CONFIG.basebranch):
            logfile = logfile.replace('_vanilla', '')
            logfile = logfile.replace('_cached', '')
            logfile = logfile.replace('_ldpre', '')
@ -330,7 +384,7 @@ if __name__ == '__main__':
        #Create a new configuration for base version tests:
        BASECONFIG = Configuration(CONFIG.repo, CONFIG.drop_caches,\
            CONFIG.tests, CONFIG.testlogs, CONFIG.basebranch,\
-            CONFIG.baserev, CONFIG.repo_prof)
+            CONFIG.baserev, CONFIG.repo_prof, CONFIG.repo_gprof)
        BASECONFIG.additional_args(None, CONFIG.baserev, CONFIG.basebranch)
        #Set up the repository and get its revision:
        REVISION = repoinit(BASECONFIG)
@ -340,20 +394,28 @@ if __name__ == '__main__':
        subprocess.call(['./previous.sh'], shell=True)
        #If profiler configuration exists also init it
        if BASECONFIG.repo_prof is not None:
-            repoinit(BASECONFIG, True)
+            repoinit(BASECONFIG, "gnu-profiler")
            os.chdir(BASECONFIG.repo_prof)
            subprocess.call(['./previous.sh'], shell=True)

+        if BASECONFIG.repo_gprof is not None:
+            repoinit(BASECONFIG, "google-profiler")
+            os.chdir(BASECONFIG.repo_gprof)
+            subprocess.call(['./previous.sh'], shell=True)
+
        #Perform tests
        for directory in FIRSTTIME:
            cur_testcase = parse_configfile(BASECONFIG.tests + '/' + directory +\
-            '/config', directory, BASECONFIG.repo)
+            '/config', directory, BASECONFIG.repo, BASECONFIG.repo_prof, BASECONFIG.repo_gprof)
            execute_tests(cur_testcase, directory, BASECONFIG)

        #Reset back the repository to the normal configuration
        repoinit(CONFIG)
        if BASECONFIG.repo_prof is not None:
-            repoinit(CONFIG, True)
+            repoinit(CONFIG, "gnu-profiler")
+
+        if BASECONFIG.repo_gprof is not None:
+            repoinit(CONFIG, "google-profiler")

    #Builds moses
    os.chdir(CONFIG.repo)
@ -362,12 +424,16 @@ if __name__ == '__main__':
        os.chdir(CONFIG.repo_prof)
        subprocess.call(['./previous.sh'], shell=True)

+    if CONFIG.repo_gprof is not None:
+        os.chdir(CONFIG.repo_gprof)
+        subprocess.call(['./previous.sh'], shell=True)
+
    if CONFIG.singletest:
        TESTCASE = parse_configfile(CONFIG.tests + '/' +\
-            CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo)
+            CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof)
        execute_tests(TESTCASE, CONFIG.singletest, CONFIG)
    else:
        for directory in ALL_DIR:
            cur_testcase = parse_configfile(CONFIG.tests + '/' + directory +\
-            '/config', directory, CONFIG.repo)
+            '/config', directory, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof)
            execute_tests(cur_testcase, directory, CONFIG)
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@ -11,12 +11,12 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -72,13 +72,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/OnDiskPt/OnDiskPt.project
+++ b/contrib/other-builds/OnDiskPt/OnDiskPt.project
@ -1,5 +1,22 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <CodeLite_Project Name="OnDiskPt" InternalType="Library">
+  <Plugins>
+    <Plugin Name="CMakePlugin">
+      <![CDATA[[{
+  "name": "Debug",
+  "enabled": false,
+  "buildDirectory": "build",
+  "sourceDirectory": "$(ProjectPath)",
+  "generator": "",
+  "buildType": "",
+  "arguments": [],
+  "parentProject": ""
+ }]]]>
+    </Plugin>
+    <Plugin Name="qmake">
+      <![CDATA[00010001N0005Debug000000000000]]>
+    </Plugin>
+  </Plugins>
  <Description/>
  <Dependencies/>
  <VirtualDirectory Name="src"/>
@ -27,6 +44,8 @@
    <File Name="../../../OnDiskPt/Word.cpp"/>
    <File Name="../../../OnDiskPt/Word.h"/>
  </VirtualDirectory>
+  <Dependencies Name="Debug"/>
+  <Dependencies Name="Release"/>
  <Settings Type="Static Library">
    <GlobalSettings>
      <Compiler Options="" C_Options="" Assembler="">
@ -40,9 +59,9 @@
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../.."/>
+        <IncludePath Value="../../../phrase-extract"/>
+        <IncludePath Value="../../../boost/include"/>
        <Preprocessor Value="MAX_NUM_FACTORS=4"/>
      </Compiler>
      <Linker Options="" Required="yes"/>
@ -72,7 +91,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -110,7 +129,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -118,6 +137,4 @@
      </Completion>
    </Configuration>
  </Settings>
-  <Dependencies Name="Debug"/>
-  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/other-builds/all.workspace
+++ b/contrib/other-builds/all.workspace
@ -6,10 +6,11 @@
  <Project Name="lm" Path="lm/lm.project" Active="No"/>
  <Project Name="OnDiskPt" Path="OnDiskPt/OnDiskPt.project" Active="No"/>
  <Project Name="search" Path="search/search.project" Active="No"/>
-  <Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="Yes"/>
+  <Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="No"/>
  <Project Name="score" Path="score/score.project" Active="No"/>
  <Project Name="consolidate" Path="consolidate/consolidate.project" Active="No"/>
  <Project Name="moses" Path="moses/moses.project" Active="No"/>
+  <Project Name="pruneGeneration" Path="pruneGeneration/pruneGeneration.project" Active="Yes"/>
  <BuildMatrix>
    <WorkspaceConfiguration Name="Debug" Selected="yes">
      <Project Name="manual-label" ConfigName="Debug"/>
@ -23,6 +24,7 @@
      <Project Name="score" ConfigName="Debug"/>
      <Project Name="consolidate" ConfigName="Debug"/>
      <Project Name="moses" ConfigName="Debug"/>
+      <Project Name="pruneGeneration" ConfigName="Debug"/>
    </WorkspaceConfiguration>
    <WorkspaceConfiguration Name="Release" Selected="yes">
      <Project Name="manual-label" ConfigName="Release"/>
@ -36,6 +38,7 @@
      <Project Name="score" ConfigName="Release"/>
      <Project Name="consolidate" ConfigName="Release"/>
      <Project Name="moses" ConfigName="Release"/>
+      <Project Name="pruneGeneration" ConfigName="Release"/>
    </WorkspaceConfiguration>
  </BuildMatrix>
 </CodeLite_Workspace>
--- a/contrib/other-builds/extract-ghkm/.project
+++ b/contrib/other-builds/extract-ghkm/.project
@ -102,9 +102,14 @@
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
 		</link>
 		<link>
-			<name>SyntaxTree.cpp</name>
+			<name>SyntaxNodeCollection.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>SyntaxNodeCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
 		</link>
 		<link>
 			<name>SyntaxTree.h</name>
--- a/contrib/other-builds/extract-mixed-syntax/extract-mixed-syntax.project
+++ b/contrib/other-builds/extract-mixed-syntax/extract-mixed-syntax.project
@ -1,5 +1,22 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <CodeLite_Project Name="extract-mixed-syntax" InternalType="Console">
+  <Plugins>
+    <Plugin Name="qmake">
+      <![CDATA[00010001N0005Debug000000000000]]>
+    </Plugin>
+    <Plugin Name="CMakePlugin">
+      <![CDATA[[{
+  "name": "Debug",
+  "enabled": false,
+  "buildDirectory": "build",
+  "sourceDirectory": "$(ProjectPath)",
+  "generator": "",
+  "buildType": "",
+  "arguments": [],
+  "parentProject": ""
+ }]]]>
+    </Plugin>
+  </Plugins>
  <Description/>
  <Dependencies/>
  <VirtualDirectory Name="src"/>
@ -43,6 +60,10 @@
    <File Name="../../../phrase-extract/OutputFileStream.cpp"/>
    <File Name="../../../phrase-extract/OutputFileStream.h"/>
  </VirtualDirectory>
+  <Dependencies Name="Debug">
+    <Project Name="util"/>
+  </Dependencies>
+  <Dependencies Name="Release"/>
  <Settings Type="Executable">
    <GlobalSettings>
      <Compiler Options="" C_Options="" Assembler="">
@ -56,13 +77,14 @@
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../../"/>
+        <IncludePath Value="../../../phrase-extract"/>
+        <IncludePath Value="../../../boost/include"/>
      </Compiler>
      <Linker Options="" Required="yes">
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
+        <LibraryPath Value="../../../boost/lib64"/>
+        <LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
+        <LibraryPath Value="Debug"/>
        <Library Value="util"/>
        <Library Value="boost_iostreams"/>
        <Library Value="boost_program_options"/>
@ -94,7 +116,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -133,7 +155,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -141,8 +163,4 @@
      </Completion>
    </Configuration>
  </Settings>
-  <Dependencies Name="Debug">
-    <Project Name="util"/>
-  </Dependencies>
-  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/other-builds/extract-rules/.cproject
+++ b/contrib/other-builds/extract-rules/.cproject
@ -26,6 +26,7 @@
 								<option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
--- a/contrib/other-builds/extract-rules/.project
+++ b/contrib/other-builds/extract-rules/.project
@ -81,9 +81,14 @@
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
 		</link>
 		<link>
-			<name>SyntaxTree.cpp</name>
+			<name>SyntaxNodeCollection.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>SyntaxNodeCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
 		</link>
 		<link>
 			<name>SyntaxTree.h</name>
--- a/contrib/other-builds/extract/.cproject
+++ b/contrib/other-builds/extract/.cproject
@ -5,16 +5,16 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
 					<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2119725657." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1708444053" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.645190133" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -25,6 +25,7 @@
 								<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
@ -61,16 +62,16 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1230189043" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
 					<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1230189043." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.280378247" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1881910636" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
--- a/contrib/other-builds/extract/extract.project
+++ b/contrib/other-builds/extract/extract.project
@ -1,5 +1,22 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <CodeLite_Project Name="extract" InternalType="Console">
+  <Plugins>
+    <Plugin Name="qmake">
+      <![CDATA[00010001N0005Debug000000000000]]>
+    </Plugin>
+    <Plugin Name="CMakePlugin">
+      <![CDATA[[{
+  "name": "Debug",
+  "enabled": false,
+  "buildDirectory": "build",
+  "sourceDirectory": "$(ProjectPath)",
+  "generator": "",
+  "buildType": "",
+  "arguments": [],
+  "parentProject": ""
+ }]]]>
+    </Plugin>
+  </Plugins>
  <Description/>
  <Dependencies/>
  <VirtualDirectory Name="src">
@ -13,6 +30,8 @@
    <File Name="../../../phrase-extract/tables-core.cpp"/>
    <File Name="../../../phrase-extract/tables-core.h"/>
  </VirtualDirectory>
+  <Dependencies Name="Debug"/>
+  <Dependencies Name="Release"/>
  <Settings Type="Executable">
    <GlobalSettings>
      <Compiler Options="" C_Options="" Assembler="">
@ -26,11 +45,11 @@
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../../"/>
+        <IncludePath Value="../../../boost/include"/>
      </Compiler>
      <Linker Options="" Required="yes">
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
+        <LibraryPath Value="../../../boost/lib64"/>
        <Library Value="boost_iostreams"/>
        <Library Value="z"/>
      </Linker>
@ -60,7 +79,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -99,7 +118,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -107,6 +126,4 @@
      </Completion>
    </Configuration>
  </Settings>
-  <Dependencies Name="Debug"/>
-  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/other-builds/extractor/.project
+++ b/contrib/other-builds/extractor/.project
@ -83,6 +83,16 @@
 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
 	</natures>
 	<linkedResources>
+		<link>
+			<name>InternalTree.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.cpp</locationURI>
+		</link>
+		<link>
+			<name>InternalTree.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.h</locationURI>
+		</link>
 		<link>
 			<name>bin</name>
 			<type>2</type>
--- a/contrib/other-builds/lm/.project
+++ b/contrib/other-builds/lm/.project
@ -546,26 +546,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/interpolate.hh</locationURI>
 		</link>
-		<link>
-			<name>builder/joint_order.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/joint_order.hh</locationURI>
-		</link>
 		<link>
 			<name>builder/lmplz_main.cc</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/lmplz_main.cc</locationURI>
 		</link>
-		<link>
-			<name>builder/ngram.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram.hh</locationURI>
-		</link>
-		<link>
-			<name>builder/ngram_stream.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram_stream.hh</locationURI>
-		</link>
 		<link>
 			<name>builder/pipeline.cc</name>
 			<type>1</type>
@ -576,21 +561,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/pipeline.hh</locationURI>
 		</link>
-		<link>
-			<name>builder/print.cc</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.cc</locationURI>
-		</link>
-		<link>
-			<name>builder/print.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.hh</locationURI>
-		</link>
-		<link>
-			<name>builder/sort.hh</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/lm/builder/sort.hh</locationURI>
-		</link>
 		<link>
 			<name>filter/Jamfile</name>
 			<type>1</type>
--- a/contrib/other-builds/lm/lm.project
+++ b/contrib/other-builds/lm/lm.project
@ -1,5 +1,22 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <CodeLite_Project Name="lm" InternalType="Library">
+  <Plugins>
+    <Plugin Name="CMakePlugin">
+      <![CDATA[[{
+  "name": "Debug",
+  "enabled": false,
+  "buildDirectory": "build",
+  "sourceDirectory": "$(ProjectPath)",
+  "generator": "",
+  "buildType": "",
+  "arguments": [],
+  "parentProject": ""
+ }]]]>
+    </Plugin>
+    <Plugin Name="qmake">
+      <![CDATA[00010001N0005Debug000000000000]]>
+    </Plugin>
+  </Plugins>
  <Description/>
  <Dependencies/>
  <VirtualDirectory Name="src"/>
@ -27,6 +44,8 @@
    <File Name="../../../lm/virtual_interface.cc"/>
    <File Name="../../../lm/vocab.cc"/>
  </VirtualDirectory>
+  <Dependencies Name="Debug"/>
+  <Dependencies Name="Release"/>
  <Settings Type="Static Library">
    <GlobalSettings>
      <Compiler Options="" C_Options="" Assembler="">
@ -40,9 +59,9 @@
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../.."/>
+        <IncludePath Value="../../../phrase-extract"/>
+        <IncludePath Value="../../../boost/include"/>
        <Preprocessor Value="KENLM_MAX_ORDER=7"/>
      </Compiler>
      <Linker Options="" Required="yes"/>
@ -72,7 +91,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -110,7 +129,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -118,6 +137,4 @@
      </Completion>
    </Configuration>
  </Settings>
-  <Dependencies Name="Debug"/>
-  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/other-builds/mert_lib/.cproject
+++ b/contrib/other-builds/mert_lib/.cproject
@ -11,15 +11,15 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
 					<folderInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.1932340583" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.debug.296711714" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.debug"/>
@ -32,6 +32,9 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
 								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.2072043013" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
+								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1183866856" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.1365367786" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug">
@ -46,9 +49,6 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
-					<fileInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013.195400614" name="MeteorScorer.cpp" rcbsApplicability="disable" resourcePath="MeteorScorer.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660">
-						<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537"/>
-					</fileInfo>
 					<sourceEntries>
 						<entry excluding="mert/PreProcessFilter.h|mert/PreProcessFilter.cpp|mert/UtilTest.cpp|mert/TimerTest.cpp|mert/SingletonTest.cpp|mert/PointTest.cpp|mert/OptimizerFactoryTest.cpp|mert/NgramTest.cpp|mert/FeatureDataTest.cpp|mert/DataTest.cpp|mert/ReferenceTest.cpp|mert/VocabularyTest.cpp|mert/extractor.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
@ -66,15 +66,15 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
 					<folderInfo id="cdt.managedbuild.config.gnu.lib.release.3250316." name="/" resourcePath="">
 						<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.1996805666" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
 							<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.release.106685808" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.release"/>
--- a/contrib/other-builds/moses-cmd/moses-cmd.project
+++ b/contrib/other-builds/moses-cmd/moses-cmd.project
@ -46,20 +46,20 @@
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../.."/>
+        <IncludePath Value="../../../phrase-extract"/>
+        <IncludePath Value="../../../boost/include"/>
        <Preprocessor Value="MAX_NUM_FACTORS=4"/>
        <Preprocessor Value="KENLM_MAX_ORDER=7"/>
        <Preprocessor Value="TRACE_ENABLE=1"/>
      </Compiler>
      <Linker Options="" Required="yes">
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
+        <LibraryPath Value="../../../boost/lib64"/>
+        <LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
        <Library Value="util"/>
        <Library Value="moses"/>
        <Library Value="search"/>
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@ -11,11 +11,11 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -79,12 +79,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -60,6 +60,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/AlignmentInfoTest.cpp</locationURI>
 		</link>
+		<link>
+			<name>AllOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>AllOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
+		</link>
 		<link>
 			<name>BaseManager.cpp</name>
 			<type>1</type>
@ -70,6 +80,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.h</locationURI>
 		</link>
+		<link>
+			<name>BeamSearchOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
+		</link>
 		<link>
 			<name>BitmapContainer.cpp</name>
 			<type>1</type>
@ -80,6 +95,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/BitmapContainer.h</locationURI>
 		</link>
+		<link>
+			<name>BookkeepingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>BookkeepingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
+		</link>
 		<link>
 			<name>CMakeLists.txt</name>
 			<type>1</type>
@ -230,6 +255,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
 		</link>
+		<link>
+			<name>CubePruningOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>CubePruningOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
+		</link>
 		<link>
 			<name>DecodeGraph.cpp</name>
 			<type>1</type>
@ -460,6 +495,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/InputFileStream.h</locationURI>
 		</link>
+		<link>
+			<name>InputOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>InputOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
+		</link>
 		<link>
 			<name>InputPath.cpp</name>
 			<type>1</type>
@ -490,6 +535,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>LMBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>LMBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
+		</link>
 		<link>
 			<name>LVoc.cpp</name>
 			<type>1</type>
@ -510,6 +565,21 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/LatticeMBR.h</locationURI>
 		</link>
+		<link>
+			<name>LookupOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
+		</link>
+		<link>
+			<name>MBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>MBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
+		</link>
 		<link>
 			<name>Manager.cpp</name>
 			<type>1</type>
@ -535,6 +605,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/MosesTest.cpp</locationURI>
 		</link>
+		<link>
+			<name>NBestOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>NBestOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
+		</link>
 		<link>
 			<name>NonTerminal.cpp</name>
 			<type>1</type>
@ -550,6 +630,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/ObjectPool.h</locationURI>
 		</link>
+		<link>
+			<name>OptionsBaseClass.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
+		</link>
+		<link>
+			<name>OptionsBaseClass.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
+		</link>
 		<link>
 			<name>OutputCollector.h</name>
 			<type>1</type>
@ -635,6 +725,26 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingConstraint.h</locationURI>
 		</link>
+		<link>
+			<name>ReorderingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>ReorderingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>ReportingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>ReportingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
+		</link>
 		<link>
 			<name>RuleCube.cpp</name>
 			<type>1</type>
@ -711,14 +821,14 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormal.h</locationURI>
 		</link>
 		<link>
-			<name>SearchNormalBatch.cpp</name>
+			<name>SearchOptions.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.cpp</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
 		</link>
 		<link>
-			<name>SearchNormalBatch.h</name>
+			<name>SearchOptions.h</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.h</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
 		</link>
 		<link>
 			<name>Sentence.cpp</name>
@ -740,6 +850,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/SentenceStats.h</locationURI>
 		</link>
+		<link>
+			<name>ServerOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>ServerOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
+		</link>
 		<link>
 			<name>SquareMatrix.cpp</name>
 			<type>1</type>
@ -1065,6 +1185,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/mbr.h</locationURI>
 		</link>
+		<link>
+			<name>parameters</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>rule.proto</name>
 			<type>1</type>
@ -1360,16 +1485,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
 		</link>
-		<link>
-			<name>FF/SkeletonChangeInput.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.cpp</locationURI>
-		</link>
-		<link>
-			<name>FF/SkeletonChangeInput.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.h</locationURI>
-		</link>
 		<link>
 			<name>FF/SkeletonStatefulFF.cpp</name>
 			<type>1</type>
@ -2240,6 +2355,146 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.h</locationURI>
 		</link>
+		<link>
+			<name>parameters/AllOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/AllOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/BeamSearchOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/BookkeepingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/BookkeepingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ContextParameters.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ContextParameters.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/CubePruningOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/CubePruningOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/InputOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/InputOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/LMBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/LMBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/LookupOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/MBR_Options.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/MBR_Options.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/NBestOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/NBestOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/OptionsBaseClass.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/OptionsBaseClass.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReorderingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReorderingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReportingOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ReportingOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/SearchOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/SearchOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
+		</link>
+		<link>
+			<name>parameters/ServerOptions.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
+		</link>
+		<link>
+			<name>parameters/ServerOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
+		</link>
 		<link>
 			<name>FF/LexicalReordering/LexicalReordering.cpp</name>
 			<type>1</type>
--- a/contrib/other-builds/moses/moses.project
+++ b/contrib/other-builds/moses/moses.project
@ -775,6 +775,8 @@
    <File Name="../../../moses/WordsRange.h"/>
    <File Name="../../../moses/XmlOption.cpp"/>
    <File Name="../../../moses/XmlOption.h"/>
+    <File Name="../../../moses/OutputFileStream.cpp"/>
+    <File Name="../../../moses/OutputFileStream.h"/>
  </VirtualDirectory>
  <VirtualDirectory Name="PP">
    <File Name="../../../moses/PP/CountsPhraseProperty.cpp"/>
@ -793,8 +795,6 @@
    <File Name="../../../moses/PP/SpanLengthPhraseProperty.h"/>
    <File Name="../../../moses/PP/TreeStructurePhraseProperty.h"/>
  </VirtualDirectory>
-  <Dependencies Name="Debug"/>
-  <Dependencies Name="Release"/>
  <VirtualDirectory Name="parameters">
    <File Name="../../../moses/parameters/ContextParameters.cpp"/>
    <File Name="../../../moses/parameters/ContextParameters.h"/>
@ -814,7 +814,7 @@
      <ResourceCompiler Options=""/>
    </GlobalSettings>
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
-      <Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
+      <Compiler Options="-g  -std=c++0x" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
        <IncludePath Value="../../../"/>
        <IncludePath Value="../../../phrase-extract"/>
@ -897,4 +897,6 @@
      </Completion>
    </Configuration>
  </Settings>
+  <Dependencies Name="Debug"/>
+  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/other-builds/pruneGeneration/pruneGeneration.project
+++ b/contrib/other-builds/pruneGeneration/pruneGeneration.project
@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<CodeLite_Project Name="pruneGeneration" InternalType="Console">
+  <Plugins>
+    <Plugin Name="qmake">
+      <![CDATA[00010001N0005Debug000000000000]]>
+    </Plugin>
+    <Plugin Name="CMakePlugin">
+      <![CDATA[[{
+  "name": "Debug",
+  "enabled": false,
+  "buildDirectory": "build",
+  "sourceDirectory": "$(ProjectPath)",
+  "generator": "",
+  "buildType": "",
+  "arguments": [],
+  "parentProject": ""
+ }]]]>
+    </Plugin>
+  </Plugins>
+  <Description/>
+  <Dependencies/>
+  <VirtualDirectory Name="src">
+    <File Name="../../../misc/pruneGeneration.cpp"/>
+    <File Name="../../../misc/pruneGeneration.h"/>
+  </VirtualDirectory>
+  <Settings Type="Executable">
+    <GlobalSettings>
+      <Compiler Options="" C_Options="" Assembler="">
+        <IncludePath Value="."/>
+      </Compiler>
+      <Linker Options="">
+        <LibraryPath Value="."/>
+      </Linker>
+      <ResourceCompiler Options=""/>
+    </GlobalSettings>
+    <Configuration Name="Debug" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
+      <Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
+        <IncludePath Value="."/>
+        <IncludePath Value="../../.."/>
+        <IncludePath Value="../../../boost/include"/>
+      </Compiler>
+      <Linker Options="" Required="yes">
+        <LibraryPath Value="../../../boost/lib64"/>
+        <LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
+        <Library Value="boost_filesystem"/>
+        <Library Value="boost_system"/>
+        <Library Value="boost_iostreams"/>
+        <Library Value="moses"/>
+        <Library Value="z"/>
+        <Library Value="bz2"/>
+      </Linker>
+      <ResourceCompiler Options="" Required="no"/>
+      <General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
+      <Environment EnvVarSetName="&lt;Use Defaults&gt;" DbgSetName="&lt;Use Defaults&gt;">
+        <![CDATA[]]>
+      </Environment>
+      <Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="yes">
+        <DebuggerSearchPaths/>
+        <PostConnectCommands/>
+        <StartupCommands/>
+      </Debugger>
+      <PreBuild/>
+      <PostBuild/>
+      <CustomBuild Enabled="no">
+        <RebuildCommand/>
+        <CleanCommand/>
+        <BuildCommand/>
+        <PreprocessFileCommand/>
+        <SingleFileCommand/>
+        <MakefileGenerationCommand/>
+        <ThirdPartyToolName>None</ThirdPartyToolName>
+        <WorkingDirectory/>
+      </CustomBuild>
+      <AdditionalRules>
+        <CustomPostBuild/>
+        <CustomPreBuild/>
+      </AdditionalRules>
+      <Completion EnableCpp11="no" EnableCpp14="no">
+        <ClangCmpFlagsC/>
+        <ClangCmpFlags/>
+        <ClangPP/>
+        <SearchPaths/>
+      </Completion>
+    </Configuration>
+    <Configuration Name="Release" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
+      <Compiler Options="-O2;-Wall" C_Options="-O2;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
+        <IncludePath Value="."/>
+        <Preprocessor Value="NDEBUG"/>
+      </Compiler>
+      <Linker Options="" Required="yes"/>
+      <ResourceCompiler Options="" Required="no"/>
+      <General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Release" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
+      <Environment EnvVarSetName="&lt;Use Defaults&gt;" DbgSetName="&lt;Use Defaults&gt;">
+        <![CDATA[]]>
+      </Environment>
+      <Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="no">
+        <DebuggerSearchPaths/>
+        <PostConnectCommands/>
+        <StartupCommands/>
+      </Debugger>
+      <PreBuild/>
+      <PostBuild/>
+      <CustomBuild Enabled="no">
+        <RebuildCommand/>
+        <CleanCommand/>
+        <BuildCommand/>
+        <PreprocessFileCommand/>
+        <SingleFileCommand/>
+        <MakefileGenerationCommand/>
+        <ThirdPartyToolName>None</ThirdPartyToolName>
+        <WorkingDirectory/>
+      </CustomBuild>
+      <AdditionalRules>
+        <CustomPostBuild/>
+        <CustomPreBuild/>
+      </AdditionalRules>
+      <Completion EnableCpp11="no" EnableCpp14="no">
+        <ClangCmpFlagsC/>
+        <ClangCmpFlags/>
+        <ClangPP/>
+        <SearchPaths/>
+      </Completion>
+    </Configuration>
+  </Settings>
+</CodeLite_Project>
--- a/contrib/other-builds/score/.cproject
+++ b/contrib/other-builds/score/.cproject
@ -59,7 +59,6 @@
 									<listOptionValue builtIn="false" value="boost_program_options"/>
 									<listOptionValue builtIn="false" value="pthread"/>
 									<listOptionValue builtIn="false" value="z"/>
-									<listOptionValue builtIn="false" value="bz2"/>
 									<listOptionValue builtIn="false" value="dl"/>
 									<listOptionValue builtIn="false" value="rt"/>
 								</option>
--- a/contrib/other-builds/score/score.project
+++ b/contrib/other-builds/score/score.project
@ -19,6 +19,10 @@
    <File Name="../../../phrase-extract/tables-core.cpp"/>
    <File Name="../../../phrase-extract/tables-core.h"/>
  </VirtualDirectory>
+  <Dependencies Name="Debug">
+    <Project Name="util"/>
+  </Dependencies>
+  <Dependencies Name="Release"/>
  <Settings Type="Executable">
    <GlobalSettings>
      <Compiler Options="" C_Options="" Assembler="">
@ -32,17 +36,17 @@
    <Configuration Name="Debug" CompilerType="clang( based on LLVM 3.5svn )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../.."/>
+        <IncludePath Value="../../../phrase-extract"/>
+        <IncludePath Value="../../../boost/include"/>
      </Compiler>
      <Linker Options="" Required="yes">
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
-        <LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
+        <LibraryPath Value="../../../boost/lib64"/>
+        <LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
+        <LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
        <Library Value="moses"/>
        <Library Value="search"/>
        <Library Value="OnDiskPt"/>
@ -86,7 +90,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -125,7 +129,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -133,8 +137,4 @@
      </Completion>
    </Configuration>
  </Settings>
-  <Dependencies Name="Debug">
-    <Project Name="util"/>
-  </Dependencies>
-  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/other-builds/search/search.project
+++ b/contrib/other-builds/search/search.project
@ -10,6 +10,8 @@
    <File Name="../../../search/rule.cc"/>
    <File Name="../../../search/vertex.cc"/>
  </VirtualDirectory>
+  <Dependencies Name="Debug"/>
+  <Dependencies Name="Release"/>
  <Settings Type="Static Library">
    <GlobalSettings>
      <Compiler Options="" C_Options="" Assembler="">
@ -23,9 +25,9 @@
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../.."/>
+        <IncludePath Value="../../../phrase-extract"/>
+        <IncludePath Value="../../../boost/include"/>
        <Preprocessor Value="KENLM_MAX_ORDER=7"/>
      </Compiler>
      <Linker Options="" Required="yes"/>
@ -55,7 +57,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -93,7 +95,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -101,6 +103,4 @@
      </Completion>
    </Configuration>
  </Settings>
-  <Dependencies Name="Debug"/>
-  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/other-builds/server/.cproject
+++ b/contrib/other-builds/server/.cproject
@ -159,10 +159,10 @@
 	</storageModule>
 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
 	<storageModule moduleId="refreshScope" versionNumber="2">
-		<configuration configurationName="Release">
+		<configuration configurationName="Debug">
 			<resource resourceType="PROJECT" workspacePath="/server"/>
 		</configuration>
-		<configuration configurationName="Debug">
+		<configuration configurationName="Release">
 			<resource resourceType="PROJECT" workspacePath="/server"/>
 		</configuration>
 	</storageModule>
--- a/contrib/other-builds/util/util.project
+++ b/contrib/other-builds/util/util.project
@ -62,6 +62,8 @@
    <File Name="../../../util/stream/sort_test.cc" ExcludeProjConfig="Debug"/>
    <File Name="../../../util/stream/stream_test.cc" ExcludeProjConfig="Debug"/>
  </VirtualDirectory>
+  <Dependencies Name="Debug"/>
+  <Dependencies Name="Release"/>
  <Settings Type="Static Library">
    <GlobalSettings>
      <Compiler Options="" C_Options="" Assembler="">
@ -75,8 +77,8 @@
    <Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
      <Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
        <IncludePath Value="."/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
-        <IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
+        <IncludePath Value="../../.."/>
+        <IncludePath Value="../../../boost/include"/>
      </Compiler>
      <Linker Options="" Required="yes"/>
      <ResourceCompiler Options="" Required="no"/>
@ -105,7 +107,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -143,7 +145,7 @@
        <CustomPostBuild/>
        <CustomPreBuild/>
      </AdditionalRules>
-      <Completion EnableCpp11="no">
+      <Completion EnableCpp11="no" EnableCpp14="no">
        <ClangCmpFlagsC/>
        <ClangCmpFlags/>
        <ClangPP/>
@ -151,6 +153,4 @@
      </Completion>
    </Configuration>
  </Settings>
-  <Dependencies Name="Debug"/>
-  <Dependencies Name="Release"/>
 </CodeLite_Project>
--- a/contrib/server/Jamfile
+++ b/contrib/server/Jamfile
@ -13,7 +13,7 @@ with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
 if $(with-xmlrpc-c) {
  echo While building mosesserver ... ;
  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
-  echo "!!! You are linking the XMLRPC-C library; Do NOT use v.1.25.29            !!!" ;
+  echo "!!! You are linking the XMLRPC-C library; Must be v.1.32 (September 2012) or higher !!!" ;
  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;

  build-moses-server = true ;
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@ -38,13 +38,12 @@ int main(int argc, char** argv)
 #include "moses/StaticData.h"
 #include "moses/ThreadPool.h"
 #include "moses/TranslationTask.h"
-#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
+#include "moses/FF/StatefulFeatureFunction.h"
 #if PT_UG
 #include "moses/TranslationModel/UG/mmsapt.h"
 #endif
 #include "moses/TreeInput.h"
-#include "moses/LM/ORLM.h"
 #include "moses/IOWrapper.h"

 #include <boost/foreach.hpp>
@ -58,8 +57,8 @@ int main(int argc, char** argv)
 #include <xmlrpc-c/server_abyss.hpp>

 // using namespace Moses;
-using Moses::TreeInput;
 using namespace std;
+using namespace Moses;

 typedef std::map<std::string, xmlrpc_c::value> params_t;

@ -82,70 +81,16 @@ public:
    Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
    pdsa->add(source_,target_,alignment_);
 #else
-    const PhraseDictionary* pdf = PhraseDictionary::GetColl()[0];
-    PhraseDictionaryDynSuffixArray*
-      pdsa = (PhraseDictionaryDynSuffixArray*) pdf;
-    cerr << "Inserting into address " << pdsa << endl;
-    pdsa->insertSnt(source_, target_, alignment_);
+    std::string msg;
+    msg  = "Server was compiled without a phrase table implementation that ";
+    msg += "supports updates.";
+    throw xmlrpc_c::fault(msg.c_str(), xmlrpc_c::fault::CODE_PARSE);
 #endif
-    if(add2ORLM_) {
-      //updateORLM();
-    }
    XVERBOSE(1,"Done inserting\n");
-    //PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
-    map<string, xmlrpc_c::value> retData;
-    //*retvalP = xmlrpc_c::value_struct(retData);
-#ifndef PT_UG
-    pdf = 0;
-#endif
-    pdsa = 0;
    *retvalP = xmlrpc_c::value_string("Phrase table updated");
  }
  string source_, target_, alignment_;
-  bool bounded_, add2ORLM_;
-  /*
-  void updateORLM() {
-    // TODO(level101): this belongs in the language model, not in moseserver.cpp
-    vector<string> vl;
-    map<vector<string>, int> ngSet;
-    LMList lms = StaticData::Instance().GetLMList(); // get LM
-    LMList::const_iterator lmIter = lms.begin();
-    LanguageModel *lm = *lmIter;
-    LanguageModelORLM* orlm = static_cast<LanguageModelORLM*>(lm);
-    if(orlm == 0) {
-      cerr << "WARNING: Unable to add target sentence to ORLM\n";
-      return;
-    }
-    // break out new ngrams from sentence
-    const int ngOrder(orlm->GetNGramOrder());
-    const std::string sBOS = orlm->GetSentenceStart()->GetString().as_string();
-    const std::string sEOS = orlm->GetSentenceEnd()->GetString().as_string();
-    Utils::splitToStr(target_, vl, " ");
-    // insert BOS and EOS
-    vl.insert(vl.begin(), sBOS);
-    vl.insert(vl.end(), sEOS);
-    for(int j=0; j < vl.size(); ++j) {
-      int i = (j<ngOrder) ? 0 : j-ngOrder+1;
-      for(int t=j; t >= i; --t) {
-        vector<string> ngVec;
-        for(int s=t; s<=j; ++s) {
-          ngVec.push_back(vl[s]);
-          //cerr << vl[s] << " ";
-        }
-        ngSet[ngVec]++;
-        //cerr << endl;
-      }
-    }
-    // insert into LM in order from 1grams up (for LM well-formedness)
-    cerr << "Inserting " << ngSet.size() << " ngrams into ORLM...\n";
-    for(int i=1; i <= ngOrder; ++i) {
-      iterate(ngSet, it) {
-        if(it->first.size() == i)
-          orlm->UpdateORLM(it->first, it->second);
-      }
-    }
-  }
-  */
+  bool bounded_;

  void breakOutParams(const params_t& params) {
    params_t::const_iterator si = params.find("source");
@ -165,8 +110,6 @@ public:
    XVERBOSE(1,"alignment = " << alignment_ << endl);
    si = params.find("bounded");
    bounded_ = (si != params.end());
-    si = params.find("updateORLM");
-    add2ORLM_ = (si != params.end());
  }
 };

@ -678,6 +621,14 @@ int main(int argc, char** argv)
  bool isSerial = false;
  size_t numThreads = 10; //for translation tasks

+  //Abyss server configuration: initial values reflect hard-coded default
+  //-> http://xmlrpc-c.sourceforge.net/doc/libxmlrpc_server_abyss.html#max_conn
+  size_t maxConn = 15;
+  size_t maxConnBacklog = 15;
+  size_t keepaliveTimeout = 15;
+  size_t keepaliveMaxConn = 30;
+  size_t timeout = 15;
+
  for (int i = 0; i < argc; ++i) {
    if (!strcmp(argv[i],"--server-port")) {
      ++i;
@ -695,6 +646,46 @@ int main(int argc, char** argv)
      } else {
        logfile = argv[i];
      }
+    } else if (!strcmp(argv[i],"--server-maxconn")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-maxconn" << endl;
+        exit(1);
+      } else {
+        maxConn = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-maxconn-backlog")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-maxconn-backlog" << endl;
+        exit(1);
+      } else {
+        maxConnBacklog = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-keepalive-timeout")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-keepalive-timeout" << endl;
+        exit(1);
+      } else {
+        keepaliveTimeout = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-keepalive-maxconn")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-keepalive-maxconn" << endl;
+        exit(1);
+      } else {
+        keepaliveMaxConn = atoi(argv[i]);
+      }
+    } else if (!strcmp(argv[i],"--server-timeout")) {
+      ++i;
+      if (i >= argc) {
+        cerr << "Error: Missing argument to --server-timeout" << endl;
+        exit(1);
+      } else {
+        timeout = atoi(argv[i]);
+      }
    } else if (!strcmp(argv[i], "--threads")) {
      ++i;
      if (i>=argc) {
@ -740,20 +731,27 @@ int main(int argc, char** argv)
  myRegistry.addMethod("updater", updater);
  myRegistry.addMethod("optimize", optimizer);

+  /* CODE FOR old xmlrpc-c v. 1.32 or lower
  xmlrpc_c::serverAbyss myAbyssServer(
 				      myRegistry,
 				      port,              // TCP port on which to listen
 				      logfile
 				      );
-  /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
+  */
+
+  /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04 */
  xmlrpc_c::serverAbyss myAbyssServer(
    xmlrpc_c::serverAbyss::constrOpt()
-    .registryPtr(&myRegistry)
+    .registryP(&myRegistry)
    .portNumber(port)              // TCP port on which to listen
    .logFileName(logfile)
    .allowOrigin("*")
+    .maxConn((unsigned int)maxConn)
+    .maxConnBacklog((unsigned int)maxConnBacklog)
+    .keepaliveTimeout((unsigned int)keepaliveTimeout)
+    .keepaliveMaxConn((unsigned int)keepaliveMaxConn)
+    .timeout((unsigned int)timeout)
  );
-  */

  XVERBOSE(1,"Listening on port " << port << endl);
  if (isSerial) {
--- a/jam-files/xmlrpc-c.jam
+++ b/jam-files/xmlrpc-c.jam
@ -2,7 +2,7 @@
 # xmlrpc-c library (including the abyss server) that is needed for 
 # moses server functionality

-if [ option.get "no-xmlrpc-c" ] 
+if [ option.get "no-xmlrpc-c" : : "yes" ]
 {
  rule xmlrpc ( what ? ) { } # never return anything
 }
--- a/lm/CMakeLists.txt
+++ b/lm/CMakeLists.txt
@ -1,46 +1,139 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+
+set(KENLM_MAX_ORDER 6)
+
+add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
+
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+set(KENLM_SOURCE 
+	bhiksha.cc
+	binary_format.cc
+	config.cc
+	lm_exception.cc
+	model.cc
+	quantize.cc
+	read_arpa.cc
+	search_hashed.cc
+	search_trie.cc
+	sizes.cc
+	trie.cc
+	trie_sort.cc
+	value_build.cc
+	virtual_interface.cc
+	vocab.cc
+)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm OBJECT ${KENLM_SOURCE})
+
+# This directory has children that need to be processed
+add_subdirectory(builder)
+add_subdirectory(common)
+add_subdirectory(filter)
+
+
+
+# Explicitly list the executable files to be compiled
+set(EXE_LIST
+  query
+  fragment
+  build_binary
+)
+
+# Iterate through the executable list   
+foreach(exe ${EXE_LIST})
+
+  # Compile the executable, linking against the requisite dependent object files
+  add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
+
+  # Link the executable against boost
+  target_link_libraries(${exe} ${Boost_LIBRARIES})
+
+  # Group executables together
+  set_target_properties(${exe} PROPERTIES FOLDER executables)
+
+# End for loop
+endforeach(exe)
+
+
+# Install the executable files
+install(TARGETS ${EXE_LIST} DESTINATION bin)
+
+
+
+if(BUILD_TESTING)
+
+    # Explicitly list the Boost test files to be compiled
+    set(KENLM_BOOST_TESTS_LIST
+      left_test
+      model_test
+      partial_test
+    )
+
+    # Iterate through the Boost tests list   
+    foreach(test ${KENLM_BOOST_TESTS_LIST})
+
+      # Compile the executable, linking against the requisite dependent object files
+      add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
+
+      # Require the following compile flag
+      set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
+  
+      # Link the executable against boost
+      target_link_libraries(${test} ${Boost_LIBRARIES})
+
+      # model_test requires an extra command line parameter
+      if ("${test}" STREQUAL "model_test")
+        set(test_params 
+            ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa 
+            ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa
+        )
+      else()
+        set(test_params 
+            ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
+        )      
+      endif()
+      
+      # Specify command arguments for how to run each unit test
+      #
+      # Assuming that foo was defined via add_executable(foo ...),
+      #   the syntax $<TARGET_FILE:foo> gives the full path to the executable.
+      #
+      add_test(NAME ${test}_test 
+               COMMAND $<TARGET_FILE:${test}> ${test_params})
+
+      # Group unit tests together
+      set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
+      
+    # End for loop
+    endforeach(test)
+
+endif()
+
+

-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/blank.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/enumerate_vocab.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/facade.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/left.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/max_order.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model_type.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/ngram_query.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/partial.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/return.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/state.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/weights.hh")
-list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/word_index.hh")

-add_library(kenlm OBJECT ${SOURCE_KENLM})
--- a/lm/builder/CMakeLists.txt
+++ b/lm/builder/CMakeLists.txt
@ -0,0 +1,87 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+#
+# In order to set correct paths to these files
+#    in case this variable is referenced by CMake files in the parent directory,
+#    we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_BUILDER_SOURCE 
+		${CMAKE_CURRENT_SOURCE_DIR}/adjust_counts.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/corpus_count.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/initial_probabilities.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/output.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/pipeline.cc
+	)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_builder OBJECT ${KENLM_BUILDER_SOURCE})
+
+
+# Compile the executable, linking against the requisite dependent object files
+add_executable(lmplz lmplz_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
+
+# Link the executable against boost
+target_link_libraries(lmplz ${Boost_LIBRARIES})
+
+# Group executables together
+set_target_properties(lmplz PROPERTIES FOLDER executables)
+
+if(BUILD_TESTING)
+
+    # Explicitly list the Boost test files to be compiled
+    set(KENLM_BOOST_TESTS_LIST
+      adjust_counts_test
+      corpus_count_test
+    )
+
+    # Iterate through the Boost tests list   
+    foreach(test ${KENLM_BOOST_TESTS_LIST})
+
+      # Compile the executable, linking against the requisite dependent object files
+      add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
+
+      # Require the following compile flag
+      set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK")
+  
+      # Link the executable against boost
+      target_link_libraries(${test} ${Boost_LIBRARIES})
+      
+      # Specify command arguments for how to run each unit test
+      #
+      # Assuming that foo was defined via add_executable(foo ...),
+      #   the syntax $<TARGET_FILE:foo> gives the full path to the executable.
+      #
+      add_test(NAME ${test}_test 
+               COMMAND $<TARGET_FILE:${test}>)
+
+      # Group unit tests together
+      set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
+      
+    # End for loop
+    endforeach(test)
+
+endif()
--- a/lm/builder/corpus_count.cc
+++ b/lm/builder/corpus_count.cc
@ -15,9 +15,6 @@
 #include "util/stream/timer.hh"
 #include "util/tokenize_piece.hh"

-#include <boost/unordered_set.hpp>
-#include <boost/unordered_map.hpp>
-
 #include <functional>

 #include <stdint.h>
--- a/lm/builder/corpus_count_test.cc
+++ b/lm/builder/corpus_count_test.cc
@ -43,12 +43,13 @@ BOOST_AUTO_TEST_CASE(Short) {
  util::scoped_fd vocab(util::MakeTemp("corpus_count_test_vocab"));

  util::stream::Chain chain(config);
-  NGramStream<BuildingPayload> stream;
  uint64_t token_count;
  WordIndex type_count = 10;
  std::vector<bool> prune_words;
  CorpusCount counter(input_piece, vocab.get(), token_count, type_count, prune_words, "", chain.BlockSize() / chain.EntrySize(), SILENT);
-  chain >> boost::ref(counter) >> stream >> util::stream::kRecycle;
+  chain >> boost::ref(counter);
+  NGramStream<BuildingPayload> stream(chain.Add());
+  chain >> util::stream::kRecycle;

  const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"};

--- a/lm/builder/debug_print.hh
+++ b/lm/builder/debug_print.hh
@ -1,54 +1,18 @@
-#ifndef LM_BUILDER_PRINT_H
-#define LM_BUILDER_PRINT_H
+#ifndef LM_BUILDER_DEBUG_PRINT_H
+#define LM_BUILDER_DEBUG_PRINT_H

-#include "lm/common/ngram_stream.hh"
-#include "lm/builder/output.hh"
 #include "lm/builder/payload.hh"
-#include "lm/common/ngram.hh"
+#include "lm/common/print.hh"
+#include "lm/common/ngram_stream.hh"
 #include "util/fake_ofstream.hh"
 #include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/string_piece.hh"

 #include <boost/lexical_cast.hpp>

-#include <ostream>
-#include <cassert>
-
-// Warning: print routines read all unigrams before all bigrams before all
-// trigrams etc.  So if other parts of the chain move jointly, you'll have to
-// buffer.
-
 namespace lm { namespace builder {
-
-class VocabReconstitute {
-  public:
-    // fd must be alive for life of this object; does not take ownership.
-    explicit VocabReconstitute(int fd);
-
-    const char *Lookup(WordIndex index) const {
-      assert(index < map_.size() - 1);
-      return map_[index];
-    }
-
-    StringPiece LookupPiece(WordIndex index) const {
-      return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
-    }
-
-    std::size_t Size() const {
-      // There's an extra entry to support StringPiece lengths.
-      return map_.size() - 1;
-    }
-
-  private:
-    util::scoped_memory memory_;
-    std::vector<const char*> map_;
-};
-
 // Not defined, only specialized.
 template <class T> void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload);
 template <> inline void PrintPayload<uint64_t>(util::FakeOFStream &to, const BuildingPayload &payload) {
-  // TODO slow
  to << payload.count;
 }
 template <> inline void PrintPayload<Uninterpolated>(util::FakeOFStream &to, const BuildingPayload &payload) {
@ -101,19 +65,6 @@ template <class V> class Print {
    int to_;
 };

-class PrintARPA : public OutputHook {
-  public:
-    explicit PrintARPA(int fd, bool verbose_header)
-      : OutputHook(PROB_SEQUENTIAL_HOOK), out_fd_(fd), verbose_header_(verbose_header) {}
-
-    void Sink(util::stream::Chains &chains);
-
-    void Run(const util::stream::ChainPositions &positions);
-
-  private:
-    util::scoped_fd out_fd_;
-    bool verbose_header_;
-};
-
 }} // namespaces
-#endif // LM_BUILDER_PRINT_H
+
+#endif // LM_BUILDER_DEBUG_PRINT_H
--- a/lm/builder/dump_counts_main.cc
+++ b/lm/builder/dump_counts_main.cc
@ -1,4 +1,4 @@
-#include "lm/builder/print.hh"
+#include "lm/common/print.hh"
 #include "lm/word_index.hh"
 #include "util/file.hh"
 #include "util/read_compressed.hh"
@ -20,7 +20,7 @@ int main(int argc, char *argv[]) {
  }
  util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
  util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
-  lm::builder::VocabReconstitute vocab(vocab_file.get());
+  lm::VocabReconstitute vocab(vocab_file.get());
  unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
  std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
  while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
--- a/lm/builder/header_info.hh
+++ b/lm/builder/header_info.hh
@ -5,6 +5,8 @@
 #include <vector>
 #include <stdint.h>

+namespace lm { namespace builder {
+
 // Some configuration info that is used to add
 // comments to the beginning of an ARPA file
 struct HeaderInfo {
@ -21,4 +23,6 @@ struct HeaderInfo {
  // TODO: More info if multiple models were interpolated
 };

+}} // namespaces
+
 #endif
--- a/lm/builder/initial_probabilities.cc
+++ b/lm/builder/initial_probabilities.cc
@ -1,9 +1,9 @@
 #include "lm/builder/initial_probabilities.hh"

 #include "lm/builder/discount.hh"
-#include "lm/builder/special.hh"
 #include "lm/builder/hash_gamma.hh"
 #include "lm/builder/payload.hh"
+#include "lm/common/special.hh"
 #include "lm/common/ngram_stream.hh"
 #include "util/murmur_hash.hh"
 #include "util/file.hh"
--- a/lm/builder/initial_probabilities.hh
+++ b/lm/builder/initial_probabilities.hh
@ -10,9 +10,8 @@
 namespace util { namespace stream { class Chains; } }

 namespace lm {
-namespace builder {
-
 class SpecialVocab;
+namespace builder {

 struct InitialProbabilitiesConfig {
  // These should be small buffers to keep the adder from getting too far ahead
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@ -1,16 +1,16 @@
 #include "lm/builder/interpolate.hh"

 #include "lm/builder/hash_gamma.hh"
-#include "lm/builder/joint_order.hh"
-#include "lm/common/ngram_stream.hh"
+#include "lm/builder/payload.hh"
 #include "lm/common/compare.hh"
+#include "lm/common/joint_order.hh"
+#include "lm/common/ngram_stream.hh"
 #include "lm/lm_exception.hh"
 #include "util/fixed_array.hh"
 #include "util/murmur_hash.hh"

 #include <cassert>
 #include <cmath>
-#include <iostream>

 namespace lm { namespace builder {
 namespace {
@ -91,7 +91,8 @@ template <class Output> class Callback {
      }
    }

-    void Enter(unsigned order_minus_1, NGram<BuildingPayload> &gram) {
+    void Enter(unsigned order_minus_1, void *data) {
+      NGram<BuildingPayload> gram(data, order_minus_1 + 1);
      BuildingPayload &pay = gram.Value();
      pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
      probs_[order_minus_1 + 1] = pay.complete.prob;
@ -125,7 +126,7 @@ template <class Output> class Callback {
      output_.Gram(order_minus_1, out_backoff, pay.complete);
    }

-    void Exit(unsigned, const NGram<BuildingPayload> &) const {}
+    void Exit(unsigned, void *) const {}

  private:
    util::FixedArray<util::stream::Stream> backoffs_;
--- a/lm/builder/interpolate.hh
+++ b/lm/builder/interpolate.hh
@ -1,7 +1,7 @@
 #ifndef LM_BUILDER_INTERPOLATE_H
 #define LM_BUILDER_INTERPOLATE_H

-#include "lm/builder/special.hh"
+#include "lm/common/special.hh"
 #include "lm/word_index.hh"
 #include "util/stream/multi_stream.hh"

--- a/lm/builder/lmplz_main.cc
+++ b/lm/builder/lmplz_main.cc
@ -1,6 +1,6 @@
 #include "lm/builder/output.hh"
 #include "lm/builder/pipeline.hh"
-#include "lm/builder/print.hh"
+#include "lm/common/size_option.hh"
 #include "lm/lm_exception.hh"
 #include "util/file.hh"
 #include "util/file_piece.hh"
@ -13,21 +13,6 @@
 #include <vector>

 namespace {
-class SizeNotify {
-  public:
-    SizeNotify(std::size_t &out) : behind_(out) {}
-
-    void operator()(const std::string &from) {
-      behind_ = util::ParseSize(from);
-    }
-
-  private:
-    std::size_t &behind_;
-};
-
-boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
-  return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
-}

 // Parse and validate pruning thresholds then return vector of threshold counts
 // for each n-grams order.
@ -106,17 +91,16 @@ int main(int argc, char *argv[]) {
      ("interpolate_unigrams", po::value<bool>(&pipeline.initial_probs.interpolate_unigrams)->default_value(true)->implicit_value(true), "Interpolate the unigrams (default) as opposed to giving lots of mass to <unk> like SRI.  If you want SRI's behavior with a large <unk> and the old lmplz default, use --interpolate_unigrams 0.")
      ("skip_symbols", po::bool_switch(), "Treat <s>, </s>, and <unk> as whitespace instead of throwing an exception")
      ("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
-      ("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
-      ("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
-      ("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
+      ("memory,S", lm:: SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
+      ("minimum_block", lm::SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
+      ("sort_block", lm::SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
      ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
      ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
-      ("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write a file containing the unique vocabulary strings delimited by null bytes")
      ("vocab_pad", po::value<uint64_t>(&pipeline.vocab_size_for_unk)->default_value(0), "If the vocabulary is smaller than this value, pad with <unk> to reach this size. Requires --interpolate_unigrams")
      ("verbose_header", po::bool_switch(&verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.")
      ("text", po::value<std::string>(&text), "Read text from a file instead of stdin")
      ("arpa", po::value<std::string>(&arpa), "Write ARPA to a file instead of stdout")
-      ("intermediate", po::value<std::string>(&intermediate), "Write ngrams to an intermediate file.  Turns off ARPA output (which can be reactivated by --arpa file).  Forces --renumber on.  Implicitly makes --vocab_file be the provided name + .vocab.")
+      ("intermediate", po::value<std::string>(&intermediate), "Write ngrams to intermediate files.  Turns off ARPA output (which can be reactivated by --arpa file).  Forces --renumber on.")
      ("renumber", po::bool_switch(&pipeline.renumber_vocabulary), "Rrenumber the vocabulary identifiers so that they are monotone with the hash of each string.  This is consistent with the ordering used by the trie data structure.")
      ("collapse_values", po::bool_switch(&pipeline.output_q), "Collapse probability and backoff into a single value, q that yields the same sentence-level probabilities.  See http://kheafield.com/professional/edinburgh/rest_paper.pdf for more details, including a proof.")
      ("prune", po::value<std::vector<std::string> >(&pruning)->multitoken(), "Prune n-grams with count less than or equal to the given threshold.  Specify one value for each order i.e. 0 0 1 to prune singleton trigrams and above.  The sequence of values must be non-decreasing and the last value applies to any remaining orders. Default is to not prune, which is equivalent to --prune 0.")
@ -217,15 +201,10 @@ int main(int argc, char *argv[]) {
      bool writing_intermediate = vm.count("intermediate");
      if (writing_intermediate) {
        pipeline.renumber_vocabulary = true;
-        if (!pipeline.vocab_file.empty()) {
-          std::cerr << "--intermediate and --vocab_file are incompatible because --intermediate already makes a vocab file." << std::endl;
-          return 1;
-        }
-        pipeline.vocab_file = intermediate + ".vocab";
      }
-      lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate);
+      lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate, pipeline.output_q);
      if (!writing_intermediate || vm.count("arpa")) {
-        output.Add(new lm::builder::PrintARPA(out.release(), verbose_header));
+        output.Add(new lm::builder::PrintHook(out.release(), verbose_header));
      }
      lm::builder::Pipeline(pipeline, in.release(), output);
    } catch (const util::MallocException &e) {
--- a/lm/builder/output.cc
+++ b/lm/builder/output.cc
@ -1,6 +1,8 @@
 #include "lm/builder/output.hh"

 #include "lm/common/model_buffer.hh"
+#include "lm/common/print.hh"
+#include "util/fake_ofstream.hh"
 #include "util/stream/multi_stream.hh"

 #include <iostream>
@ -9,23 +11,22 @@ namespace lm { namespace builder {

 OutputHook::~OutputHook() {}

-Output::Output(StringPiece file_base, bool keep_buffer)
-  : file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer) {}
+Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
+  : buffer_(file_base, keep_buffer, output_q) {}

-void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {
+void Output::SinkProbs(util::stream::Chains &chains) {
  Apply(PROB_PARALLEL_HOOK, chains);
-  if (!keep_buffer_ && !Have(PROB_SEQUENTIAL_HOOK)) {
+  if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
    chains >> util::stream::kRecycle;
    chains.Wait(true);
    return;
  }
-  lm::common::ModelBuffer buf(file_base_, keep_buffer_, output_q);
-  buf.Sink(chains);
+  buffer_.Sink(chains, header_.counts_pruned);
  chains >> util::stream::kRecycle;
  chains.Wait(false);
  if (Have(PROB_SEQUENTIAL_HOOK)) {
    std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
-    buf.Source(chains);
+    buffer_.Source(chains);
    Apply(PROB_SEQUENTIAL_HOOK, chains);
    chains >> util::stream::kRecycle;
    chains.Wait(true);
@ -34,8 +35,18 @@ void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {

 void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
  for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
-    entry->Sink(chains);
+    entry->Sink(header_, VocabFile(), chains);
  }
 }

+void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
+  if (verbose_header_) {
+    util::FakeOFStream out(file_.get(), 50);
+    out << "# Input file: " << info.input_file << '\n';
+    out << "# Token count: " << info.token_count << '\n';
+    out << "# Smoothing: Modified Kneser-Ney" << '\n';
+  }
+  chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
+}
+
 }} // namespaces
--- a/lm/builder/output.hh
+++ b/lm/builder/output.hh
@ -2,6 +2,7 @@
 #define LM_BUILDER_OUTPUT_H

 #include "lm/builder/header_info.hh"
+#include "lm/common/model_buffer.hh"
 #include "util/file.hh"

 #include <boost/ptr_container/ptr_vector.hpp>
@ -20,69 +21,64 @@ enum HookType {
  NUMBER_OF_HOOKS // Keep this last so we know how many values there are.
 };

-class Output;
-
 class OutputHook {
  public:
-    explicit OutputHook(HookType hook_type) : type_(hook_type), master_(NULL) {}
+    explicit OutputHook(HookType hook_type) : type_(hook_type) {}

    virtual ~OutputHook();

-    virtual void Sink(util::stream::Chains &chains) = 0;
+    virtual void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) = 0;

-  protected:
-    const HeaderInfo &GetHeader() const;
-    int GetVocabFD() const;
+    HookType Type() const { return type_; }

  private:
-    friend class Output;
-    const HookType type_;
-    const Output *master_;
+    HookType type_;
 };

 class Output : boost::noncopyable {
  public:
-    Output(StringPiece file_base, bool keep_buffer);
+    Output(StringPiece file_base, bool keep_buffer, bool output_q);

    // Takes ownership.
    void Add(OutputHook *hook) {
-      hook->master_ = this;
-      outputs_[hook->type_].push_back(hook);
+      outputs_[hook->Type()].push_back(hook);
    }

    bool Have(HookType hook_type) const {
      return !outputs_[hook_type].empty();
    }

-    void SetVocabFD(int to) { vocab_fd_ = to; }
-    int GetVocabFD() const { return vocab_fd_; }
+    int VocabFile() const { return buffer_.VocabFile(); }

    void SetHeader(const HeaderInfo &header) { header_ = header; }
    const HeaderInfo &GetHeader() const { return header_; }

    // This is called by the pipeline.
-    void SinkProbs(util::stream::Chains &chains, bool output_q);
+    void SinkProbs(util::stream::Chains &chains);

    unsigned int Steps() const { return Have(PROB_SEQUENTIAL_HOOK); }

  private:
    void Apply(HookType hook_type, util::stream::Chains &chains);

-    boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
-    int vocab_fd_;
-    HeaderInfo header_;
+    ModelBuffer buffer_;

-    std::string file_base_;
-    bool keep_buffer_;
+    boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
+    HeaderInfo header_;
 };

-inline const HeaderInfo &OutputHook::GetHeader() const {
-  return master_->GetHeader();
-}
+class PrintHook : public OutputHook {
+  public:
+    // Takes ownership
+    PrintHook(int write_fd, bool verbose_header)
+      : OutputHook(PROB_SEQUENTIAL_HOOK), file_(write_fd), verbose_header_(verbose_header) {}

-inline int OutputHook::GetVocabFD() const {
-  return master_->GetVocabFD();
-}
+    void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains);
+
+  private:
+    util::scoped_fd file_;
+    bool verbose_header_;
+};

 }} // namespaces

--- a/lm/builder/pipeline.cc
+++ b/lm/builder/pipeline.cc
@ -277,27 +277,27 @@ void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &maste
  }
  master >> Interpolate(std::max(master.Config().vocab_size_for_unk, counts[0] - 1 /* <s> is not included */), util::stream::ChainPositions(gamma_chains), config.prune_thresholds, config.prune_vocab, config.output_q, specials);
  gamma_chains >> util::stream::kRecycle;
-  output.SinkProbs(master.MutableChains(), config.output_q);
+  output.SinkProbs(master.MutableChains());
 }

 class VocabNumbering {
  public:
-    VocabNumbering(StringPiece vocab_file, StringPiece temp_prefix, bool renumber)
-      : vocab_file_(vocab_file.data(), vocab_file.size()),
-        temp_prefix_(temp_prefix.data(), temp_prefix.size()),
+    VocabNumbering(int final_vocab, StringPiece temp_prefix, bool renumber)
+      : final_vocab_(final_vocab),
        renumber_(renumber),
        specials_(kBOS, kEOS) {
-      InitFile(renumber || vocab_file.empty());
+      if (renumber) {
+        temporary_.reset(util::MakeTemp(temp_prefix));
+      }
    }

-    int File() const { return null_delimited_.get(); }
+    int WriteOnTheFly() const { return renumber_ ? temporary_.get() : final_vocab_; }

    // Compute the vocabulary mapping and return the memory used.
    std::size_t ComputeMapping(WordIndex type_count) {
      if (!renumber_) return 0;
-      util::scoped_fd previous(null_delimited_.release());
-      InitFile(vocab_file_.empty());
-      ngram::SortedVocabulary::ComputeRenumbering(type_count, previous.get(), null_delimited_.get(), vocab_mapping_);
+      ngram::SortedVocabulary::ComputeRenumbering(type_count, temporary_.get(), final_vocab_, vocab_mapping_);
+      temporary_.reset();
      return sizeof(WordIndex) * vocab_mapping_.size();
    }

@ -312,15 +312,9 @@ class VocabNumbering {
    const SpecialVocab &Specials() const { return specials_; }

  private:
-    void InitFile(bool temp) {
-      null_delimited_.reset(temp ?
-        util::MakeTemp(temp_prefix_) :
-        util::CreateOrThrow(vocab_file_.c_str()));
-    }
-
-    std::string vocab_file_, temp_prefix_;
-
-    util::scoped_fd null_delimited_;
+    int final_vocab_;
+    // Out of order vocab file created on the fly.
+    util::scoped_fd temporary_;

    bool renumber_;

@ -349,18 +343,17 @@ void Pipeline(PipelineConfig &config, int text_file, Output &output) {
  // master's destructor will wait for chains.  But they might be deadlocked if
  // this thread dies because e.g. it ran out of memory.
  try {
-    VocabNumbering numbering(config.vocab_file, config.TempPrefix(), config.renumber_vocabulary);
+    VocabNumbering numbering(output.VocabFile(), config.TempPrefix(), config.renumber_vocabulary);
    uint64_t token_count;
    WordIndex type_count;
    std::string text_file_name;
    std::vector<bool> prune_words;
    util::scoped_ptr<util::stream::Sort<SuffixOrder, CombineCounts> > sorted_counts(
-        CountText(text_file, numbering.File(), master, token_count, type_count, text_file_name, prune_words));
+        CountText(text_file, numbering.WriteOnTheFly(), master, token_count, type_count, text_file_name, prune_words));
    std::cerr << "Unigram tokens " << token_count << " types " << type_count << std::endl;

    // Create vocab mapping, which uses temporary memory, while nothing else is happening.
    std::size_t subtract_for_numbering = numbering.ComputeMapping(type_count);
-    output.SetVocabFD(numbering.File());

    std::cerr << "=== 2/" << master.Steps() << " Calculating and sorting adjusted counts ===" << std::endl;
    master.InitForAdjust(*sorted_counts, type_count, subtract_for_numbering);
--- a/lm/builder/pipeline.hh
+++ b/lm/builder/pipeline.hh
@ -18,7 +18,6 @@ class Output;

 struct PipelineConfig {
  std::size_t order;
-  std::string vocab_file;
  util::stream::SortConfig sort;
  InitialProbabilitiesConfig initial_probs;
  util::stream::ChainConfig read_backoffs;
--- a/lm/builder/print.cc
+++ b/lm/builder/print.cc
@ -1,64 +0,0 @@
-#include "lm/builder/print.hh"
-
-#include "util/fake_ofstream.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/scoped.hh"
-#include "util/stream/timer.hh"
-
-#include <sstream>
-#include <cstring>
-
-namespace lm { namespace builder {
-
-VocabReconstitute::VocabReconstitute(int fd) {
-  uint64_t size = util::SizeOrThrow(fd);
-  util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
-  const char *const start = static_cast<const char*>(memory_.get());
-  const char *i;
-  for (i = start; i != start + size; i += strlen(i) + 1) {
-    map_.push_back(i);
-  }
-  // Last one for LookupPiece.
-  map_.push_back(i);
-}
-
-void PrintARPA::Sink(util::stream::Chains &chains) {
-  chains >> boost::ref(*this);
-}
-
-void PrintARPA::Run(const util::stream::ChainPositions &positions) {
-  VocabReconstitute vocab(GetVocabFD());
-  util::FakeOFStream out(out_fd_.get());
-
-  // Write header.
-  if (verbose_header_) {
-    out << "# Input file: " << GetHeader().input_file << '\n';
-    out << "# Token count: " << GetHeader().token_count << '\n';
-    out << "# Smoothing: Modified Kneser-Ney" << '\n';
-  }
-  out << "\\data\\\n";
-  for (size_t i = 0; i < positions.size(); ++i) {
-    out << "ngram " << (i+1) << '=' << GetHeader().counts_pruned[i] << '\n';
-  }
-  out << '\n';
-
-  for (unsigned order = 1; order <= positions.size(); ++order) {
-    out << "\\" << order << "-grams:" << '\n';
-    for (NGramStream<BuildingPayload> stream(positions[order - 1]); stream; ++stream) {
-      // Correcting for numerical precision issues.  Take that IRST.
-      out << stream->Value().complete.prob << '\t' << vocab.Lookup(*stream->begin());
-      for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
-        out << ' ' << vocab.Lookup(*i);
-      }
-      if (order != positions.size())
-        out << '\t' << stream->Value().complete.backoff;
-      out << '\n';
-
-    }
-    out << '\n';
-  }
-  out << "\\end\\\n";
-}
-
-}} // namespaces
--- a/lm/common/CMakeLists.txt
+++ b/lm/common/CMakeLists.txt
@ -0,0 +1,40 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+#
+# In order to set correct paths to these files
+#    in case this variable is referenced by CMake files in the parent directory,
+#    we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_COMMON_SOURCE 
+		${CMAKE_CURRENT_SOURCE_DIR}/model_buffer.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/print.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/renumber.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/size_option.cc
+	)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_common OBJECT ${KENLM_COMMON_SOURCE})
+
--- a/lm/common/Jamfile
+++ b/lm/common/Jamfile
@ -1,2 +1,2 @@
 fakelib common : [ glob *.cc : *test.cc *main.cc ]
-  ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm ;
+  ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm /top//boost_program_options ;
--- a/lm/builder/joint_order.hh
+++ b/lm/builder/joint_order.hh
@ -1,8 +1,7 @@
-#ifndef LM_BUILDER_JOINT_ORDER_H
-#define LM_BUILDER_JOINT_ORDER_H
+#ifndef LM_COMMON_JOINT_ORDER_H
+#define LM_COMMON_JOINT_ORDER_H

 #include "lm/common/ngram_stream.hh"
-#include "lm/builder/payload.hh"
 #include "lm/lm_exception.hh"

 #ifdef DEBUG
@ -12,15 +11,19 @@

 #include <cstring>

-namespace lm { namespace builder {
+namespace lm {

 template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) {
  // Allow matching to reference streams[-1].
-  NGramStreams<BuildingPayload> streams_with_dummy;
-  streams_with_dummy.InitWithDummy(positions);
-  NGramStream<BuildingPayload> *streams = streams_with_dummy.begin() + 1;
+  util::FixedArray<ProxyStream<NGramHeader> > streams_with_dummy(positions.size() + 1);
+  // A bogus stream for [-1].
+  streams_with_dummy.push_back();
+  for (std::size_t i = 0; i < positions.size(); ++i) {
+    streams_with_dummy.push_back(positions[i], NGramHeader(NULL, i + 1));
+  }
+  ProxyStream<NGramHeader> *streams = streams_with_dummy.begin() + 1;

-  unsigned int order;
+  std::size_t order;
  for (order = 0; order < positions.size() && streams[order]; ++order) {}
  assert(order); // should always have <unk>.

@ -31,11 +34,11 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
    less_compare.push_back(i + 1);
 #endif // DEBUG

-  unsigned int current = 0;
+  std::size_t current = 0;
  while (true) {
    // Does the context match the lower one?
    if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
-      callback.Enter(current, *streams[current]);
+      callback.Enter(current, streams[current].Get());
      // Transition to looking for extensions.
      if (++current < order) continue;
    }
@ -51,7 +54,7 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
    while(true) {
      assert(current > 0);
      --current;
-      callback.Exit(current, *streams[current]);
+      callback.Exit(current, streams[current].Get());

      if (++streams[current]) break;

@ -63,6 +66,6 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
  }
 }

-}} // namespaces
+} // namespaces

-#endif // LM_BUILDER_JOINT_ORDER_H
+#endif // LM_COMMON_JOINT_ORDER_H
--- a/lm/common/model_buffer.cc
+++ b/lm/common/model_buffer.cc
@ -8,25 +8,30 @@

 #include <boost/lexical_cast.hpp>

-namespace lm { namespace common {
+namespace lm {

 namespace {
 const char kMetadataHeader[] = "KenLM intermediate binary file";
 } // namespace

-ModelBuffer::ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q)
-  : file_base_(file_base), keep_buffer_(keep_buffer), output_q_(output_q) {}
-
-ModelBuffer::ModelBuffer(const std::string &file_base)
-  : file_base_(file_base), keep_buffer_(false) {
+ModelBuffer::ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q)
+  : file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer), output_q_(output_q),
+    vocab_file_(keep_buffer ? util::CreateOrThrow((file_base_ + ".vocab").c_str()) : util::MakeTemp(file_base_)) {}
+  
+ModelBuffer::ModelBuffer(StringPiece file_base)
+  : file_base_(file_base.data(), file_base.size()), keep_buffer_(false) {
  const std::string full_name = file_base_ + ".kenlm_intermediate";
  util::FilePiece in(full_name.c_str());
  StringPiece token = in.ReadLine();
  UTIL_THROW_IF2(token != kMetadataHeader, "File " << full_name << " begins with \"" << token << "\" not " << kMetadataHeader);

  token = in.ReadDelimited();
-  UTIL_THROW_IF2(token != "Order", "Expected Order, got \"" << token << "\" in " << full_name);
-  unsigned long order = in.ReadULong();
+  UTIL_THROW_IF2(token != "Counts", "Expected Counts, got \"" << token << "\" in " << full_name);
+  char got;
+  while ((got = in.get()) == ' ') {
+    counts_.push_back(in.ReadULong());
+  }
+  UTIL_THROW_IF2(got != '\n', "Expected newline at end of counts.");

  token = in.ReadDelimited();
  UTIL_THROW_IF2(token != "Payload", "Expected Payload, got \"" << token << "\" in " << full_name);
@ -39,16 +44,16 @@ ModelBuffer::ModelBuffer(const std::string &file_base)
    UTIL_THROW(util::Exception, "Unknown payload " << token);
  }

-  files_.Init(order);
-  for (unsigned long i = 0; i < order; ++i) {
+  vocab_file_.reset(util::OpenReadOrThrow((file_base_ + ".vocab").c_str()));
+
+  files_.Init(counts_.size());
+  for (unsigned long i = 0; i < counts_.size(); ++i) {
    files_.push_back(util::OpenReadOrThrow((file_base_ + '.' + boost::lexical_cast<std::string>(i + 1)).c_str()));
  }
 }

-// virtual destructor
-ModelBuffer::~ModelBuffer() {}
-
-void ModelBuffer::Sink(util::stream::Chains &chains) {
+void ModelBuffer::Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts) {
+  counts_ = counts;
  // Open files.
  files_.Init(chains.size());
  for (std::size_t i = 0; i < chains.size(); ++i) {
@ -64,19 +69,23 @@ void ModelBuffer::Sink(util::stream::Chains &chains) {
  if (keep_buffer_) {
    util::scoped_fd metadata(util::CreateOrThrow((file_base_ + ".kenlm_intermediate").c_str()));
    util::FakeOFStream meta(metadata.get(), 200);
-    meta << kMetadataHeader << "\nOrder " << chains.size() << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
+    meta << kMetadataHeader << "\nCounts";
+    for (std::vector<uint64_t>::const_iterator i = counts_.begin(); i != counts_.end(); ++i) {
+      meta << ' ' << *i;
+    }
+    meta << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
  }
 }

 void ModelBuffer::Source(util::stream::Chains &chains) {
-  assert(chains.size() == files_.size());
-  for (unsigned int i = 0; i < files_.size(); ++i) {
+  assert(chains.size() <= files_.size());
+  for (unsigned int i = 0; i < chains.size(); ++i) {
    chains[i] >> util::stream::PRead(files_[i].get());
  }
 }

-std::size_t ModelBuffer::Order() const {
-  return files_.size();
+void ModelBuffer::Source(std::size_t order_minus_1, util::stream::Chain &chain) {
+  chain >> util::stream::PRead(files_[order_minus_1].get());
 }

-}} // namespaces
+} // namespace
--- a/lm/common/model_buffer.hh
+++ b/lm/common/model_buffer.hh
@ -1,5 +1,5 @@
-#ifndef LM_BUILDER_MODEL_BUFFER_H
-#define LM_BUILDER_MODEL_BUFFER_H
+#ifndef LM_COMMON_MODEL_BUFFER_H
+#define LM_COMMON_MODEL_BUFFER_H

 /* Format with separate files in suffix order.  Each file contains
 * n-grams of the same order.
@ -9,37 +9,55 @@
 #include "util/fixed_array.hh"

 #include <string>
+#include <vector>

-namespace util { namespace stream { class Chains; } }
+namespace util { namespace stream {
+class Chains;
+class Chain;
+}} // namespaces

-namespace lm { namespace common {
+namespace lm {

 class ModelBuffer {
  public:
-    // Construct for writing.
-    ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q);
+    // Construct for writing.  Must call VocabFile() and fill it with null-delimited vocab words.
+    ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q);

    // Load from file.
-    explicit ModelBuffer(const std::string &file_base);
+    explicit ModelBuffer(StringPiece file_base);

-    // explicit for virtual destructor.
-    ~ModelBuffer();
-
-    void Sink(util::stream::Chains &chains);
+    // Must call VocabFile and populate before calling this function.
+    void Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts);

+    // Read files and write to the given chains.  If fewer chains are provided,
+    // only do the lower orders.
    void Source(util::stream::Chains &chains);

+    void Source(std::size_t order_minus_1, util::stream::Chain &chain);
+
    // The order of the n-gram model that is associated with the model buffer.
-    std::size_t Order() const;
+    std::size_t Order() const { return counts_.size(); }
+    // Requires Sink or load from file.
+    const std::vector<uint64_t> &Counts() const {
+      assert(!counts_.empty());
+      return counts_;
+    }
+
+    int VocabFile() const { return vocab_file_.get(); }
+    int StealVocabFile() { return vocab_file_.release(); }
+
+    bool Keep() const { return keep_buffer_; }

  private:
    const std::string file_base_;
    const bool keep_buffer_;
    bool output_q_;
+    std::vector<uint64_t> counts_;

+    util::scoped_fd vocab_file_;
    util::FixedArray<util::scoped_fd> files_;
 };

-}} // namespaces
+} // namespace lm

-#endif // LM_BUILDER_MODEL_BUFFER_H
+#endif // LM_COMMON_MODEL_BUFFER_H
--- a/lm/common/ngram.hh
+++ b/lm/common/ngram.hh
@ -16,6 +16,8 @@ class NGramHeader {
    NGramHeader(void *begin, std::size_t order)
      : begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}

+    NGramHeader() : begin_(NULL), end_(NULL) {}
+
    const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
    uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); }

@ -32,6 +34,7 @@ class NGramHeader {
    const WordIndex *end() const { return end_; }
    WordIndex *end() { return end_; }

+    std::size_t size() const { return end_ - begin_; }
    std::size_t Order() const { return end_ - begin_; }

  private:
@ -42,6 +45,8 @@ template <class PayloadT> class NGram : public NGramHeader {
  public:
    typedef PayloadT Payload;

+    NGram() : NGramHeader(NULL, 0) {}
+
    NGram(void *begin, std::size_t order) : NGramHeader(begin, order) {}

    // Would do operator++ but that can get confusing for a stream.
--- a/lm/common/ngram_stream.hh
+++ b/lm/common/ngram_stream.hh
@ -10,24 +10,21 @@

 namespace lm {

-template <class Payload> class NGramStream {
+template <class Proxy> class ProxyStream {
  public:
-    NGramStream() : gram_(NULL, 0) {}
+    // Make an invalid stream.
+    ProxyStream() {}

-    NGramStream(const util::stream::ChainPosition &position) : gram_(NULL, 0) {
-      Init(position);
+    explicit ProxyStream(const util::stream::ChainPosition &position, const Proxy &proxy = Proxy())
+      : proxy_(proxy), stream_(position) {
+      proxy_.ReBase(stream_.Get());
    }

-    void Init(const util::stream::ChainPosition &position) {
-      stream_.Init(position);
-      gram_ = NGram<Payload>(stream_.Get(), NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()));
-    }
+    Proxy &operator*() { return proxy_; }
+    const Proxy &operator*() const { return proxy_; }

-    NGram<Payload> &operator*() { return gram_; }
-    const NGram<Payload> &operator*() const { return gram_; }
-
-    NGram<Payload> *operator->() { return &gram_; }
-    const NGram<Payload> *operator->() const { return &gram_; }
+    Proxy *operator->() { return &proxy_; }
+    const Proxy *operator->() const { return &proxy_; }

    void *Get() { return stream_.Get(); }
    const void *Get() const { return stream_.Get(); }
@ -36,21 +33,25 @@ template <class Payload> class NGramStream {
    bool operator!() const { return !stream_; }
    void Poison() { stream_.Poison(); }

-    NGramStream &operator++() {
+    ProxyStream<Proxy> &operator++() {
      ++stream_;
-      gram_.ReBase(stream_.Get());
+      proxy_.ReBase(stream_.Get());
      return *this;
    }

  private:
-    NGram<Payload> gram_;
+    Proxy proxy_;
    util::stream::Stream stream_;
 };

-template <class Payload> inline util::stream::Chain &operator>>(util::stream::Chain &chain, NGramStream<Payload> &str) {
-  str.Init(chain.Add());
-  return chain;
-}
+template <class Payload> class NGramStream : public ProxyStream<NGram<Payload> > {
+  public:
+    // Make an invalid stream.
+    NGramStream() {}
+
+    explicit NGramStream(const util::stream::ChainPosition &position) :
+      ProxyStream<NGram<Payload> >(position, NGram<Payload>(NULL, NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()))) {}
+};

 template <class Payload> class NGramStreams : public util::stream::GenericStreams<NGramStream<Payload> > {
  private:
--- a/lm/common/print.cc
+++ b/lm/common/print.cc
@ -0,0 +1,62 @@
+#include "lm/common/print.hh"
+
+#include "lm/common/ngram_stream.hh"
+#include "util/fake_ofstream.hh"
+#include "util/file.hh"
+#include "util/mmap.hh"
+#include "util/scoped.hh"
+
+#include <sstream>
+#include <cstring>
+
+namespace lm {
+
+VocabReconstitute::VocabReconstitute(int fd) {
+  uint64_t size = util::SizeOrThrow(fd);
+  util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
+  const char *const start = static_cast<const char*>(memory_.get());
+  const char *i;
+  for (i = start; i != start + size; i += strlen(i) + 1) {
+    map_.push_back(i);
+  }
+  // Last one for LookupPiece.
+  map_.push_back(i);
+}
+
+namespace {
+template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStream<Payload> &stream, util::FakeOFStream &out) {
+  out << stream->Value().prob << '\t' << vocab.Lookup(*stream->begin());
+  for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
+    out << ' ' << vocab.Lookup(*i);
+  }
+}
+} // namespace
+
+void PrintARPA::Run(const util::stream::ChainPositions &positions) {
+  VocabReconstitute vocab(vocab_fd_);
+  util::FakeOFStream out(out_fd_);
+  out << "\\data\\\n";
+  for (size_t i = 0; i < positions.size(); ++i) {
+    out << "ngram " << (i+1) << '=' << counts_[i] << '\n';
+  }
+  out << '\n';
+
+  for (unsigned order = 1; order < positions.size(); ++order) {
+    out << "\\" << order << "-grams:" << '\n';
+    for (ProxyStream<NGram<ProbBackoff> > stream(positions[order - 1], NGram<ProbBackoff>(NULL, order)); stream; ++stream) {
+      PrintLead(vocab, stream, out);
+      out << '\t' << stream->Value().backoff << '\n';
+    }
+    out << '\n';
+  }
+
+  out << "\\" << positions.size() << "-grams:" << '\n';
+  for (ProxyStream<NGram<Prob> > stream(positions.back(), NGram<Prob>(NULL, positions.size())); stream; ++stream) {
+    PrintLead(vocab, stream, out);
+    out << '\n';
+  }
+  out << '\n';
+  out << "\\end\\\n";
+}
+
+} // namespace lm
--- a/lm/common/print.hh
+++ b/lm/common/print.hh
@ -0,0 +1,58 @@
+#ifndef LM_COMMON_PRINT_H
+#define LM_COMMON_PRINT_H
+
+#include "lm/word_index.hh"
+#include "util/mmap.hh"
+#include "util/string_piece.hh"
+
+#include <cassert>
+#include <vector>
+
+namespace util { namespace stream { class ChainPositions; }}
+
+// Warning: PrintARPA routines read all unigrams before all bigrams before all
+// trigrams etc.  So if other parts of the chain move jointly, you'll have to
+// buffer.
+
+namespace lm {
+
+class VocabReconstitute {
+  public:
+    // fd must be alive for life of this object; does not take ownership.
+    explicit VocabReconstitute(int fd);
+
+    const char *Lookup(WordIndex index) const {
+      assert(index < map_.size() - 1);
+      return map_[index];
+    }
+
+    StringPiece LookupPiece(WordIndex index) const {
+      return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
+    }
+
+    std::size_t Size() const {
+      // There's an extra entry to support StringPiece lengths.
+      return map_.size() - 1;
+    }
+
+  private:
+    util::scoped_memory memory_;
+    std::vector<const char*> map_;
+};
+
+class PrintARPA {
+  public:
+    // Does not take ownership of vocab_fd or out_fd.
+    explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts)
+      : vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {}
+
+    void Run(const util::stream::ChainPositions &positions);
+
+  private:
+    int vocab_fd_;
+    int out_fd_;
+    std::vector<uint64_t> counts_;
+};
+
+} // namespace lm
+#endif // LM_COMMON_PRINT_H
--- a/lm/common/size_option.cc
+++ b/lm/common/size_option.cc
@ -0,0 +1,24 @@
+#include <boost/program_options.hpp>
+#include "util/usage.hh"
+
+namespace lm {
+
+namespace {
+class SizeNotify {
+  public:
+    explicit SizeNotify(std::size_t &out) : behind_(out) {}
+
+    void operator()(const std::string &from) {
+      behind_ = util::ParseSize(from);
+    }
+
+  private:
+    std::size_t &behind_;
+};
+}
+
+boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
+  return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
+}
+
+} // namespace lm
--- a/lm/common/size_option.hh
+++ b/lm/common/size_option.hh
@ -0,0 +1,11 @@
+#include <boost/program_options.hpp>
+
+#include <cstddef>
+#include <string>
+
+namespace lm {
+
+// Create a boost program option for data sizes.  This parses sizes like 1T and 10k.
+boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value);
+
+} // namespace lm
--- a/lm/builder/special.hh
+++ b/lm/builder/special.hh
@ -1,9 +1,9 @@
-#ifndef LM_BUILDER_SPECIAL_H
-#define LM_BUILDER_SPECIAL_H
+#ifndef LM_COMMON_SPECIAL_H
+#define LM_COMMON_SPECIAL_H

 #include "lm/word_index.hh"

-namespace lm { namespace builder {
+namespace lm {

 class SpecialVocab {
  public:
@ -22,6 +22,6 @@ class SpecialVocab {
    WordIndex eos_;
 };

-}} // namespaces
+} // namespace lm

-#endif // LM_BUILDER_SPECIAL_H
+#endif // LM_COMMON_SPECIAL_H
--- a/lm/filter/CMakeLists.txt
+++ b/lm/filter/CMakeLists.txt
@ -0,0 +1,62 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+# 
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+# 
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+#    that should be included in the kenlm library,
+#        (this excludes any unit test files)
+#    you should add them to the following list:
+#
+# In order to set correct paths to these files
+#    in case this variable is referenced by CMake files in the parent directory,
+#    we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_FILTER_SOURCE 
+		${CMAKE_CURRENT_SOURCE_DIR}/arpa_io.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/phrase.cc
+		${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc
+	)
+
+
+# Group these objects together for later use. 
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_filter OBJECT ${KENLM_FILTER_SOURCE})
+
+
+# Explicitly list the executable files to be compiled
+set(EXE_LIST
+  filter
+  phrase_table_vocab
+)
+
+
+# Iterate through the executable list   
+foreach(exe ${EXE_LIST})
+
+  # Compile the executable, linking against the requisite dependent object files
+	add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_filter> $<TARGET_OBJECTS:kenlm_util>)
+
+  # Link the executable against boost
+  target_link_libraries(${exe} ${Boost_LIBRARIES})
+
+  # Group executables together
+  set_target_properties(${exe} PROPERTIES FOLDER executables)
+
+# End for loop
+endforeach(exe)
+
--- a/mert/HwcmScorer.h
+++ b/mert/HwcmScorer.h
@ -5,10 +5,7 @@
 #include <vector>

 #include "StatisticsBasedScorer.h"
-#include "moses/FF/InternalTree.h"
-
-using Moses::TreePointer;
-using Moses::InternalTree;
+#include "InternalTree.h"

 namespace MosesTuning
 {
--- a/mert/InternalTree.cpp
+++ b/mert/InternalTree.cpp
@ -0,0 +1,110 @@
+#include "InternalTree.h"
+
+namespace MosesTuning
+{
+
+InternalTree::InternalTree(const std::string & line, const bool terminal):
+  m_isTerminal(terminal)
+{
+
+  size_t found = line.find_first_of("[] ");
+
+  if (found == line.npos) {
+    m_value = line;
+  }
+
+  else {
+    AddSubTree(line, 0);
+  }
+}
+
+size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
+{
+
+  std::string value;
+  char token = 0;
+
+  while (token != ']' && pos != std::string::npos) {
+    size_t oldpos = pos;
+    pos = line.find_first_of("[] ", pos);
+    if (pos == std::string::npos) break;
+    token = line[pos];
+    value = line.substr(oldpos,pos-oldpos);
+
+    if (token == '[') {
+      if (m_value.size() > 0) {
+        m_children.push_back(boost::make_shared<InternalTree>(value,false));
+        pos = m_children.back()->AddSubTree(line, pos+1);
+      } else {
+        if (value.size() > 0) {
+          m_value = value;
+        }
+        pos = AddSubTree(line, pos+1);
+      }
+    } else if (token == ' ' || token == ']') {
+      if (value.size() > 0 && !(m_value.size() > 0)) {
+        m_value = value;
+      } else if (value.size() > 0) {
+        m_isTerminal = false;
+        m_children.push_back(boost::make_shared<InternalTree>(value,true));
+      }
+      if (token == ' ') {
+        pos++;
+      }
+    }
+
+    if (m_children.size() > 0) {
+      m_isTerminal = false;
+    }
+  }
+
+  if (pos == std::string::npos) {
+    return line.size();
+  }
+  return std::min(line.size(),pos+1);
+
+}
+
+std::string InternalTree::GetString(bool start) const
+{
+
+  std::string ret = "";
+  if (!start) {
+    ret += " ";
+  }
+
+  if (!m_isTerminal) {
+    ret += "[";
+  }
+
+  ret += m_value;
+  for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
+    ret += (*it)->GetString(false);
+  }
+
+  if (!m_isTerminal) {
+    ret += "]";
+  }
+  return ret;
+
+}
+
+
+void InternalTree::Combine(const std::vector<TreePointer> &previous)
+{
+
+  std::vector<TreePointer>::iterator it;
+  bool found = false;
+  leafNT next_leafNT(this);
+  for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
+    found = next_leafNT(it);
+    if (found) {
+      *it = *it_prev;
+    } else {
+      std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
+    }
+  }
+}
+
+
+}
--- a/mert/InternalTree.h
+++ b/mert/InternalTree.h
@ -0,0 +1,77 @@
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <map>
+#include <vector>
+#include <boost/shared_ptr.hpp>
+#include <boost/make_shared.hpp>
+#include "util/generator.hh"
+#include "util/exception.hh"
+
+namespace MosesTuning
+{
+
+class InternalTree;
+typedef boost::shared_ptr<InternalTree> TreePointer;
+typedef int NTLabel;
+
+class InternalTree
+{
+  std::string m_value;
+  std::vector<TreePointer> m_children;
+  bool m_isTerminal;
+public:
+  InternalTree(const std::string & line, const bool terminal = false);
+  InternalTree(const InternalTree & tree):
+    m_value(tree.m_value),
+    m_isTerminal(tree.m_isTerminal) {
+    const std::vector<TreePointer> & children = tree.m_children;
+    for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
+      m_children.push_back(boost::make_shared<InternalTree>(**it));
+    }
+  }
+  size_t AddSubTree(const std::string & line, size_t start);
+
+  std::string GetString(bool start = true) const;
+  void Combine(const std::vector<TreePointer> &previous);
+  const std::string & GetLabel() const {
+    return m_value;
+  }
+
+  size_t GetLength() const {
+    return m_children.size();
+  }
+  std::vector<TreePointer> & GetChildren() {
+    return m_children;
+  }
+
+  bool IsTerminal() const {
+    return m_isTerminal;
+  }
+
+  bool IsLeafNT() const {
+    return (!m_isTerminal && m_children.size() == 0);
+  }
+};
+
+// Python-like generator that yields next nonterminal leaf on every call
+$generator(leafNT)
+{
+  std::vector<TreePointer>::iterator it;
+  InternalTree* tree;
+  leafNT(InternalTree* root = 0): tree(root) {}
+  $emit(std::vector<TreePointer>::iterator)
+  for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
+    if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
+      $yield(it);
+    } else if ((*it)->GetLength() > 0) {
+      if ((*it).get()) { // normal pointer to same object that TreePointer points to
+        $restart(tree = (*it).get());
+      }
+    }
+  }
+  $stop;
+};
+
+}
--- a/mert/Jamfile
+++ b/mert/Jamfile
@ -30,7 +30,7 @@ InterpolatedScorer.cpp
 Point.cpp
 PerScorer.cpp
 HwcmScorer.cpp
-../moses/FF/InternalTree.cpp
+InternalTree.cpp
 Scorer.cpp
 ScorerFactory.cpp
 Optimizer.cpp
--- a/misc/Jamfile
+++ b/misc/Jamfile
@ -14,6 +14,8 @@ exe 1-1-Extraction : 1-1-Extraction.cpp ..//boost_filesystem ../moses//moses ;

 exe prunePhraseTable : prunePhraseTable.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options  ;

+exe pruneGeneration : pruneGeneration.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options  ;
+
 local with-cmph = [ option.get "with-cmph" ] ;
 if $(with-cmph) {
    exe processPhraseTableMin : processPhraseTableMin.cpp ..//boost_filesystem ../moses//moses ;
@ -46,6 +48,6 @@ $(TOP)//boost_iostreams
 $(TOP)//boost_program_options 
 ; 

-alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable  ;
+alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable pruneGeneration  ;
 #processPhraseTable queryPhraseTable

--- a/misc/pruneGeneration.cpp
+++ b/misc/pruneGeneration.cpp
@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <cassert>
+#include <algorithm>
+#include <functional>
+#include <boost/filesystem.hpp>
+#include "pruneGeneration.h"
+#include "moses/InputFileStream.h"
+#include "moses/OutputFileStream.h"
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+  cerr << "Starting" << endl;
+  int limit = atoi(argv[1]);
+  string inPathStem = argv[2];
+  string outPathStem = argv[3];
+
+  namespace fs = boost::filesystem;
+
+  //cerr << "inPathStem=" << inPathStem << endl;
+  fs::path p(inPathStem);
+  fs::path dir = p.parent_path();
+  //cerr << "dir=" << dir << endl;
+
+  fs::path fileStem = p.filename();
+  string fileStemStr = fileStem.native();
+  size_t fileStemStrSize = fileStemStr.size();
+  //cerr << "fileStem=" << fileStemStr << endl;
+
+  // loop thru each file in directory
+  fs::directory_iterator end_iter;
+  for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
+    if (fs::is_regular_file(dir_iter->status())) {
+      fs::path currPath = *dir_iter;
+      string currPathStr = currPath.native();
+      //cerr << "currPathStr=" << currPathStr << endl;
+
+      fs::path currFile = currPath.filename();
+      string currFileStr = currFile.native();
+
+      if (currFileStr.find(fileStemStr) == 0) {
+        // found gen table we need
+        //cerr << "found=" << currPathStr << endl;
+        string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
+        string outPath = outPathStem + suffix;
+        cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
+
+        Moses::InputFileStream inStrme(currPathStr);
+        Moses::OutputFileStream outStrme(outPath);
+        Process(limit, inStrme, outStrme);
+
+      }
+    }
+  }
+
+  cerr << "Finished" << endl;
+}
+
+void Process(int limit, istream &inStrme, ostream &outStrme)
+{
+  vector<Rec> records;
+  string prevInWord;
+  string line;
+  while (getline(inStrme, line)) {
+    vector<string> toks;
+    Tokenize(toks, line);
+    assert(toks.size() == 4);
+
+    if (prevInWord != toks[0]) {
+      Output(outStrme, records, limit);
+      records.clear();
+    }
+
+    // add new record
+    float prob = atof(toks[2].c_str());
+    records.push_back(Rec(prob, line));
+
+    prevInWord = toks[0];
+  }
+
+  // last
+  Output(outStrme, records, limit);
+  records.clear();
+
+}
+
+void Output(ostream &outStrme, vector<Rec> &records, int limit)
+{
+  std::sort(records.rbegin(), records.rend());
+
+  for (size_t i = 0; i < limit && i < records.size(); ++i) {
+    const Rec &rec = records[i];
+    outStrme << rec.line << endl;
+  }
+}
+
--- a/misc/pruneGeneration.h
+++ b/misc/pruneGeneration.h
@ -0,0 +1,46 @@
+#pragma once
+#include <vector>
+#include <string>
+#include <iostream>
+
+class Rec
+{
+public:
+  float prob;
+  std::string line;
+
+  Rec(float aprob, const std::string &aline)
+    :prob(aprob)
+    ,line(aline)
+  {}
+
+  inline bool operator< (const Rec &compare) const {
+    return prob < compare.prob;
+  }
+};
+
+////////////////////////////////////////////////////////////
+
+void Process(int limit, std::istream &inStrme, std::ostream &outStrme);
+void Output(std::ostream &outStrme, std::vector<Rec> &records, int limit);
+
+////////////////////////////////////////////////////////////
+inline void Tokenize(std::vector<std::string> &output
+                     , const std::string& str
+                     , const std::string& delimiters = " \t")
+{
+  // Skip delimiters at beginning.
+  std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+  // Find first "non-delimiter".
+  std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
+
+  while (std::string::npos != pos || std::string::npos != lastPos) {
+    // Found a token, add it to the vector.
+    output.push_back(str.substr(lastPos, pos - lastPos));
+    // Skip delimiters.  Note the "not_of"
+    lastPos = str.find_first_not_of(delimiters, pos);
+    // Find next "non-delimiter"
+    pos = str.find_first_of(delimiters, lastPos);
+  }
+}
+
--- a/moses-cmd/LatticeMBRGrid.cpp
+++ b/moses-cmd/LatticeMBRGrid.cpp
@ -159,13 +159,15 @@ int main(int argc, char* argv[])
  }

  StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
-  SD.SetUseLatticeMBR(true);
+  LMBR_Options& lmbr = SD.options().lmbr;
+  MBR_Options&   mbr = SD.options().mbr;
+  lmbr.enabled = true;

  boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
  if (!ioWrapper) {
    throw runtime_error("Failed to initialise IOWrapper");
  }
-  size_t nBestSize = SD.GetMBRSize();
+  size_t nBestSize = mbr.size;

  if (nBestSize <= 0) {
    throw new runtime_error("Non-positive size specified for n-best list");
@ -187,13 +189,13 @@ int main(int argc, char* argv[])
    manager.CalcNBest(nBestSize, nBestList,true);
    //grid search
    BOOST_FOREACH(float const& p, pgrid) {
-      SD.SetLatticeMBRPrecision(p);
+      lmbr.precision = p;
      BOOST_FOREACH(float const& r, rgrid) {
-        SD.SetLatticeMBRPRatio(r);
+        lmbr.ratio = r;
        BOOST_FOREACH(size_t const prune_i, prune_grid) {
-          SD.SetLatticeMBRPruningFactor(size_t(prune_i));
+          lmbr.pruning_factor = prune_i;
          BOOST_FOREACH(float const& scale_i, scale_grid) {
-            SD.SetMBRScale(scale_i);
+            mbr.scale = scale_i;
            size_t lineCount = source->GetTranslationId();
            cout << lineCount << " ||| " << p << " "
                 << r << " " << size_t(prune_i) << " " << scale_i
--- a/moses/BaseManager.cpp
+++ b/moses/BaseManager.cpp
@ -27,6 +27,12 @@ BaseManager::GetSource() const
  return m_source;
 }

+const ttasksptr
+BaseManager::GetTtask() const
+{
+  return m_ttask.lock();
+}
+
 void
 BaseManager::
 OutputSearchGraphAsHypergraph(std::ostream& out) const
@ -134,6 +140,14 @@ void BaseManager::WriteApplicationContext(std::ostream &out,
  }
 }

+AllOptions const&
+BaseManager::
+options() const
+{
+  return GetTtask()->options();
+}
+
+
 } // namespace


--- a/moses/BaseManager.h
+++ b/moses/BaseManager.h
@ -5,7 +5,7 @@
 #include <string>
 #include "ScoreComponentCollection.h"
 #include "InputType.h"
-
+#include "moses/parameters/AllOptions.h"
 namespace Moses
 {
 class ScoreComponentCollection;
@ -50,6 +50,8 @@ public:

  //! the input sentence being decoded
  const InputType& GetSource() const;
+  const ttasksptr  GetTtask() const;
+  AllOptions const& options() const;

  virtual void Decode() = 0;
  // outputs
--- a/moses/ChartCell.cpp
+++ b/moses/ChartCell.cpp
@ -53,7 +53,7 @@ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
  ChartCellBase(startPos, endPos), m_manager(manager)
 {
  const StaticData &staticData = StaticData::Instance();
-  m_nBestIsEnabled = staticData.IsNBestEnabled();
+  m_nBestIsEnabled = staticData.options().nbest.enabled;
 }

 ChartCell::~ChartCell() {}
@ -100,7 +100,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
  }

  // pluck things out of queue and add to hypo collection
-  const size_t popLimit = staticData.GetCubePruningPopLimit();
+  const size_t popLimit = staticData.options().cube.pop_limit;
  for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
    ChartHypothesis *hypo = queue.Pop();
    AddHypothesis(hypo);
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@ -287,8 +287,11 @@ void ChartHypothesis::CleanupArcList()
   * so we'll keep all of arc list if nedd distinct n-best list
   */
  const StaticData &staticData = StaticData::Instance();
-  size_t nBestSize = staticData.GetNBestSize();
-  bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
+  size_t nBestSize = staticData.options().nbest.nbest_size;
+  bool distinctNBest = (staticData.options().nbest.only_distinct
+                        || staticData.options().mbr.enabled
+                        || staticData.GetOutputSearchGraph()
+                        || staticData.GetOutputSearchGraphHypergraph());

  if (!distinctNBest && m_arcList->size() > nBestSize) {
    // prune arc list only if there too many arcs
--- a/moses/ChartHypothesisCollection.cpp
+++ b/moses/ChartHypothesisCollection.cpp
@ -38,8 +38,8 @@ ChartHypothesisCollection::ChartHypothesisCollection()
  const StaticData &staticData = StaticData::Instance();

  m_beamWidth = staticData.GetBeamWidth();
-  m_maxHypoStackSize = staticData.GetMaxHypoStackSize();
-  m_nBestIsEnabled = staticData.IsNBestEnabled();
+  m_maxHypoStackSize = staticData.options().search.stack_size;
+  m_nBestIsEnabled = staticData.options().nbest.enabled;
  m_bestScore = -std::numeric_limits<float>::infinity();
 }

--- a/moses/ChartHypothesisCollection.h
+++ b/moses/ChartHypothesisCollection.h
@ -52,11 +52,7 @@ public:
    // shouldn't be mixing hypos with different lhs
    assert(hypoA->GetTargetLHS() == hypoB->GetTargetLHS());

-    int ret = hypoA->RecombineCompare(*hypoB);
-    if (ret != 0)
-      return (ret < 0);
-
-    return false;
+    return (hypoA->RecombineCompare(*hypoB) < 0);
  }
 };

--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@ -207,7 +207,7 @@ void ChartManager::CalcNBest(
  // with 0 being 'unlimited.'  This actually sets a large-ish limit in case
  // too many translations are identical.
  const StaticData &staticData = StaticData::Instance();
-  const std::size_t nBestFactor = staticData.GetNBestFactor();
+  const std::size_t nBestFactor = staticData.options().nbest.factor;
  std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;

  // Extract the derivations.
@ -318,13 +318,14 @@ void ChartManager::OutputBest(OutputCollector *collector) const
 void ChartManager::OutputNBest(OutputCollector *collector) const
 {
  const StaticData &staticData = StaticData::Instance();
-  size_t nBestSize = staticData.GetNBestSize();
+  size_t nBestSize = staticData.options().nbest.nbest_size;
  if (nBestSize > 0) {
    const size_t translationId = m_source.GetTranslationId();

-    VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
+    VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
+            << staticData.options().nbest.output_file_path << endl);
    std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
-    CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
+    CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
    OutputNBestList(collector, nBestList, translationId);
    IFVERBOSE(2) {
      PrintUserTime("N-Best Hypotheses Generation Time:");
@ -348,10 +349,9 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
    FixPrecision(out);
  }

-  bool includeWordAlignment =
-    StaticData::Instance().PrintAlignmentInfoInNbest();
-
-  bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
+  NBestOptions const& nbo = StaticData::Instance().options().nbest;
+  bool includeWordAlignment = nbo.include_alignment_info;
+  bool PrintNBestTrees = nbo.print_trees;

  for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
       p != nBestList.end(); ++p) {
@ -620,9 +620,9 @@ void ChartManager::OutputDetailedTranslationReport(

  if (staticData.IsDetailedAllTranslationReportingEnabled()) {
    const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
-    size_t nBestSize = staticData.GetNBestSize();
+    size_t nBestSize = staticData.options().nbest.nbest_size;
    std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
-    CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
+    CalcNBest(nBestSize, nBestList, staticData.options().nbest.nbest_size);
    OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
  }

--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@ -106,7 +106,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
      targetPhrase->SetTargetLHS(targetLHS);
      targetPhrase->SetAlignmentInfo("0-0");
      targetPhrase->EvaluateInIsolation(*unksrc);
-      if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
+
+      if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
        targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
      }

--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@ -1,3 +1,4 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
 // $Id$

 #include "ConfusionNet.h"
@ -65,9 +66,9 @@ ConfusionNet() : InputType()
 {
  stats.createOne();

-  const StaticData& staticData = StaticData::Instance();
-  if (staticData.IsSyntax()) {
-    m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
+  const StaticData& SD = StaticData::Instance();
+  if (SD.IsSyntax()) {
+    m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
  }
  UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
 }
--- a/moses/ExportInterface.cpp
+++ b/moses/ExportInterface.cpp
@ -1,3 +1,4 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
 // $Id: ExportInterface.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $

 /***********************************************************************
@ -63,9 +64,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <xmlrpc-c/base.hpp>
 #include <xmlrpc-c/registry.hpp>
 #include <xmlrpc-c/server_abyss.hpp>
-#include "server/Translator.h"
-#include "server/Optimizer.h"
-#include "server/Updater.h"
+#include "server/Server.h"
 #endif

 using namespace std;
@ -147,41 +146,9 @@ int
 run_as_server()
 {
 #ifdef HAVE_XMLRPC_C
-  int port;
-  params.SetParameter(port, "server-port", 8080);
-  bool isSerial;
-  params.SetParameter(isSerial, "serial", false);
-  string logfile;
-  params.SetParameter(logfile, "server-log", string(""));
-  size_t num_threads;
-  params.SetParameter(num_threads, "threads", size_t(10));
-  if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
-
-  xmlrpc_c::registry myRegistry;
-
-  xmlrpc_c::methodPtr const translator(new MosesServer::Translator(num_threads));
-  xmlrpc_c::methodPtr const updater(new MosesServer::Updater);
-  xmlrpc_c::methodPtr const optimizer(new MosesServer::Optimizer);
-
-  myRegistry.addMethod("translate", translator);
-  myRegistry.addMethod("updater", updater);
-  myRegistry.addMethod("optimize", optimizer);
-
-  xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
-
-  XVERBOSE(1,"Listening on port " << port << endl);
-  if (isSerial) {
-    while(1) myAbyssServer.runOnce();
-  } else myAbyssServer.run();
-
-  std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
-  // #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
-#else
-  // #pragma message("BUILDING MOSES WITHOUT SERVER SUPPORT")
-  std::cerr << "Moses was compiled without server support." << endl;
+  MosesServer::Server server(params);
+  return server.run(); // actually: don't return. see Server::run()
 #endif
-  return 1;
-
 }

 int
@ -212,21 +179,57 @@ batch_run()
  ThreadPool pool(staticData.ThreadCount());
 #endif

+  // using context for adaptation:
+  // e.g., context words / strings from config file / cmd line
  std::string context_string;
  params.SetParameter(context_string,"context-string",string(""));

-  // main loop over set of input sentences
+  // ... or weights for documents/domains from config file / cmd. line
+  std::string context_weights;
+  params.SetParameter(context_weights,"context-weights",string(""));

+  // ... or the surrounding context (--context-window ...)
+  size_t size_t_max = std::numeric_limits<size_t>::max();
+  bool use_context_window = ioWrapper->GetLookAhead() || ioWrapper->GetLookBack();
+  bool use_context = use_context_window || context_string.size();
+  bool use_sliding_context_window = (use_context_window
+                                     && ioWrapper->GetLookAhead() != size_t_max);
+
+  boost::shared_ptr<std::vector<std::string> >  context_window;
+  boost::shared_ptr<std::vector<std::string> >* cw;
+  cw = use_context_window ? &context_window : NULL;
+  if (!cw && context_string.size())
+    context_window.reset(new std::vector<std::string>(1,context_string));
+
+  // global scope of caches, biases, etc., if any
+  boost::shared_ptr<ContextScope> gscope;
+  if (!use_sliding_context_window)
+    gscope.reset(new ContextScope);
+
+  // main loop over set of input sentences
  boost::shared_ptr<InputType> source;
-  while ((source = ioWrapper->ReadInput()) != NULL) {
+  while ((source = ioWrapper->ReadInput(cw)) != NULL) {
    IFVERBOSE(1) ResetUserTime();

    // set up task of translating one sentence
-    boost::shared_ptr<TranslationTask>
-    task = TranslationTask::create(source, ioWrapper);
-    if (source->GetContext())
-      task->SetContextString(*source->GetContext());
-    else task->SetContextString(context_string);
+    boost::shared_ptr<ContextScope>  lscope;
+    if (gscope) lscope = gscope;
+    else lscope.reset(new ContextScope);
+
+    boost::shared_ptr<TranslationTask> task;
+    task = TranslationTask::create(source, ioWrapper, lscope);
+
+    if (cw) {
+      if (context_string.size())
+        context_window->push_back(context_string);
+      if(!use_sliding_context_window)
+        cw = NULL;
+    }
+    if (context_window)
+      task->SetContextWindow(context_window);
+
+    if (context_weights != "")
+      task->SetContextWeights(context_weights);

    // Allow for (sentence-)context-specific processing prior to
    // decoding. This can be used, for example, for context-sensitive
--- a/moses/ExportInterface.h
+++ b/moses/ExportInterface.h
@ -1,3 +1,4 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
 #pragma once
 // $Id$

--- a/moses/FF/ConstrainedDecoding.cpp
+++ b/moses/FF/ConstrainedDecoding.cpp
@ -43,7 +43,9 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
 void ConstrainedDecoding::Load()
 {
  const StaticData &staticData = StaticData::Instance();
-  bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
+  bool addBeginEndWord
+  = ((staticData.options().search.algo == CYKPlus)
+     || (staticData.options().search.algo == ChartIncremental));

  for(size_t i = 0; i < m_paths.size(); ++i) {
    InputFileStream constraintFile(m_paths[i]);
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -6,7 +6,6 @@
 #include "moses/TranslationModel/PhraseDictionaryMemory.h"
 #include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
-#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
 #include "moses/TranslationModel/PhraseDictionaryScope3.h"
 #include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
 #include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
@ -152,7 +151,7 @@ FeatureFactory
 ::DefaultSetup(F *feature)
 {
  StaticData &static_data = StaticData::InstanceNonConst();
-  const string &featureName = feature->GetScoreProducerDescription();
+  const std::string &featureName = feature->GetScoreProducerDescription();
  std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);


@ -165,8 +164,8 @@ FeatureFactory
                  << "WARNING: Auto-initializing all weights for this FF to 1.0");
        weights.assign(feature->GetNumScoreComponents(),1.0);
      } else {
-        TRACE_ERR("WARNING: No weights specified in config file for FF "
-                  << featureName << ". Using default values supplied by FF.");
+        VERBOSE(2,"WARNING: No weights specified in config file for FF "
+                << featureName << ". Using default values supplied by FF.");
      }
    }
    UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
@ -215,7 +214,7 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(PhraseDictionaryMultiModel);
  MOSES_FNAME(PhraseDictionaryMultiModelCounts);
  MOSES_FNAME(PhraseDictionaryALSuffixArray);
-  MOSES_FNAME(PhraseDictionaryDynSuffixArray);
+  //  MOSES_FNAME(PhraseDictionaryDynSuffixArray);
  MOSES_FNAME(PhraseDictionaryTransliteration);
  MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
  MOSES_FNAME(PhraseDictionaryFuzzyMatch);
@ -353,18 +352,18 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line

 void FeatureRegistry::PrintFF() const
 {
-  vector<string> ffs;
+  std::vector<std::string> ffs;
  std::cerr << "Available feature functions:" << std::endl;
  Map::const_iterator iter;
  for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
-    const string &ffName = iter->first;
+    const std::string &ffName = iter->first;
    ffs.push_back(ffName);
  }

-  vector<string>::const_iterator iterVec;
+  std::vector<std::string>::const_iterator iterVec;
  std::sort(ffs.begin(), ffs.end());
  for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
-    const string &ffName = *iterVec;
+    const std::string &ffName = *iterVec;
    std::cerr << ffName << " ";
  }

--- a/moses/FF/HyperParameterAsWeight.cpp
+++ b/moses/FF/HyperParameterAsWeight.cpp
@ -19,8 +19,8 @@ HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)

  vector<float> weights = staticData.GetWeights(this);

-  staticData.m_maxHypoStackSize = weights[0] * 1000;
-  staticData.m_beamWidth = weights[1] * 10;
+  staticData.m_options.search.stack_size = weights[0] * 1000;
+  staticData.m_options.search.beam_width = weights[1] * 10;

 }

--- a/moses/FF/InternalTree.cpp
+++ b/moses/FF/InternalTree.cpp
@ -1,27 +1,24 @@
 #include "InternalTree.h"
+#include "moses/StaticData.h"

 namespace Moses
 {

-InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
-  m_value_nt(0),
-  m_isTerminal(terminal)
+InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool nonterminal)
 {

  if (len > 0) {
-    m_value.assign(line, start, len);
+    m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(start, len), nonterminal);
  }
 }

-InternalTree::InternalTree(const std::string & line, const bool terminal):
-  m_value_nt(0),
-  m_isTerminal(terminal)
+InternalTree::InternalTree(const std::string & line, const bool nonterminal)
 {

  size_t found = line.find_first_of("[] ");

  if (found == line.npos) {
-    m_value = line;
+    m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), line, nonterminal);
  } else {
    AddSubTree(line, 0);
  }
@ -32,6 +29,7 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)

  char token = 0;
  size_t len = 0;
+  bool has_value = false;

  while (token != ']' && pos != std::string::npos) {
    size_t oldpos = pos;
@ -41,30 +39,27 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
    len = pos-oldpos;

    if (token == '[') {
-      if (!m_value.empty()) {
-        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
+      if (has_value) {
+        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
        pos = m_children.back()->AddSubTree(line, pos+1);
      } else {
        if (len > 0) {
-          m_value.assign(line, oldpos, len);
+          m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), false);
+          has_value = true;
        }
        pos = AddSubTree(line, pos+1);
      }
    } else if (token == ' ' || token == ']') {
-      if (len > 0 && m_value.empty()) {
-        m_value.assign(line, oldpos, len);
+      if (len > 0 && !has_value) {
+        m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), true);
+        has_value = true;
      } else if (len > 0) {
-        m_isTerminal = false;
-        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
+        m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
      }
      if (token == ' ') {
        pos++;
      }
    }
-
-    if (!m_children.empty()) {
-      m_isTerminal = false;
-    }
  }

  if (pos == std::string::npos) {
@ -82,16 +77,16 @@ std::string InternalTree::GetString(bool start) const
    ret += " ";
  }

-  if (!m_isTerminal) {
+  if (!IsTerminal()) {
    ret += "[";
  }

-  ret += m_value;
+  ret += m_value.GetString(StaticData::Instance().GetOutputFactorOrder(), false);
  for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
    ret += (*it)->GetString(false);
  }

-  if (!m_isTerminal) {
+  if (!IsTerminal()) {
    ret += "]";
  }
  return ret;
@ -120,13 +115,13 @@ void InternalTree::Unbinarize()
 {

  // nodes with virtual label cannot be unbinarized
-  if (m_value.empty() || m_value[0] == '^') {
+  if (m_value.GetString(0).empty() || m_value.GetString(0).as_string()[0] == '^') {
    return;
  }

  //if node has child that is virtual node, get unbinarized list of children
  for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
-    if (!(*it)->IsTerminal() && (*it)->GetLabel()[0] == '^') {
+    if (!(*it)->IsTerminal() && (*it)->GetLabel().GetString(0).as_string()[0] == '^') {
      std::vector<TreePointer> new_children;
      GetUnbinarizedChildren(new_children);
      m_children = new_children;
@ -144,8 +139,8 @@ void InternalTree::Unbinarize()
 void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
 {
  for (std::vector<TreePointer>::const_iterator itx = m_children.begin(); itx != m_children.end(); ++itx) {
-    const std::string &label = (*itx)->GetLabel();
-    if (!label.empty() && label[0] == '^') {
+    const StringPiece label = (*itx)->GetLabel().GetString(0);
+    if (!label.empty() && label.as_string()[0] == '^') {
      (*itx)->GetUnbinarizedChildren(ret);
    } else {
      ret.push_back(*itx);
@ -153,7 +148,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
  }
 }

-bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+bool InternalTree::FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
 {
  for (it = m_children.begin(); it != m_children.end(); ++it) {
    if ((*it)->GetLabel() == label) {
@ -163,7 +158,7 @@ bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer
  return false;
 }

-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
+bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
 {
  for (it = m_children.begin(); it != m_children.end(); ++it) {
    if ((*it)->GetLabel() == label) {
@ -178,7 +173,7 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
  return false;
 }

-bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
+bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
 {
  for (it = m_children.begin(); it != m_children.end(); ++it) {
    if ((*it)->GetLabel() == label) {
@ -194,88 +189,4 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
  return false;
 }

-
-bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if ((*it)->GetNTLabel() == label) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if ((*it)->GetNTLabel() == label) {
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(label, it2)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if ((*it)->GetNTLabel() == label) {
-      parent = this;
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(label, it2, parent)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
-
-bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(labels, it2)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
-{
-  for (it = m_children.begin(); it != m_children.end(); ++it) {
-    if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
-      parent = this;
-      return true;
-    }
-    std::vector<TreePointer>::const_iterator it2;
-    if ((*it)->RecursiveSearch(labels, it2, parent)) {
-      it = it2;
-      return true;
-    }
-  }
-  return false;
-}
-
 }
--- a/moses/FF/InternalTree.h
+++ b/moses/FF/InternalTree.h
@ -5,30 +5,28 @@
 #include <map>
 #include <vector>
 #include "FFState.h"
+#include "moses/Word.h"
 #include <boost/shared_ptr.hpp>
 #include <boost/make_shared.hpp>
 #include "util/generator.hh"
 #include "util/exception.hh"
+#include "util/string_piece.hh"

 namespace Moses
 {

 class InternalTree;
 typedef boost::shared_ptr<InternalTree> TreePointer;
-typedef int NTLabel;

 class InternalTree
 {
-  std::string m_value;
-  NTLabel m_value_nt;
+  Word m_value;
  std::vector<TreePointer> m_children;
-  bool m_isTerminal;
 public:
  InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
-  InternalTree(const std::string & line, const bool terminal = false);
+  InternalTree(const std::string & line, const bool nonterminal = true);
  InternalTree(const InternalTree & tree):
-    m_value(tree.m_value),
-    m_isTerminal(tree.m_isTerminal) {
+    m_value(tree.m_value) {
    const std::vector<TreePointer> & children = tree.m_children;
    for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
      m_children.push_back(boost::make_shared<InternalTree>(**it));
@ -40,20 +38,10 @@ public:
  void Combine(const std::vector<TreePointer> &previous);
  void Unbinarize();
  void GetUnbinarizedChildren(std::vector<TreePointer> &children) const;
-  const std::string & GetLabel() const {
+  const Word & GetLabel() const {
    return m_value;
  }

-  // optionally identify label by int instead of string;
-  // allows abstraction if multiple nonterminal strings should map to same label.
-  const NTLabel & GetNTLabel() const {
-    return m_value_nt;
-  }
-
-  void SetNTLabel(NTLabel value) {
-    m_value_nt = value;
-  }
-
  size_t GetLength() const {
    return m_children.size();
  }
@ -62,38 +50,22 @@ public:
  }

  bool IsTerminal() const {
-    return m_isTerminal;
+    return !m_value.IsNonTerminal();
  }

  bool IsLeafNT() const {
-    return (!m_isTerminal && m_children.size() == 0);
+    return (m_value.IsNonTerminal() && m_children.size() == 0);
  }

  // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
  // can be used for formulating syntax constraints.

  // if found, 'it' is iterator to first tree node that matches search string
-  bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
-  bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
+  bool FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;
+  bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;

  // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
-  bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
-
-  // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
-  // if found, 'it' is iterator to first tree node that matches search string
-  bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
-  bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
-
-  // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
-  bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
-
-  // pass vector of possible labels to search
-  // if found, 'it' is iterator to first tree node that matches search string
-  bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
-  bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
-
-  // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
-  bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
+  bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;

  // Python-like generator that yields next nonterminal leaf on every call
  $generator(leafNT) {
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@ -1,4 +1,4 @@
-// -*- c++ -*-
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
 #pragma once

 #include <string>
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@ -1,6 +1,5 @@
-// -*- c++ -*-
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
 #pragma once
-
 #include <vector>
 #include <string>

@ -12,7 +11,6 @@
 #include "moses/WordsBitmap.h"
 #include "moses/TranslationOption.h"
 #include "moses/FF/FFState.h"
-
 #include "ReorderingStack.h"

 namespace Moses
--- a/moses/FF/Model1Feature.cpp
+++ b/moses/FF/Model1Feature.cpp
@ -75,7 +75,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
    ++i;
    std::vector<std::string> tokens = Tokenize(line);
    UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
-    unsigned id = Scan<unsigned>(tokens[0]);
+    unsigned id = atoll( tokens[0].c_str() );
    if (! ( (id == 1) && (tokens[1] == "UNK") )) {
      const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
      bool stored = Store(factor, id);
@ -86,7 +86,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
    ++i;
    std::vector<std::string> tokens = Tokenize(line);
    UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
-    unsigned id = Scan<unsigned>(tokens[0]);
+    unsigned id = atoll( tokens[0].c_str() );
    const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
    bool stored = Store(factor, id);
    UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
@ -105,11 +105,11 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
    ++i;
    std::vector<std::string> tokens = Tokenize(line);
    UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
-    unsigned idS = Scan<unsigned>(tokens[0]);
-    unsigned idT = Scan<unsigned>(tokens[1]);
+    unsigned idS = atoll( tokens[0].c_str() );
+    unsigned idT = atoll( tokens[1].c_str() );
    const Factor* wordS = vcbS.GetWord(idS);
    const Factor* wordT = vcbT.GetWord(idT);
-    float prob = Scan<float>(tokens[2]);
+    float prob = std::atof( tokens[2].c_str() );
    if ( (wordS != NULL) && (wordT != NULL) ) {
      m_ltable[ wordS ][ wordT ] = prob;
    }
--- a/moses/FF/PhraseOrientationFeature.cpp
+++ b/moses/FF/PhraseOrientationFeature.cpp
@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
  if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {

    // Initialize phrase orientation scoring object
-    MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
+    MosesTraining::Syntax::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
        targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());

    PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,

      // LEFT-TO-RIGHT DIRECTION

-      MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
+      MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_L2R);

      if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
           && (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
        if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
            reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
          // discontinuous
-          l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+          l2rOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
        } else {
          reoClassData->firstNonTerminalIsBoundary = true;
        }
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,

      // RIGHT-TO-LEFT DIRECTION

-      MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
+      MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_R2L);

      if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
           && (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
        if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
            reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
          // discontinuous
-          r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+          r2lOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
        } else {
          reoClassData->lastNonTerminalIsBoundary = true;
        }
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(

      // LEFT-TO-RIGHT DIRECTION

-      MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
+      MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];

      IFFEATUREVERBOSE(2) {
        FEATUREVERBOSE(2, "l2rOrientation ");
        switch (l2rOrientation) {
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT:
          FEATUREVERBOSE2(2, "mono" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
          FEATUREVERBOSE2(2, "swap" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
          FEATUREVERBOSE2(2, "dleft" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
          FEATUREVERBOSE2(2, "dright" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
-          // modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+          // modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR
          FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
          break;
        default:
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(

      } else {

-        if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
+        if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {

          newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
          // if sub-derivation has left-boundary non-terminal:
          // add recursive actual score of boundary non-terminal from subderivation
          LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);

-        } else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
+        } else if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {

          newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
          // if sub-derivation has left-boundary non-terminal:
          // add recursive actual score of boundary non-terminal from subderivation
          LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);

-        } else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
-                    ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
-                    ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
+        } else if ( ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
+                    ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
+                    ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {

          newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
          // if sub-derivation has left-boundary non-terminal:
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(

      // RIGHT-TO-LEFT DIRECTION

-      MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
+      MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];

      IFFEATUREVERBOSE(2) {
        FEATUREVERBOSE(2, "r2lOrientation ");
        switch (r2lOrientation) {
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT:
          FEATUREVERBOSE2(2, "mono" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
          FEATUREVERBOSE2(2, "swap" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
          FEATUREVERBOSE2(2, "dleft" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
          FEATUREVERBOSE2(2, "dright" << std::endl);
          break;
-        case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
-          // modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
+        case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+          // modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR
          FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
          break;
        default:
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(

      } else {

-        if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
+        if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {

          newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
          // if sub-derivation has right-boundary non-terminal:
          // add recursive actual score of boundary non-terminal from subderivation
          RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);

-        } else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
+        } else if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {

          newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
          // if sub-derivation has right-boundary non-terminal:
          // add recursive actual score of boundary non-terminal from subderivation
          RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);

-        } else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
-                    ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
-                    ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
+        } else if ( ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
+                    ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
+                    ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {

          newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
          // if sub-derivation has right-boundary non-terminal:
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
 }


-const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
+const std::string* PhraseOrientationFeature::ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const
 {
-  if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
+  if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
    return &MORIENT;

-  } else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
+  } else if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
    return &SORIENT;

-  } else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
-              ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
-              ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
+  } else if ( ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
+              ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
+              ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
    return &DORIENT;

  } else {
--- a/moses/FF/PhraseOrientationFeature.h
+++ b/moses/FF/PhraseOrientationFeature.h
@ -302,8 +302,8 @@ public:

  struct ReoClassData {
  public:
-    std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
-    std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
+    std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
+    std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
    bool firstNonTerminalIsBoundary;
    bool firstNonTerminalPreviousSourceSpanIsAligned;
    bool firstNonTerminalFollowingSourceSpanIsAligned;
@ -401,7 +401,7 @@ protected:
                                 ScoreComponentCollection* scoreBreakdown,
                                 const std::string* o) const;

-  const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
+  const std::string* ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const;

  static const std::string MORIENT;
  static const std::string SORIENT;
--- a/moses/FF/PhrasePairFeature.cpp
+++ b/moses/FF/PhrasePairFeature.cpp
@ -16,21 +16,29 @@ namespace Moses

 PhrasePairFeature::PhrasePairFeature(const std::string &line)
  :StatelessFeatureFunction(0, line)
+  ,m_unrestricted(false)
+  ,m_simple(true)
+  ,m_sourceContext(false)
+  ,m_domainTrigger(false)
+  ,m_ignorePunctuation(false)
 {
-  std::cerr << "Initializing PhrasePairFeature.." << std::endl;
+  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
  ReadParameters();

-  if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
-  if (m_sourceContext == 1) std::cerr << "using source context.. ";
-  if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
+  if (m_simple == 1) VERBOSE(1, " Using simple phrase pairs.");
+  if (m_sourceContext == 1) VERBOSE(1, " Using source context.");
+  if (m_domainTrigger == 1) VERBOSE(1, " Using domain triggers.");

  // compile a list of punctuation characters
  if (m_ignorePunctuation) {
-    std::cerr << "ignoring punctuation for triggers.. ";
+    VERBOSE(1, " Ignoring punctuation for triggers.");
    char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
-    for (size_t i=0; i < sizeof(punctuation)-1; ++i)
+    for (size_t i=0; i < sizeof(punctuation)-1; ++i) {
      m_punctuationHash[punctuation[i]] = 1;
+    }
  }
+
+  VERBOSE(1, " Done." << std::endl);
 }

 void PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
@ -76,7 +84,7 @@ void PhrasePairFeature::Load()
    }

    inFileSource.close();
-  } else {
+  } else if (!m_unrestricted) {
    // restricted source word vocabulary
    ifstream inFileSource(m_filePathSource.c_str());
    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
@ -101,8 +109,6 @@ void PhrasePairFeature::Load()
    }

    inFileTarget.close();*/
-
-    m_unrestricted = false;
  }
 }

@ -114,25 +120,6 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
    , ScoreComponentCollection *estimatedFutureScore) const
 {
  const Phrase& source = inputPath.GetPhrase();
-  if (m_simple) {
-    ostringstream namestr;
-    namestr << "pp_";
-    namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
-    for (size_t i = 1; i < source.GetSize(); ++i) {
-      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
-      namestr << ",";
-      namestr << sourceFactor->GetString();
-    }
-    namestr << "~";
-    namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
-    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
-      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
-      namestr << ",";
-      namestr << targetFactor->GetString();
-    }
-
-    scoreBreakdown.SparsePlusEquals(namestr.str(),1);
-  }
  if (m_domainTrigger) {
    const Sentence& isnt = static_cast<const Sentence&>(input);
    const bool use_topicid = isnt.GetUseTopicId();
@ -140,18 +127,18 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input

    // compute pair
    ostringstream pair;
-    pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
+    pair << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
-      pair << ",";
-      pair << sourceFactor->GetString();
+      pair << "~";
+      pair << ReplaceTilde( sourceFactor->GetString() );
    }
-    pair << "~";
-    pair << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
+    pair << "~~";
+    pair << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
-      pair << ",";
-      pair << targetFactor->GetString();
+      pair << "~";
+      pair << ReplaceTilde( targetFactor->GetString() );
    }

    if (use_topicid || use_topicid_prob) {
@ -159,7 +146,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
        // use topicid as trigger
        const long topicid = isnt.GetTopicId();
        stringstream feature;
-        feature << "pp_";
+        feature << m_description << "_";
        if (topicid == -1)
          feature << "unk";
        else
@ -173,13 +160,13 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
        const vector<string> &topicid_prob = *(isnt.GetTopicIdAndProb());
        if (atol(topicid_prob[0].c_str()) == -1) {
          stringstream feature;
-          feature << "pp_unk_";
+          feature << m_description << "_unk_";
          feature << pair.str();
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        } else {
          for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
            stringstream feature;
-            feature << "pp_";
+            feature << m_description << "_";
            feature << topicid_prob[i];
            feature << "_";
            feature << pair.str();
@ -193,7 +180,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
      for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
        string sourceTrigger = *p;
        ostringstream namestr;
-        namestr << "pp_";
+        namestr << m_description << "_";
        namestr << sourceTrigger;
        namestr << "_";
        namestr << pair.str();
@ -221,21 +208,21 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input

      if (m_unrestricted || sourceTriggerExists) {
        ostringstream namestr;
-        namestr << "pp_";
+        namestr << m_description << "_";
        namestr << sourceTrigger;
        namestr << "~";
-        namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
+        namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
        for (size_t i = 1; i < source.GetSize(); ++i) {
          const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
-          namestr << ",";
-          namestr << sourceFactor->GetString();
+          namestr << "~";
+          namestr << ReplaceTilde( sourceFactor->GetString() );
        }
-        namestr << "~";
-        namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
+        namestr << "~~";
+        namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
        for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
          const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
-          namestr << ",";
-          namestr << targetFactor->GetString();
+          namestr << "~";
+          namestr << ReplaceTilde( targetFactor->GetString() );
        }

        scoreBreakdown.SparsePlusEquals(namestr.str(),1);
@ -244,6 +231,31 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
  }
 }

+void PhrasePairFeature::EvaluateInIsolation(const Phrase &source
+    , const TargetPhrase &targetPhrase
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection &estimatedFutureScore) const
+{
+  if (m_simple) {
+    ostringstream namestr;
+    namestr << m_description << "_";
+    namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
+    for (size_t i = 1; i < source.GetSize(); ++i) {
+      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
+      namestr << "~";
+      namestr << ReplaceTilde( sourceFactor->GetString() );
+    }
+    namestr << "~~";
+    namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
+    for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
+      const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
+      namestr << "~";
+      namestr << ReplaceTilde( targetFactor->GetString() );
+    }
+    scoreBreakdown.SparsePlusEquals(namestr.str(),1);
+  }
+}
+
 bool PhrasePairFeature::IsUseable(const FactorMask &mask) const
 {
  bool ret = mask[m_targetFactorId];
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@ -1,5 +1,4 @@
-#ifndef moses_PhrasePairFeature_h
-#define moses_PhrasePairFeature_h
+#pragma once

 #include <stdexcept>
 #include <boost/unordered_set.hpp>
@ -32,6 +31,16 @@ class PhrasePairFeature: public StatelessFeatureFunction
  CharHash m_punctuationHash;
  std::string m_filePathSource;

+  inline std::string ReplaceTilde(const StringPiece &str) const {
+    std::string out = str.as_string();
+    size_t pos = out.find('~');
+    while ( pos != std::string::npos ) {
+      out.replace(pos,1,"<TILDE>");
+      pos = out.find('~',pos);
+    }
+    return out;
+  };
+
 public:
  PhrasePairFeature(const std::string &line);

@ -43,8 +52,7 @@ public:
  void EvaluateInIsolation(const Phrase &source
                           , const TargetPhrase &targetPhrase
                           , ScoreComponentCollection &scoreBreakdown
-                           , ScoreComponentCollection &estimatedFutureScore) const {
-  }
+                           , ScoreComponentCollection &estimatedFutureScore) const;

  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const {
@ -69,5 +77,3 @@ public:

 }

-
-#endif
--- a/moses/FF/RulePairUnlexicalizedSource.cpp
+++ b/moses/FF/RulePairUnlexicalizedSource.cpp
@ -12,7 +12,7 @@ namespace Moses
 {

 RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line)
-  : StatelessFeatureFunction(0, line)
+  : StatelessFeatureFunction(1, line)
  , m_glueRules(false)
  , m_nonGlueRules(true)
  , m_glueTargetLHSStr("Q")
@ -81,6 +81,9 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
  }

  scoreBreakdown.PlusEquals(this, namestr.str(), 1);
+  if ( targetPhraseLHS != m_glueTargetLHS ) {
+    scoreBreakdown.PlusEquals(this, 1);
+  }
 }

 }
--- a/moses/FF/SkeletonTranslationOptionListFeature.h
+++ b/moses/FF/SkeletonTranslationOptionListFeature.h
@ -34,7 +34,7 @@ public:

  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const {
-    vector<float> newScores(m_numScoreComponents);
+    std::vector<float> newScores(m_numScoreComponents);
    newScores[0] = translationOptionList.size();

    TranslationOptionList::const_iterator iterTransOpt;
--- a/moses/FF/SoftMatchingFeature.cpp
+++ b/moses/FF/SoftMatchingFeature.cpp
@ -13,6 +13,7 @@ namespace Moses
 SoftMatchingFeature::SoftMatchingFeature(const std::string &line)
  : StatelessFeatureFunction(0, line)
  , m_softMatches(moses_MaxNumNonterminals)
+  , m_scoreIdentical(true)
 {
  ReadParameters();
 }
@ -26,6 +27,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
  } else if (key == "path") {
    const std::string filePath = value;
    Load(filePath);
+  } else if (key == "score-identical") {
+    m_scoreIdentical = Scan<bool>(value);
  } else {
    UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
  }
@ -80,8 +83,10 @@ void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
      const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd);
      const Word& prevLHS = prevHypo->GetTargetLHS();

-      const std::string &name = GetOrSetFeatureName(word, prevLHS);
-      accumulator->PlusEquals(this,name,1);
+      if ( (word != prevLHS) || m_scoreIdentical ) {
+        const std::string &name = GetOrSetFeatureName(word, prevLHS);
+        accumulator->PlusEquals(this,name,1);
+      }
    }
  }
 }
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@ -55,6 +55,7 @@ public:
 private:
  mutable std::vector<std::vector<Word> > m_softMatches; // map RHS of new rule to list of possible LHS of old rule (subtree)
  mutable std::vector<std::vector<std::string> > m_nameCache;
+  bool m_scoreIdentical;

 #ifdef WITH_THREADS
  //reader-writer lock
--- a/moses/FF/SourceWordDeletionFeature.cpp
+++ b/moses/FF/SourceWordDeletionFeature.cpp
@ -38,9 +38,8 @@ void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::

 void SourceWordDeletionFeature::Load()
 {
-  if (m_filename == "") {
+  if (m_filename.empty())
    return;
-  }

  FEATUREVERBOSE(1, "Loading source word deletion word list from " << m_filename << std::endl);
  ifstream inFile(m_filename.c_str());
--- a/Show More
+++ b/Show More