Merge branch 'master' into miramerge

Conflicts: moses-chart-cmd/src/Main.cpp moses-cmd/src/IOWrapper.cpp moses-cmd/src/Main.cpp moses/src/DummyScoreProducers.cpp moses/src/DummyScoreProducers.h moses/src/GenerationDictionary.cpp moses/src/GenerationDictionary.h moses/src/GlobalLexicalModel.h moses/src/LMList.h moses/src/LanguageModel.cpp moses/src/LanguageModel.h moses/src/LanguageModelImplementation.h moses/src/LanguageModelKen.h moses/src/LanguageModelMultiFactor.cpp moses/src/LanguageModelMultiFactor.h moses/src/LanguageModelSingleFactor.cpp moses/src/LanguageModelSingleFactor.h moses/src/LexicalReordering.h moses/src/PhraseDictionary.cpp moses/src/PhraseDictionary.h moses/src/ScoreIndexManager.cpp moses/src/ScoreProducer.h moses/src/StaticData.cpp moses/src/StaticData.h moses/src/TranslationSystem.cpp
2024-09-11 19:27:11 +03:00 · 2011-09-20 11:23:38 +01:00 · 2011-09-20 11:23:38 +01:00 · 5b7c5ebdb5
commit 5b7c5ebdb5
parent 2dd9e4cc7c a064f799e0
180 changed files with 27179 additions and 33753 deletions
--- a/cruise-control/README
+++ b/cruise-control/README
@ -0,0 +1,27 @@
+A simple regular testing of Moses codebase, aka cruise control
+Started by Ondrej Bojar
+2011-08-28
+
+
+Usage:
+
+1. Checkout this directory somewhere on the computer where you want to run the
+   cruise control.
+2. Create as many config files as you wish, an example is ondrej-nb.config
+   ...hardcode all paths to you preferred external tools like LM toolkits.
+3. Run ./test_all_new_commits.sh <CONFIGFILE>
+
+
+
+TODO / KNOWN BUGS
+- regression tests are not run yet
+
+- regression tests always require SRILM, but we need to test all LMs that have
+  been compiled in
+  => add separate regression tests, one for each LM?
+  => modify regression tests to actually loop over all LMs?
+
+- final status is FAIL if any regression test fails, but we should actually
+  allow to expect failures for the given set of ./configure parameters
+  (e.g. regression test requiring irstlm is bound to fail if we're not linking
+  against irstlm)
--- a/cruise-control/test_all_new_commits.sh
+++ b/cruise-control/test_all_new_commits.sh
@ -0,0 +1,128 @@
+#!/bin/bash
+# given a config file runs tests on all untested commits of the scanned branches
+# storing detailed logs to logs/CONFIGNAME/commit
+# and extending the file brief.log
+#
+# A commit is assumed to be tested, if logs/CONFIGNAME/commit exists
+#
+# Ondrej Bojar, 2011
+
+function warn() { echo "$@" >&2; }
+function die() { echo "$@" >&2; exit 1; }
+set -o pipefail  # safer pipes
+
+configf="$1"
+[ -e "$configf" ] || die "usage: $0 configfile"
+configname=$(basename $configf | sed 's/\.config$//')
+
+source "$configf"
+
+[ -z "$MCC_SCAN_BRANCHES" ] \
+  && die "Bad config $configf; does not define MCC_SCAN_BRANCHES"
+
+# use the given tempdir or make subdir tmp here
+USE_TEMPDIR=$MCC_TEMPDIR
+[ -d "$USE_TEMPDIR" ] || USE_TEMPDIR=./tmp
+
+LOGDIR=$MCC_LOGDIR
+[ -d "$LOGDIR" ] || LOGDIR=.
+
+# ensure full path for logdir
+LOGDIR=$(readlink -f "$LOGDIR")
+[ -d "$LOGDIR" ] || die "Fatal: confusing readlink for $LOGDIR"
+
+# this is where moses is cloned into
+WORKDIR=$MCC_WORKDIR
+[ -d "$WORKDIR" ] || WORKDIR=$USE_TEMPDIR/workdir
+
+# this is where moses is taken from
+GITREPO=$MCC_GITREPO
+[ -d "$GITREPO" ] || GITREPO=/home/obo/moses-at-google-code
+
+
+if [ ! -d "$WORKDIR" ]; then
+  mkdir $(dirname "$WORKDIR") || die "Failed to create workdir $WORKDIR"
+  warn "Cloning $GITREPO into $WORKDIR"
+  git clone "$GITREPO" $WORKDIR \
+    || die "Failed to git clone into workdir $WORKDIR"
+else
+  ( cd "$WORKDIR" && git fetch ) \
+    || die "Failed to update our clone at $WORKDIR"
+fi
+
+mkdir -p $LOGDIR/logs/$configname \
+  || die "Failed to create dir $LOGDIR/logs/$configname"
+
+#### How is one test performed
+function run_single_test () {
+  commit=$1
+  longlog="$LOGDIR/logs/$configname/$commit"
+  if [ -e "$longlog" ]; then
+    # Commit already tested
+    return
+  fi
+
+  warn "Testing commit $commit"
+
+  # Get the version of this script
+  ccversion=$(svnversion 2>/dev/null)
+  [ ! -z "$ccversion" ] || ccversion=$(git show 2>&1 | head -n 1)
+  [ ! -z "$ccversion" ] || ccversion="unknown"
+
+  # Create log header with computer details:
+  echo "#### Moses Cruise Control Log for commit $commit" > $longlog
+  date >> $longlog
+  echo "## Cruise Control version" >> $longlog
+  echo $ccversion >> $longlog
+  echo "## Parameters" >> $longlog
+  cat $configf >> $longlog
+  echo "## Envinronment" >> $longlog
+  uname -a >> $longlog
+  env >> $longlog
+
+
+  pushd $WORKDIR 2>/dev/null >/dev/null || die "Failed to chdir to $WORKDIR"
+  git checkout --force $commit 2>/dev/null || die "Failed to checkout commit $commit"
+
+  err=""
+  echo "## regenerate-makefiles.sh" >> $longlog
+  ./regenerate-makefiles.sh >> $longlog 2>&1 || err="regenerate-makefiles"
+
+  echo "## make clean" >> $longlog
+  make clean >> $longlog 2>&1 || warn "make clean failed, suspicious"
+
+  echo "## ./configure $MCC_CONFIGURE_ARGS" >> $longlog
+  [ -z "$err" ] && ./configure $MCC_CONFIGURE_ARGS  >> $longlog 2>&1 \
+    || err="configure"
+
+  echo "## make" >> $longlog
+  [ -z "$err" ] && make  >> $longlog 2>&1 \
+    || err="make"
+
+  cd regression-testing
+  echo "## Not running any regression tests yet." >> $longlog
+
+  echo "## Finished" >> $longlog
+  date >> $longlog
+
+  if [ -z "$err" ]; then
+    status="OK"
+  else
+    status="FAIL:$err"
+  fi
+  echo "## Status: $status" >> $longlog
+  
+  nicedate=$(date +"%Y%m%d-%H%M%S")
+  echo "$commit	$status	$configname	$ccversion	$nicedate" \
+    >> "$LOGDIR/brief.log"
+
+  popd > /dev/null 2> /dev/null
+}
+
+
+#### Main loop over all commits
+( cd "$WORKDIR" && git rev-list $MCC_SCAN_BRANCHES ) \
+| while read commit; do
+  run_single_test $commit || die "Testing failed, stopping the loop."
+done
+
--- a/kenlm/kenlm.xcodeproj/project.pbxproj
+++ b/kenlm/kenlm.xcodeproj/project.pbxproj
@ -368,6 +368,10 @@
 				GCC_ENABLE_FIX_AND_CONTINUE = YES;
 				GCC_MODEL_TUNING = G5;
 				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					_LARGE_FILES,
+					"_FILE_OFFSET_BITS=64",
+				);
 				INSTALL_PATH = /usr/local/lib;
 				PRODUCT_NAME = kenlm;
 			};
@ -379,6 +383,10 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				GCC_MODEL_TUNING = G5;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					_LARGE_FILES,
+					"_FILE_OFFSET_BITS=64",
+				);
 				INSTALL_PATH = /usr/local/lib;
 				PRODUCT_NAME = kenlm;
 			};
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@ -6,14 +6,18 @@
 *
 */

+#include <cassert>
 #include <fstream>
+
 #include "Scorer.h"
+#include "ScorerFactory.h"
 #include "Data.h"
 #include "Util.h"


 Data::Data(Scorer& ptr):
-  theScorer(&ptr)
+  theScorer(&ptr),
+  _sparse_flag(false)
 {
  score_type = (*theScorer).getName();
  TRACE_ERR("Data::score_type " << score_type << std::endl);
@ -40,7 +44,6 @@ void Data::loadnbest(const std::string &file)
  std::string theSentence;
  std::string::size_type loc;

-
  while (getline(inp,stringBuf,'\n')) {
    if (stringBuf.empty()) continue;

@ -56,16 +59,15 @@ void Data::loadnbest(const std::string &file)
    featentry.reset();
    scoreentry.clear();

-
    theScorer->prepareStats(sentence_index, theSentence, scoreentry);

    scoredata->add(scoreentry, sentence_index);

    getNextPound(stringBuf, substring, "|||"); //third field

+    // examine first line for name of features
    if (!existsFeatureNames()) {
      std::string stringsupport=substring;
-      // adding feature names
      std::string features="";
      std::string tmpname="";

@ -75,10 +77,17 @@ void Data::loadnbest(const std::string &file)
        getNextPound(stringsupport, subsubstring);

        // string ending with ":" are skipped, because they are the names of the features
-        if ((loc = subsubstring.find(":")) != subsubstring.length()-1) {
+        if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
          features+=tmpname+"_"+stringify(tmpidx)+" ";
          tmpidx++;
-        } else {
+        }
+        // ignore sparse feature name
+        else if (subsubstring.find("_") != string::npos) {
+          // also ignore its value
+          getNextPound(stringsupport, subsubstring);
+        }
+        // update current feature name
+        else {
          tmpidx=0;
          tmpname=subsubstring.substr(0,subsubstring.size() - 1);
        }
@ -87,22 +96,36 @@ void Data::loadnbest(const std::string &file)
      featdata->setFeatureMap(features);
    }

-// adding features
+    // adding features
    while (!substring.empty()) {
 //			TRACE_ERR("Decompounding: " << substring << std::endl);
      getNextPound(substring, subsubstring);

-// string ending with ":" are skipped, because they are the names of the features
-      if ((loc = subsubstring.find(":")) != subsubstring.length()-1) {
+      // no ':' -> feature value that needs to be stored
+      if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
        featentry.add(ATOFST(subsubstring.c_str()));
      }
+      // sparse feature name? store as well
+      else if (subsubstring.find("_") != string::npos) {
+        std::string name = subsubstring;
+        getNextPound(substring, subsubstring);
+        featentry.addSparse( name, atof(subsubstring.c_str()) );
+        _sparse_flag = true;
+      }
    }
+    //cerr << "number of sparse features: " << featentry.getSparse().size() << endl;
    featdata->add(featentry,sentence_index);
  }

  inp.close();
 }

+// TODO
+void Data::mergeSparseFeatures() { 
+  std::cerr << "ERROR: sparse features can only be trained with pairwise ranked optimizer (PRO), not traditional MERT\n";
+  exit(1);
+}
+
 // really not the right place...
 float sentenceLevelBleuPlusOne( ScoreStats &stats ) {
 	float logbleu = 0.0;
@ -144,7 +167,7 @@ public:
 };
 	

-void Data::sample_ranked_pairs( const std::string &rankedpairfile ) {
+void Data::sampleRankedPairs( const std::string &rankedpairfile ) {
 	cout << "Sampling ranked pairs." << endl;

 	ofstream *outFile = new ofstream();
@ -187,20 +210,15 @@ void Data::sample_ranked_pairs( const std::string &rankedpairfile ) {
 		for(unsigned int i=0; i<samples.size() && collected < n_samples; i++) {
 			if (samples[i]->getDiff() >= min_diff) {
 				collected++;
-				FeatureStats &f1 = featdata->get(S,samples[i]->getTranslation1());
-				FeatureStats &f2 = featdata->get(S,samples[i]->getTranslation2());

 				*out << "1";
-				for(unsigned int j=0; j<f1.size(); j++)
-					if (abs(f1.get(j)-f2.get(j)) > 0.00001)
-						*out << " F" << j << " " << (f1.get(j)-f2.get(j));
-				*out << endl;
-
+        outputSample( *out, featdata->get(S,samples[i]->getTranslation1()),
+                            featdata->get(S,samples[i]->getTranslation2()) );
+        *out << endl;
 				*out << "0";
-				for(unsigned int j=0; j<f1.size(); j++)
-					if (abs(f1.get(j)-f2.get(j)) > 0.00001)
-						*out << " F" << j << " " << (f2.get(j)-f1.get(j));
-				*out << endl;
+        outputSample( *out, featdata->get(S,samples[i]->getTranslation2()),
+                            featdata->get(S,samples[i]->getTranslation1()) );
+        *out << endl;
 			}
 			delete samples[i];
 		}
@ -209,3 +227,77 @@ void Data::sample_ranked_pairs( const std::string &rankedpairfile ) {
 	out->flush();
 	outFile->close();
 }
+
+void Data::outputSample( ostream &out, const FeatureStats &f1, const FeatureStats &f2 ) 
+{
+  // difference in score in regular features
+	for(unsigned int j=0; j<f1.size(); j++)
+		if (abs(f1.get(j)-f2.get(j)) > 0.00001)
+			out << " F" << j << " " << (f1.get(j)-f2.get(j));
+
+  if (!hasSparseFeatures())
+    return;
+
+  // sparse features
+  const sparse_featstats_t &s1 = f1.getSparse();
+  const sparse_featstats_t &s2 = f2.getSparse();
+  for( sparse_featstats_t::const_iterator i=s1.begin(); i!=s1.end(); i++) {
+    if (s2.find(i->first) == s2.end())
+      out << " " << i->first << " " << i->second;
+    else {
+      float diff = i->second - s2.find(i->first)->second;
+      if (abs(diff) > 0.00001)
+        out << " " << i->first << " " << diff;
+    }
+  }
+  for( sparse_featstats_t::const_iterator i=s2.begin(); i!=s2.end(); i++) {
+    if (s1.find(i->first) == s1.end())
+      out << " " << i->first << " " << (- i->second);
+  }
+}
+
+
+void Data::createShards(size_t shard_count, float shard_size, const string& scorerconfig,
+      std::vector<Data>& shards) 
+{
+  assert(shard_count);
+  assert(shard_size >=0);
+  assert(shard_size <= 1);
+
+  size_t data_size = scoredata->size();
+  assert(data_size == featdata->size());
+
+  shard_size *=  data_size;
+
+  for (size_t shard_id = 0; shard_id < shard_count; ++shard_id) {
+    vector<size_t> shard_contents;
+    if (shard_size == 0) {
+      //split into roughly equal size shards
+      size_t shard_start = floor(0.5 + shard_id * (float)data_size / shard_count);
+      size_t shard_end = floor(0.5 + (shard_id+1) * (float)data_size / shard_count);
+      for (size_t i = shard_start; i < shard_end; ++i) {
+        shard_contents.push_back(i);
+      }
+    } else {
+      //create shards by randomly sampling
+      for (size_t i = 0; i < floor(shard_size+0.5); ++i) {
+        shard_contents.push_back(rand() % data_size);
+      }
+    }
+    
+    ScorerFactory SF;
+    Scorer* scorer = SF.getScorer(score_type, scorerconfig);
+
+    shards.push_back(Data(*scorer));
+    shards.back().score_type = score_type;
+    shards.back().number_of_scores = number_of_scores;
+    shards.back()._sparse_flag = _sparse_flag;
+    for (size_t i = 0; i < shard_contents.size(); ++i) {
+      shards.back().featdata->add(featdata->get(shard_contents[i]));
+      shards.back().scoredata->add(scoredata->get(shard_contents[i]));
+    }
+    //cerr << endl;
+    
+  }
+}
+
--- a/mert/Data.h
+++ b/mert/Data.h
@ -31,10 +31,10 @@ private:
  Scorer* theScorer;
  std::string score_type;
  size_t number_of_scores; //number of scores
+  bool _sparse_flag;

 public:
  Data(Scorer& sc);
-
  ~Data() {};

  inline void clear() {
@ -49,6 +49,10 @@ public:
    return featdata;
  };

+  Scorer* getScorer() {
+    return theScorer;
+  }
+
  inline size_t NumberOfFeatures() const {
    return featdata->NumberOfFeatures();
  }
@ -62,11 +66,16 @@ public:
    featdata->Features(f);
  }

+  inline bool hasSparseFeatures() const { return _sparse_flag; }
+  void mergeSparseFeatures();
+
  void loadnbest(const std::string &file);

  void load(const std::string &featfile,const std::string &scorefile) {
    featdata->load(featfile);
    scoredata->load(scorefile);
+    if (featdata->hasSparseFeatures())
+      _sparse_flag = true;
  }

  void save(const std::string &featfile,const std::string &scorefile, bool bin=false) {
@ -90,8 +99,17 @@ public:
    return featdata->getFeatureIndex(name);
  };

-	void sample_ranked_pairs( const std::string &rankedPairFile );
+	void sampleRankedPairs( const std::string &rankedPairFile );
+  void outputSample( std::ostream &out, const FeatureStats &f1, const FeatureStats &f2 );
+
+  /**
+   *  Create shard_count shards. If shard_size == 0, then the shards are non-overlapping
+   *  and exhaust the data. If 0 < shard_size <= 1, then shards are chosen by sampling 
+   *  the data (with replacement) and shard_size is interpreted as the proportion
+   * of the total size.
+   */
+  void createShards(size_t shard_count, float shard_size, const std::string& scorerconfig, 
+       std::vector<Data>& shards);
 };

-
 #endif
--- a/mert/FeatureArray.cpp
+++ b/mert/FeatureArray.cpp
@ -11,7 +11,7 @@
 #include "Util.h"


-FeatureArray::FeatureArray(): idx("")
+FeatureArray::FeatureArray(): idx(""), _sparse_flag(false)
 {};

 void FeatureArray::savetxt(std::ofstream& outFile)
@ -69,6 +69,8 @@ void FeatureArray::loadtxt(ifstream& inFile, size_t n)
  for (size_t i=0 ; i < n; i++) {
    entry.loadtxt(inFile);
    add(entry);
+    if (entry.getSparse().size()>0)
+      _sparse_flag = true;
  }
 }

--- a/mert/FeatureArray.h
+++ b/mert/FeatureArray.h
@ -30,6 +30,7 @@ protected:
  featarray_t array_;
  size_t number_of_features;
  std::string features;
+  bool _sparse_flag;

 private:
  std::string idx; // idx to identify the utterance, it can differ from the index inside the vector
@ -43,6 +44,10 @@ public:
    array_.clear();
  }

+  inline bool hasSparseFeatures() const {
+    return _sparse_flag;
+  }
+
  inline std::string getIndex() {
    return idx;
  }
--- a/mert/FeatureData.cpp
+++ b/mert/FeatureData.cpp
@ -51,9 +51,12 @@ void FeatureData::load(ifstream& inFile)
    if (entry.size() == 0)
      break;

-    if (size() == 0) {
+    if (size() == 0)
      setFeatureMap(entry.Features());
-    }
+
+    if (entry.hasSparseFeatures())
+      _sparse_flag = true;
+
    add(entry);
  }
 }
--- a/mert/FeatureData.h
+++ b/mert/FeatureData.h
@ -26,10 +26,10 @@ protected:
  idx2name idx2arrayname_; //map from index to name of array
  name2idx arrayname2idx_; //map from name to index of array

-
 private:
  size_t number_of_features;
  std::string features;
+  bool _sparse_flag;

  map<std::string, size_t> featname2idx_; //map from name to index of features
  map<size_t, std::string> idx2featname_; //map from index to name of features
@ -43,6 +43,9 @@ public:
    array_.clear();
  }

+  inline bool hasSparseFeatures() const { 
+    return _sparse_flag; 
+  }
  inline FeatureArray get(const std::string& idx) {
    return array_.at(getIndex(idx));
  }
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@ -21,7 +21,7 @@ FeatureStats::FeatureStats()

 FeatureStats::~FeatureStats()
 {
-  delete array_;
+  delete[] array_;
 };

 FeatureStats::FeatureStats(const FeatureStats &stats)
@ -30,6 +30,7 @@ FeatureStats::FeatureStats(const FeatureStats &stats)
  entries_ = stats.size();
  array_ = new FeatureStatsType[available_];
  memcpy(array_,stats.getArray(),featbytes_);
+  map_ = stats.getSparse();
 };

 FeatureStats::FeatureStats(const size_t size)
@ -61,6 +62,11 @@ void FeatureStats::add(FeatureStatsType v)
  array_[entries_++]=v;
 }

+void FeatureStats::addSparse(string name, FeatureStatsType v)
+{
+  map_[name]=v;
+}
+
 void FeatureStats::set(std::string &theString)
 {
  std::string substring, stringBuf;
@ -68,7 +74,15 @@ void FeatureStats::set(std::string &theString)

  while (!theString.empty()) {
    getNextPound(theString, substring);
-    add(ATOFST(substring.c_str()));
+    // regular feature
+    if (substring.find(":") == string::npos) {
+      add(ATOFST(substring.c_str()));
+    }
+    // sparse feature
+    else {
+      size_t separator = substring.find_last_of(":");
+      addSparse(substring.substr(0,separator), atof(substring.substr(separator+1).c_str()) );
+    }
  }
 }

@ -123,6 +137,7 @@ FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
  entries_ = stats.size();
  array_ = new FeatureStatsType[available_];
  memcpy(array_,stats.getArray(),featbytes_);
+  map_ = stats.getSparse();

  return *this;
 }
@ -131,7 +146,14 @@ FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
 /**write the whole object to a stream*/
 ostream& operator<<(ostream& o, const FeatureStats& e)
 {
-  for (size_t i=0; i< e.size(); i++)
+  // print regular features
+  for (size_t i=0; i< e.size(); i++) {
    o << e.get(i) << " ";
+  }
+  // sparse features
+  const sparse_featstats_t &sparse = e.getSparse();
+  for(sparse_featstats_t::const_iterator i = sparse.begin(); i != sparse.end(); i++) {
+    o << i->first << i->second << " ";
+  }
  return o;
 }
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@ -26,6 +26,7 @@ class FeatureStats
 {
 private:
  featstats_t array_;
+  sparse_featstats_t map_;
  size_t entries_;
  size_t available_;

@ -43,9 +44,11 @@ public:
  }
  void expand();
  void add(FeatureStatsType v);
+  void addSparse(string name, FeatureStatsType v);

  inline void clear() {
    memset((void*) array_,0,featbytes_);
+    map_.clear();
  }

  inline FeatureStatsType get(size_t i) {
@ -57,6 +60,9 @@ public:
  inline featstats_t getArray() const {
    return array_;
  }
+  inline sparse_featstats_t getSparse() const {
+    return map_;
+  }

  void set(std::string &theString);

--- a/mert/Makefile.am
+++ b/mert/Makefile.am
@ -1,97 +1,36 @@
+lib_LTLIBRARIES = libmert.la
 bin_PROGRAMS = mert extractor evaluator
-mert_SOURCES = Util.cpp \
-     Timer.cpp \
-ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
-FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \
-Data.cpp \
-BleuScorer.cpp \
-Point.cpp \
-PerScorer.cpp \
-Scorer.cpp \
-Optimizer.cpp \
-mert.cpp \
-TERsrc/alignmentStruct.cpp \
-TERsrc/hashMap.cpp \
-TERsrc/hashMapStringInfos.cpp \
-TERsrc/segmentStructure.cpp \
-TERsrc/stringHasher.cpp \
-TERsrc/terAlignment.cpp \
-TERsrc/terShift.cpp \
-TERsrc/tinyxml.cpp \
-TERsrc/tinyxmlparser.cpp \
-TERsrc/documentStructure.cpp \
-TERsrc/hashMapInfos.cpp \
-TERsrc/infosHasher.cpp \
-TERsrc/stringInfosHasher.cpp \
-TERsrc/tercalc.cpp \
-TERsrc/tinystr.cpp \
-TERsrc/tinyxmlerror.cpp \
-TERsrc/tools.cpp \
-TerScorer.cpp \
-CderScorer.cpp
+AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)

-extractor_SOURCES = Util.cpp \
-     Timer.cpp \
-ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
-FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \
-Data.cpp \
-BleuScorer.cpp \
-Point.cpp \
-PerScorer.cpp \
-Scorer.cpp \
-Optimizer.cpp \
-extractor.cpp \
-TERsrc/alignmentStruct.cpp \
-TERsrc/hashMap.cpp \
-TERsrc/hashMapStringInfos.cpp \
-TERsrc/segmentStructure.cpp \
-TERsrc/stringHasher.cpp \
-TERsrc/terAlignment.cpp \
-TERsrc/terShift.cpp \
-TERsrc/tinyxml.cpp \
-TERsrc/tinyxmlparser.cpp \
-TERsrc/documentStructure.cpp \
-TERsrc/hashMapInfos.cpp \
-TERsrc/infosHasher.cpp \
-TERsrc/stringInfosHasher.cpp \
-TERsrc/tercalc.cpp \
-TERsrc/tinystr.cpp \
-TERsrc/tinyxmlerror.cpp \
-TERsrc/tools.cpp \
-TerScorer.cpp \
-CderScorer.cpp
-
-evaluator_SOURCES = Util.cpp \
-evaluator.cpp \
+libmert_la_SOURCES =  \
+Util.cpp \
 Timer.cpp \
 ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
+FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \
+Data.cpp \
 BleuScorer.cpp \
+Point.cpp \
 PerScorer.cpp \
 Scorer.cpp \
+Optimizer.cpp \
 TERsrc/alignmentStruct.cpp \
 TERsrc/hashMap.cpp \
 TERsrc/hashMapStringInfos.cpp \
-TERsrc/segmentStructure.cpp \
 TERsrc/stringHasher.cpp \
 TERsrc/terAlignment.cpp \
 TERsrc/terShift.cpp \
-TERsrc/tinyxml.cpp \
-TERsrc/tinyxmlparser.cpp \
-TERsrc/documentStructure.cpp \
 TERsrc/hashMapInfos.cpp \
 TERsrc/infosHasher.cpp \
 TERsrc/stringInfosHasher.cpp \
 TERsrc/tercalc.cpp \
-TERsrc/tinystr.cpp \
-TERsrc/tinyxmlerror.cpp \
 TERsrc/tools.cpp \
 TerScorer.cpp \
 CderScorer.cpp

-mert_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE
-extractor_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE
-evaluator_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE
+mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp
+extractor_SOURCES  = extractor.cpp 
+evaluator_SOURCES = evaluator.cpp 

-extractor_LDADD = -lm -lz
-mert_LDADD = -lm -lz
-evaluator_LDADD = -lm -lz
+extractor_LDADD = libmert.la -lm -lz
+mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
+evaluator_LDADD = libmert.la -lm -lz
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@ -100,6 +100,15 @@ Point Point::operator+(const Point& p2)const
  return Res;
 };

+void Point::operator+=(const Point& p2)
+{
+  assert(p2.size()==size());
+  for(unsigned i=0; i<size(); i++)
+    operator[](i)+=p2[i];
+  score=numeric_limits<statscore_t>::max();
+};
+
+
 Point Point::operator*(float l)const
 {
  Point Res(*this);
--- a/mert/Point.h
+++ b/mert/Point.h
@ -33,6 +33,12 @@ public:
  static unsigned int getpdim() {
    return pdim;
  }
+  static void setpdim(size_t pd) {
+    pdim = pd;
+  }
+  static void setdim(size_t d) {
+    dim = d;
+  }
  static bool OptimizeAll() {
    return fixedweights.empty();
  };
@ -46,6 +52,7 @@ public:

  double operator*(const FeatureStats&)const;//compute the feature function
  Point operator+(const Point&)const;
+  void operator+=(const Point&);
  Point operator*(float)const;
  /**write the Whole featureweight to a stream (ie pdim float)*/
  friend ostream& operator<<(ostream& o,const Point& P);
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@ -21,7 +21,7 @@ ScoreStats::ScoreStats()

 ScoreStats::~ScoreStats()
 {
-  delete array_;
+  delete[] array_;
 };

 ScoreStats::ScoreStats(const ScoreStats &stats)
--- a/mert/TERsrc/documentStructure.cpp
+++ b/mert/TERsrc/documentStructure.cpp
@ -1,181 +0,0 @@
-#include "documentStructure.h"
-
-using namespace std;
-namespace TERCpp
-{
-
-string documentStructure::toString()
-{
-  stringstream s;
-// 	s << "nword : " << vectorToString(nwords)<<endl;
-// 	s << "alignment" << vectorToString(alignment)<<endl;
-// 	s << "afterShift" << vectorToString(alignment)<<endl;
-  s << "Nothing to be printed" << endl;
-  return s.str();
-}
-
-string documentStructure::getDocId()
-{
-  return docId;
-}
-
-vector< segmentStructure >* documentStructure::getSegments()
-{
-  return &seg;
-}
-
-string documentStructure::getSysId()
-{
-  return sysId;
-}
-//     float documentStructure::getAverageLength()
-//     {
-// 	return averageLength;
-//     }
-//     void documentStructure::setAverageLength(float f)
-//     {
-// 	averageLength=f;
-//     }
-
-void documentStructure::addSegments ( segmentStructure s )
-{
-  seg.push_back ( s );
-}
-void documentStructure::addSegments ( string id, string text )
-{
-  segmentStructure tmp_seg ( id, text );
-  seg.push_back ( tmp_seg );
-}
-segmentStructure* documentStructure::getLastSegments()
-{
-  return & seg.at ( ( int ) seg.size() - 1 );
-}
-void documentStructure::setDocId ( string s )
-{
-  docId = s;
-}
-void documentStructure::setSysId ( string s )
-{
-  sysId = s;
-}
-
-segmentStructure* documentStructure::getSegment ( string id )
-{
-  for ( int i = 0; i < ( int ) seg.size(); i++ ) {
-    if ( id.compare ( seg.at ( i ).getSegId() ) == 0 ) {
-      return & ( seg.at ( i ) );
-    }
-  }
-  cerr << "ERROR : documentStructure::getSegment : Segment " << id << " does not exist" <<endl;
-  cerr << "Segment size " << seg.size()<< endl;
-  for (int i=0; i<(int)seg.size(); i++) {
-    cerr <<seg.at(i).getSegId()<<endl;
-  }
-  exit(0);
-}
-int documentStructure::getSize()
-{
-  return ( int ) seg.size();
-}
-
-
-// 	documentStructure::documentStructure()
-// 	{
-// // 		vector<string> ref;
-// // 		vector<string> hyp;
-// // 		vector<string> aftershift;
-//
-// 		//   documentStructure[] allshifts = null;
-//
-// 		numEdits=0;
-// 		numWords=0;
-// 		bestRef="";
-//
-// 		numIns=0;
-// 		numDel=0;
-// 		numSub=0;
-// 		numSft=0;
-// 		numWsf=0;
-// 	}
-// 	documentStructure::documentStructure ()
-// 	{
-// 		start = 0;
-// 		end = 0;
-// 		moveto = 0;
-// 		newloc = 0;
-// 		cost=1.0;
-// 	}
-// 	documentStructure::documentStructure (int _start, int _end, int _moveto, int _newloc)
-// 	{
-// 		start = _start;
-// 		end = _end;
-// 		moveto = _moveto;
-// 		newloc = _newloc;
-// 		cost=1.0;
-// 	}
-
-// 	documentStructure::documentStructure (int _start, int _end, int _moveto, int _newloc, vector<string> _shifted)
-// 	{
-// 		start = _start;
-// 		end = _end;
-// 		moveto = _moveto;
-// 		newloc = _newloc;
-// 		shifted = _shifted;
-// 		cost=1.0;
-// 	}
-// 	string documentStructure::vectorToString(vector<string> vec)
-// 	{
-// 		string retour("");
-// 		for (vector<string>::iterator vecIter=vec.begin();vecIter!=vec.end(); vecIter++)
-// 		{
-// 			retour+=(*vecIter)+"\t";
-// 		}
-// 		return retour;
-// 	}
-
-// 	string documentStructure::toString()
-// 	{
-// 		stringstream s;
-// 		s.str("");
-// 		s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
-// 		if ((int)shifted.size() > 0)
-// 		{
-// 			s << " (" << vectorToString(shifted) << ")";
-// 		}
-// 		return s.str();
-// 	}
-
-/* The distance of the shift. */
-// 	int documentStructure::distance()
-// 	{
-// 		if (moveto < start)
-// 		{
-// 			return start - moveto;
-// 		}
-// 		else if (moveto > end)
-// 		{
-// 			return moveto - end;
-// 		}
-// 		else
-// 		{
-// 			return moveto - start;
-// 		}
-// 	}
-//
-// 	bool documentStructure::leftShift()
-// 	{
-// 		return (moveto < start);
-// 	}
-//
-// 	int documentStructure::size()
-// 	{
-// 		return (end - start) + 1;
-// 	}
-// 	documentStructure documentStructure::operator=(documentStructure t)
-// 	{
-//
-// 		return t;
-// 	}
-
-
-}
--- a/mert/TERsrc/documentStructure.h
+++ b/mert/TERsrc/documentStructure.h
@ -1,60 +0,0 @@
-#ifndef __DOCUMENTSTRUCTURE_H__
-#define __DOCUMENTSTRUCTURE_H__
-
-
-#include <vector>
-#include <stdio.h>
-#include <string>
-#include <sstream>
-#include "tools.h"
-#include "segmentStructure.h"
-
-
-using namespace std;
-using namespace Tools;
-
-namespace TERCpp
-{
-class documentStructure
-{
-private:
-  string docId;
-  string sysId;
-  vector<segmentStructure> seg;
-public:
-  string getDocId();
-  string getSysId();
-  vector<segmentStructure>* getSegments();
-  segmentStructure* getLastSegments();
-  void setDocId ( string s );
-  void setSysId ( string s );
-  void addSegments ( segmentStructure s );
-  void addSegments ( string id, string text );
-  segmentStructure* getSegment ( string id );
-  int getSize();
-
-// 	alignmentStruct();
-// 	alignmentStruct (int _start, int _end, int _moveto, int _newloc);
-// 	alignmentStruct (int _start, int _end, int _moveto, int _newloc, vector<string> _shifted);
-// 	string toString();
-// 	int distance() ;
-// 	bool leftShift();
-// 	int size();
-// 	alignmentStruct operator=(alignmentStruct t);
-// 	string vectorToString(vector<string> vec);
-
-//   int start;
-//   int end;
-//   int moveto;
-//   int newloc;
-//             vector<string> nwords; // The words we shifted
-//             vector<char> alignment ; // for pra_more output
-//             vector<vecInt> aftershift; // for pra_more output
-  // This is used to store the cost of a shift, so we don't have to
-  // calculate it multiple times.
-//             double cost;
-  string toString();
-};
-
-}
-#endif
--- a/mert/TERsrc/hashMap.h
+++ b/mert/TERsrc/hashMap.h
@ -3,7 +3,6 @@
 */
 #ifndef __HASHMAP_H__
 #define __HASHMAP_H__
-#include <boost/functional/hash.hpp>
 #include "stringHasher.h"
 #include <vector>
 #include <string>
--- a/mert/TERsrc/hashMapInfos.h
+++ b/mert/TERsrc/hashMapInfos.h
@ -3,7 +3,6 @@
 */
 #ifndef __HASHMAPINFOS_H__
 #define __HASHMAPINFOS_H__
-#include <boost/functional/hash.hpp>
 #include "infosHasher.h"
 #include <vector>
 #include <string>
--- a/mert/TERsrc/hashMapStringInfos.h
+++ b/mert/TERsrc/hashMapStringInfos.h
@ -3,7 +3,6 @@
 */
 #ifndef __HASHMAPSTRINGINFOS_H__
 #define __HASHMAPSTRINGINFOS_H__
-#include <boost/functional/hash.hpp>
 #include "stringInfosHasher.h"
 #include <vector>
 #include <string>
--- a/mert/TERsrc/multiEvaluation.cpp
+++ b/mert/TERsrc/multiEvaluation.cpp
@ -1,332 +0,0 @@
-#include "multiEvaluation.h"
-
-
-// #include <iostream>
-// #include <boost/filesystem/fstream.hpp>
-// #include <boost/archive/xml_oarchive.hpp>
-// #include <boost/archive/xml_iarchive.hpp>
-// #include <boost/serialization/nvp.hpp>
-
-// helper functions to allow us to load and save sandwiches to/from xml
-
-namespace TERCpp
-{
-
-multiEvaluation::multiEvaluation()
-{
-  evalParameters.debugMode = false;
-  evalParameters.caseOn = false;
-  evalParameters.noPunct = false;
-  evalParameters.normalize = false;
-  evalParameters.tercomLike = false;
-  evalParameters.sgmlInputs = false;
-  evalParameters.noTxtIds = false;
-// 	referencesTxt=new multiTxtDocument();
-// 	hypothesisTxt=new documentStructure();
-}
-
-multiEvaluation::multiEvaluation ( param p )
-{
-  evalParameters.debugMode = false;
-  evalParameters.caseOn = false;
-  evalParameters.noPunct = false;
-  evalParameters.normalize = false;
-  evalParameters.tercomLike = false;
-  evalParameters.sgmlInputs = false;
-  evalParameters.noTxtIds = false;
-
-  evalParameters = Tools::copyParam ( p );
-// 	referencesTxt=new multiTxtDocument();
-// 	hypothesisTxt=new documentStructure();
-}
-
-void multiEvaluation::addReferences()
-{
-  referencesTxt.loadRefFiles ( evalParameters );
-}
-// void multiEvaluation::addReferences(vector< string > vecRefecrences)
-// {
-//     for (int i=0; i< (int) vecRefecrences.size(); i++)
-//     {
-//         referencesTxt.loadFile(vecRefecrences.at(i));
-//     }
-// }
-
-
-void multiEvaluation::setHypothesis()
-{
-  multiTxtDocument l_multiTxtTmp;
-  l_multiTxtTmp.loadHypFile ( evalParameters );
-  hypothesisTxt = (*(l_multiTxtTmp.getDocument ( "0" )));
-}
-void multiEvaluation::setParameters ( param p )
-{
-  evalParameters = Tools::copyParam ( p );
-}
-
-void multiEvaluation::launchTxtEvaluation()
-{
-  if (evalParameters.debugMode) {
-    cerr <<"DEBUG tercpp : multiEvaluation::launchTxtEvaluation : before testing references and hypothesis size  "<<endl<<"END DEBUG"<<endl;
-  }
-
-  if ( referencesTxt.getSize() == 0 ) {
-    cerr << "ERROR : multiEvaluation::launchTxtEvaluation : there is no references" << endl;
-    exit ( 0 );
-  }
-  if ( hypothesisTxt.getSize() == 0 ) {
-    cerr << "ERROR : multiEvaluation::launchTxtEvaluation : there is no hypothesis" << endl;
-    exit ( 0 );
-  }
-  if (evalParameters.debugMode) {
-    cerr <<"DEBUG tercpp : multiEvaluation::launchTxtEvaluation : testing references and hypothesis size  "<<endl<<" number of references : "<<  referencesTxt.getSize()<<endl;
-    vector <string> s =referencesTxt.getListDocuments();
-    cerr << " avaiable ids : ";
-    for (vector <string>::iterator iterS=s.begin(); iterS!=s.end(); iterS++) {
-      cerr << " " << (*iterS);
-    }
-    cerr << endl;
-    for (vector <string>::iterator iterSBis=s.begin(); iterSBis!=s.end(); iterSBis++) {
-      cerr << " reference : "+(*iterSBis)+";  size : "<<  (referencesTxt.getDocument((*iterSBis)))->getSize() << endl;
-    }
-    cerr << " hypothesis size : "<<  hypothesisTxt.getSize() << endl<<"END DEBUG"<<endl;
-  }
-
-  int incDocRefences = 0;
-  stringstream l_stream;
-  vector<float> editsResults;
-  vector<float> wordsResults;
-  int tot_ins = 0;
-  int tot_del = 0;
-  int tot_sub = 0;
-  int tot_sft = 0;
-  int tot_wsf = 0;
-  float tot_err = 0;
-  float tot_wds = 0;
-//         vector<stringInfosHasher> setOfHypothesis = hashHypothesis.getHashMap();
-  ofstream outputSum ( ( evalParameters.hypothesisFile + ".output.sum.log" ).c_str() );
-  outputSum << "Hypothesis File: " + evalParameters.hypothesisFile + "\nReference File: " + evalParameters.referenceFile + "\n" + "Ave-Reference File: " << endl;
-  char outputCharBuffer[200];
-  sprintf ( outputCharBuffer, "%19s | %4s | %4s | %4s | %4s | %4s | %6s | %8s | %8s", "Sent Id", "Ins", "Del", "Sub", "Shft", "WdSh", "NumEr", "AvNumWd", "TER");
-  outputSum << outputCharBuffer << endl;
-  outputSum << "-------------------------------------------------------------------------------------" << endl;
-  vector <string> referenceList =referencesTxt.getListDocuments();
-  for (vector <string>::iterator referenceListIter=referenceList.begin(); referenceListIter!=referenceList.end(); referenceListIter++) {
-// 	    cerr << " " << (*referenceListIter);
-    documentStructure l_reference = (*(referencesTxt.getDocument ( (*referenceListIter) )));
-    evaluate ( l_reference, hypothesisTxt );
-//             evaluate ( l_reference);
-  }
-
-//         for ( incDocRefences = 0; incDocRefences < referencesTxt.getSize();incDocRefences++ )
-//         {
-//             l_stream.str ( "" );
-//             l_stream << incDocRefences;
-//         }
-  for ( vector<segmentStructure>::iterator segHypIt = hypothesisTxt.getSegments()->begin(); segHypIt != hypothesisTxt.getSegments()->end(); segHypIt++ ) {
-    terAlignment l_result = segHypIt->getAlignment();
-    string bestDocId = segHypIt->getBestDocId();
-    string l_id=segHypIt->getSegId();
-    editsResults.push_back(l_result.numEdits);
-    wordsResults.push_back(l_result.numWords);
-    l_result.scoreDetails();
-    tot_ins += l_result.numIns;
-    tot_del += l_result.numDel;
-    tot_sub += l_result.numSub;
-    tot_sft += l_result.numSft;
-    tot_wsf += l_result.numWsf;
-    tot_err += l_result.numEdits;
-    tot_wds += l_result.averageWords;
-
-    char outputCharBufferTmp[200];
-    sprintf(outputCharBufferTmp, "%19s | %4d | %4d | %4d | %4d | %4d | %6.1f | %8.3f | %8.3f",(l_id+":"+bestDocId).c_str(), l_result.numIns, l_result.numDel, l_result.numSub, l_result.numSft, l_result.numWsf, l_result.numEdits, l_result.averageWords, l_result.scoreAv()*100.0);
-    outputSum<< outputCharBufferTmp<<endl;
-
-    if (evalParameters.debugMode) {
-      cerr <<"DEBUG tercpp : multiEvaluation::launchTxtEvaluation : Evaluation "<<endl<< l_result.toString() <<endl<<"END DEBUG"<<endl;
-    }
-
-  }
-
-  cout << "Total TER: " << scoreTER ( editsResults, wordsResults );
-  char outputCharBufferTmp[200];
-  outputSum << "-------------------------------------------------------------------------------------" << endl;
-  sprintf ( outputCharBufferTmp, "%19s | %4d | %4d | %4d | %4d | %4d | %6.1f | %8.3f | %8.3f", "TOTAL", tot_ins, tot_del, tot_sub, tot_sft, tot_wsf, tot_err, tot_wds, tot_err*100.0 / tot_wds );
-  outputSum << outputCharBufferTmp << endl;
-  outputSum.close();
-
-}
-void multiEvaluation::evaluate ( documentStructure& docStructReference, documentStructure& docStructhypothesis )
-{
-  if (evalParameters.debugMode) {
-    cerr <<"DEBUG tercpp : multiEvaluation::evaluate : launching evaluate on  "<<endl<<" references size : "<<  docStructReference.getSize() << endl << " hypothesis size : "<<  docStructhypothesis.getSize() << endl<<"END DEBUG"<<endl;
-  }
-  if (evalParameters.debugMode) {
-    cerr <<"DEBUG tercpp : multiEvaluation::evaluate : testing hypothesis "<<endl;
-    cerr <<" segId : "<<  docStructhypothesis.getSegments()->at(0).getSegId() << endl<<"END DEBUG"<<endl;
-  }
-
-  for ( vector<segmentStructure>::iterator segHypIt = docStructhypothesis.getSegments()->begin(); segHypIt != docStructhypothesis.getSegments()->end(); segHypIt++ ) {
-// 	  cerr << "************************************************************************************************************************************************************************************** 1 " << (docStructhypothesis.getSegments()->at(0)).toString()<<endl;
-    terCalc * l_evalTER = new terCalc();
-// 	  cerr << "************************************************************************************************************************************************************************************** 2"<<endl;
-// 	  (*segHypIt).getSegId() ;
-// 	  cerr << "************************************************************************************************************************************************************************************** 3"<<endl;
-    segmentStructure * l_segRef = docStructReference.getSegment ( segHypIt->getSegId() );
-// 	  cerr << "************************************************************************************************************************************************************************************** 4"<<endl;
-// 	    exit(0);
-    terAlignment l_result = l_evalTER->TER ( segHypIt->getContent(), l_segRef->getContent());
-    l_result.averageWords = l_segRef->getAverageLength();
-    if (l_result.averageWords==0.0) {
-      cerr << "ERROR : tercpp : multiEvaluation::evaluate : averageWords is equal to zero" <<endl;
-      exit(0);
-    }
-    l_segRef->setAlignment ( l_result );
-    if ((segHypIt->getAlignment().numWords == 0) && (segHypIt->getAlignment().numEdits == 0 )) {
-      segHypIt->setAlignment ( l_result );
-      segHypIt->setBestDocId ( docStructReference.getDocId() );
-    } else if ( l_result.scoreAv() < segHypIt->getAlignment().scoreAv() ) {
-      segHypIt->setAlignment ( l_result );
-      segHypIt->setBestDocId ( docStructReference.getDocId() );
-    }
-    if (evalParameters.debugMode) {
-      cerr <<"DEBUG tercpp : multiEvaluation::evaluate : testing   "<<endl<<" hypothesis : "<<  segHypIt->getSegId() <<endl;
-      cerr << "hypothesis score : "<<  segHypIt->getAlignment().scoreAv() <<endl;
-      cerr << "BestDoc Id : "<<  segHypIt->getBestDocId() <<endl;
-      cerr << "new score : "<<  l_result.scoreAv()  <<endl;
-      cerr << "new BestDoc Id : "<< docStructReference.getDocId()  <<endl;
-      cerr << endl<<"END DEBUG"<<endl;
-    }
-  }
-  if (evalParameters.debugMode) {
-    cerr <<"DEBUG tercpp : multiEvaluation::evaluate :    "<<endl<<"End of function"<<endl<<"END DEBUG"<<endl;
-  }
-// 	for (incSegHypothesis=0; incSegHypothesis< getSize();incSegHypothesis++)
-// 	{
-// 	  docStructhypothesis->getSegments()
-// 	}
-}
-
-string multiEvaluation::scoreTER ( vector<float> numEdits, vector<float> numWords )
-{
-  vector<float>::iterator editsIt = numEdits.begin();
-  vector<float>::iterator wordsIt = numWords.begin();
-  if ( numWords.size() != numEdits.size() ) {
-    cerr << "ERROR : tercpp:score, diffrent size of hyp and ref" << endl;
-    exit ( 0 );
-  }
-
-  double editsCount = 0.0;
-  double wordsCount = 0.0;
-  while ( editsIt != numEdits.end() ) {
-    editsCount += ( *editsIt );
-    wordsCount += ( *wordsIt );
-    editsIt++;
-    wordsIt++;
-  }
-  stringstream output;
-
-  if ( ( wordsCount <= 0.0 ) && ( editsCount > 0.0 ) ) {
-    output <<  1.0 << " (" << editsCount << "/" << wordsCount << ")" << endl;
-  } else if ( wordsCount <= 0.0 ) {
-    output <<  0.0 << " (" << editsCount << "/" << wordsCount << ")" << endl;
-  } else {
-//       return editsCount/wordsCount;
-    output <<  editsCount / wordsCount << " (" << editsCount << "/" << wordsCount << ")" << endl;
-  }
-  return output.str();
-}
-
-void multiEvaluation::launchSGMLEvaluation()
-{
-  if (evalParameters.debugMode) {
-    cerr <<"DEBUG tercpp : multiEvaluation::launchSGMLEvaluation : before testing references and hypothesis size  "<<endl<<"END DEBUG"<<endl;
-  }
-
-  if ( referencesSGML.getSize() == 0 ) {
-    cerr << "ERROR : multiEvaluation::launchSGMLEvaluation : there is no references" << endl;
-    exit ( 0 );
-  }
-  if ( hypothesisSGML.getSize() == 0 ) {
-    cerr << "ERROR : multiEvaluation::launchSGMLEvaluation : there is no hypothesis" << endl;
-    exit ( 0 );
-  }
-  if (evalParameters.debugMode) {
-    cerr <<"DEBUG tercpp : multiEvaluation::launchSGMLEvaluation : testing references and hypothesis size  "<<endl<<" references size : "<<  referencesSGML.getSize() << endl << " hypothesis size : "<<  hypothesisSGML.getSize() << endl<<"END DEBUG"<<endl;
-  }
-
-  int incDocRefences = 0;
-  stringstream l_stream;
-  vector<float> editsResults;
-  vector<float> wordsResults;
-  int tot_ins = 0;
-  int tot_del = 0;
-  int tot_sub = 0;
-  int tot_sft = 0;
-  int tot_wsf = 0;
-  float tot_err = 0;
-  float tot_wds = 0;
-//         vector<stringInfosHasher> setOfHypothesis = hashHypothesis.getHashMap();
-  ofstream outputSum ( ( evalParameters.hypothesisFile + ".output.sum.log" ).c_str() );
-  outputSum << "Hypothesis File: " + evalParameters.hypothesisFile + "\nReference File: " + evalParameters.referenceFile + "\n" + "Ave-Reference File: " << endl;
-  char outputCharBuffer[200];
-  sprintf ( outputCharBuffer, "%19s | %4s | %4s | %4s | %4s | %4s | %6s | %8s | %8s", "Sent Id", "Ins", "Del", "Sub", "Shft", "WdSh", "NumEr", "AvNumWd", "TER");
-  outputSum << outputCharBuffer << endl;
-  outputSum << "-------------------------------------------------------------------------------------" << endl;
-  for ( incDocRefences = 0; incDocRefences < referencesSGML.getSize(); incDocRefences++ ) {
-    l_stream.str ( "" );
-    l_stream << incDocRefences;
-    documentStructure l_reference = (*(referencesSGML.getDocument ( l_stream.str() )));
-    evaluate ( l_reference, hypothesisSGML );
-  }
-  for ( vector<segmentStructure>::iterator segHypIt = hypothesisSGML.getSegments()->begin(); segHypIt != hypothesisSGML.getSegments()->end(); segHypIt++ ) {
-    terAlignment l_result = segHypIt->getAlignment();
-    string bestDocId = segHypIt->getBestDocId();
-    string l_id=segHypIt->getSegId();
-    editsResults.push_back(l_result.numEdits);
-    wordsResults.push_back(l_result.averageWords);
-    l_result.scoreDetails();
-    tot_ins += l_result.numIns;
-    tot_del += l_result.numDel;
-    tot_sub += l_result.numSub;
-    tot_sft += l_result.numSft;
-    tot_wsf += l_result.numWsf;
-    tot_err += l_result.numEdits;
-    tot_wds += l_result.averageWords;
-
-    char outputCharBufferTmp[200];
-    sprintf(outputCharBufferTmp, "%19s | %4d | %4d | %4d | %4d | %4d | %6.1f | %8.3f | %8.3f",(l_id+":"+bestDocId).c_str(), l_result.numIns, l_result.numDel, l_result.numSub, l_result.numSft, l_result.numWsf, l_result.numEdits, l_result.averageWords, l_result.scoreAv()*100.0);
-    outputSum<< outputCharBufferTmp<<endl;
-
-    if (evalParameters.debugMode) {
-      cerr <<"DEBUG tercpp : multiEvaluation::launchSGMLEvaluation : Evaluation "<<endl<< l_result.toString() <<endl<<"END DEBUG"<<endl;
-    }
-
-  }
-
-  cout << "Total TER: " << scoreTER ( editsResults, wordsResults );
-  char outputCharBufferTmp[200];
-  outputSum << "-------------------------------------------------------------------------------------" << endl;
-  sprintf ( outputCharBufferTmp, "%19s | %4d | %4d | %4d | %4d | %4d | %6.1f | %8.3f | %8.3f", "TOTAL", tot_ins, tot_del, tot_sub, tot_sft, tot_wsf, tot_err, tot_wds, tot_err*100.0 / tot_wds );
-  outputSum << outputCharBufferTmp << endl;
-  outputSum.close();
-
-
-}
-void multiEvaluation::addSGMLReferences()
-{
-  xmlStructure refStruct;
-  refStruct.xmlParams=copyParam(evalParameters);
-  referencesSGML=refStruct.dump_to_SGMLDocument(evalParameters.referenceFile);
-}
-void multiEvaluation::setSGMLHypothesis()
-{
-  SGMLDocument sgmlHyp;
-  xmlStructure hypStruct;
-  hypStruct.xmlParams=copyParam(evalParameters);
-  hypStruct.xmlParams.tercomLike=false;
-  sgmlHyp=hypStruct.dump_to_SGMLDocument(evalParameters.hypothesisFile);
-  hypothesisSGML=(*(sgmlHyp.getFirstDocument()));
-}
-
-}
--- a/mert/TERsrc/multiEvaluation.h
+++ b/mert/TERsrc/multiEvaluation.h
@ -1,44 +0,0 @@
-#ifndef __MULTIEVAL_DOCUMENT_H__
-#define __MULTIEVAL_DOCUMENT_H__
-
-#include "multiTxtDocument.h"
-#include "tools.h"
-#include <iostream>
-#include <string>
-#include "xmlStructure.h"
-#include "sgmlDocument.h"
-
-using namespace Tools;
-namespace TERCpp
-{
-
-class multiEvaluation
-{
-public:
-  multiEvaluation();
-  multiEvaluation(param p );
-//     void addReferences(string s);
-//     void addReferences(vector<string> vecRefecrences);
-//     void addReferences(documentStructure doc);
-//     void setHypothesis(string s);
-//     void setHypothesis(documentStructure doc);
-  void addReferences();
-  void setHypothesis();
-  void addSGMLReferences();
-  void setSGMLHypothesis();
-  void setParameters ( param p );
-  void launchTxtEvaluation();
-  void launchSGMLEvaluation();
-  void evaluate ( documentStructure & docStructReference, documentStructure & docStructhypothesis );
-  string scoreTER ( vector<float> numEdits, vector<float> numWords );
-private:
-  param evalParameters;
-  multiTxtDocument referencesTxt;
-  documentStructure hypothesisTxt;
-  SGMLDocument referencesSGML;
-  documentStructure hypothesisSGML;
-
-
-};
-}
-#endif //SANDWICH_DEFINED
--- a/mert/TERsrc/multiTxtDocument.cpp
+++ b/mert/TERsrc/multiTxtDocument.cpp
@ -1,347 +0,0 @@
-#include "multiTxtDocument.h"
-
-// #include <iostream>
-// #include <boost/filesystem/fstream.hpp>
-// #include <boost/archive/xml_oarchive.hpp>
-// #include <boost/archive/xml_iarchive.hpp>
-// #include <boost/serialization/nvp.hpp>
-
-// helper functions to allow us to load and save sandwiches to/from xml
-namespace TERCpp
-{
-multiTxtDocument::multiTxtDocument()
-{
-//       docType="";
-//       setId="";
-//       srcLang="";
-//       tgtLang="";
-}
-//     multiTxtDocument::multiTxtDocument ( string FileName )
-//     {
-// 	this=xmlStruct.copy_to_multiTxtDocument(FileName);
-//     }
-//     xmlStructure multiTxtDocument::getStructure()
-//     {
-// 	return xmlStruct;
-//     }
-//     string multiTxtDocument::getDocType()
-//     {
-//       return docType;
-//     }
-//     string multiTxtDocument::getSetId()
-//     {
-//       return setId;
-//     }
-//     string multiTxtDocument::getSrcLang()
-//     {
-//       return srcLang;
-//     }
-//     string multiTxtDocument::getTgtLang()
-//     {
-//       return tgtLang;
-//     }
-//     void multiTxtDocument::setDocType ( string s )
-//     {
-//       docType=s;
-//     }
-//     void multiTxtDocument::setSetId ( string s )
-//     {
-//       setId=s;
-//     }
-//     void multiTxtDocument::setSrcLang ( string s )
-//     {
-//       srcLang=s;
-//     }
-//     void multiTxtDocument::setTgtLang ( string s )
-//     {
-//       tgtLang=s;
-//     }
-void multiTxtDocument::addDocument ( documentStructure doc )
-{
-  documents.push_back ( doc );
-}
-documentStructure* multiTxtDocument::getLastDocument()
-{
-  return & ( documents.at ( ( int ) documents.size() - 1 ) );
-}
-vector< documentStructure > multiTxtDocument::getDocuments()
-{
-  return documents;
-}
-vector< string > multiTxtDocument::getListDocuments()
-{
-  vector< string > to_return;
-  for (vector< documentStructure >::iterator iter=documents.begin(); iter!=documents.end(); iter++) {
-    string l_id=(*iter).getDocId();
-    to_return.push_back(l_id);
-  }
-  return to_return;
-}
-
-documentStructure* multiTxtDocument::getDocument ( string docId )
-{
-  for ( int i = 0; i < ( int ) documents.size(); i++ ) {
-    if ( docId.compare ( documents.at ( i ).getDocId() ) == 0 ) {
-      return & ( documents.at ( i ) );
-    }
-  }
-  cerr << "ERROR : multiTxtDocument::getDocument : document " << docId << " does not exist !" << endl;
-  exit ( 0 );
-}
-
-void multiTxtDocument::loadFile ( string fileName, bool caseOn,  bool noPunct, bool debugMode, bool noTxtIds, bool tercomLike )
-{
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::loadFile : loading files  " << endl << fileName << endl << "END DEBUG" << endl;
-    cerr << "DEBUG tercpp : multiTxtDocument::loadFile : testing params  " << endl << Tools::printParams ( multiTxtDocumentParams ) << endl << "END DEBUG" << endl;
-    cerr << "DEBUG tercpp : multiTxtDocument::loadFile : testing others params  " << endl << "caseOn : " << caseOn << endl << "noPunct : " << noPunct << endl << "debugMode : " << debugMode << endl << "noTxtIds : " << noTxtIds << endl << "tercomLike : " << tercomLike << endl << "END DEBUG" << endl;
-  }
-
-  ifstream fichierLoad ( fileName.c_str(), ios::in );
-  string line;
-  documentStructure l_doc;
-  if ( fichierLoad ) {
-    int l_ids = 1;
-    stringstream l_stream;
-    while ( getline ( fichierLoad, line ) ) {
-      string l_key;
-      string line_mod;
-      l_stream.str ( "" );
-
-      if ( noTxtIds ) {
-        l_stream << l_ids;
-        l_key = l_stream.str();
-        line_mod = line;
-        l_ids++;
-      } else {
-        if ((int)line.rfind ( "(" )==-1) {
-          cerr << "ERROR : multiTxtDocument::loadFile : Id not found, maybe you should use the --noTxtIds Option ? " << endl;
-          exit ( 0 );
-        }
-        l_key = line.substr ( line.rfind ( "(" ), line.size() - 1 );
-        line_mod = line.substr ( 0, line.rfind ( "(" ) - 1 );
-      }
-      if ( multiTxtDocumentParams.debugMode ) {
-        cerr << "DEBUG multiTxtDocument::loadFile : line NOT tokenized |" << line_mod << "|" << endl << "END DEBUG" << endl;
-      }
-      if ( !tercomLike ) {
-        if ( multiTxtDocumentParams.debugMode ) {
-          cerr << "DEBUG tercpp : multiTxtDocument::loadFile : " << endl << "TERCOM AT FALSE " << endl << "END DEBUG" << endl;
-        }
-
-        line_mod = tokenizePunct ( line_mod );
-      }
-      if ( !caseOn ) {
-        if ( multiTxtDocumentParams.debugMode ) {
-          cerr << "DEBUG tercpp : multiTxtDocument::loadFile : " << endl << "CASEON AT FALSE " << endl << "END DEBUG" << endl;
-        }
-        line_mod = lowerCase ( line_mod );
-      }
-      if ( noPunct ) {
-        if ( multiTxtDocumentParams.debugMode ) {
-          cerr << "DEBUG tercpp : multiTxtDocument::loadFile : " << endl << "NOPUNCT AT TRUE " << endl << "END DEBUG" << endl;
-        }
-        if ( !tercomLike ) {
-          line_mod = removePunctTercom ( line_mod );
-        } else {
-          line_mod = removePunct ( line_mod );
-        }
-      }
-      if ( multiTxtDocumentParams.debugMode ) {
-        cerr << "DEBUG multiTxtDocument::loadFile : line tokenized |" << line_mod << "|" << endl << "END DEBUG" << endl;
-      }
-      vector<string> vecDocLine = stringToVector ( line_mod, " " );
-// 	  string l_key;
-// 	  hashHypothesis.addValue(l_key,vecDocLine);
-// 	  l_key=(string)vecDocLine.at((int)vecDocLine.size()-1);
-// 	  vecDocLine.pop_back();
-      if ( multiTxtDocumentParams.debugMode ) {
-        cerr << "DEBUG tercpp multiTxtDocument::loadFile : " << l_key << "|" << vectorToString ( vecDocLine ) << "|" << endl << "Vector Size : " << vecDocLine.size() << endl << "Line length : " << ( int ) line_mod.length() << endl << "END DEBUG" << endl;
-      }
-//             hashHypothesis.addValue(l_key,vecDocLine);
-      segmentStructure l_seg ( l_key, vecDocLine );
-      l_doc.addSegments ( l_seg );
-    }
-//         Ref=line;
-//         getline ( fichierHyp, line );
-//         Hyp=line;
-    fichierLoad.close();  // on ferme le fichier
-    l_stream.str ( "" );
-    l_stream << ( int ) documents.size();
-    l_doc.setDocId ( l_stream.str() );
-    addDocument ( l_doc );
-    if ( multiTxtDocumentParams.debugMode ) {
-      cerr << "DEBUG multiTxtDocument::loadFile : document " << l_doc.getDocId() << " added !!!" << endl << "END DEBUG" << endl;
-    }
-  } else { // sinon
-    cerr << "ERROR : multiTxtDocument::loadFile : can't open file : " + fileName + " !" << endl;
-    exit ( 0 );
-  }
-}
-
-
-// void save_sandwich(const multiTxtDocument &sw, const std::string &file_name);
-// multiTxtDocument load_sandwich(const std::string &file_name);
-// int callmultiTxtDocument()
-// {
-// 	// xml filename
-// 	const std::string fn="JasonsSarnie.xml";
-//
-// 	// create a new sandwich and lets take a look at it!
-// 	multiTxtDocument *s = new multiTxtDocument("Granary", "Brie", "Bacon", false); // mmmmm, Brie and bacon! ;)
-// 	std::cout << "Created the following sandwich:" << std::endl;
-// 	s->output();
-//
-// 	// Now lets save the sandwich out to an XML file....
-// 	std::cout << std::endl << "Saving the sandwich to xml...." << std::endl;
-// 	save_sandwich(*s, fn);
-//
-// 	// And then load it into another multiTxtDocument variable and take a look at what we've got
-// 	std::cout << "Attempting to load the saved sandwich..." << std::endl;
-// 	multiTxtDocument s2 = load_sandwich(fn);
-// 	std::cout << "Contents of loaded multiTxtDocument:" << std::endl;
-// 	s2.output();
-//
-// 	delete s;
-// 	std::string dummy;
-// 	std::getline(std::cin, dummy);
-//
-// }
-/*
-
-// Save a multiTxtDocument to XML...
-void save_sandwich(const multiTxtDocument &sw, const std::string &file_name)
-{
-	// Create a filestream object
-	boost::filesystem::fstream ofs(file_name, std::ios::trunc | std::ios::out);
-
-	// Now create an XML output file using our filestream
-	boost::archive::xml_oarchive xml(ofs);
-
-	// call serialization::make_nvp, passing our sandwich.
-	// make_nvp will eventually call the sandwich instance (sw) serialize function
-	// causing the contents of sw to be output to the xml file
-	xml << boost::serialization::make_nvp("multiTxtDocument", sw);
-}
-
-// The load function works in almost the exact same way as save_sandwich,
-// The only differences are:
-// 1. we create an XML input stream - the original example in AD's link created another xml_oarchive, causing a runtime error...doh!
-// 2. the call to make_nvp populates the sandwich instance(sw) which is then returned...
-multiTxtDocument load_sandwich(const std::string &file_name)
-{
-	multiTxtDocument sw;
-	boost::filesystem::fstream ifs(file_name, std::ios::binary | std::ios::in);
-	boost::archive::xml_iarchive xml(ifs);
-	xml >> boost::serialization::make_nvp("multiTxtDocument", sw);
-	return sw;
-}*/
-
-void multiTxtDocument::setAverageLength()
-{
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::setAverageLength : Starting calculate Average length  " << endl << "END DEBUG" << endl;
-  }
-
-  vecFloat l_avLength((*documents.begin()).getSize(),0.0);
-  vector< documentStructure >::iterator iter=documents.begin();
-//       for (vector< documentStructure >::iterator iter=documents.begin(); iter!=documents.end(); iter++)
-//       {
-// 	 string l_id=(*iter).getDocId();
-// 	 to_return.push_back(l_id);
-  vector< segmentStructure > * l_vecSeg=(*iter).getSegments();
-//       vector< segmentStructure >::iterator iterSeg=l_vecSeg->begin();
-  for (vector< segmentStructure >::iterator iterSeg=l_vecSeg->begin(); iterSeg!=l_vecSeg->end(); iterSeg++) {
-    segmentStructure l_seg=(*iterSeg);
-// 	  if ( multiTxtDocumentParams.debugMode )
-// 	  {
-// 	      cerr << "DEBUG tercpp : multiTxtDocument::setAverageLength : Average length: " << l_seg.getAverageLength() << endl << "END DEBUG" << endl;
-// 	  }
-    if (l_seg.getAverageLength()==0.0) {
-      float l_average=0.0;
-      for (int l_iter =0; l_iter < (int)documents.size(); l_iter++) {
-        l_average+=(float)(documents.at(l_iter).getSegment(l_seg.getSegId()))->getSize();
-      }
-      l_average=l_average/(float)documents.size();
-      l_seg.setAverageLength(l_average);
-      for (iter=documents.begin(); iter!=documents.end(); iter++) {
-// 		  if ( multiTxtDocumentParams.debugMode )
-// 		  {
-// 		      cerr << "DEBUG tercpp : multiTxtDocument::setAverageLength : average length BEFORE assignation: DocId, SegId, Average: " << (*iter).getDocId() << "\t"<< (*iter).getSegment(l_seg.getSegId())->getSegId() << "\t"<< (*iter).getSegment(l_seg.getSegId())->getAverageLength() << endl << "END DEBUG" << endl;
-// 		  }
-        (*iter).getSegment(l_seg.getSegId())->setAverageLength(l_average);
-        if ( multiTxtDocumentParams.debugMode ) {
-          cerr << "DEBUG tercpp : multiTxtDocument::setAverageLength : average length AFTER  assignation: DocId, SegId, Average: " << (*iter).getDocId() << "\t"<< (*iter).getSegment(l_seg.getSegId())->getSegId() << "\t"<< (*iter).getSegment(l_seg.getSegId())->getAverageLength() << endl << "END DEBUG" << endl;
-        }
-      }
-    }
-    iter=documents.begin();
-// 	  if ( multiTxtDocumentParams.debugMode )
-// 	  {
-// 	      cerr << "DEBUG tercpp : multiTxtDocument::setAverageLength : average length verification: DocId, SegId, Average: " << (*iter).getDocId() << "\t"<< (*iter).getSegment(l_seg.getSegId())->getSegId() << "\t"<< (*iter).getSegment(l_seg.getSegId())->getAverageLength() << endl << "END DEBUG" << endl;
-// 	  }
-  }
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::setAverageLength : End calculate Average length  " << endl << "END DEBUG" << endl;
-  }
-
-
-//       }
-
-}
-
-
-void multiTxtDocument::loadFiles ( string fileName, bool caseOn, bool noPunct, bool debugMode, bool noTxtIds, bool tercomLike )
-{
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::loadFiles : loading files  " << endl << fileName << endl << "END DEBUG" << endl;
-
-  }
-  vector<string> vecFiles = stringToVector ( fileName, "," );
-  for ( int i = 0; i < ( int ) vecFiles.size(); i++ ) {
-    loadFile ( vecFiles.at ( i ), caseOn, noPunct, debugMode, noTxtIds, tercomLike );
-  }
-  setAverageLength();
-}
-
-void multiTxtDocument::loadRefFile ( param p )
-{
-  multiTxtDocumentParams = Tools::copyParam ( p );
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::loadRefFile : loading references  " << endl << multiTxtDocumentParams.referenceFile << endl << "END DEBUG" << endl;
-  }
-  loadFile ( multiTxtDocumentParams.referenceFile, multiTxtDocumentParams.caseOn, multiTxtDocumentParams.noPunct, multiTxtDocumentParams.debugMode, multiTxtDocumentParams.noTxtIds, multiTxtDocumentParams.tercomLike );
-}
-void multiTxtDocument::loadRefFiles ( param p )
-{
-  multiTxtDocumentParams = Tools::copyParam ( p );
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::loadRefFiles : loading references  " << endl << multiTxtDocumentParams.referenceFile << endl << "END DEBUG" << endl;
-  }
-  loadFiles ( multiTxtDocumentParams.referenceFile, multiTxtDocumentParams.caseOn, multiTxtDocumentParams.noPunct, multiTxtDocumentParams.debugMode, multiTxtDocumentParams.noTxtIds, multiTxtDocumentParams.tercomLike );
-}
-void multiTxtDocument::loadHypFile ( param p )
-{
-  multiTxtDocumentParams = Tools::copyParam ( p );
-  multiTxtDocumentParams.tercomLike = false;
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::loadHypFile : loading hypothesis  " << endl << multiTxtDocumentParams.hypothesisFile << endl << "END DEBUG" << endl;
-  }
-  loadFile ( multiTxtDocumentParams.hypothesisFile, multiTxtDocumentParams.caseOn, multiTxtDocumentParams.noPunct, multiTxtDocumentParams.debugMode, multiTxtDocumentParams.noTxtIds, multiTxtDocumentParams.tercomLike );
-}
-void multiTxtDocument::loadHypFiles ( param p )
-{
-  multiTxtDocumentParams = Tools::copyParam ( p );
-  multiTxtDocumentParams.tercomLike = false;
-  if ( multiTxtDocumentParams.debugMode ) {
-    cerr << "DEBUG tercpp : multiTxtDocument::loadHypFiles : loading hypothesis  " << endl << multiTxtDocumentParams.hypothesisFile << endl << "END DEBUG" << endl;
-  }
-  loadFile ( multiTxtDocumentParams.hypothesisFile, multiTxtDocumentParams.caseOn, multiTxtDocumentParams.noPunct, multiTxtDocumentParams.debugMode, multiTxtDocumentParams.noTxtIds, multiTxtDocumentParams.tercomLike );
-}
-
-int multiTxtDocument::getSize()
-{
-  return ( int ) documents.size();
-}
-
-}
--- a/mert/TERsrc/multiTxtDocument.h
+++ b/mert/TERsrc/multiTxtDocument.h
@ -1,81 +0,0 @@
-#ifndef __MULTITXT_DOCUMENT_H__
-#define __MULTITXT_DOCUMENT_H__
-
-#include "documentStructure.h"
-#include "tools.h"
-// #include "xmlStructure.h"
-
-#include <iostream>
-#include <string>
-namespace TERCpp
-{
-
-class multiTxtDocument
-{
-public:
-  multiTxtDocument();
-// 		multiTxtDocument(string FileName);
-// 		multiTxtDocument(const std::string &bread, const std::string &cheese, const std::string &meat, const bool pickle):
-// 			m_bread(bread), m_cheese(cheese), m_meat(meat), m_pickle(pickle){};
-// 		~multiTxtDocument(){};
-
-// 		void output()
-// 		{
-// 			std::cout << "Bread = " << m_bread << ", Cheese = " << m_cheese <<
-// 				", Meat = " << m_meat << ", Has Pickle = " << m_pickle << std::endl;
-//
-// 		}
-// 		void setDocType(string s);
-// 		void setSetId(string s);
-// 		void setSrcLang(string s);
-// 		void setTgtLang(string s);
-// 		string getDocType();
-// 		string getSetId();
-// 		string getSrcLang();
-// 		string getTgtLang();
-// 		xmlStructure getStructure();
-  void addDocument ( documentStructure doc );
-  documentStructure* getLastDocument();
-  documentStructure* getDocument ( string docId );
-  vector<documentStructure> getDocuments ();
-  vector<string> getListDocuments ();
-  void loadFile ( string fileName, bool caseOn,  bool noPunct, bool debugMode, bool noTxtIds, bool tercomLike );
-  void loadFiles ( string fileName, bool caseOn,  bool noPunct, bool debugMode, bool noTxtIds, bool tercomLike );
-  void loadRefFile ( param p );
-  void loadRefFiles ( param p );
-  void loadHypFile ( param p );
-  void loadHypFiles ( param p );
-  void setAverageLength();
-  int getSize();
-
-
-private:
-// 		string docType;
-// 		string setId;
-// 		string srcLang;
-// 		string tgtLang;
-// 		xmlStructure xmlStruct;
-  param multiTxtDocumentParams;
-  vector<documentStructure> documents;
-// 		vector<string> bestDocumentId;
-// 		std::string m_bread, m_cheese, m_meat;
-// 		bool m_pickle;
-//
-// 	// declare the boost::serialization::access class as a friend of multiTxtDocument
-// 	friend class boost::serialization::access;
-// 	// Create a serialize function for serialization::access to use, I guess you could regard this as a kind of callback function!
-// 	template<class archive>
-// 	void serialize(archive& ar, const unsigned int version)
-// 	{
-// 		// Note: As explained in the original tut. the & operator is overridden in boost to use
-// 		// << or >> depending on the direction of the data (read/write)
-// 		using boost::serialization::make_nvp;
-// 		ar & make_nvp("Bread", m_bread);
-// 		ar & make_nvp("Cheese", m_cheese);
-// 		ar & make_nvp("Meats", m_meat);
-// 		ar & make_nvp("HasPickle", m_pickle);
-// 		// Also note: strings in the first parameter of make_nvp cannot contain spaces!
-// 	}
-};
-}
-#endif //SANDWICH_DEFINED
--- a/mert/TERsrc/segmentStructure.cpp
+++ b/mert/TERsrc/segmentStructure.cpp
@ -1,82 +0,0 @@
-#include "segmentStructure.h"
-
-using namespace std;
-namespace TERCpp
-{
-vecString segmentStructure::getContent()
-{
-  return content;
-}
-string segmentStructure::getSegId()
-{
-  return segId;
-}
-string segmentStructure::toString()
-{
-//         return vectorToString(content);
-  return "";
-}
-void segmentStructure::addContent ( vecString vecS )
-{
-  content = vecS;
-  averageLength=0.0;
-}
-void segmentStructure::setSegId ( string s )
-{
-  segId = s;
-}
-segmentStructure::segmentStructure ( string id, vecString vecS )
-{
-  segId = id;
-  content = vecS;
-  averageLength=0.0;
-}
-segmentStructure::segmentStructure ( string id, string txt )
-{
-  segId = id;
-  content = stringToVector ( txt, " " );
-  averageLength=0.0;
-}
-void segmentStructure::addContent ( string s )
-{
-  content = stringToVector ( s, " " );
-  averageLength=0.0;
-}
-segmentStructure::segmentStructure()
-{
-  segId = "";
-}
-terAlignment segmentStructure::getAlignment()
-{
-  return evaluation;
-}
-void segmentStructure::setAlignment ( terAlignment& l_align )
-{
-  evaluation = l_align;
-}
-
-string segmentStructure::getBestDocId()
-{
-  return bestDocId;
-}
-void segmentStructure::setBestDocId ( string s )
-{
-  bestDocId = s;
-}
-float segmentStructure::getAverageLength()
-{
-  return averageLength;
-}
-void segmentStructure::setAverageLength(float f)
-{
-  averageLength=f;
-}
-int segmentStructure::getSize()
-{
-  return (int)content.size();
-}
-
-
-
-
-}
--- a/mert/TERsrc/segmentStructure.h
+++ b/mert/TERsrc/segmentStructure.h
@ -1,73 +0,0 @@
-#ifndef __SEGMENTSTRUCTURE_H__
-#define __SEGMENTSTRUCTURE_H__
-
-
-#include <vector>
-#include <stdio.h>
-#include <string>
-#include <sstream>
-#include "tools.h"
-#include "tercalc.h"
-
-
-using namespace std;
-using namespace Tools;
-
-namespace TERCpp
-{
-class segmentStructure
-{
-private:
-  string segId;
-  vecString content;
-  terAlignment evaluation;
-  string bestDocId;
-  float averageLength;
-
-public:
-  segmentStructure();
-  segmentStructure ( string id, vecString vecS );
-  segmentStructure ( string id, string txt );
-  void setAverageLength(float f);
-  float getAverageLength();
-  string getSegId();
-  terAlignment getAlignment();
-  void setAlignment(terAlignment& l_align);
-  void setSegId ( string s );
-  void setBestDocId ( string s );
-  string getBestDocId();
-  void addContent ( vecString vecS );
-  void addContent ( string s );
-  int getSize();
-// 	  {
-// 	    return segId;
-// 	  }
-  vecString getContent();
-// 	  {
-// 	    return content;
-// 	  }
-// 	alignmentStruct();
-// 	alignmentStruct (int _start, int _end, int _moveto, int _newloc);
-// 	alignmentStruct (int _start, int _end, int _moveto, int _newloc, vector<string> _shifted);
-// 	string toString();
-// 	int distance() ;
-// 	bool leftShift();
-// 	int size();
-// 	alignmentStruct operator=(alignmentStruct t);
-// 	string vectorToString(vector<string> vec);
-
-//   int start;
-//   int end;
-//   int moveto;
-//   int newloc;
-  vector<string> nwords; // The words we shifted
-  vector<char> alignment ; // for pra_more output
-  vector<vecInt> aftershift; // for pra_more output
-  // This is used to store the cost of a shift, so we don't have to
-  // calculate it multiple times.
-  double cost;
-  string toString();
-};
-
-}
-#endif
--- a/mert/TERsrc/sgmlDocument.cpp
+++ b/mert/TERsrc/sgmlDocument.cpp
@ -1,149 +0,0 @@
-#include "sgmlDocument.h"
-
-// #include <iostream>
-// #include <boost/filesystem/fstream.hpp>
-// #include <boost/archive/xml_oarchive.hpp>
-// #include <boost/archive/xml_iarchive.hpp>
-// #include <boost/serialization/nvp.hpp>
-
-// helper functions to allow us to load and save sandwiches to/from xml
-namespace TERCpp
-{
-SGMLDocument::SGMLDocument()
-{
-  docType="";
-  setId="";
-  srcLang="";
-  tgtLang="";
-}
-//     SGMLDocument::SGMLDocument ( string FileName )
-//     {
-// 	this=xmlStruct.copy_to_SGMLDocument(FileName);
-//     }
-//     xmlStructure SGMLDocument::getStructure()
-//     {
-// 	return xmlStruct;
-//     }
-string SGMLDocument::getDocType()
-{
-  return docType;
-}
-string SGMLDocument::getSetId()
-{
-  return setId;
-}
-string SGMLDocument::getSrcLang()
-{
-  return srcLang;
-}
-string SGMLDocument::getTgtLang()
-{
-  return tgtLang;
-}
-void SGMLDocument::setDocType ( string s )
-{
-  docType=s;
-}
-void SGMLDocument::setSetId ( string s )
-{
-  setId=s;
-}
-void SGMLDocument::setSrcLang ( string s )
-{
-  srcLang=s;
-}
-void SGMLDocument::setTgtLang ( string s )
-{
-  tgtLang=s;
-}
-void SGMLDocument::addDocument ( documentStructure doc )
-{
-  documents.push_back(doc);
-}
-documentStructure* SGMLDocument::getLastDocument()
-{
-  return &(documents.at((int)documents.size()-1));
-}
-documentStructure* SGMLDocument::getFirstDocument()
-{
-  return &(documents.at(0));
-}
-int SGMLDocument::getSize()
-{
-  return (int)documents.size();
-}
-documentStructure* SGMLDocument::getDocument(string docId)
-{
-  for ( int i = 0; i < ( int ) documents.size(); i++ ) {
-    if ( docId.compare ( documents.at ( i ).getDocId() ) == 0 ) {
-      return & ( documents.at ( i ) );
-    }
-  }
-  cerr << "ERROR : SGMLDocument::getDocument : document " << docId << " does not exist !" << endl;
-  exit ( 0 );
-}
-
-
-
-
-
-// void save_sandwich(const SGMLDocument &sw, const std::string &file_name);
-// SGMLDocument load_sandwich(const std::string &file_name);
-// int callSGMLDocument()
-// {
-// 	// xml filename
-// 	const std::string fn="JasonsSarnie.xml";
-//
-// 	// create a new sandwich and lets take a look at it!
-// 	SGMLDocument *s = new SGMLDocument("Granary", "Brie", "Bacon", false); // mmmmm, Brie and bacon! ;)
-// 	std::cout << "Created the following sandwich:" << std::endl;
-// 	s->output();
-//
-// 	// Now lets save the sandwich out to an XML file....
-// 	std::cout << std::endl << "Saving the sandwich to xml...." << std::endl;
-// 	save_sandwich(*s, fn);
-//
-// 	// And then load it into another SGMLDocument variable and take a look at what we've got
-// 	std::cout << "Attempting to load the saved sandwich..." << std::endl;
-// 	SGMLDocument s2 = load_sandwich(fn);
-// 	std::cout << "Contents of loaded SGMLDocument:" << std::endl;
-// 	s2.output();
-//
-// 	delete s;
-// 	std::string dummy;
-// 	std::getline(std::cin, dummy);
-//
-// }
-/*
-
-// Save a SGMLDocument to XML...
-void save_sandwich(const SGMLDocument &sw, const std::string &file_name)
-{
-	// Create a filestream object
-	boost::filesystem::fstream ofs(file_name, std::ios::trunc | std::ios::out);
-
-	// Now create an XML output file using our filestream
-	boost::archive::xml_oarchive xml(ofs);
-
-	// call serialization::make_nvp, passing our sandwich.
-	// make_nvp will eventually call the sandwich instance (sw) serialize function
-	// causing the contents of sw to be output to the xml file
-	xml << boost::serialization::make_nvp("SGMLDocument", sw);
-}
-
-// The load function works in almost the exact same way as save_sandwich,
-// The only differences are:
-// 1. we create an XML input stream - the original example in AD's link created another xml_oarchive, causing a runtime error...doh!
-// 2. the call to make_nvp populates the sandwich instance(sw) which is then returned...
-SGMLDocument load_sandwich(const std::string &file_name)
-{
-	SGMLDocument sw;
-	boost::filesystem::fstream ifs(file_name, std::ios::binary | std::ios::in);
-	boost::archive::xml_iarchive xml(ifs);
-	xml >> boost::serialization::make_nvp("SGMLDocument", sw);
-	return sw;
-}*/
-
-
-
-}
--- a/mert/TERsrc/sgmlDocument.h
+++ b/mert/TERsrc/sgmlDocument.h
@ -1,69 +0,0 @@
-#ifndef __SGML_DOCUMENT_H__
-#define __SGML_DOCUMENT_H__
-
-#include "documentStructure.h"
-// #include "xmlStructure.h"
-
-#include <iostream>
-#include <string>
-namespace TERCpp
-{
-
-class SGMLDocument
-{
-public:
-  SGMLDocument();
-// 		SGMLDocument(string FileName);
-// 		SGMLDocument(const std::string &bread, const std::string &cheese, const std::string &meat, const bool pickle):
-// 			m_bread(bread), m_cheese(cheese), m_meat(meat), m_pickle(pickle){};
-// 		~SGMLDocument(){};
-
-// 		void output()
-// 		{
-// 			std::cout << "Bread = " << m_bread << ", Cheese = " << m_cheese <<
-// 				", Meat = " << m_meat << ", Has Pickle = " << m_pickle << std::endl;
-//
-// 		}
-  void setDocType ( string s );
-  void setSetId ( string s );
-  void setSrcLang ( string s );
-  void setTgtLang ( string s );
-  string getDocType();
-  string getSetId();
-  string getSrcLang();
-  string getTgtLang();
-// 		xmlStructure getStructure();
-  void addDocument ( documentStructure doc );
-  documentStructure* getLastDocument();
-  documentStructure* getFirstDocument();
-  int getSize();
-  documentStructure* getDocument(string docId);
-
-private:
-  string docType;
-  string setId;
-  string srcLang;
-  string tgtLang;
-// 		xmlStructure xmlStruct;
-  vector<documentStructure> documents;
-// 		std::string m_bread, m_cheese, m_meat;
-// 		bool m_pickle;
-//
-// 	// declare the boost::serialization::access class as a friend of SGMLDocument
-// 	friend class boost::serialization::access;
-// 	// Create a serialize function for serialization::access to use, I guess you could regard this as a kind of callback function!
-// 	template<class archive>
-// 	void serialize(archive& ar, const unsigned int version)
-// 	{
-// 		// Note: As explained in the original tut. the & operator is overridden in boost to use
-// 		// << or >> depending on the direction of the data (read/write)
-// 		using boost::serialization::make_nvp;
-// 		ar & make_nvp("Bread", m_bread);
-// 		ar & make_nvp("Cheese", m_cheese);
-// 		ar & make_nvp("Meats", m_meat);
-// 		ar & make_nvp("HasPickle", m_pickle);
-// 		// Also note: strings in the first parameter of make_nvp cannot contain spaces!
-// 	}
-};
-}
-#endif //SANDWICH_DEFINED
--- a/mert/TERsrc/terEvaluation.h
+++ b/mert/TERsrc/terEvaluation.h
@ -1,40 +0,0 @@
-/*
- * Generic hashmap manipulation functions
- */
-#ifndef __XMLSTRUCTURE_H__
-#define __XMLSTRUCTURE_H__
-
-#include "sgmlDocument.h"
-#include "documentStructure.h"
-#include "stdio.h"
-#include <iostream>
-#include <string>
-#include "tinyxml.h"
-
-using namespace std;
-
-namespace TERCpp
-{
-class xmlStructure
-{
-private:
-  unsigned int NUM_INDENTS_PER_SPACE;
-//             void dump_attribs_to_SGMLDocuments ( SGMLDocument* arg1, const TiXmlElement* arg2 );
-  void dump_attribs_to_SGMLDocuments ( SGMLDocument* sgmlDoc, TiXmlElement* pElement, unsigned int indent );
-public:
-  xmlStructure();
-  const char * getIndent( unsigned int numIndents );
-  const char * getIndentAlt( unsigned int numIndents );
-  int dump_attribs_to_stdout(TiXmlElement* pElement, unsigned int indent);
-  void dump_to_stdout( TiXmlNode* pParent, unsigned int indent );
-  void dump_to_stdout(const char* pFilename);
-  void copy_to_SGMLDocument(SGMLDocument* sgmlDoc ,TiXmlNode* pParent, unsigned int indent );
-  SGMLDocument dump_to_SGMLDocument(string FileName);
-
-};
-
-
-}
-
-
-#endif
--- a/mert/TERsrc/tinystr.cpp
+++ b/mert/TERsrc/tinystr.cpp
@ -1,111 +0,0 @@
-/*
-www.sourceforge.net/projects/tinyxml
-Original file by Yves Berquin.
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any
-damages arising from the use of this software.
-
-Permission is granted to anyone to use this software for any
-purpose, including commercial applications, and to alter it and
-redistribute it freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must
-not claim that you wrote the original software. If you use this
-software in a product, an acknowledgment in the product documentation
-would be appreciated but is not required.
-
-2. Altered source versions must be plainly marked as such, and
-must not be misrepresented as being the original software.
-
-3. This notice may not be removed or altered from any source
-distribution.
-*/
-
-/*
- * THIS FILE WAS ALTERED BY Tyge Løvset, 7. April 2005.
- */
-
-
-#ifndef TIXML_USE_STL
-
-#include "tinystr.h"
-
-// Error value for find primitive
-const TiXmlString::size_type TiXmlString::npos = static_cast< TiXmlString::size_type >(-1);
-
-
-// Null rep.
-TiXmlString::Rep TiXmlString::nullrep_ = { 0, 0, { '\0' } };
-
-
-void TiXmlString::reserve (size_type cap)
-{
-  if (cap > capacity()) {
-    TiXmlString tmp;
-    tmp.init(length(), cap);
-    memcpy(tmp.start(), data(), length());
-    swap(tmp);
-  }
-}
-
-
-TiXmlString& TiXmlString::assign(const char* str, size_type len)
-{
-  size_type cap = capacity();
-  if (len > cap || cap > 3*(len + 8)) {
-    TiXmlString tmp;
-    tmp.init(len);
-    memcpy(tmp.start(), str, len);
-    swap(tmp);
-  } else {
-    memmove(start(), str, len);
-    set_size(len);
-  }
-  return *this;
-}
-
-
-TiXmlString& TiXmlString::append(const char* str, size_type len)
-{
-  size_type newsize = length() + len;
-  if (newsize > capacity()) {
-    reserve (newsize + capacity());
-  }
-  memmove(finish(), str, len);
-  set_size(newsize);
-  return *this;
-}
-
-
-TiXmlString operator + (const TiXmlString & a, const TiXmlString & b)
-{
-  TiXmlString tmp;
-  tmp.reserve(a.length() + b.length());
-  tmp += a;
-  tmp += b;
-  return tmp;
-}
-
-TiXmlString operator + (const TiXmlString & a, const char* b)
-{
-  TiXmlString tmp;
-  TiXmlString::size_type b_len = static_cast<TiXmlString::size_type>( strlen(b) );
-  tmp.reserve(a.length() + b_len);
-  tmp += a;
-  tmp.append(b, b_len);
-  return tmp;
-}
-
-TiXmlString operator + (const char* a, const TiXmlString & b)
-{
-  TiXmlString tmp;
-  TiXmlString::size_type a_len = static_cast<TiXmlString::size_type>( strlen(a) );
-  tmp.reserve(a_len + b.length());
-  tmp.append(a, a_len);
-  tmp += b;
-  return tmp;
-}
-
-
-#endif	// TIXML_USE_STL
--- a/mert/TERsrc/tinystr.h
+++ b/mert/TERsrc/tinystr.h
@ -1,337 +0,0 @@
-/*
-www.sourceforge.net/projects/tinyxml
-Original file by Yves Berquin.
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any
-damages arising from the use of this software.
-
-Permission is granted to anyone to use this software for any
-purpose, including commercial applications, and to alter it and
-redistribute it freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must
-not claim that you wrote the original software. If you use this
-software in a product, an acknowledgment in the product documentation
-would be appreciated but is not required.
-
-2. Altered source versions must be plainly marked as such, and
-must not be misrepresented as being the original software.
-
-3. This notice may not be removed or altered from any source
-distribution.
-*/
-
-/*
- * THIS FILE WAS ALTERED BY Tyge Lovset, 7. April 2005.
- *
- * - completely rewritten. compact, clean, and fast implementation.
- * - sizeof(TiXmlString) = pointer size (4 bytes on 32-bit systems)
- * - fixed reserve() to work as per specification.
- * - fixed buggy compares operator==(), operator<(), and operator>()
- * - fixed operator+=() to take a const ref argument, following spec.
- * - added "copy" constructor with length, and most compare operators.
- * - added swap(), clear(), size(), capacity(), operator+().
- */
-
-#ifndef TIXML_USE_STL
-
-#ifndef TIXML_STRING_INCLUDED
-#define TIXML_STRING_INCLUDED
-
-#include <assert.h>
-#include <string.h>
-
-/*	The support for explicit isn't that universal, and it isn't really
-	required - it is used to check that the TiXmlString class isn't incorrectly
-	used. Be nice to old compilers and macro it here:
-*/
-#if defined(_MSC_VER) && (_MSC_VER >= 1200 )
-// Microsoft visual studio, version 6 and higher.
-#define TIXML_EXPLICIT explicit
-#elif defined(__GNUC__) && (__GNUC__ >= 3 )
-// GCC version 3 and higher.s
-#define TIXML_EXPLICIT explicit
-#else
-#define TIXML_EXPLICIT
-#endif
-
-
-/*
-   TiXmlString is an emulation of a subset of the std::string template.
-   Its purpose is to allow compiling TinyXML on compilers with no or poor STL support.
-   Only the member functions relevant to the TinyXML project have been implemented.
-   The buffer allocation is made by a simplistic power of 2 like mechanism : if we increase
-   a string and there's no more room, we allocate a buffer twice as big as we need.
-*/
-class TiXmlString
-{
-public :
-  // The size type used
-  typedef size_t size_type;
-
-  // Error value for find primitive
-  static const size_type npos; // = -1;
-
-
-  // TiXmlString empty constructor
-  TiXmlString () : rep_(&nullrep_) {
-  }
-
-  // TiXmlString copy constructor
-  TiXmlString ( const TiXmlString & copy) : rep_(0) {
-    init(copy.length());
-    memcpy(start(), copy.data(), length());
-  }
-
-  // TiXmlString constructor, based on a string
-  TIXML_EXPLICIT TiXmlString ( const char * copy) : rep_(0) {
-    init( static_cast<size_type>( strlen(copy) ));
-    memcpy(start(), copy, length());
-  }
-
-  // TiXmlString constructor, based on a string
-  TIXML_EXPLICIT TiXmlString ( const char * str, size_type len) : rep_(0) {
-    init(len);
-    memcpy(start(), str, len);
-  }
-
-  // TiXmlString destructor
-  ~TiXmlString () {
-    quit();
-  }
-
-  // = operator
-  TiXmlString& operator = (const char * copy) {
-    return assign( copy, (size_type)strlen(copy));
-  }
-
-  // = operator
-  TiXmlString& operator = (const TiXmlString & copy) {
-    return assign(copy.start(), copy.length());
-  }
-
-
-  // += operator. Maps to append
-  TiXmlString& operator += (const char * suffix) {
-    return append(suffix, static_cast<size_type>( strlen(suffix) ));
-  }
-
-  // += operator. Maps to append
-  TiXmlString& operator += (char single) {
-    return append(&single, 1);
-  }
-
-  // += operator. Maps to append
-  TiXmlString& operator += (const TiXmlString & suffix) {
-    return append(suffix.data(), suffix.length());
-  }
-
-
-  // Convert a TiXmlString into a null-terminated char *
-  const char * c_str () const {
-    return rep_->str;
-  }
-
-  // Convert a TiXmlString into a char * (need not be null terminated).
-  const char * data () const {
-    return rep_->str;
-  }
-
-  // Return the length of a TiXmlString
-  size_type length () const {
-    return rep_->size;
-  }
-
-  // Alias for length()
-  size_type size () const {
-    return rep_->size;
-  }
-
-  // Checks if a TiXmlString is empty
-  bool empty () const {
-    return rep_->size == 0;
-  }
-
-  // Return capacity of string
-  size_type capacity () const {
-    return rep_->capacity;
-  }
-
-
-  // single char extraction
-  const char& at (size_type index) const {
-    assert( index < length() );
-    return rep_->str[ index ];
-  }
-
-  // [] operator
-  char& operator [] (size_type index) const {
-    assert( index < length() );
-    return rep_->str[ index ];
-  }
-
-  // find a char in a string. Return TiXmlString::npos if not found
-  size_type find (char lookup) const {
-    return find(lookup, 0);
-  }
-
-  // find a char in a string from an offset. Return TiXmlString::npos if not found
-  size_type find (char tofind, size_type offset) const {
-    if (offset >= length()) return npos;
-
-    for (const char* p = c_str() + offset; *p != '\0'; ++p) {
-      if (*p == tofind) return static_cast< size_type >( p - c_str() );
-    }
-    return npos;
-  }
-
-  void clear () {
-    //Lee:
-    //The original was just too strange, though correct:
-    //	TiXmlString().swap(*this);
-    //Instead use the quit & re-init:
-    quit();
-    init(0,0);
-  }
-
-  /*	Function to reserve a big amount of data when we know we'll need it. Be aware that this
-  	function DOES NOT clear the content of the TiXmlString if any exists.
-  */
-  void reserve (size_type cap);
-
-  TiXmlString& assign (const char* str, size_type len);
-
-  TiXmlString& append (const char* str, size_type len);
-
-  void swap (TiXmlString& other) {
-    Rep* r = rep_;
-    rep_ = other.rep_;
-    other.rep_ = r;
-  }
-
-private:
-
-  void init(size_type sz) {
-    init(sz, sz);
-  }
-  void set_size(size_type sz) {
-    rep_->str[ rep_->size = sz ] = '\0';
-  }
-  char* start() const {
-    return rep_->str;
-  }
-  char* finish() const {
-    return rep_->str + rep_->size;
-  }
-
-  struct Rep {
-    size_type size, capacity;
-    char str[1];
-  };
-
-  void init(size_type sz, size_type cap) {
-    if (cap) {
-      // Lee: the original form:
-      //	rep_ = static_cast<Rep*>(operator new(sizeof(Rep) + cap));
-      // doesn't work in some cases of new being overloaded. Switching
-      // to the normal allocation, although use an 'int' for systems
-      // that are overly picky about structure alignment.
-      const size_type bytesNeeded = sizeof(Rep) + cap;
-      const size_type intsNeeded = ( bytesNeeded + sizeof(int) - 1 ) / sizeof( int );
-      rep_ = reinterpret_cast<Rep*>( new int[ intsNeeded ] );
-
-      rep_->str[ rep_->size = sz ] = '\0';
-      rep_->capacity = cap;
-    } else {
-      rep_ = &nullrep_;
-    }
-  }
-
-  void quit() {
-    if (rep_ != &nullrep_) {
-      // The rep_ is really an array of ints. (see the allocator, above).
-      // Cast it back before delete, so the compiler won't incorrectly call destructors.
-      delete [] ( reinterpret_cast<int*>( rep_ ) );
-    }
-  }
-
-  Rep * rep_;
-  static Rep nullrep_;
-
-} ;
-
-
-inline bool operator == (const TiXmlString & a, const TiXmlString & b)
-{
-  return    ( a.length() == b.length() )				// optimization on some platforms
-            && ( strcmp(a.c_str(), b.c_str()) == 0 );	// actual compare
-}
-inline bool operator < (const TiXmlString & a, const TiXmlString & b)
-{
-  return strcmp(a.c_str(), b.c_str()) < 0;
-}
-
-inline bool operator != (const TiXmlString & a, const TiXmlString & b)
-{
-  return !(a == b);
-}
-inline bool operator >  (const TiXmlString & a, const TiXmlString & b)
-{
-  return b < a;
-}
-inline bool operator <= (const TiXmlString & a, const TiXmlString & b)
-{
-  return !(b < a);
-}
-inline bool operator >= (const TiXmlString & a, const TiXmlString & b)
-{
-  return !(a < b);
-}
-
-inline bool operator == (const TiXmlString & a, const char* b)
-{
-  return strcmp(a.c_str(), b) == 0;
-}
-inline bool operator == (const char* a, const TiXmlString & b)
-{
-  return b == a;
-}
-inline bool operator != (const TiXmlString & a, const char* b)
-{
-  return !(a == b);
-}
-inline bool operator != (const char* a, const TiXmlString & b)
-{
-  return !(b == a);
-}
-
-TiXmlString operator + (const TiXmlString & a, const TiXmlString & b);
-TiXmlString operator + (const TiXmlString & a, const char* b);
-TiXmlString operator + (const char* a, const TiXmlString & b);
-
-
-/*
-   TiXmlOutStream is an emulation of std::ostream. It is based on TiXmlString.
-   Only the operators that we need for TinyXML have been developped.
-*/
-class TiXmlOutStream : public TiXmlString
-{
-public :
-
-  // TiXmlOutStream << operator.
-  TiXmlOutStream & operator << (const TiXmlString & in) {
-    *this += in;
-    return *this;
-  }
-
-  // TiXmlOutStream << operator.
-  TiXmlOutStream & operator << (const char * in) {
-    *this += in;
-    return *this;
-  }
-
-} ;
-
-#endif	// TIXML_STRING_INCLUDED
-#endif	// TIXML_USE_STL
--- a/mert/TERsrc/tinyxml.cpp
+++ b/mert/TERsrc/tinyxml.cpp
--- a/mert/TERsrc/tinyxml.h
+++ b/mert/TERsrc/tinyxml.h
--- a/mert/TERsrc/tinyxmlerror.cpp
+++ b/mert/TERsrc/tinyxmlerror.cpp
@ -1,52 +0,0 @@
-/*
-www.sourceforge.net/projects/tinyxml
-Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com)
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any
-damages arising from the use of this software.
-
-Permission is granted to anyone to use this software for any
-purpose, including commercial applications, and to alter it and
-redistribute it freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must
-not claim that you wrote the original software. If you use this
-software in a product, an acknowledgment in the product documentation
-would be appreciated but is not required.
-
-2. Altered source versions must be plainly marked as such, and
-must not be misrepresented as being the original software.
-
-3. This notice may not be removed or altered from any source
-distribution.
-*/
-
-#include "tinyxml.h"
-
-// The goal of the seperate error file is to make the first
-// step towards localization. tinyxml (currently) only supports
-// english error messages, but the could now be translated.
-//
-// It also cleans up the code a bit.
-//
-
-const char* TiXmlBase::errorString[ TIXML_ERROR_STRING_COUNT ] = {
-  "No error",
-  "Error",
-  "Failed to open file",
-  "Memory allocation failed.",
-  "Error parsing Element.",
-  "Failed to read Element name",
-  "Error reading Element value.",
-  "Error reading Attributes.",
-  "Error: empty tag.",
-  "Error reading end tag.",
-  "Error parsing Unknown.",
-  "Error parsing Comment.",
-  "Error parsing Declaration.",
-  "Error document empty.",
-  "Error null (0) or unexpected EOF found in input stream.",
-  "Error parsing CDATA.",
-  "Error when TiXmlDocument added to document, because TiXmlDocument can only be at the root.",
-};
--- a/mert/TERsrc/tinyxmlparser.cpp
+++ b/mert/TERsrc/tinyxmlparser.cpp
--- a/mert/TERsrc/tools.cpp
+++ b/mert/TERsrc/tools.cpp
@ -1,7 +1,6 @@
 #include "tools.h"

 using namespace std;
-using namespace boost::xpressive;
 namespace Tools
 {

@ -179,6 +178,8 @@ string lowerCase ( string str )
  }
  return str;
 }
+
+/*
 string removePunctTercom ( string str )
 {
  string str_mod = str;
@ -504,6 +505,7 @@ string normalizeStd ( string str )

  return str_mod;
 }
+*/

 param copyParam ( param p )
 {
--- a/mert/TERsrc/tools.h
+++ b/mert/TERsrc/tools.h
@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <string>
 #include <sstream>
-#include <boost/xpressive/xpressive.hpp>


 using namespace std;
@ -63,4 +62,4 @@ string printParams(param p);
 // };
 param copyParam(param p);
 }
-#endif
+#endif
--- a/mert/TERsrc/xmlStructure.cpp
+++ b/mert/TERsrc/xmlStructure.cpp
@ -1,332 +0,0 @@
-#include "xmlStructure.h"
-
-// The following class defines a hash function for strings
-
-
-using namespace std;
-
-namespace TERCpp
-{
-
-// tutorial demo program
-
-// ----------------------------------------------------------------------
-// STDOUT dump and indenting utility functions
-// ----------------------------------------------------------------------
-// const unsigned int NUM_INDENTS_PER_SPACE=2;
-
-xmlStructure::xmlStructure()
-{
-  NUM_INDENTS_PER_SPACE = 2;
-}
-
-const char * xmlStructure::getIndent ( unsigned int numIndents )
-{
-  static const char * pINDENT = "                                      + ";
-  static const unsigned int LENGTH = strlen ( pINDENT );
-  unsigned int n = numIndents * NUM_INDENTS_PER_SPACE;
-  if ( n > LENGTH )
-    n = LENGTH;
-
-  return &pINDENT[ LENGTH-n ];
-}
-
-// same as getIndent but no "+" at the end
-const char * xmlStructure::getIndentAlt ( unsigned int numIndents )
-{
-  static const char * pINDENT = "                                        ";
-  static const unsigned int LENGTH = strlen ( pINDENT );
-  unsigned int n = numIndents * NUM_INDENTS_PER_SPACE;
-  if ( n > LENGTH )
-    n = LENGTH;
-
-  return &pINDENT[ LENGTH-n ];
-}
-
-int xmlStructure::dump_attribs_to_stdout ( TiXmlElement* pElement, unsigned int indent )
-{
-  if ( !pElement )
-    return 0;
-
-  TiXmlAttribute* pAttrib = pElement->FirstAttribute();
-  int i = 0;
-  int ival;
-  double dval;
-  const char* pIndent = getIndent ( indent );
-  printf ( "\n" );
-  while ( pAttrib ) {
-    printf ( "%s%s: value=[%s]", pIndent, pAttrib->Name(), pAttrib->Value() );
-
-    if ( pAttrib->QueryIntValue ( &ival ) == TIXML_SUCCESS )
-      printf ( " int=%d", ival );
-    if ( pAttrib->QueryDoubleValue ( &dval ) == TIXML_SUCCESS )
-      printf ( " d=%1.1f", dval );
-    printf ( "\n" );
-    i++;
-    pAttrib = pAttrib->Next();
-  }
-  return i;
-}
-
-void xmlStructure::dump_to_stdout ( TiXmlNode* pParent, unsigned int indent = 0 )
-{
-  if ( !pParent )
-    return;
-
-  TiXmlNode* pChild;
-  TiXmlText* pText;
-  int t = pParent->Type();
-  printf ( "%s", getIndent ( indent ) );
-  int num;
-
-  switch ( t ) {
-  case TiXmlNode::DOCUMENT:
-    printf ( "Document" );
-    break;
-
-  case TiXmlNode::ELEMENT:
-    printf ( "Element [%s]", pParent->Value() );
-    num = dump_attribs_to_stdout ( pParent->ToElement(), indent + 1 );
-    switch ( num ) {
-    case 0:
-      printf ( " (No attributes)" );
-      break;
-    case 1:
-      printf ( "%s1 attribute", getIndentAlt ( indent ) );
-      break;
-    default:
-      printf ( "%s%d attributes", getIndentAlt ( indent ), num );
-      break;
-    }
-    break;
-
-  case TiXmlNode::COMMENT:
-    printf ( "Comment: [%s]", pParent->Value() );
-    break;
-
-  case TiXmlNode::UNKNOWN:
-    printf ( "Unknown" );
-    break;
-
-  case TiXmlNode::TEXT:
-    pText = pParent->ToText();
-    printf ( "Text: [%s]", pText->Value() );
-    break;
-
-  case TiXmlNode::DECLARATION:
-    printf ( "Declaration" );
-    break;
-  default:
-    break;
-  }
-  printf ( "\n" );
-  for ( pChild = pParent->FirstChild(); pChild != 0; pChild = pChild->NextSibling() ) {
-    dump_to_stdout ( pChild, indent + 1 );
-  }
-}
-
-// load the named file and dump its structure to STDOUT
-void xmlStructure::dump_to_stdout ( const char* pFilename )
-{
-  TiXmlDocument doc ( pFilename );
-  bool loadOkay = doc.LoadFile();
-  if ( loadOkay ) {
-    printf ( "\n%s:\n", pFilename );
-    dump_to_stdout ( &doc ); // defined later in the tutorial
-  } else {
-    printf ( "Failed to load file \"%s\"\n", pFilename );
-  }
-}
-// Load the file and dump it into a SGMLDocument.
-SGMLDocument xmlStructure::dump_to_SGMLDocument ( string FileName )
-{
-
-  TiXmlDocument doc ( FileName.c_str() );
-  SGMLDocument to_return;
-  bool isLoaded = doc.LoadFile();
-  if ( isLoaded ) {
-    copy_to_SGMLDocument ( &to_return, &doc, ( unsigned int ) 0 );
-  } else {
-    cerr << "ERROR : xmlStructure::dump_to_SGMLDocument : Failed to load file " << FileName << endl;
-    exit ( 0 );
-  }
-  return to_return;
-}
-
-void xmlStructure::copy_to_SGMLDocument ( SGMLDocument* sgmlDoc, TiXmlNode* pParent, unsigned int indent )
-{
-  if ( !pParent )
-    return;
-
-  TiXmlNode* pChild;
-  TiXmlText* pText;
-  int t = pParent->Type();
-//         printf ( "%s", getIndent ( indent ) );
-//         int num;
-  string elementValue;
-  switch ( t ) {
-  case TiXmlNode::DOCUMENT:
-//                 printf ( "Document" );
-    break;
-
-  case TiXmlNode::ELEMENT:
-    printf ( "Element [%s]", pParent->Value() );
-    elementValue = pParent->Value();
-    if ( ( ( int ) elementValue.compare ( "refset" ) == 0 ) || ( ( int ) elementValue.compare ( "tstset" ) == 0 ) ) {
-      sgmlDoc->setDocType ( elementValue );
-    } else if ( ( int ) elementValue.compare ( "doc" ) == 0 ) {
-      documentStructure tmp_doc;
-      sgmlDoc->addDocument ( tmp_doc );
-    } else if ( ( int ) elementValue.compare ( "seg" ) == 0 ) {
-      segmentStructure tmp_seg;
-      ( sgmlDoc->getLastDocument() )->addSegments ( tmp_seg );
-    }
-    dump_attribs_to_SGMLDocuments ( sgmlDoc, pParent->ToElement(), indent + 1 );
-//                 num = dump_attribs_to_stdout ( pParent->ToElement(), indent + 1 );
-//                 switch ( num )
-//                 {
-//                     case 0:
-//                         printf ( " (No attributes)" );
-//                         break;
-//                     case 1:
-//                         printf ( "%s1 attribute", getIndentAlt ( indent ) );
-//                         break;
-//                     default:
-//                         printf ( "%s%d attributes", getIndentAlt ( indent ), num );
-//                         break;
-//                 }
-    break;
-
-//             case TiXmlNode::COMMENT:
-//                 printf ( "Comment: [%s]", pParent->Value() );
-//                 break;
-//
-//             case TiXmlNode::UNKNOWN:
-//                 printf ( "Unknown" );
-//                 break;
-
-  case TiXmlNode::TEXT:
-    pText = pParent->ToText();
-//                 printf ( "Text: [%s]", pText->Value() );
-    if ( indent == 5 ) {
-      documentStructure * l_tmp_doc = sgmlDoc->getLastDocument();
-      segmentStructure * l_tmp_seg = l_tmp_doc->getLastSegments();
-      string l_text = pText->Value();
-      string line_mod=l_text;
-      if ( !xmlParams.tercomLike ) {
-        if ( xmlParams.debugMode ) {
-          cerr << "DEBUG xmlStructure::copy_to_SGMLDocument : line NOT tokenized |" << line_mod << "|" << endl << "END DEBUG" << endl;
-        }
-        if ( xmlParams.debugMode ) {
-          cerr << "DEBUG tercpp : xmlStructure::copy_to_SGMLDocument : " << endl << "TERCOM AT FALSE " << endl << "END DEBUG" << endl;
-        }
-
-        line_mod = tokenizePunct ( line_mod );
-      }
-      if ( !xmlParams.caseOn ) {
-        if ( xmlParams.debugMode ) {
-          cerr << "DEBUG tercpp : xmlStructure::copy_to_SGMLDocument : " << endl << "CASEON AT FALSE " << endl << "END DEBUG" << endl;
-        }
-        line_mod = lowerCase ( line_mod );
-      }
-      if ( xmlParams.noPunct ) {
-        if ( xmlParams.debugMode ) {
-          cerr << "DEBUG tercpp : xmlStructure::copy_to_SGMLDocument : " << endl << "NOPUNCT AT TRUE " << endl << "END DEBUG" << endl;
-        }
-        if ( !xmlParams.tercomLike ) {
-          line_mod = removePunctTercom ( line_mod );
-        } else {
-          line_mod = removePunct ( line_mod );
-        }
-      }
-      if ( xmlParams.debugMode ) {
-        cerr << "DEBUG xmlStructure::copy_to_SGMLDocument : line tokenized |" << line_mod << "|" << endl << "END DEBUG" << endl;
-      }
-      l_tmp_seg->addContent ( line_mod );
-    }
-    break;
-
-//             case TiXmlNode::DECLARATION:
-//                 printf ( "Declaration" );
-//                 break;
-  default:
-    break;
-  }
-//         printf ( "\n" );
-  for ( pChild = pParent->FirstChild(); pChild != 0; pChild = pChild->NextSibling() ) {
-    copy_to_SGMLDocument ( sgmlDoc, pChild, indent + 1 );
-  }
-}
-
-void xmlStructure::dump_attribs_to_SGMLDocuments ( SGMLDocument * sgmlDoc, TiXmlElement* pElement, unsigned int indent )
-{
-  if ( !pElement )
-    return;
-  TiXmlAttribute* pAttrib = pElement->FirstAttribute();
-//         int i = 0;
-//         int ival;
-//         double dval;
-//         const char* pIndent = getIndent ( indent );
-//         printf ( "\n" );
-  while ( pAttrib ) {
-    string attribut = pAttrib->Name();
-    switch ( indent ) {
-    case 1 : {
-      if ( attribut.compare ( "setid" ) == 0 ) {
-        sgmlDoc->setSetId ( pAttrib->Value() );
-      }
-      if ( attribut.compare ( "srclang" ) == 0 ) {
-        sgmlDoc->setSrcLang ( pAttrib->Value() );
-      }
-      if ( attribut.compare ( "tgtlang" ) == 0 ) {
-        sgmlDoc->setTgtLang ( pAttrib->Value() );
-      }
-    }
-    break;
-
-    case 2: {
-      documentStructure * tmp_doc_bis = sgmlDoc->getLastDocument();
-      if ( attribut.compare ( "docid" ) == 0 ) {
-        tmp_doc_bis->setDocId ( pAttrib->Value() );
-      }
-      if ( attribut.compare ( "sysid" ) == 0 ) {
-        tmp_doc_bis->setSysId ( pAttrib->Value() );
-      }
-    }
-    break;
-
-    case 4: {
-      documentStructure * l_tmp_doc = sgmlDoc->getLastDocument();
-      segmentStructure * l_tmp_seg = l_tmp_doc->getLastSegments();
-      if ( attribut.compare ( "id" ) == 0 ) {
-        l_tmp_seg->setSegId ( pAttrib->Value() );
-      }
-// 		else
-// 		if (attribut.compare("Text")==0)
-// 		{
-// 		  tmp_seg.addContent(pAttrib->Value());
-// 		}
-    }
-    break;
-    default:
-      break;
-
-    }
-//             printf ( "%s%s: value=[%s]", pIndent, pAttrib->Name(), pAttrib->Value() );
-
-//             if ( pAttrib->QueryIntValue ( &ival ) == TIXML_SUCCESS )
-//                 printf ( " int=%d", ival );
-//             if ( pAttrib->QueryDoubleValue ( &dval ) == TIXML_SUCCESS )
-//                 printf ( " d=%1.1f", dval );
-//             printf ( "\n" );
-//             i++;
-    pAttrib = pAttrib->Next();
-  }
-//         return i;
-}
-
-
-//     std::size_t hashValue(std::string key){}
-
-}
-
--- a/mert/TERsrc/xmlStructure.h
+++ b/mert/TERsrc/xmlStructure.h
@ -1,40 +0,0 @@
-/*
- * Generic hashmap manipulation functions
- */
-#ifndef __XMLSTRUCTURE_H__
-#define __XMLSTRUCTURE_H__
-
-#include "sgmlDocument.h"
-#include "documentStructure.h"
-#include "stdio.h"
-#include <iostream>
-#include <string>
-#include "tinyxml.h"
-
-using namespace std;
-
-namespace TERCpp
-{
-class xmlStructure
-{
-private:
-  unsigned int NUM_INDENTS_PER_SPACE;
-//             void dump_attribs_to_SGMLDocuments ( SGMLDocument* arg1, const TiXmlElement* arg2 );
-  void dump_attribs_to_SGMLDocuments ( SGMLDocument* sgmlDoc, TiXmlElement* pElement, unsigned int indent );
-public:
-  xmlStructure();
-  const char * getIndent ( unsigned int numIndents );
-  const char * getIndentAlt ( unsigned int numIndents );
-  int dump_attribs_to_stdout ( TiXmlElement* pElement, unsigned int indent );
-  void dump_to_stdout ( TiXmlNode* pParent, unsigned int indent );
-  void dump_to_stdout ( const char* pFilename );
-  void copy_to_SGMLDocument ( SGMLDocument* sgmlDoc , TiXmlNode* pParent, unsigned int indent );
-  SGMLDocument dump_to_SGMLDocument ( string FileName );
-  param xmlParams;
-};
-
-
-}
-
-
-#endif
--- a/mert/TerScorer.cpp
+++ b/mert/TerScorer.cpp
@ -78,7 +78,7 @@ void TerScorer::prepareStats ( size_t sid, const string& text, ScoreStats& entry

  }
  ostringstream stats;
-  stats << result.numEdits << " " << result.averageWords << " " << result.scoreAv() << " " ;
+  stats << result.numEdits*100.0 << " " << result.averageWords*100.0 << " " << result.scoreAv()*100.0 << " " ;
  string stats_str = stats.str();
  entry.set ( stats_str );
 }
@ -94,6 +94,8 @@ float TerScorer::calculateScore ( const vector<int>& comps )
    return (1.0+(num / denom));
  }
 }
+
+/*
 float TerScorer::calculateScore ( const vector<float>& comps )
 {
  float denom = 1.0 * comps[1];
@ -105,3 +107,4 @@ float TerScorer::calculateScore ( const vector<float>& comps )
    return (1.0+(num / denom));
  }
 }
+*/
--- a/mert/TerScorer.h
+++ b/mert/TerScorer.h
@ -43,9 +43,9 @@ public:
  };


-//     protected:
+protected:
  float calculateScore(const vector<int>& comps);
-  float calculateScore(const vector<float>& comps);
+//  float calculateScore(const vector<float>& comps);

 private:
  string javaEnv;
--- a/mert/Types.h
+++ b/mert/Types.h
@ -26,6 +26,7 @@ typedef vector<statscore_t> statscores_t;

 typedef float FeatureStatsType;
 typedef FeatureStatsType* featstats_t;
+typedef map<string,FeatureStatsType> sparse_featstats_t;
 //typedef vector<FeatureStatsType> featstats_t;
 typedef vector<FeatureStats> featarray_t;
 typedef vector<FeatureArray> featdata_t;
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@ -23,12 +23,14 @@
 #include "Timer.h"
 #include "Util.h"

+#include "../moses/src/ThreadPool.h"
+

 float min_interval = 1e-3;

 using namespace std;

-void usage(void)
+void usage(int ret)
 {
  cerr<<"usage: mert -d <dimensions> (mandatory )"<<endl;
  cerr<<"[-n] retry ntimes (default 1)"<<endl;
@ -42,9 +44,14 @@ void usage(void)
  cerr<<"[--scfile|-S] comma separated list of scorer data files (default score.data)"<<endl;
  cerr<<"[--ffile|-F] comma separated list of feature data files (default feature.data)"<<endl;
  cerr<<"[--ifile|-i] the starting point data file (default init.opt)"<<endl;
+#ifdef WITH_THREADS
+  cerr<<"[--threads|-T] use multiple threads (default 1)"<<endl;
+#endif
+  cerr<<"[--shard-count] Split data into shards, optimize for each shard and average"<<endl;
+  cerr<<"[--shard-size] Shard size as proportion of data. If 0, use non-overlapping shards"<<endl;
  cerr<<"[-v] verbose level"<<endl;
  cerr<<"[--help|-h] print this message and exit"<<endl;
-  exit(1);
+  exit(ret);
 }

 static struct option long_options[] = {
@ -60,12 +67,48 @@ static struct option long_options[] = {
  {"scfile",1,0,'S'},
  {"ffile",1,0,'F'},
  {"ifile",1,0,'i'},
+#ifdef WITH_THREADS
+  {"threads", required_argument,0,'T'},
+#endif
+  {"shard-count", required_argument, 0, 'a'},
+  {"shard-size", required_argument, 0, 'b'},
  {"verbose",1,0,'v'},
  {"help",no_argument,0,'h'},
  {0, 0, 0, 0}
 };
 int option_index;

+/**
+  * Runs an optimisation, or a random restart.
+**/
+class OptimizationTask : public Moses::Task 
+{
+  public:
+    OptimizationTask(Optimizer* optimizer, const Point& point) :
+       m_optimizer(optimizer), m_point(point) {}
+
+    bool DeleteAfterExecution() {
+      return false;
+    }
+
+    void Run() {
+      m_score = m_optimizer->Run(m_point);
+    }
+
+    statscore_t getScore() const {
+      return m_score;
+    }
+
+    const Point& getPoint() const  {
+      return m_point;
+    }
+
+  private:
+    Optimizer* m_optimizer;
+    Point m_point;
+    statscore_t m_score;
+};
+
 int main (int argc, char **argv)
 {

@ -83,6 +126,11 @@ int main (int argc, char **argv)
  int nrandom=0;
  int seed=0;
  bool hasSeed = false;
+#ifdef WITH_THREADS
+  size_t threads=1;
+#endif
+  float shard_size = 0;
+  size_t shard_count = 0;
  string type("powell");
  string scorertype("BLEU");
  string scorerconfig("");
@ -140,12 +188,37 @@ int main (int argc, char **argv)
    case 'v':
      setverboselevel(strtol(optarg,NULL,10));
      break;
+#ifdef WITH_THREADS
+    case 'T':
+      threads = strtol(optarg, NULL, 10);
+      if (threads < 1) threads = 1;
+      break;
+#endif
+    case 'a':
+      shard_count = strtof(optarg,NULL);
+      break;
+    case 'b':
+      shard_size = strtof(optarg,NULL);
+      break;
+    case 'h':
+      usage(0);
+      break;
    default:
-      usage();
+      usage(1);
    }
  }
  if (pdim < 0)
-    usage();
+    usage(1);
+
+  cerr << "shard_size = " << shard_size << " shard_count = " << shard_count << endl;
+  if (shard_size && !shard_count) {
+    cerr << "Error: shard-size provided without shard-count" << endl;
+    exit(1);
+  }
+  if (shard_size > 1 || shard_size < 0) {
+    cerr << "Error: shard-size should be between 0 and 1" << endl;
+    exit(1);
+  }

  if (hasSeed) {
    cerr << "Seeding random numbers with " << seed << endl;
@ -230,6 +303,12 @@ int main (int argc, char **argv)

  PrintUserTime("Data loaded");

+  // starting point score over latest n-best, accumulative n-best
+  //vector<unsigned> bests;
+  //compute bests with sparse features needs to be implemented
+  //currently sparse weights are not even loaded
+  //statscore_t score = TheScorer->score(bests);
+
  if (tooptimizestr.length() > 0) {
    cerr << "Weights to optimize: " << tooptimizestr << endl;

@ -257,63 +336,116 @@ int main (int argc, char **argv)
  }

 	if (pairedrankfile.compare("") != 0) {
-		D.sample_ranked_pairs(pairedrankfile);
+		D.sampleRankedPairs(pairedrankfile);
 		PrintUserTime("Stopping...");
 		exit(0);
 	}

-  Optimizer *O=OptimizerFactory::BuildOptimizer(pdim,tooptimize,start_list[0],type,nrandom);
-  O->SetScorer(TheScorer);
-  O->SetFData(D.getFeatureData());
-
-  // run with specified starting points
-  stringstream oss;
-  statscore_t best=0, mean=0, var=0;
-  Point bestP;
-  for(int i=0;i<start_list.size();i++) {
-    Point P(start_list[i], min, max);//Generate from the full feature set. Warning: must be done after Optimizer initialization
-    statscore_t score=O->Run(P);
-    oss.str("");
-    oss << "Specified starting point number " << (1+i) << ", score: " << score;
-    if (i==0 || score>best) {
-      best=score;
-      bestP=P;
-      oss << " (new best)";
-    }
-    mean+=score;
-    var+=(score*score);
-    PrintUserTime(oss.str());
+  // treat sparse features just like regular features
+  if (D.hasSparseFeatures()) {
+    D.mergeSparseFeatures();
  }

-  // run with random starting points
-  for(int i=0; i<ntry; i++) {
-    Point P(start_list[0], min, max);
-    P.Randomize(); // randomize within min and max as given to the constructor
-    statscore_t score=O->Run(P);
-    oss.str("");
-    oss << "Randomized starting point number " << (1+i) << ", score: " << score;
-    if(score>best) {
-      best=score;
-      bestP=P;
-      oss << " (new best)";
-    }
-    mean+=score;
-    var+=(score*score);
-    PrintUserTime(oss.str());
+
+#ifdef WITH_THREADS
+  cerr << "Creating a pool of " << threads << " threads" << endl;
+  Moses::ThreadPool pool(threads);
+#endif
+
+  Point::setpdim(pdim);
+  Point::setdim(tooptimize.size());
+
+  //starting points consist of specified points and random restarts
+  vector<Point> startingPoints;
+
+  for (size_t i = 0; i < start_list.size(); ++i) {
+    startingPoints.push_back(Point(start_list[i],min,max));
  }
-  mean/=(float)ntry;
-  var/=(float)ntry;
-  var=sqrt(abs(var-mean*mean));
+  for (int i = 0; i < ntry; ++i) {
+    startingPoints.push_back(Point(start_list[0],min,max));
+    startingPoints.back().Randomize();
+  }
+
+
+  vector<vector<OptimizationTask*> > allTasks(1);
+
+  //optional sharding
+  vector<Data> shards;
+  if (shard_count) {
+    D.createShards(shard_count, shard_size, scorerconfig, shards);
+    allTasks.resize(shard_count);
+  }
+
+  //launch tasks
+  for (size_t i = 0 ; i < allTasks.size(); ++i) {
+    Data& data = D;
+    if (shard_count) data = shards[i]; //use the sharded data if it exists
+    vector<OptimizationTask*>& tasks = allTasks[i];
+    Optimizer *O=OptimizerFactory::BuildOptimizer(pdim,tooptimize,start_list[0],type,nrandom);
+    O->SetScorer(data.getScorer());
+    O->SetFData(data.getFeatureData());
+    //A task for each start point
+    for (size_t j = 0; j < startingPoints.size(); ++j) {
+      OptimizationTask* task = new OptimizationTask(O,startingPoints[j]);
+      tasks.push_back(task);
+#ifdef WITH_THREADS
+      pool.Submit(task);
+#else
+      task->Run();
+#endif
+    }
+  }
+
+    
+  //wait for all threads to finish
+#ifdef WITH_THREADS
+  pool.Stop(true);
+#endif
+
+  statscore_t total = 0;
+  Point totalP;
+
+  //collect results
+  for (size_t i = 0; i < allTasks.size(); ++i) {
+    statscore_t best=0, mean=0, var=0;
+    Point bestP;
+    for (size_t j = 0; j < allTasks[i].size(); ++j) {
+      statscore_t score = allTasks[i][j]->getScore();
+      mean += score;
+      var += score*score;
+      if (score > best) {
+        bestP = allTasks[i][j]->getPoint();
+        best = score;
+      }
+      delete allTasks[i][j];
+    }
+
+    mean/=(float)ntry;
+    var/=(float)ntry;
+    var=sqrt(abs(var-mean*mean));
+    if (verboselevel()>1)
+      cerr<<"shard " << i << " best score: "<< best << " variance of the score (for "<<ntry<<" try): "<<var<<endl;
+
+    totalP += bestP;
+    total += best;
+    if (verboselevel()>1)
+      cerr << "bestP " << bestP << endl;
+  }
+
+  //cerr << "totalP: " << totalP << endl;
+  Point finalP = totalP * (1.0 / allTasks.size());
+  statscore_t final = total / allTasks.size();
+
  if (verboselevel()>1)
-    cerr<<"best score: "<< best << " variance of the score (for "<<ntry<<" try): "<<var<<endl;
+    cerr << "bestP: " << finalP << endl;

  // L1-Normalization of the best Point
  if ((int)tooptimize.size() == pdim)
-    bestP.NormalizeL1();
+    finalP.NormalizeL1();

-  cerr << "Best point: " << bestP << " => " << best << endl;
+  cerr << "Best point: " << finalP << " => " << final << endl;
  ofstream res("weights.txt");
-  res<<bestP<<endl;
+  res<<finalP<<endl;

  PrintUserTime("Stopping...");
 }
--- a/mert/regression-testing/MertRegressionTesting.pm
+++ b/mert/regression-testing/MertRegressionTesting.pm
@ -1,46 +0,0 @@
-package MertRegressionTesting;
-
-use strict;
-
-# if your tests need a new version of the test data, increment this
-# and make sure that a moses-regression-tests-vX.Y is available
-use constant TESTING_DATA_VERSION => '0.1';
-
-# find the data directory in a few likely locations and make sure
-# that it is the correct version
-sub find_data_directory
-{
-  my ($test_script_root, $data_dir) = @_;
-	my $data_version = TESTING_DATA_VERSION;
-  my @ds = ();
-  my $mrtp = "mert-reg-test-data-$data_version";
-  push @ds, $data_dir if defined $data_dir;
-  push @ds, "$test_script_root/$mrtp";
-  push @ds, "/tmp/$mrtp";
-  push @ds, "/var/tmp/$mrtp";
-  foreach my $d (@ds) {
-    next unless (-d $d);
-    return $d;
-  }
-  print STDERR<<EOT;
-
-You do not appear to have the regression testing data installed.
-You may either specify a non-standard location when running
-the test suite with the --data-dir option, 
-or, you may install it in any one of the following
-standard locations: $test_script_root, /tmp, or /var/tmp with these
-commands:
-  cd <DESIRED_INSTALLATION_DIRECTORY>
- MODIFY ACCORDING TO IRSTLM  
-  wget http://www.statmt.org/moses/reg-testing/mert-regression-tests-v$data_version.tar
-  tar xf mert-regression-tests-v$data_version.tar
-  rm mert-regression-tests-v$data_version.tar
-
-EOT
-	exit 1;
-}
-
-1;
-
-
-
--- a/mert/regression-testing/compare-results.pl
+++ b/mert/regression-testing/compare-results.pl
@ -1,88 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-my ($results, $truth) = @ARGV;
-
-my ($report, $pass, $fail) = compare_results("$results/results.dat", "$truth/results.dat");
-open OUT, ">$results/Summary";
-print OUT $report;
-print $report;
-close OUT;
-
-if ($fail > 0) {
-  print <<EOT;
-
-There were failures in this test run.  Please analyze the results carefully.
-
-EOT
-  exit 1;
-}
-exit 0;
-
-sub compare_results {
-  my ($testf, $truthf) = @_;
-  my $test = read_results($testf);
-  my $truth = read_results($truthf);
-
-  my $ct1 = delete $truth->{'COMPARISON_TYPE'};
-  my $ct2 = delete $test->{'COMPARISON_TYPE'};
-
-  my $pass = 0;
-  my $fail = 0;
-  my $report = '';
-  foreach my $k (sort keys %$truth) {
-    $report .= "test-name=$k\tresult=";
-    if (!exists $test->{$k}) {
-      $report .= "missing from test results\n";
-      $fail++;
-      next;
-    }
-
-    my $truthv = $truth->{$k} || '';
-    my $testv = delete $test->{$k} || '';
-
-    if ($ct1->{$k} eq '=') {
-      if ($truthv eq $testv) {
-        $report .= "pass\n";
-        $pass++;
-      } else {
-        $report .= "fail\n\tTRUTH=$truthv\n\t TEST=$testv\n";
-        $fail++;
-      }
-    } else { # numeric difference
-      $testv=$testv?$testv:0;
-      $truthv=$truthv?$truthv:0;
-      my $diff = $testv - $truthv;
-      if ($diff == 0) { $report .= "identical\n"; next; }
-      $report .= "BASELINE=$truthv, TEST=$testv\t  DELTA=$diff";
-      if ($truthv != 0) {
-        my $pct = $diff/$truthv;
-        my $t = sprintf "\t PCT CHANGE=%4.2f", $pct*100;
-        $report .= $t;
-      }
-      $report .= "\n";
-    }
-  }
-  foreach my $k (sort keys %$test) {
-    $fail++;
-    $report .= "test-name=$k\tfound in TEST but not in TRUTH.\n";
-  }
-  $report .= "\nTESTS PASSED=$pass\nTESTS FAILED=$fail\n";
-  return $report, $pass, $fail;
-}
-
-sub read_results {
-  my ($file) = @_;
-  open IN, "<$file" or die "Could not open $file!";
-  my %res;
-  while (my $l = <IN>) {
-    if ($l =~ /^([A-Za-z0-9_]+)\s*([=~])\s*(.+)$/) {
-      my ($key, $comparison_type, $value) = ($1, $2, $3);
-      $res{$key} = $value;
-      $res{'COMPARISON_TYPE'}->{$key}=$comparison_type;
-    }
-  }
-  close IN;
-  return \%res;
-}
-
--- a/mert/regression-testing/run-test-suite
+++ b/mert/regression-testing/run-test-suite
@ -1,105 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-use Getopt::Long;
-
-############################################################
-my @tests = qw (
-  mert-basic
-  extractor-txt
-  extractor-bin
-);
-
-my @qsubtests = qw (
-);
-
-if (@qsubtests){
-my $cmd=&getQsubCmd();
-
-if (!defined($cmd)){
-	print STDERR "Regression tests (@qsubtests) can not run on $ENV{HOST}\nbecause SGE is not installed\n\n"; 
-}else{
-	push @tests, @qsubtests;
-}
-}
-
-###########################################################
-
-use MertRegressionTesting;
-use File::Temp qw ( tempfile );
-use POSIX qw ( strftime );
-
-my $test_dir;
-my $BIN_TEST = $script_dir;
-my $data_dir;
-my $mert_scripts_dir;
-
-GetOptions("data-dir=s" => \$data_dir,
-           "mert-scripts-dir=s"=> \$mert_scripts_dir,
-          ) or exit 1;
-
-$data_dir = MertRegressionTesting::find_data_directory($BIN_TEST, $data_dir);
-
-my $test_run = "$BIN_TEST/run-single-test.pl --data-dir=$data_dir";
-$test_dir = $script_dir . "/tests";
-$test_run .= " --test-dir=$test_dir" if $test_dir;
-$test_run .= " --mert-scripts-dir=$mert_scripts_dir" if $mert_scripts_dir;
-
-print "Data directory: $data_dir\n";
-
-print "Running tests: @tests\n\n";
-
-print "TEST NAME               STATUS     PATH TO RESULTS\n";
-my $lb = "---------------------------------------------------------------------------------------------------------\n";
-print $lb;
-
-my $fail = 0;
-my @failed;
-foreach my $test (@tests) {
-  my $cmd = "$test_run --test=$test";
-  my ($res, $output, $results_path) = do_test($cmd);
-  format STDOUT =
-@<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-$test,                   $res,      $results_path
-.
-  write;
-  if ($res eq 'FAIL') {
-    print "$lb$output$lb";
-    $fail++;
-    push @failed, $test;
-  } else {
-# TOTAL_WALLTIME  result=BASELINE=11, TEST=12       DELTA=1        PCT CHANGE=9.09
-    if ($output =~ /TOTAL_WALLTIME\s+result\s*=\s*([^\n]+)/o) {
-      print "\t\tTiming statistics: $1\n";
-    }
-  }
-}
-
-my $total = scalar @tests;
-my $fail_percentage = int(100 * $fail / $total);
-my $pass_percentage = int(100 * ($total-$fail) / $total);
-print "\n$pass_percentage% of the tests passed.\n";
-print "$fail_percentage% of the tests failed.\n";
-if ($fail_percentage>0) { print "\nPLEASE INVESTIGATE THESE FAILED TESTS: @failed\n"; }
-
-sub do_test {
-  my ($test) = @_;
-  my $o = `$test 2>&1`;
-  my $res = 'PASS';
-  $res = 'FAIL' if ($? > 0);
-  my $od = '';
-  if ($o =~ /RESULTS AVAILABLE IN: (.*)$/m) {
-    $od = $1;
-    $o =~ s/^RESULTS AVAIL.*$//mo;
-  }
-  return ($res, $o, $od);
-}
-
-sub getQsubCmd {
-        my $a =`which qsub | head -1 | awk '{print \$1}'`;
-        chomp($a);
-        if ($a && -e $a){ return $a; }
-        else{ return undef; }
-}
-
--- a/mert/regression-testing/tests/mert-basic/command
+++ b/mert/regression-testing/tests/mert-basic/command
@ -1,9 +0,0 @@
-#! /bin/sh -w
-
-bin=$1; shift
-testdir=$1; shift
-cd $testdir
-
-$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"
-$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"
-
--- a/mert/tests/testmert.py
+++ b/mert/tests/testmert.py
@ -1,220 +0,0 @@
-#!/usr/bin/env python
-
-#
-# Mert test suite.
-# Created by Barry Haddow
-#
-# This script downloads data from www.statmt.org, and runs tests of mert,
-# comparing weights against expected and producing timing  information.
-#
-
-import ConfigParser
-import logging
-import optparse
-import os
-import os.path
-import re
-import string
-import subprocess
-import sys
-import time
-import urllib
-import warnings
-
-warnings.filterwarnings(action="ignore",message="tmpnam")
-
-log = logging.getLogger("testmert")
-dataurl = "http://www.statmt.org/moses/reg-testing/mert/"
-
-def getMertDirectory():
-    scriptdir = os.path.dirname(__file__)
-    if not os.path.isabs(scriptdir):
-        scriptdir = os.path.join(os.getcwd(),scriptdir)
-        scriptdir = os.path.normpath(scriptdir)
-    return os.path.dirname(scriptdir)
-
-class Mert:
-    """Controls operation of mert loop"""
-    def __init__(self,weightfile,reffile,scorertype="BLEU",retries="20"):
-        self.reffile = reffile
-        self.scorertype = scorertype
-        self.workingdir = os.tmpnam()
-        os.mkdir(self.workingdir)
-        self.mertdir = getMertDirectory()
-        self.iteration = 1 # iteration number of inner loop
-        self.retries = retries
-        self.extractortimes = []
-        self.merttimes = []
-        os.system("cp %s %s" % \
-            (weightfile,self.getFileName("weights",self.iteration-1)))
-        # calculate dimension from weight file
-        weightfh = open(weightfile)
-        line = weightfh.readline()
-        self.dimension = repr(len(line.split()))
-        weightfh.close()
-
-    def innerLoop(self, nbestfile):
-        """Perform iteration of the inner loop. Returns location of 
-        weights file"""
-        log.debug("Inner loop: %d" % self.iteration)
-        # run extractor
-        scorefile = self.getFileName("scores",self.iteration)
-        featurefile = self.getFileName("features",self.iteration)
-        weightinfile = self.getFileName("weights",self.iteration-1)
-        cmd = [os.path.join(self.mertdir,"extractor"),"--reference", 
-          self.reffile, "--nbest",nbestfile,"--sctype",self.scorertype,\
-          "--scfile", scorefile,"--ffile",featurefile]
-        if self.iteration > 1:
-            prevscorefile = self.getFileName("scores",self.iteration-1)
-            prevfeaturefile = self.getFileName("features",self.iteration-1)
-            cmd = cmd + ["--prev-scfile",prevscorefile , "--prev-ffile", prevfeaturefile ]
-        log.debug("Running: " + string.join(cmd))
-        start = time.time()
-        ret = subprocess.call(cmd)
-        self.extractortimes.append(time.time()-start)
-        if ret != 0:
-            raise RuntimeError("Failed to execute extractor: return code %d" % ret)
-
-        # run mert
-        cmd = [os.path.join(self.mertdir,"mert"),"--sctype",\
-         self.scorertype, "--scfile", scorefile, "--ffile", featurefile,\
-         "--ifile",weightinfile, "-d", self.dimension,"-n",self.retries]
-        log.debug("Running: " + string.join(cmd))
-        start = time.time()
-        ret = subprocess.call(cmd, cwd=self.workingdir)
-        self.merttimes.append(time.time()-start)
-        if ret != 0:
-            raise RuntimeError("Failed to execute mert: return code %d" % ret)
-        weightoutfile = self.getFileName("weights",self.iteration)
-        os.system("mv %s %s" % (os.path.join(self.workingdir,\
-            "weights.txt"), weightoutfile))
-        self.iteration = self.iteration + 1
-        return weightoutfile
-
-    def getFileName(self,stem,iteration):
-        return os.path.join(self.workingdir,stem+"."+repr(iteration))
-
-    def cleanup(self):
-        os.system("rm -rf %s" % self.workingdir)
-
-class Test:
-    """A mert test"""
-    def __init__(self,datadir):
-        self.datadir = datadir
-        config = ConfigParser.ConfigParser()
-        config.read(os.path.join(datadir,"config"))
-        self.iterations = config.getint("test","iterations")
-        log.debug("Test iterations: %d" % self.iterations)
-        self.tolerance = 0.00001
-
-    def run(self):
-        """Run the test, return a boolean indicating success or failure"""
-        weightfile = os.path.join(self.datadir,"weights.0")
-        reffile = os.path.join(self.datadir,"reference")
-        self.mert = Mert(weightfile,reffile)
-        self.diffs = []
-        for i in range(self.iterations):
-            nbestfile = os.path.join(self.datadir,"nbest." + repr(i+1) + ".gz")
-            weightfile = self.mert.innerLoop(nbestfile)
-            expectedweightfile = os.path.join(self.datadir,"weights."+repr(i+1))
-            expectedweights = self.getWeights(expectedweightfile)
-            weights = self.getWeights(weightfile)
-            log.debug("Expected weights: " + repr(expectedweights))
-            log.debug("Actual weights: " + repr(weights))
-            diff = False
-            for j in range(len(weights)):
-                if abs(weights[j]-expectedweights[j]) > self.tolerance:
-                    log.debug("Weight %d does not match: " % j)
-                    diff = True
-                    break
-            else:
-                log.debug("Weights match expected")
-            self.diffs.append(diff)
-        self.mert.cleanup()
-    
-    def getWeights(self,weightfile):
-        """Load a weight set from a file"""
-        weightfh = open(weightfile)
-        line = weightfh.readline()
-        weights = [float(w) for w in line.split()]
-        weightfh.close()
-        return weights
-
-    def printSummary(self):
-        """Print a summary of the results"""
-        print "RESULTS: ", self.datadir
-        print "Weights matching expected: ",
-        for diff in self.diffs:
-            print not diff,
-        print
-        print "Extractor times: ",
-        for etime in self.mert.extractortimes:
-            print "%7.3f" % etime,
-        print "ave: %7.3f" % (sum(self.mert.extractortimes)/self.iterations) 
-        print "Optimisation times: ",
-        for mtime in self.mert.merttimes:
-            print "%7.3f" % mtime,
-        print "ave: %7.3f" % (sum(self.mert.merttimes)/self.iterations)
-
-def getTestList():
-   listfh  = urllib.urlopen(os.path.join(dataurl,"tests.txt")) 
-   tests = []
-   for line in listfh:
-       tests.append(line[:-1])
-   listfh.close()
-   return tests
-
-def list():
-    """List all available tests"""
-    tests = getTestList()
-    print "Available tests:"
-    for test in tests:
-        print test
-
-def runAll(datadir):
-    """Run all available tests"""
-    for test in getTestList():
-        runTest(test,datadir)
-
-def runTest(testname,datadir):
-    log.info("Test started: " + testname)
-    if not os.path.isdir(datadir):
-        os.mkdir(datadir)
-    testdir = os.path.join(datadir,testname)
-    # Check if the test exists, download if necessary
-    if os.path.isdir(testdir):
-        log.debug("Directory %s already exists: not downloading" % testdir)
-    else:
-        testurl = os.path.join(dataurl,testname + ".tgz")
-        log.debug("Retrieving test data from " + testurl)
-        (arname,headers) = urllib.urlretrieve(testurl)
-        os.system("cd %s; tar zxf %s" % (datadir,arname))
-        log.debug("Done")
-        if not os.path.isdir(testdir):
-            raise RuntimeError("Test %s did not unpack properly" % testname)
-    test = Test(testdir)
-    test.run()
-    test.printSummary()
-    log.info("Test ended: " + testname)
-
-def main():
-    logging.basicConfig(level = logging.DEBUG)
-    parser = optparse.OptionParser("usage: %prog [options] list|run|runall [testname]")
-    parser.add_option("-d", "--datadir", action="store", default="data",
-        dest="datadir", help="Data directory to use", metavar="DIR")
-    (options,args) = parser.parse_args() 
-    if len(args) < 1:
-        parser.error("Need to specify an action")
-    if args[0] == "list":
-        list()
-    else:
-        datadir = options.datadir
-        if args[0] == "runall":
-            runAll(datadir)
-        elif args[0] == "run":
-            if len(args) < 2:
-                parser.error("The run action requires a test name")
-            runTest(args[1],datadir)
-
-if __name__ == "__main__":
-    main()
--- a/moses-chart-cmd/moses-chart-cmd.xcodeproj/project.pbxproj
+++ b/moses-chart-cmd/moses-chart-cmd.xcodeproj/project.pbxproj
@ -297,6 +297,11 @@
 				GCC_ENABLE_FIX_AND_CONTINUE = YES;
 				GCC_MODEL_TUNING = G5;
 				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					TRACE_ENABLE,
+					_LARGE_FILES,
+					"_FILE_OFFSET_BITS=64",
+				);
 				HEADER_SEARCH_PATHS = ../moses/src;
 				INSTALL_PATH = /usr/local/bin;
 				LIBRARY_SEARCH_PATHS = (
@ -325,6 +330,11 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				GCC_MODEL_TUNING = G5;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					TRACE_ENABLE,
+					_LARGE_FILES,
+					"_FILE_OFFSET_BITS=64",
+				);
 				HEADER_SEARCH_PATHS = ../moses/src;
 				INSTALL_PATH = /usr/local/bin;
 				LIBRARY_SEARCH_PATHS = (
--- a/moses-chart-cmd/src/IOWrapper.cpp
+++ b/moses-chart-cmd/src/IOWrapper.cpp
@ -361,77 +361,55 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
      }
    }

+
+    std::string lastName = "";
+
    // translation components
-    if (StaticData::Instance().GetInputType()==SentenceInput) {
-      // translation components	for text input
-      vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
-      if (pds.size() > 0) {
-        if (labeledOutput)
-          out << "tm: ";
-        vector<PhraseDictionaryFeature*>::iterator iter;
-        for (iter = pds.begin(); iter != pds.end(); ++iter) {
-          vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
-          for (size_t j = 0; j<scores.size(); ++j)
-            out << scores[j] << " ";
-        }
-      }
-    } else {
-      // translation components for Confusion Network input
-      // first translation component has GetNumInputScores() scores from the input Confusion Network
-      // at the beginning of the vector
-      vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
-      if (pds.size() > 0) {
-        vector<PhraseDictionaryFeature*>::iterator iter;
+    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
+    if (pds.size() > 0) {

-        iter = pds.begin();
-        vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
+      for( size_t i=0; i<pds.size(); i++ ) {
+	size_t pd_numinputscore = pds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){

-        size_t pd_numinputscore = (*iter)->GetNumInputScores();
-
-        if (pd_numinputscore) {
-
-          if (labeledOutput)
-            out << "I: ";
-
-          for (size_t j = 0; j < pd_numinputscore; ++j)
-            out << scores[j] << " ";
-        }
-
-
-        for (iter = pds.begin() ; iter != pds.end(); ++iter) {
-          vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
-
-          size_t pd_numinputscore = (*iter)->GetNumInputScores();
-
-          if (iter == pds.begin() && labeledOutput)
-            out << "tm: ";
-          for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
-            out << scores[j] << " ";
-        }
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  pds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
      }
    }

-
-
    // word penalty
    if (labeledOutput)
-      out << "w: ";
+      out << " w: ";
    out << path.GetScoreBreakdown().GetScoreForProducer(system->GetWordPenaltyProducer()) << " ";

    // generation
-    const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries();
+    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
    if (gds.size() > 0) {
-      if (labeledOutput)
-        out << "g: ";
-      vector<GenerationDictionary*>::const_iterator iter;
-      for (iter = gds.begin(); iter != gds.end(); ++iter) {
-        vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
-        for (size_t j = 0; j<scores.size(); j++) {
-          out << scores[j] << " ";
-        }
+
+      for( size_t i=0; i<gds.size(); i++ ) {
+	size_t pd_numinputscore = gds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){
+
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  gds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
      }
    }

+
    // total
    out << "||| " << path.GetTotalScore();

--- a/moses-cmd/src/IOWrapper.cpp
+++ b/moses-cmd/src/IOWrapper.cpp
@ -207,22 +207,40 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<Fa
  }
 }

-void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
+void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
+{
+  typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+  AlignVec alignments = ai.GetSortedAlignments();
+  
+  AlignVec::const_iterator it;
+  for (it = alignments.begin(); it != alignments.end(); ++it) {
+    const std::pair<size_t,size_t> &alignment = **it;
+    out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+  }
+  
+}
+
+void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
 {
-  ostringstream out;
  size_t targetOffset = 0;

  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
    const Hypothesis &edge = *edges[currEdge];
    const TargetPhrase &tp = edge.GetCurrTargetPhrase();
    size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
-    AlignmentInfo::const_iterator it;
-    for (it = tp.GetAlignmentInfo().begin(); it != tp.GetAlignmentInfo().end(); ++it) {
-      out << it->first + sourceOffset << "-" << it->second + targetOffset << " ";
-    }
+    
+    OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset);
+
    targetOffset += tp.GetSize();
  }
  out << std::endl;
+}
+
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
+{
+  ostringstream out;
+  OutputAlignment(out, edges);
+  
  collector->Write(lineNo,out.str());
 }

@ -364,68 +382,45 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con

    // print scores with feature names
    OutputAllFeatureScores( out, system, path );
+    string lastName;

    // translation components
-    if (StaticData::Instance().GetInputType()==SentenceInput) {
-      // translation components	for text input
-      vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
-      if (pds.size() > 0) {
-        if (labeledOutput)
-          out << " tm:";
-        vector<PhraseDictionaryFeature*>::iterator iter;
-        for (iter = pds.begin(); iter != pds.end(); ++iter) {
-          vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
-          for (size_t j = 0; j<scores.size(); ++j)
-            out << " " << scores[j];
-        }
-      }
-    } else {
-      // translation components for Confusion Network input
-      // first translation component has GetNumInputScores() scores from the input Confusion Network
-      // at the beginning of the vector
-      vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
-      if (pds.size() > 0) {
-        vector<PhraseDictionaryFeature*>::iterator iter;
+    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
+    if (pds.size() > 0) {

-        iter = pds.begin();
-        vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
+      for( size_t i=0; i<pds.size(); i++ ) {
+	size_t pd_numinputscore = pds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){

-        size_t pd_numinputscore = (*iter)->GetNumInputScores();
-
-        if (pd_numinputscore) {
-
-          if (labeledOutput)
-            out << " I:";
-
-          for (size_t j = 0; j < pd_numinputscore; ++j)
-            out << " " << scores[j];
-        }
-
-
-        for (iter = pds.begin() ; iter != pds.end(); ++iter) {
-          vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
-
-          size_t pd_numinputscore = (*iter)->GetNumInputScores();
-
-          if (iter == pds.begin() && labeledOutput)
-            out << " tm:";
-          for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
-            out << " " << scores[j];
-        }
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  pds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
      }
    }

    // generation
-    const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries();
+    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
    if (gds.size() > 0) {
-      if (labeledOutput)
-        out << " g: ";
-      vector<GenerationDictionary*>::const_iterator iter;
-      for (iter = gds.begin(); iter != gds.end(); ++iter) {
-        vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
-        for (size_t j = 0; j<scores.size(); j++) {
-          out << scores[j] << " ";
-        }
+
+      for( size_t i=0; i<gds.size(); i++ ) {
+	size_t pd_numinputscore = gds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){
+
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  gds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
      }
    }

@ -451,18 +446,17 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
    }

    if (includeWordAlignment) {
-      out << " |||";
+      out << " ||| ";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        const int sourceOffset = sourceRange.GetStartPos();
        const int targetOffset = targetRange.GetStartPos();
-        const AlignmentInfo AI = edge.GetCurrTargetPhrase().GetAlignmentInfo();
-        AlignmentInfo::const_iterator iter;
-        for (iter = AI.begin(); iter != AI.end(); ++iter) {
-          out << " " << iter->first+sourceOffset << "-" << iter->second+targetOffset;
-        }
+        const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo();
+        
+        OutputAlignment(out, ai, sourceOffset, targetOffset);
+
      }
    }

--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@ -341,6 +341,7 @@ int main(int argc, char** argv)
    exit(1);
  }

+
  // create threadpool, if using multi-threaded decoding
  // note: multi-threading is done on sentence-level,
  // each thread translates one sentence
--- a/moses/src/AlignmentInfo.cpp
+++ b/moses/src/AlignmentInfo.cpp
@ -16,10 +16,11 @@
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
-
+#include <algorithm>
+#include <cassert>
 #include "AlignmentInfo.h"
-
 #include "TypeDef.h"
+#include "StaticData.h"

 namespace Moses
 {
@ -41,8 +42,47 @@ void AlignmentInfo::BuildNonTermIndexMap()
  for (p = begin(); p != end(); ++p) {
    m_nonTermIndexMap[p->second] = i++;
  }
+            
 }

+bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b) {
+  if(a->second < b->second)  return true;
+  if(a->second == b->second) return (a->first < b->first);
+  return false;
+}
+
+
+std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignments() const
+{
+  std::vector< const std::pair<size_t,size_t>* > ret;
+  
+  CollType::const_iterator iter;
+  for (iter = m_collection.begin(); iter != m_collection.end(); ++iter)
+  {
+    const std::pair<size_t,size_t> &alignPair = *iter;
+    ret.push_back(&alignPair);
+  }
+  
+  const StaticData &staticData = StaticData::Instance();
+  WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort();
+  
+  switch (wordAlignmentSort)
+  {
+    case NoSort:
+      break;
+      
+    case TargetOrder:
+      std::sort(ret.begin(), ret.end(), compare_target);
+      break;
+      
+    default:
+      assert(false);
+  }
+  
+  return ret;
+  
+}
+  
 std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
 {
  AlignmentInfo::const_iterator iter;
--- a/moses/src/AlignmentInfo.h
+++ b/moses/src/AlignmentInfo.h
@ -51,6 +51,8 @@ class AlignmentInfo
    return m_nonTermIndexMap;
  }

+  std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
+  
 private:
  // AlignmentInfo objects should only be created by an AlignmentInfoCollection
  explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@ -67,7 +67,7 @@ public:
    	return "BleuScoreFeature";
    }

-    std::string GetScoreProducerWeightShortName() const
+    std::string GetScoreProducerWeightShortName(unsigned) const
    {
        return "bl";
    }
--- a/moses/src/ChartCell.cpp
+++ b/moses/src/ChartCell.cpp
@ -190,7 +190,6 @@ void ChartCell::GetSearchGraph(long translationId, std::ostream &outputSearchGra
    const ChartHypothesisCollection &coll = iterOutside->second;
    coll.GetSearchGraph(translationId, outputSearchGraphStream, reachable);
  }
-
 }

 std::ostream& operator<<(std::ostream &out, const ChartCell &cell)
--- a/moses/src/ChartHypothesisCollection.cpp
+++ b/moses/src/ChartHypothesisCollection.cpp
@ -259,7 +259,8 @@ void ChartHypothesisCollection::GetSearchGraph(long translationId, std::ostream
  HCType::const_iterator iter;
  for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
    ChartHypothesis &mainHypo = **iter;
-		if (reachable.find(mainHypo.GetId()) != reachable.end()) {
+		if (StaticData::Instance().GetUnprunedSearchGraph() ||
+        reachable.find(mainHypo.GetId()) != reachable.end()) {
 			outputSearchGraphStream << translationId << " " << mainHypo << endl;
 		}

--- a/moses/src/ChartRuleLookupManagerOnDisk.cpp
+++ b/moses/src/ChartRuleLookupManagerOnDisk.cpp
@ -254,7 +254,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
            delete tpcollBerkeleyDb;
            m_cache[tpCollFilePos] = targetPhraseCollection;
          } else {
-            // jsut get out of cache
+            // just get out of cache
            targetPhraseCollection = iterCache->second;
          }

--- a/moses/src/DummyScoreProducers.cpp
+++ b/moses/src/DummyScoreProducers.cpp
@ -44,7 +44,7 @@ size_t DistortionScoreProducer::GetNumScoreComponents() const
  return 1;
 }

-std::string DistortionScoreProducer::GetScoreProducerWeightShortName() const
+std::string DistortionScoreProducer::GetScoreProducerWeightShortName(unsigned) const
 {
  return "d";
 }
@ -105,7 +105,7 @@ size_t WordPenaltyProducer::GetNumScoreComponents() const
  return 1;
 }

-std::string WordPenaltyProducer::GetScoreProducerWeightShortName() const
+std::string WordPenaltyProducer::GetScoreProducerWeightShortName(unsigned) const
 {
  return "w";
 }
@ -126,7 +126,7 @@ size_t UnknownWordPenaltyProducer::GetNumScoreComponents() const
 }


-std::string UnknownWordPenaltyProducer::GetScoreProducerWeightShortName() const
+std::string UnknownWordPenaltyProducer::GetScoreProducerWeightShortName(unsigned) const
 {
  return "u";
 }
--- a/moses/src/DummyScoreProducers.h
+++ b/moses/src/DummyScoreProducers.h
@ -21,7 +21,7 @@ public:
                                 const WordsRange &prev, const WordsRange &curr, const int FirstGapPosition) const;

 	size_t GetNumScoreComponents() const;
-	std::string GetScoreProducerWeightShortName() const;
+	std::string GetScoreProducerWeightShortName(unsigned) const;
 	size_t GetNumInputScores() const;

  virtual const FFState* EmptyHypothesisState(const InputType &input) const;
@ -49,7 +49,7 @@ public:
 	WordPenaltyProducer() : StatelessFeatureFunction("WordPenalty") {}

 	size_t GetNumScoreComponents() const;
-	std::string GetScoreProducerWeightShortName() const;
+	std::string GetScoreProducerWeightShortName(unsigned) const;
 	size_t GetNumInputScores() const;

  virtual void Evaluate(
@ -64,7 +64,7 @@ public:
 	UnknownWordPenaltyProducer() : StatelessFeatureFunction("!UnknownWordPenalty") {}

 	size_t GetNumScoreComponents() const;
-	std::string GetScoreProducerWeightShortName() const;
+	std::string GetScoreProducerWeightShortName(unsigned) const;
 	size_t GetNumInputScores() const;

  virtual bool ComputeValueInTranslationOption() const;
--- a/moses/src/DynSuffixArray.cpp
+++ b/moses/src/DynSuffixArray.cpp
@ -60,7 +60,7 @@ void DynSuffixArray::BuildAuxArrays()
 int DynSuffixArray::Rank(unsigned word, unsigned idx)
 {
  /* use Gerlach's code to make rank faster */
-  // the number of word in L[0..i]
+  // the number of words in L[0..i] (minus 1 which is why 'i < idx', not '<=')
  int r(0);
  for(unsigned i=0; i < idx; ++i)
    if(m_L->at(i) == word) ++r;
@ -140,25 +140,33 @@ void DynSuffixArray::Insert(vuint_t* newSent, unsigned newIndex)
  }
  // Begin stage 4
  Reorder(true_pos, LastFirstFunc(kprime)); // actual position vs computed position of cycle (newIndex-1)
-  cerr << "GETS HERE 13\n";
 }

 void DynSuffixArray::Reorder(unsigned j, unsigned jprime)
 {
-  //cerr << "j=" << j << "\tj'=" << jprime << endl;
+  set<pair<unsigned, unsigned> > seen;
  while(j != jprime) {
+    // this 'seenit' check added for data with many loops. will remove after double 
+    // checking.  
+    bool seenit = seen.insert(std::make_pair(j, jprime)).second;
+    if(seenit) {
+      for(int i=1; i < m_SA->size(); ++i) {
+        if(m_corpus->at(m_SA->at(i)) < m_corpus->at(m_SA->at(i-1))) {
+          cerr << "PROBLEM WITH SUFFIX ARRAY REORDERING. EXITING...\n";
+          exit(1);
+        }
+      }
+      return;
+    }
    //cerr << "j=" << j << "\tj'=" << jprime << endl;
-    int tmp, isaIdx(-1);
+    int isaIdx(-1);
    int new_j = LastFirstFunc(j);
-    cerr << "new_j = " << new_j << endl;
-    // for SA, L, and F, the element at pos j is moved to j'
-    tmp = m_L->at(j); // L
-    m_L->at(j) = m_L->at(jprime);
-    m_L->at(jprime) = tmp;
-    tmp = m_SA->at(j);  // SA
-    m_SA->at(j) = m_SA->at(jprime);
-    m_SA->at(jprime) = tmp;
-
+    assert(j <= jprime);
+    // for SA and L, the element at pos j is moved to pos j'
+    m_L->insert(m_L->begin() + jprime + 1, m_L->at(j)); 
+    m_L->erase(m_L->begin() + j);
+    m_SA->insert(m_SA->begin() + jprime + 1, m_SA->at(j)); 
+    m_SA->erase(m_SA->begin() + j);
    // all ISA values between (j...j'] decremented
    for(size_t i = 0; i < m_ISA->size(); ++i) {
      if((m_ISA->at(i) == j) && (isaIdx == -1))
@ -180,8 +188,8 @@ void DynSuffixArray::Delete(unsigned index, unsigned num2del)
  int true_pos = LastFirstFunc(m_ISA->at(index)); // track cycle shift (newIndex - 1)
  for(size_t q = 0; q < num2del; ++q) {
    int row = m_ISA->at(index); // gives the position of index in SA and m_F
-    std::cerr << "row = " << row << std::endl;
-    std::cerr << "SA[r]/index = " << m_SA->at(row) << "/" << index << std::endl;
+    //std::cerr << "row = " << row << std::endl;
+    //std::cerr << "SA[r]/index = " << m_SA->at(row) << "/" << index << std::endl;
    true_pos -= (row <= true_pos ? 1 : 0); // track changes
    m_L->erase(m_L->begin() + row);
    m_F->erase(m_F->begin() + row);
@ -198,7 +206,7 @@ void DynSuffixArray::Delete(unsigned index, unsigned num2del)
  }
  m_L->at(m_ISA->at(index))= ltmp;
  Reorder(LastFirstFunc(m_ISA->at(index)), true_pos);
-  PrintAuxArrays();
+  //PrintAuxArrays();
 }

 void DynSuffixArray::Substitute(vuint_t* /* newSents */, unsigned /* newIndex */)
--- a/moses/src/GenerationDictionary.h
+++ b/moses/src/GenerationDictionary.h
@ -71,7 +71,7 @@ public:
 	bool Load(const std::string &filePath, FactorDirection direction);

 	size_t GetNumScoreComponents() const;
-	std::string GetScoreProducerWeightShortName() const
+	std::string GetScoreProducerWeightShortName(unsigned) const
 	{
 		return "g";
 	}
--- a/moses/src/GlobalLexicalModel.h
+++ b/moses/src/GlobalLexicalModel.h
@ -57,7 +57,7 @@ public:
    return 1;
  };

-  virtual std::string GetScoreProducerWeightShortName() const {
+  virtual std::string GetScoreProducerWeightShortName(unsigned) const {
    return "lex";
  };

--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@ -276,17 +276,13 @@ void Hypothesis::CalcScore(const SquareMatrix &futureScore)
  // cached in the translation option-- there is no principled distinction
  const vector<const StatelessFeatureFunction*>& sfs =
    m_manager.GetTranslationSystem()->GetStatelessFeatureFunctions();
-	VERBOSE(3,"There are " << sfs.size() << " stateless feature functions" << endl);
  for (unsigned i = 0; i < sfs.size(); ++i) {
-	  VERBOSE(3,"\tStateless score producer:\t" << sfs[i]->GetScoreProducerDescription() << endl);
    sfs[i]->Evaluate(m_targetPhrase, &m_scoreBreakdown);
  }

  const vector<const StatefulFeatureFunction*>& ffs =
    m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
-	VERBOSE(3,"There are " << ffs.size() << " stateful feature functions" << endl);
  for (unsigned i = 0; i < ffs.size(); ++i) {
-	  VERBOSE(3,"\tStateful score producer:\t" << ffs[i]->GetScoreProducerDescription() << endl);
    m_ffStates[i] = ffs[i]->Evaluate(
                      *this,
                      m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
--- a/moses/src/LMList.cpp
+++ b/moses/src/LMList.cpp
@ -39,22 +39,34 @@ void LMList::CleanUp()
  RemoveAllInColl(m_coll);
 }

-void LMList::CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, ScoreComponentCollection* breakdown) const
+void LMList::CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, float &retOOVScore,  ScoreComponentCollection* breakdown) const
 {
  const_iterator lmIter;
  for (lmIter = begin(); lmIter != end(); ++lmIter) {
    const LanguageModel &lm = **lmIter;
    const float weightLM = lm.GetWeight();
+    const float oovWeightLM = lm.GetOOVWeight();

-    float fullScore, nGramScore;
+    float fullScore, nGramScore; 
+    size_t oovCount;

    // do not process, if factors not defined yet (happens in partial translation options)
    if (!lm.Useable(phrase))
      continue;

-    lm.CalcScore(phrase, fullScore, nGramScore);
+    lm.CalcScore(phrase, fullScore, nGramScore, oovCount);
+
+    if (StaticData::Instance().GetLMEnableOOVFeature()) {
+      vector<float> scores(2);
+      scores[0] = nGramScore;
+      scores[1] = oovCount;
+      breakdown->Assign(&lm, scores);
+      retOOVScore += oovCount * oovWeightLM;
+    } else {
+      breakdown->Assign(&lm, nGramScore);  // I'm not sure why += doesn't work here- it should be 0.0 right?
+    }
+    

-    breakdown->Assign(&lm, nGramScore);  // I'm not sure why += doesn't work here- it should be 0.0 right?
    retFullScore   += fullScore * weightLM;
    retNGramScore	+= nGramScore * weightLM;
  }
--- a/moses/src/LMList.h
+++ b/moses/src/LMList.h
@ -34,7 +34,7 @@ public:
    void CleanUp();
 	~LMList();
 	
-	void CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, ScoreComponentCollection* breakdown) const;
+	void CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, float &retOOVScore, ScoreComponentCollection* breakdown) const;

 	void CalcAllLMScores(const Phrase &phrase
 								 , ScoreComponentCollection &nGramOnly
@ -45,7 +45,6 @@ public:
 	size_t GetMaxNGramOrder() const
 	{ return m_maxNGramOrder; }
 	
-
 };

 }
--- a/moses/src/LanguageModel.cpp
+++ b/moses/src/LanguageModel.cpp
@ -46,6 +46,7 @@ LanguageModel::LanguageModel(LanguageModelImplementation *implementation) :
 	StatefulFeatureFunction("LM"),
  m_implementation(implementation)
 {
+  m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature(); 
 #ifndef WITH_THREADS
  // ref counting handled by boost otherwise
  m_implementation->IncrementReferenceCount();
@ -56,6 +57,7 @@ LanguageModel::LanguageModel(LanguageModel *loadedLM) :
  StatefulFeatureFunction("LM"),
 	m_implementation(loadedLM->m_implementation)
 {
+  m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature(); 
 #ifndef WITH_THREADS
  // ref counting handled by boost otherwise
  m_implementation->IncrementReferenceCount();
@ -73,16 +75,23 @@ LanguageModel::~LanguageModel()
 // don't inline virtual funcs...
 size_t LanguageModel::GetNumScoreComponents() const
 {
-  return 1;
+  if (m_enableOOVFeature) {
+    return 2;
+  } else {
+    return 1;
+  }
 }

+  
 void LanguageModel::CalcScore(const Phrase &phrase
                              , float &fullScore
-                              , float &ngramScore) const
+                              , float &ngramScore
+                              , size_t &oovCount) const
 {

  fullScore	= 0;
  ngramScore	= 0;
+  oovCount = 0;

  size_t phraseSize = phrase.GetSize();
  if (!phraseSize) return;
@ -110,10 +119,13 @@ void LanguageModel::CalcScore(const Phrase &phrase
        // do nothing, don't include prob for <s> unigram
        assert(currPos == 0);
      } else {
-        float partScore = m_implementation->GetValueGivenState(contextFactor, *state).score;
-        fullScore += partScore;
+        LMResult result = m_implementation->GetValueGivenState(contextFactor, *state);
+        fullScore += result.score;
        if (contextFactor.size() == GetNGramOrder())
-          ngramScore += partScore;
+          ngramScore += result.score;
+        if (contextFactor.size() == 1 && result.unknown)
+          ++oovCount; 
+          
      }
    }

@ -219,7 +231,16 @@ FFState* LanguageModel::Evaluate(
      m_implementation->GetState(contextFactor, *res);
    }
  }
-  out->PlusEquals(this, lmScore);
+  if (m_enableOOVFeature) {
+    vector<float> scores(2);
+    scores[0] = lmScore;
+    scores[1] = 0;
+    out->PlusEquals(this, scores);
+  } else {
+	  out->PlusEquals(this, lmScore);
+  }
+
+
  IFVERBOSE(2) {
    hypo.GetManager().GetSentenceStats().AddTimeCalcLM( clock()-t );
  }
@ -227,7 +248,15 @@ FFState* LanguageModel::Evaluate(
 }

 float LanguageModel::GetWeight() const {
-	return StaticData::Instance().GetAllWeights().GetScoreForProducer(this);
+  return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[0];
+}
+
+float LanguageModel::GetOOVWeight() const {
+  if (m_enableOOVFeature) {
+    return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[1];
+  } else {
+    return 0;
+  }
 }
 	
 FFState* LanguageModel::EvaluateChart(
--- a/moses/src/LanguageModel.h
+++ b/moses/src/LanguageModel.h
@ -52,6 +52,8 @@ protected:
 #else
  LanguageModelImplementation *m_implementation;
 #endif
+  bool m_enableOOVFeature;
+  

 public:

@ -85,11 +87,13 @@ public:
 	 * Useable() should be called beforehand on the phrase
 	 * \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase
 	 * \param ngramScore score of only n-gram of order m_nGramOrder
+   * \param oovCount number of LM OOVs
 	 */
 	void CalcScore(
      const Phrase &phrase,
 			float &fullScore,
-			float &ngramScore) const;
+			float &ngramScore,
+      size_t &oovCount) const;
 	
 	void CalcScoreChart(
      const Phrase &phrase,
@ -104,8 +108,9 @@ public:
 	
 	
 	float GetWeight() const;
+	float GetOOVWeight() const;

-	std::string GetScoreProducerWeightShortName() const 
+	std::string GetScoreProducerWeightShortName(unsigned) const 
    {
 		return "lm";
 	}
--- a/moses/src/LanguageModelDMapLM.cpp
+++ b/moses/src/LanguageModelDMapLM.cpp
@ -97,7 +97,7 @@ float LanguageModelDMapLM::GetValue(
  return score;
 }

-FFState* LanguageModelDMapLM::GetNullContextState() const {
+const FFState* LanguageModelDMapLM::GetNullContextState() const {
    DMapLMState* state = new DMapLMState();
    state->m_last_succeeding_order = GetNGramOrder();
    return state;
@ -109,7 +109,7 @@ FFState* LanguageModelDMapLM::GetNewSentenceState() const {
    return state;
 }

-FFState* LanguageModelDMapLM::GetBeginSentenceState() const {
+const FFState* LanguageModelDMapLM::GetBeginSentenceState() const {
    DMapLMState* state = new DMapLMState();
    state->m_last_succeeding_order = GetNGramOrder();
    return state;
--- a/moses/src/LanguageModelDMapLM.h
+++ b/moses/src/LanguageModelDMapLM.h
@ -37,9 +37,9 @@ public:
  LMResult GetValueGivenState(const std::vector<const Word*>&, FFState&) const;
  LMResult GetValueForgotState(const std::vector<const Word*>&, FFState&) const;
  float GetValue(const std::vector<const Word*>&, size_t, size_t*) const;
-  FFState* GetNullContextState() const;
+  const FFState* GetNullContextState() const;
  FFState* GetNewSentenceState() const;
-  FFState* GetBeginSentenceState() const;
+  const FFState* GetBeginSentenceState() const;
  FFState* NewState(const FFState*) const;
  void CleanUpAfterSentenceProcessing();
  void InitializeBeforeSentenceProcessing();
--- a/moses/src/LanguageModelImplementation.h
+++ b/moses/src/LanguageModelImplementation.h
@ -92,8 +92,8 @@ public:
  // This is here so models can implement a shortcut to GetValueAndState.
  virtual void GetState(const std::vector<const Word*> &contextFactor, FFState &outState) const;

-  virtual FFState *GetNullContextState() const = 0;
-  virtual FFState *GetBeginSentenceState() const = 0;
+  virtual const FFState *GetNullContextState() const = 0;
+  virtual const FFState *GetBeginSentenceState() const = 0;
  virtual FFState *NewState(const FFState *from = NULL) const = 0;

  //! max n-gram order of LM
@ -109,6 +109,11 @@ public:
    return m_sentenceEndArray;
  }

+
+  std::string GetScoreProducerWeightShortName(unsigned) const {
+    return "lm";
+  }
+
  //! overrideable funtions for IRST LM to cleanup. Maybe something to do with on demand/cache loading/unloading
  virtual void InitializeBeforeSentenceProcessing() {};
  virtual void CleanUpAfterSentenceProcessing() {};
--- a/moses/src/LanguageModelJoint.h
+++ b/moses/src/LanguageModelJoint.h
@ -119,11 +119,11 @@ public:
    return ret;
  }

-  FFState *GetNullContextState() const {
+  const FFState *GetNullContextState() const {
    return m_lmImpl->GetNullContextState();
  }

-  FFState *GetBeginSentenceState() const {
+  const FFState *GetBeginSentenceState() const {
    return m_lmImpl->GetBeginSentenceState();
  }

--- a/moses/src/LanguageModelKen.cpp
+++ b/moses/src/LanguageModelKen.cpp
@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <cassert>
 #include <cstring>
 #include <iostream>
+#include <stdlib.h>
 #include "lm/binary_format.hh"
 #include "lm/enumerate_vocab.hh"
 #include "lm/model.hh"
@ -40,6 +41,8 @@ using namespace std;
 namespace Moses
 {

+LanguageModelKenBase::~LanguageModelKenBase() {}
+
 namespace
 {

@ -78,14 +81,14 @@ struct KenLMState : public FFState {

 /** Implementation of single factor LM using Ken's code.
 */
-template <class Model> class LanguageModelKen : public LanguageModelSingleFactor
+template <class Model> class LanguageModelKen : public LanguageModelKenBase
 {
 private:
  Model *m_ngram;
  std::vector<lm::WordIndex> m_lmIdLookup;
  bool m_lazy;
-  FFState *m_nullContextState;
-  FFState *m_beginSentenceState;
+  KenLMState m_nullContextState;
+  KenLMState m_beginSentenceState;

  void TranslateIDs(const std::vector<const Word*> &contextFactor, lm::WordIndex *indices) const;

@ -97,12 +100,20 @@ public:
            , FactorType factorType
            , size_t nGramOrder);

-  LMResult GetValueGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const;
-  LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
+  LMResult GetValueGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const {
+    return GetKenFullScoreGivenState(contextFactor, state);
+  }
+  LMKenResult GetKenFullScoreGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const;
+
+  LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const {
+    return GetKenFullScoreForgotState(contextFactor, outState);
+  }
+  LMKenResult GetKenFullScoreForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
+
  void GetState(const std::vector<const Word*> &contextFactor, FFState &outState) const;

-  FFState *GetNullContextState() const;
-  FFState *GetBeginSentenceState() const;
+  const FFState *GetNullContextState() const;
+  const FFState *GetBeginSentenceState() const;
  FFState *NewState(const FFState *from = NULL) const;

  lm::WordIndex GetLmID(const std::string &str) const;
@ -159,24 +170,26 @@ template <class Model> bool LanguageModelKen<Model>::Load(const std::string &fil
  config.enumerate_vocab = &builder;
  config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;

-  m_ngram = new Model(filePath.c_str(), config);
+  try {
+    m_ngram = new Model(filePath.c_str(), config);
+  } catch (std::exception &e) {
+    std::cerr << e.what() << std::endl;
+    abort();
+  }
  m_nGramOrder  = m_ngram->Order();

-  KenLMState *tmp = new KenLMState();
-  tmp->state = m_ngram->NullContextState();
-  m_nullContextState = tmp;
-  tmp = new KenLMState();
-  tmp->state = m_ngram->BeginSentenceState();
-  m_beginSentenceState = tmp;
+  m_nullContextState.state = m_ngram->NullContextState();
+  m_beginSentenceState.state = m_ngram->BeginSentenceState();
  return true;
 }

-template <class Model> LMResult LanguageModelKen<Model>::GetValueGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const
+template <class Model> LMKenResult LanguageModelKen<Model>::GetKenFullScoreGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const
 {
-  LMResult result;
+  LMKenResult result;
  if (contextFactor.empty()) {
    result.score = 0.0;
    result.unknown = false;
+    result.ngram_length = 0;
    return result;
  }
  lm::ngram::State &realState = static_cast<KenLMState&>(state).state;
@ -187,16 +200,18 @@ template <class Model> LMResult LanguageModelKen<Model>::GetValueGivenState(cons

  result.score = TransformLMScore(ret.prob);
  result.unknown = (new_word == 0);
+  result.ngram_length = ret.ngram_length;
  return result;
 }

-template <class Model> LMResult LanguageModelKen<Model>::GetValueForgotState(const vector<const Word*> &contextFactor, FFState &outState) const
+template <class Model> LMKenResult LanguageModelKen<Model>::GetKenFullScoreForgotState(const vector<const Word*> &contextFactor, FFState &outState) const
 {
-  LMResult result;
+  LMKenResult result;
  if (contextFactor.empty()) {
    static_cast<KenLMState&>(outState).state = m_ngram->NullContextState();
    result.score = 0.0;
    result.unknown = false;
+    result.ngram_length = 0;
    return result;
  }

@ -207,6 +222,7 @@ template <class Model> LMResult LanguageModelKen<Model>::GetValueForgotState(con

  result.score = TransformLMScore(ret.prob);
  result.unknown = (indices[0] == 0);
+  result.ngram_length = ret.ngram_length;
  return result;
 }

@ -221,14 +237,14 @@ template <class Model> void LanguageModelKen<Model>::GetState(const std::vector<
  m_ngram->GetState(indices, indices + contextFactor.size(), static_cast<KenLMState&>(outState).state);
 }

-template <class Model> FFState *LanguageModelKen<Model>::GetNullContextState() const
+template <class Model> const FFState *LanguageModelKen<Model>::GetNullContextState() const
 {
-  return m_nullContextState;
+  return &m_nullContextState;
 }

-template <class Model> FFState *LanguageModelKen<Model>::GetBeginSentenceState() const
+template <class Model> const FFState *LanguageModelKen<Model>::GetBeginSentenceState() const
 {
-  return m_beginSentenceState;
+  return &m_beginSentenceState;
 }

 template <class Model> FFState *LanguageModelKen<Model>::NewState(const FFState *from) const
--- a/moses/src/LanguageModelKen.h
+++ b/moses/src/LanguageModelKen.h
@ -28,10 +28,26 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 namespace Moses
 {
-// Doesn't actually load; moses wants the Load method for that.  It needs the file to autodetect binary format.  
+
+  // kenlm specific score value
+  struct LMKenResult : public LMResult {
+    unsigned char ngram_length;
+  };
+
+  // base-class for the actual LanguageModelKen; only here to provide a specific behaviour without exposing the implementation
+  class LanguageModelKenBase : public LanguageModelSingleFactor {
+    public:
+      virtual ~LanguageModelKenBase();
+      // scoring functions which provide more info than the common interface of LanguageModel
+      virtual LMKenResult GetKenFullScoreGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const = 0;
+      virtual LMKenResult GetKenFullScoreForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const = 0;
+  };
+
+
+// Doesn't actually load; moses wants the Load method for that.  It needs the file to autodetect binary format.
 LanguageModelSingleFactor *ConstructKenLM(const std::string &file, bool lazy);

-};
+}


 #endif
--- a/moses/src/LanguageModelParallelBackoff.cpp
+++ b/moses/src/LanguageModelParallelBackoff.cpp
@ -286,12 +286,12 @@ FFState *LanguageModelParallelBackoff::NewState(const FFState * /*from*/) const
  return NULL;
 }

-FFState *LanguageModelParallelBackoff::GetNullContextState() const
+const FFState *LanguageModelParallelBackoff::GetNullContextState() const
 {
  return NULL;
 }

-FFState *LanguageModelParallelBackoff::GetBeginSentenceState() const
+const FFState *LanguageModelParallelBackoff::GetBeginSentenceState() const
 {
  return NULL;
 }
--- a/moses/src/LanguageModelParallelBackoff.h
+++ b/moses/src/LanguageModelParallelBackoff.h
@ -90,8 +90,8 @@ public:
  void CreateFactors();

  LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
-  FFState *GetNullContextState() const;
-  FFState *GetBeginSentenceState() const;
+  const FFState *GetNullContextState() const;
+  const FFState *GetBeginSentenceState() const;
  FFState *NewState(const FFState *from) const;

 };
--- a/moses/src/LanguageModelSingleFactor.cpp
+++ b/moses/src/LanguageModelSingleFactor.cpp
@ -61,12 +61,12 @@ LanguageModelPointerState::LanguageModelPointerState()

 LanguageModelPointerState::~LanguageModelPointerState() {}

-FFState *LanguageModelPointerState::GetNullContextState() const
+const FFState *LanguageModelPointerState::GetNullContextState() const
 {
  return m_nullContextState;
 }

-FFState *LanguageModelPointerState::GetBeginSentenceState() const
+const FFState *LanguageModelPointerState::GetBeginSentenceState() const
 {
  return m_beginSentenceState;
 }
--- a/moses/src/LanguageModelSingleFactor.h
+++ b/moses/src/LanguageModelSingleFactor.h
@ -83,8 +83,8 @@ protected:

  virtual ~LanguageModelPointerState();

-  virtual FFState *GetNullContextState() const;
-  virtual FFState *GetBeginSentenceState() const;
+  virtual const FFState *GetNullContextState() const;
+  virtual const FFState *GetBeginSentenceState() const;
  virtual FFState *NewState(const FFState *from = NULL) const;

  virtual LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
--- a/moses/src/LanguageModelSkip.h
+++ b/moses/src/LanguageModelSkip.h
@ -69,11 +69,11 @@ public:
    return m_lmImpl->Load(filePath, m_factorType, nGramOrder);
  }

-  FFState *GetNullContextState() const {
+  const FFState *GetNullContextState() const {
    return m_lmImpl->GetNullContextState();
  }

-  FFState *GetBeginSentenceState() const {
+  const FFState *GetBeginSentenceState() const {
    return m_lmImpl->GetBeginSentenceState();
  }

--- a/moses/src/LexicalReordering.h
+++ b/moses/src/LexicalReordering.h
@ -42,7 +42,7 @@ public:
    virtual const FFState* EmptyHypothesisState(const InputType &input) const;
    
    
-    std::string GetScoreProducerWeightShortName() const {
+    std::string GetScoreProducerWeightShortName(unsigned) const {
        return "d";
    };
    
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@ -70,6 +70,7 @@ libmoses_la_HEADERS = \
        LanguageModelRemote.h \
        LanguageModelSingleFactor.h \
        LanguageModelSkip.h \
+        LanguageModelKen.h \
        LexicalReordering.h \
        LexicalReorderingState.h \
        LexicalReorderingTable.h \
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@ -57,6 +57,7 @@ Parameter::Parameter()
  AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");
  AddParam("lmodel-file", "location and properties of the language models");
  AddParam("lmodel-dub", "dictionary upper bounds of language models");
+  AddParam("lmodel-oov-feature", "add language model oov feature, one per model");
  AddParam("mapping", "description of decoding steps");
  AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
  AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
@ -123,6 +124,7 @@ Parameter::Parameter()
  AddParam("time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
  AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
  AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
+  AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
 #ifdef HAVE_PROTOBUF
  AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
 #endif
@ -155,6 +157,7 @@ Parameter::Parameter()
  AddParam("translation-systems", "specify multiple translation systems, each consisting of an id, followed by a set of models ids, eg '0 T1 R1 L0'");
  AddParam("show-weights", "print feature weights and exit");
  AddParam("alignment-output-file", "print output word alignments into given file");
+  AddParam("sort-word-alignment", "Sort word alignments for more consistent display. 0=no sort (default), 1=target order");
 }

 Parameter::~Parameter()
@ -314,7 +317,8 @@ bool Parameter::Validate()
    }
  }

-  if (m_setting["lmodel-file"].size() != m_setting["weight-l"].size()) {
+  if (m_setting["lmodel-file"].size() * (m_setting.find("lmodel-oov-feature") != m_setting.end() ? 2 : 1)
+         != m_setting["weight-l"].size()) {
    stringstream errorMsg("");
    errorMsg << "Config and parameters specify "
             << static_cast<int>(m_setting["lmodel-file"].size())
@ -322,6 +326,7 @@ bool Parameter::Validate()
             << static_cast<int>(m_setting["weight-l"].size())
             << " weights (weight-l)";
    errorMsg << endl << "You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \"TYPE FACTOR ORDER FILENAME\"'";
+    errorMsg << endl << "You should also remember that each language model requires 2 weights, if and only if lmodel-oov-feature is on.";
    UserMessage::Add(errorMsg.str());
    noErrorFlag = false;
  }
--- a/moses/src/PhraseBoundaryFeature.cpp
+++ b/moses/src/PhraseBoundaryFeature.cpp
@ -27,7 +27,7 @@ size_t PhraseBoundaryFeature::GetNumScoreComponents() const
  return ScoreProducer::unlimited;
 }

-string PhraseBoundaryFeature::GetScoreProducerWeightShortName() const 
+string PhraseBoundaryFeature::GetScoreProducerWeightShortName(unsigned) const 
 {
  return "pb";
 }
--- a/moses/src/PhraseBoundaryFeature.h
+++ b/moses/src/PhraseBoundaryFeature.h
@ -34,7 +34,7 @@ public:
  PhraseBoundaryFeature(const FactorList& sourceFactors, const FactorList& targetFactors);

  size_t GetNumScoreComponents() const;
-  std::string GetScoreProducerWeightShortName() const;
+  std::string GetScoreProducerWeightShortName(unsigned) const;
  size_t GetNumInputScores() const;

  virtual const FFState* EmptyHypothesisState(const InputType &) const;
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@ -224,6 +224,14 @@ PhraseDictionaryFeature::~PhraseDictionaryFeature()
 {}


+std::string PhraseDictionaryFeature::GetScoreProducerWeightShortName(unsigned idx) const
+{
+  if (idx < GetNumInputScores()){
+    return "I";
+  }else{
+    return "tm";
+  }
+}

 size_t PhraseDictionaryFeature::GetNumScoreComponents() const
 {
--- a/moses/src/PhraseDictionary.h
+++ b/moses/src/PhraseDictionary.h
@ -113,9 +113,8 @@ public:

  virtual bool ComputeValueInTranslationOption() const;

-  std::string GetScoreProducerWeightShortName() const {
-    return "tm";
-  }
+  std::string GetScoreProducerWeightShortName(unsigned idx=0) const;
+
  size_t GetNumScoreComponents() const;

  size_t GetNumInputScores() const;
--- a/moses/src/PhraseDictionaryOnDisk.h
+++ b/moses/src/PhraseDictionaryOnDisk.h
@ -68,7 +68,7 @@ public:
            const LMList& languageModels,
            const WordPenaltyProducer* wpProducer);

-  std::string GetScoreProducerDescription() const {
+  std::string GetScoreProducerDescription(unsigned) const {
    return "BerkeleyPt";
  }

--- a/moses/src/PhraseDictionarySCFG.h
+++ b/moses/src/PhraseDictionarySCFG.h
@ -66,7 +66,7 @@ public:
  }
  virtual ~PhraseDictionarySCFG();

-  std::string GetScoreProducerDescription() const {
+  std::string GetScoreProducerDescription(unsigned) const {
    return "Hieu's Reordering Model";
  }

--- a/moses/src/PhraseDictionaryTree.cpp
+++ b/moses/src/PhraseDictionaryTree.cpp
@ -661,7 +661,7 @@ GetTargetCandidates(PrefixPtr p,
  imp->ConvertTgtCand(tcands,rv,wa);
 }

-std::string PhraseDictionaryTree::GetScoreProducerDescription() const
+std::string PhraseDictionaryTree::GetScoreProducerDescription(unsigned) const
 {
  return "PhraseDictionaryTree";
 }
--- a/moses/src/PhraseDictionaryTree.h
+++ b/moses/src/PhraseDictionaryTree.h
@ -122,8 +122,8 @@ public:
  // print target candidates for a given prefix pointer to a stream, mainly
  // for debugging
  void PrintTargetCandidates(PrefixPtr p,std::ostream& out) const;
-  std::string GetScoreProducerDescription() const;
-  std::string GetScoreProducerWeightShortName() const {
+  std::string GetScoreProducerDescription(unsigned) const;
+  std::string GetScoreProducerWeightShortName(unsigned) const {
    return "tm";
  }
 };
--- a/moses/src/PhraseDictionaryTreeAdaptor.cpp
+++ b/moses/src/PhraseDictionaryTreeAdaptor.cpp
@ -105,11 +105,21 @@ size_t PhraseDictionaryTreeAdaptor::GetNumInputScores() const
  return imp->GetNumInputScores();
 }

-std::string PhraseDictionaryTreeAdaptor::GetScoreProducerDescription() const
+std::string PhraseDictionaryTreeAdaptor::GetScoreProducerDescription(unsigned idx) const{
+  if (idx < imp->GetNumInputScores()){
+    return "InputScore";
+  }else{
+    return "PhraseModel";
+  }
+}
+
+std::string PhraseDictionaryTreeAdaptor::GetScoreProducerWeightShortName(unsigned idx) const
 {
-  return "PhraseModel";
+  if (idx < imp->GetNumInputScores()){
+    return "I";
+  }else{  
+    return "tm";
+  }
 }

 }
-
-
--- a/moses/src/PhraseDictionaryTreeAdaptor.h
+++ b/moses/src/PhraseDictionaryTreeAdaptor.h
@ -60,10 +60,8 @@ public:
  // this function can be only used for UNKNOWN source phrases
  void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);

-  std::string GetScoreProducerDescription() const;
-  std::string GetScoreProducerWeightShortName() const {
-    return "tm";
-  }
+  std::string GetScoreProducerDescription(unsigned idx=0) const;
+  std::string GetScoreProducerWeightShortName(unsigned idx=0) const;

  size_t GetNumInputScores() const;
  virtual void InitializeForInput(InputType const& source);
--- a/moses/src/PhraseLengthFeature.h
+++ b/moses/src/PhraseLengthFeature.h
@ -25,7 +25,7 @@ public:

  // basic properties
 	size_t GetNumScoreComponents() const { return ScoreProducer::unlimited; }
-	std::string GetScoreProducerWeightShortName() const { return "pl"; }
+	std::string GetScoreProducerWeightShortName(unsigned) const { return "pl"; }
 	size_t GetNumInputScores() const { return 0; }
 };

--- a/moses/src/PhrasePairFeature.cpp
+++ b/moses/src/PhrasePairFeature.cpp
@ -19,7 +19,7 @@ size_t PhrasePairFeature::GetNumScoreComponents() const
  return ScoreProducer::unlimited;
 }

-string PhrasePairFeature::GetScoreProducerWeightShortName() const
+string PhrasePairFeature::GetScoreProducerWeightShortName(unsigned) const
 {
  return "pp";
 }
--- a/moses/src/PhrasePairFeature.h
+++ b/moses/src/PhrasePairFeature.h
@ -23,7 +23,7 @@ class PhrasePairFeature: public StatelessFeatureFunction {

    
    size_t GetNumScoreComponents() const;
-    std::string GetScoreProducerWeightShortName() const;
+    std::string GetScoreProducerWeightShortName(unsigned) const;
    size_t GetNumInputScores() const;


--- a/moses/src/ScoreProducer.h
+++ b/moses/src/ScoreProducer.h
@ -43,7 +43,7 @@ public:
 	const std::string& GetScoreProducerDescription() const {return m_description;}

  //! returns the weight parameter name of this producer (used in n-best list)
-  virtual std::string GetScoreProducerWeightShortName() const = 0;
+  virtual std::string GetScoreProducerWeightShortName(unsigned idx=0) const = 0;

  //! returns the number of scores gathered from the input (0 by default)
  virtual size_t GetNumInputScores() const {
--- a/moses/src/SourceWordDeletionFeature.h
+++ b/moses/src/SourceWordDeletionFeature.h
@ -31,7 +31,7 @@ public:

  // basic properties
 	size_t GetNumScoreComponents() const { return ScoreProducer::unlimited; }
-	std::string GetScoreProducerWeightShortName() const { return "swd"; }
+	std::string GetScoreProducerWeightShortName(unsigned) const { return "swd"; }
 	size_t GetNumInputScores() const { return 0; }
 };

--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -95,6 +95,7 @@ StaticData::StaticData()
  ,m_detailedTranslationReportingFilePath()
  ,m_onlyDistinctNBest(false)
  ,m_factorDelimiter("|") // default delimiter between factors
+  ,m_lmEnableOOVFeature(false)
  ,m_isAlwaysCreateDirectTranslationOption(false)

 {
@ -145,6 +146,10 @@ bool StaticData::LoadData(Parameter *parameter)
    }
  }

+  if(m_parameter->GetParam("sort-word-alignment").size()) {
+    m_wordAlignmentSort = (WordAlignmentSort) Scan<size_t>(m_parameter->GetParam("sort-word-alignment")[0]);
+  }
+  
  // factor delimiter
  if (m_parameter->GetParam("factor-delimiter").size() > 0) {
    m_factorDelimiter = m_parameter->GetParam("factor-delimiter")[0];
@ -223,6 +228,7 @@ bool StaticData::LoadData(Parameter *parameter)
  } else
    m_outputSearchGraphPB = false;
 #endif
+  SetBooleanParameter( &m_unprunedSearchGraph, "unpruned-search-graph", true );

  // include feature names in the n-best list
  SetBooleanParameter( &m_labeledNBestList, "labeled-n-best-list", true );
@ -359,6 +365,8 @@ bool StaticData::LoadData(Parameter *parameter)
  // unknown word processing
  SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );

+  SetBooleanParameter( &m_lmEnableOOVFeature, "lmodel-oov-feature", false);
+
  // minimum Bayes risk decoding
  SetBooleanParameter( &m_mbr, "minimum-bayes-risk", false );
  m_mbrSize = (m_parameter->GetParam("mbr-size").size() > 0) ?
@ -472,19 +480,19 @@ bool StaticData::LoadData(Parameter *parameter)
  if (m_parameter->GetParam("report-sparse-features").size() > 0) {
    for(size_t i=0; i<m_parameter->GetParam("report-sparse-features").size(); i++) {
      const std::string &name = m_parameter->GetParam("report-sparse-features")[i];
-      if (m_targetBigramFeature && name.compare(m_targetBigramFeature->GetScoreProducerWeightShortName()) == 0)
+      if (m_targetBigramFeature && name.compare(m_targetBigramFeature->GetScoreProducerWeightShortName(0)) == 0)
        m_targetBigramFeature->SetSparseFeatureReporting();
-      if (m_phrasePairFeature && name.compare(m_phrasePairFeature->GetScoreProducerWeightShortName()) == 0)
+      if (m_phrasePairFeature && name.compare(m_phrasePairFeature->GetScoreProducerWeightShortName(0)) == 0)
        m_phrasePairFeature->SetSparseFeatureReporting();
-      if (m_phraseBoundaryFeature && name.compare(m_phraseBoundaryFeature->GetScoreProducerWeightShortName()) == 0)
+      if (m_phraseBoundaryFeature && name.compare(m_phraseBoundaryFeature->GetScoreProducerWeightShortName(0)) == 0)
        m_phraseBoundaryFeature->SetSparseFeatureReporting();
-      if (m_phraseLengthFeature && name.compare(m_phraseLengthFeature->GetScoreProducerWeightShortName()) == 0)
+      if (m_phraseLengthFeature && name.compare(m_phraseLengthFeature->GetScoreProducerWeightShortName(0)) == 0)
        m_phraseLengthFeature->SetSparseFeatureReporting();
-      if (m_targetWordInsertionFeature && name.compare(m_targetWordInsertionFeature->GetScoreProducerWeightShortName()) == 0)
+      if (m_targetWordInsertionFeature && name.compare(m_targetWordInsertionFeature->GetScoreProducerWeightShortName(0)) == 0)
        m_targetWordInsertionFeature->SetSparseFeatureReporting();
-      if (m_sourceWordDeletionFeature && name.compare(m_sourceWordDeletionFeature->GetScoreProducerWeightShortName()) == 0)
+      if (m_sourceWordDeletionFeature && name.compare(m_sourceWordDeletionFeature->GetScoreProducerWeightShortName(0)) == 0)
        m_sourceWordDeletionFeature->SetSparseFeatureReporting();
-      if (m_wordTranslationFeature && name.compare(m_wordTranslationFeature->GetScoreProducerWeightShortName()) == 0)
+      if (m_wordTranslationFeature && name.compare(m_wordTranslationFeature->GetScoreProducerWeightShortName(0)) == 0)
        m_wordTranslationFeature->SetSparseFeatureReporting();
    }
  }
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@ -187,6 +187,7 @@ protected:
  float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details

  size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
+  bool m_lmEnableOOVFeature;

  bool m_timeout; //! use timeout
  size_t m_timeout_threshold; //! seconds after which time out is activated
@ -208,6 +209,7 @@ protected:
 #ifdef HAVE_PROTOBUF
  bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
 #endif
+  bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)

  size_t m_cubePruningPopLimit;
  size_t m_cubePruningDiversity;
@ -220,7 +222,7 @@ protected:
  Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
  SourceLabelOverlap m_sourceLabelOverlap;
  UnknownLHSList m_unknownLHS;
-
+  WordAlignmentSort m_wordAlignmentSort;

  StaticData();

@ -568,6 +570,10 @@ public:
    return m_lmcache_cleanup_threshold;
  }

+  bool GetLMEnableOOVFeature() const {
+    return m_lmEnableOOVFeature;
+  }
+
  bool GetOutputSearchGraph() const {
    return m_outputSearchGraph;
  }
@ -582,6 +588,9 @@ public:
    return m_outputSearchGraphPB;
  }
 #endif
+  bool GetUnprunedSearchGraph() const {
+    return m_unprunedSearchGraph;
+  }

  XmlInputType GetXmlInputType() const {
    return m_xmlInputType;
@ -627,7 +636,6 @@ public:
    return 999999; /* TODO wtf! */
  }

-
  bool ContinuePartialTranslation() const {
    return m_continuePartialTranslation;
  }
@ -639,6 +647,9 @@ public:
  }
  void SetAllWeightsScoreComponentCollection(const ScoreComponentCollection &weightsScoreComponentCollection);

+  WordAlignmentSort GetWordAlignmentSort() const {
+    return m_wordAlignmentSort;
+  }
 };

 }
--- a/moses/src/SyntacticLanguageModel.cpp
+++ b/moses/src/SyntacticLanguageModel.cpp
@ -36,11 +36,11 @@ namespace Moses
    return m_NumScoreComponents;
  }

-  std::string SyntacticLanguageModel::GetScoreProducerDescription() const {
+  std::string SyntacticLanguageModel::GetScoreProducerDescription(unsigned) const {
    return "Syntactic Language Model";
  }

-  std::string SyntacticLanguageModel::GetScoreProducerWeightShortName() const {
+  std::string SyntacticLanguageModel::GetScoreProducerWeightShortName(unsigned) const {
    return "slm";
  }

--- a/Show More
+++ b/Show More