Merge remote branch 'github/master' into miramerge

Compiles, but not tested. Had to disable relent filter. Strangely, it seems to contain the whole of moses-cmd. Conflicts: Jamroot OnDiskPt/TargetPhrase.cpp moses-cmd/src/Main.cpp moses/src/AlignmentInfo.cpp moses/src/AlignmentInfo.h moses/src/ChartTranslationOptionCollection.cpp moses/src/ChartTranslationOptionCollection.h moses/src/GenerationDictionary.cpp moses/src/Jamfile moses/src/Parameter.cpp moses/src/PhraseDictionary.cpp moses/src/StaticData.cpp moses/src/StaticData.h moses/src/TargetPhrase.h moses/src/TranslationSystem.cpp moses/src/TranslationSystem.h moses/src/Word.cpp phrase-extract/score.cpp regression-testing/Jamfile scripts/ems/experiment.meta scripts/ems/experiment.perl scripts/training/train-model.perl
2024-08-17 07:20:48 +03:00 · 2012-09-26 22:49:33 +01:00 · 2012-09-26 22:49:33 +01:00 · 0a950ee9f4
commit 0a950ee9f4
parent 1ce788e2b8 ab60d1ad6f
324 changed files with 59117 additions and 3283 deletions
--- a/.gitignore
+++ b/.gitignore
@ -61,3 +61,7 @@ scripts/training/train-model.perl
 dist
 bin
 previous.sh
+contrib/other-builds/*.xcodeproj/project.xcworkspace/
+contrib/other-builds/*.xcodeproj/xcuserdata/
+*/*.xcodeproj/project.xcworkspace
+*/*.xcodeproj/xcuserdata
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "regression-testing/tests"]
+	path = regression-testing/tests
+	url = ../moses-regression-tests.git
--- a/23
+++ b/23
@ -15,13 +15,15 @@
 #Note that, like language models, this is the --prefix where the library was
 #installed, not some executable within the library.  
 #
+#Compact phrase table and compact lexical reordering table
+#--with-cmph=/path/to/cmph
+#
 #Thread-caching malloc (optional):
 #--with-tcmalloc
 #
 #REGRESSION TESTING
 #--with-regtest=/path/to/moses-reg-test-data
 #
-#
 #INSTALLATION
 #--prefix=/path/to/prefix sets the install prefix [default is source root].
 #--bindir=/path/to/prefix/bin sets the bin directory [PREFIX/bin]
@ -29,6 +31,7 @@
 #--includedir=/path/to/prefix/include installs headers.  
 #  Does not install if missing.  No argument defaults to PREFIX/include .
 #--install-scripts=/path/to/scripts copies scripts into a directory.
+#  Does not install if missing.  No argument defaults to PREFIX/scripts .
 #--git appends the git revision to the prefix directory.
 #
 #
@ -41,7 +44,9 @@
 # variant=release|debug|profile  builds optimized (default), for debug, or for
 #                                profiling
 #
-# link=static|shared             controls linking (default static)
+# link=static|shared             controls preferred linking (default static)
+# --static                       forces static linking (the default will fall
+#                                back to shared)
 #
 # debug-symbols=on|off           include (default) or exclude debugging
 #                                information also known as -g
@ -50,6 +55,9 @@
 # --enable-boost-pool            uses Boost pools for the memory SCFG table
 #
 # --enable-mpi                   switch on mpi
+# --without-libsegfault          does not link with libSegFault
+#
+# --max-kenlm-order              maximum ngram order that kenlm can process (default 6)
 #
 #CONTROLLING THE BUILD
 #-a to build from scratch
@ -84,6 +92,10 @@ if [ option.get "enable-mpi" : : "yes" ] {
 requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
 requirements += [ option.get "enable-boost-pool" : : <define>USE_BOOST_POOL ] ;

+if [ option.get "with-cmph" ] {
+  requirements += <define>HAVE_CMPH ;
+}
+
 project : default-build
  <threading>multi
  <warnings>on
@ -99,12 +111,13 @@ project : requirements
  $(requirements)
  ;

-build-projects util lm mert moses-cmd/src moses-chart-cmd/src mira scripts regression-testing ;
+#Add directories here if you want their incidental targets too (i.e. tests).
+build-projects util lm mert moses-cmd/src moses-chart-cmd/src mira scripts regression-testing  ;

-alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs mira//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor ;
+alias programs : lm//query lm//build_binary lm//kenlm_max_order moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs mira//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor  ;

 install-bin-libs programs ;
-install-headers headers-base : [ glob-tree *.h *.hh : jam-files dist bin lib include kenlm moses ] : . ;
+install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
 install-headers headers-moses : moses/src//headers-to-install : moses/src ;

 alias install : prefix-bin prefix-lib headers-base headers-moses ;
--- a/OnDiskPt/TargetPhrase.cpp
+++ b/OnDiskPt/TargetPhrase.cpp
@ -240,9 +240,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
  --phraseSize;

  for (size_t pos = 0; pos < phraseSize; ++pos) {
-    Moses::Word *mosesWord = GetWord(pos).ConvertToMoses(Moses::Output, outputFactors, vocab);
-    ret->AddWord(*mosesWord);
-    delete mosesWord;
+    GetWord(pos).ConvertToMoses(outputFactors, vocab, ret->AddWord());
  }

  // scores
@ -261,16 +259,12 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
  }
  ret->SetAlignmentInfo(alignmentInfo, indicator);

-  Moses::Word *lhs = GetWord(GetSize() - 1).ConvertToMoses(Moses::Output, outputFactors, vocab);
-  ret->SetTargetLHS(*lhs);
-  delete lhs;
+  GetWord(GetSize() - 1).ConvertToMoses(outputFactors, vocab, ret->MutableTargetLHS());
  
  // set source phrase
  Moses::Phrase mosesSP(Moses::Input);
  for (size_t pos = 0; pos < sp->GetSize(); ++pos) {
-    Moses::Word *mosesWord = sp->GetWord(pos).ConvertToMoses(Moses::Input, inputFactors, vocab);
-    mosesSP.AddWord(*mosesWord);
-    delete mosesWord;
+    sp->GetWord(pos).ConvertToMoses(inputFactors, vocab, mosesSP.AddWord());
  }
  ret->SetSourcePhrase(mosesSP);
  
--- a/OnDiskPt/Word.cpp
+++ b/OnDiskPt/Word.cpp
@ -23,6 +23,9 @@
 #include "../moses/src/Word.h"
 #include "Word.h"

+#include "util/tokenize_piece.hh"
+#include "util/exception.hh"
+
 using namespace std;

 namespace OnDiskPt
@ -94,23 +97,21 @@ size_t Word::ReadFromFile(std::fstream &file)
  return memUsed;
 }

-Moses::Word *Word::ConvertToMoses(Moses::FactorDirection direction
-                                  , const std::vector<Moses::FactorType> &outputFactorsVec
-                                  , const Vocab &vocab) const
-{
+void Word::ConvertToMoses(
+    const std::vector<Moses::FactorType> &outputFactorsVec, 
+    const Vocab &vocab,
+    Moses::Word &overwrite) const {
  Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
-  Moses::Word *ret = new Moses::Word(m_isNonTerminal);
+  overwrite = Moses::Word(m_isNonTerminal);

-  const string &str = vocab.GetString(m_vocabId);
-  vector<string> toks = Moses::Tokenize(str, "|");
-  for (size_t ind = 0; ind < toks.size(); ++ind) {
-    Moses::FactorType factorType = outputFactorsVec[ind];
-    const Moses::Factor *factor = factorColl.AddFactor(direction, factorType, toks[ind]);
-    ret->SetFactor(factorType, factor);
+  // TODO: this conversion should have been done at load time.  
+  util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
+
+  for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
+    UTIL_THROW_IF(!tok, util::Exception, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
+    overwrite.SetFactor(*t, factorColl.AddFactor(*tok));
  }
-
-  return ret;
-
+  UTIL_THROW_IF(tok, util::Exception, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
 }

 int Word::Compare(const Word &compare) const
--- a/OnDiskPt/Word.h
+++ b/OnDiskPt/Word.h
@ -71,9 +71,10 @@ public:
    m_vocabId = vocabId;
  }

-  Moses::Word *ConvertToMoses(Moses::FactorDirection direction
-                              , const std::vector<Moses::FactorType> &outputFactorsVec
-                              , const Vocab &vocab) const;
+  void ConvertToMoses(
+    const std::vector<Moses::FactorType> &outputFactorsVec,
+    const Vocab &vocab,
+    Moses::Word &overwrite) const;

 	virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;

--- a/4
+++ b/4
@ -4,8 +4,8 @@ if
  bjam="$(which bjam 2>/dev/null)" && #exists
  [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true
  ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" </dev/null >/dev/null && #bjam in path isn't this script
-  "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build)
-  "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough.  
+  "${bjam}" --sanity-test 2>/dev/null |grep Sane >/dev/null && #The test in jam-files/sanity.jam passes
+  (cd jam-files/fail && ! "${bjam}") >/dev/null #Returns non-zero on failure
 then
  #Delegate to system bjam
  exec "${bjam}" "$@"
--- a/contrib/combine-ptables/README.md
+++ b/contrib/combine-ptables/README.md
@ -0,0 +1,139 @@
+`combine-ptables.pl`: fill-up and other techniques of translation models combination.
+
+Author: 
+Arianna Bisazza bisazza[AT]fbk.eu
+
+ABOUT
+-----
+This tool implements "fill-up" and other operations that are useful to combine translation and reordering tables.
+In the "fill-up" approach, the weights of out-domain data sources are estimated directly by MERT along with the 
+other model weights.
+
+This tool also supports linear interpolation, but weights must be provided by the user.
+If you want to automatically estimate linear interpolation weights, use `contrib/tmcombine` instead.
+
+
+REFERENCE
+---------
+When using this script, please cite: 
+Arianna Bisazza, Nick Ruiz, and Marcello Federico. 2011. 
+"Fill-up versus Interpolation Methods for Phrase-based SMT Adaptation."
+In International Workshop on Spoken Language Translation (IWSLT), San Francisco, CA.
+
+
+FILL-UP
+-------
+
+This combination technique is useful when the relevance of the models is known a priori,
+e.g. when one is trained on in-domain data and the others on out-of-domain data.
+
+This mode preserves all the entries and scores coming from the first model, and adds
+entries from the other models only if new.
+If more than two tables are provided, each entry is taken only from the first table 
+that contains it.
+
+Moreover, a binary feature is added for each additional table to denote the provenance
+of an entry. For in-domain entries, the binary features are all set to 1 (=exp(0)).
+Entries coming from the 2nd table will have the 1st binary feature set to 2.718 (=exp(1)).
+
+This technique was proposed in the following works:
+
+Preslav Nakov. 2008. 
+"Improving English-Spanish Statistical Machine Translation: Experiments in Domain 
+Adaptation, Sentence Paraphrasing, Tokenization, and Recasing."
+In Workshop on Statistical Machine Translation.
+
+Arianna Bisazza, Nick Ruiz, and Marcello Federico. 2011. 
+"Fill-up versus Interpolation Methods for Phrase-based SMT Adaptation."
+In International Workshop on Spoken Language Translation (IWSLT), San Francisco, CA.
+
+The latter paper contains details about the present implementation as well as an empirical
+evaluation of fill-up against other combination techniques.
+Reordering model fill-up, cascaded fill-up and pruning criteria are also discussed in the 
+same paper.
+
+Among the findings of this paper, pruning new (out-of-domain) phrases with more than 4
+source words appeared to be beneficial on the Arabic-English TED task when combining the
+in-domain models with MultiUn models.
+This corresponds to the option:
+   `--newSourceMaxLength=4`
+
+
+LINEAR INTERPOLATION
+--------------------
+
+This combination technique consists in linearly combining the feature values coming
+from all tables. The combination weights should be provided by the user, otherwise
+uniform weights are assumed.
+When a phrase pair is absent from a table, a constant value (epsilon) is assumed for 
+the corresponding feature values. You may want to set your own epsilon.
+
+See [Bisazza et al. 2011] for an empirical comparison of uniformly weighted linear 
+interpolation against fill-up and decoding-time log-linear interpolation. In that paper, 
+epsilon was always set to 1e-06.
+
+
+UNION
+-----
+
+This combination technique creates the union of all phrase pairs and assigns to each
+of them the concatenation of all tables scores. 
+
+
+INTERSECTION
+------------
+
+This combination technique creates the intersection of all phrase pairs: each phrase 
+pair that occurs in all phrase tables is output along with the feature vector taken 
+from the *first* table.
+The intersection can be used to prune the reordering table in order to match the 
+entries of a corresponding pruned phrase table.
+
+
+USAGE
+-----
+
+Get statistics about overlap of entries:
+    `combine-ptables.pl --mode=stats ptable1 ptable2 ... ptableN > ptables-overlap-stats`
+
+Interpolate phrase tables...
+- with uniform weights:
+    `combine-ptables.pl --mode=interp --phpenalty-at=4 ptable1 ptable2 ptable3 > interp-ptable.X`
+
+- with custom weights:
+    `combine-ptables.pl --mode=interp --phpenalty-at=4 --weights=0.8,0.1,0.1 ptable1 ptable2 ptable3 > interp-ptable.Y`
+
+- with custom epsilon:
+    `combine-ptables.pl --mode=interp --phpenalty-at=4 --epsilon=1e-05 ptable1 ptable2 ptable3 > interp-ptable.Z`
+
+
+Fillup phrase tables...
+- unpruned:
+    `combine-ptables.pl --mode=fillup ptable1 ptable2 ... ptableN > fillup-ptable`
+
+- pruned (new phrases only with max. 4 source words):
+    `combine-ptables.pl --mode=fillup --newSourceMaxLength=4 ptable1 ptable2 ... ptableN > fillup-ptable`
+
+
+Given a pruned phrase table, prune the corresponding reordering table:
+    `combine-ptables.pl --mode=intersect1 reotable1-unpruned ptable1-pruned > reotable1-pruned`
+
+
+NOTES
+-----
+
+The script works only with textual (non-binarized) phrase or reordering tables 
+that were *previously sorted* with `LC_ALL=C sort`
+
+The resulting combined tables are also textual and need to binarized normally.
+
+The script combine-ptables.pl can be used on lexicalized reordering tables as well.
+
+Input tables can be gzipped.
+
+When integrating filled up models into a Moses system, remember to:
+ - specify the correct number of features (typically 6) under [ttable-file] in the configuration file `moses.ini`
+ - add a weight under [weight-t] in `moses.ini`
+ - if you binarize the models, provide the correct number of features to the command:
+    `$moses/bin/processPhraseTable -ttable 0 0 - -nscores $nbFeatures`
+
--- a/contrib/combine-ptables/combine-ptables.pl
+++ b/contrib/combine-ptables/combine-ptables.pl
@ -0,0 +1,425 @@
+#! /usr/bin/perl
+
+#******************************************************************************
+# Arianna Bisazza @ FBK-irst. March 2012
+#******************************************************************************
+# combine-ptables.pl : Combine Moses-style phrase tables, using different approaches
+
+
+use strict;
+use open ':utf8';
+binmode STDIN, ':utf8';
+binmode STDOUT, ':utf8';
+
+use Getopt::Long "GetOptions";
+
+sub main {
+my $usage = "
+USAGE
+-----
+combine-ptables.pl --mode=(interp|union|fillup|intersect1|stats) ptable1 ptable2 ... ptableN > combined-ptable
+combine-ptables.pl --mode=intersect1 reotable-unpruned ptable-pruned > reotable-pruned
+-----
+#
+# This scripts reads two or more *sorted* phrase tables and combines them in different modes.
+#
+# (Note: if present, word alignments are ignored).
+#
+# ----------------
+# OPTIONS
+# ----------------
+#
+# Required:
+# --mode			fillup:	    Each entry is taken only from the first table that contains it.
+#		                            A binary feature is added from each table except the first.
+#				interp:     Linear interpolation.
+#				union:	    Union of entries, feature vectors are concatenated.
+#                               intersect1: Intersection of entries, feature vectors taken from the first table.
+#                               stats:      Only compute some statistics about tables overlap. No table is produced.
+#
+#                               NOTE: if present, additional fields such as word alignment, phrase counts etc. are always
+#                                     taken from the first table.
+#
+# Generic options:
+# --phpenalty=FLOAT             Constant value for phrase penalty. Default is exp(1)=2.718
+# --phpenalty-at=N              The (N+1)th score of each table is considered as phrase penalty with a constant value.
+#                               In 'interp' mode, the corresponding feature is not interpolated but simply set to the constant.
+#                               In 'union' mode, the ph.penalty (constant) is output only once, after all the other scores.
+#                               By default, no score is considered as phrase penalty.
+#
+#
+# Options for 'fillup':
+# --newSourceMaxLength=INT      Don't include \"new\" source phrases if longer than INT words.
+#
+# Options for 'interp':
+# --weights=W1,W2,...WN		Weights for interpolation. By default, uniform weights are applied.
+# --epsilon=X			Score to assume when a phrase pair is not contained in a table (in 'interp' and 'union' modes).
+#                               Default epsilon is 1e-06.
+#
+# Options for 'union':
+#
+#
+";
+
+my $combination_mode = '';
+my $debug = '';
+my $weights_str = '';
+my $epsilon = 0.000001;
+my $phPenalty = 2.718;	# exp(1)
+my $phPenalty_idx = -1; 
+my $delim= " ||| ";
+my $delim_RE= ' \|\|\| ';
+my $exp_one = 2.718;
+my $exp_zero = 1;
+my $newSourceMaxLength = -1;
+my $help = '';
+
+GetOptions ('debug' => \$debug, 
+	    'mode=s' => \$combination_mode,
+	    'weights=s' => \$weights_str,
+            'epsilon=f' => \$epsilon,
+	    'phpenalty=f' => \$phPenalty,
+            'phpenalty-at=i' => \$phPenalty_idx,
+	    'newSourceMaxLength=i' => \$newSourceMaxLength,
+            'help' => \$help);
+
+if($help) { die "$usage\n\n"; }
+
+if($combination_mode!~/(interp|union|fillup|intersect1|stats)/) {die "$usage\nUnknown combination mode!\n"}; 
+
+if(@ARGV < 2) {die "$usage\n\n Please provide at least 2 tables to combine \n\n";}
+
+print STDERR "
+WARNING: Your phrase tables must be sorted (with LC_ALL=C) !!
+******************************
+Combination mode is [$combination_mode]
+******************************
+";
+
+my @tables = @ARGV;
+my $nbtables = scalar(@tables);
+
+###########################################
+
+# The newSourceMaxLength option requires reading all the first PT before starting the combination
+my %sourcePhrasesPT1; 
+if($combination_mode eq "fillup" && $newSourceMaxLength>-1) {
+    my $table1=$tables[0];
+    $table1 =~ s/(.*\.gz)\s*$/gzip -dc < $1|/;
+    open(TABLE1, "$table1") or die "Cannot open $table1: ($!)\n";
+    while(my $line=<TABLE1>) {
+	$line=~m/^(.*?)$delim_RE/;
+	$sourcePhrasesPT1{$1}++;
+    }
+    close(TABLE1);
+}
+
+my @table_files=();
+foreach my $table (@tables) {
+    $table =~ s/(.*\.gz)\s*$/gzip -dc < $1|/;
+    #localize the file glob, so FILE is unique to the inner loop.
+    local *FILE;
+    open(FILE, "$table") or die "Cannot open $table: ($!)\n";
+    push(@table_files, *FILE);
+}
+
+
+# Read first line from all tables to find number of weights (and sanity checks)
+my @read_ppairs=();
+my $nbscores = &read_line_from_tables(\@table_files, \@read_ppairs);
+print STDERR "Each phrase table contains $nbscores features.\n";
+
+###########################################
+
+if($phPenalty_idx!=-1) {
+    if($phPenalty_idx<0 || $phPenalty_idx>=$nbscores) {
+	die "Invalid value for option phpenalty-at! Should be in the range [0,($nbscores-1)]\n\n";
+    }
+    else { print STDERR "Phrase penalty at index $phPenalty_idx\n"; }
+}
+
+#if($weights_str ne "") { die "Weights option NOT supported yet. Can only use uniform (1/nbscores)\n\n"; }
+#my $unifw = 1/$nbtables;
+
+my @weights=(); # Array of arrays each containing the feature weights for a phrase table
+if($combination_mode eq "interp") {
+    my @table_level_weights=();
+    if($weights_str eq "") {
+	@table_level_weights= ((1/$nbtables) x $nbtables);   # assuming uniform weights
+    }
+    else {
+	@table_level_weights= split(/,/, $weights_str);
+	if(scalar(@table_level_weights) != $nbtables) {
+	    die "$usage\n Invalid string for option --weights! Must be a comma-separated list of floats, one per ph.table.\n";
+	}
+    }
+
+    for(my $i=0; $i<$nbtables; $i++) {
+	my @weights_pt = (($table_level_weights[$i]) x $nbscores);
+	if($phPenalty_idx!=-1) {
+	    $weights_pt[$phPenalty_idx]=0;
+	}
+	print STDERR "WEIGHTS-PT_$i: ", join(" -- ", @weights_pt), "\n";
+	$weights[$i] = \@weights_pt;
+    }
+    print STDERR "EPSILON: $epsilon \n";
+}
+
+
+###########################################
+
+my @empty_ppair=("");
+my @epsilons = (($epsilon) x $nbscores);
+if($phPenalty_idx>-1) {
+    pop @epsilons;
+}
+
+my $nbPpairs_inAll=0;
+my @nbPairs_found_only_in=((0) x $nbtables);
+my $MINSCORE=1;
+
+print STDERR "Working...\n\n";
+while(1) {  
+    my $min_ppair="";
+    my $reached_end_of_tables=1;
+    my @tablesContainingPpair=((0) x $nbtables);
+    for(my $i=0; $i<$nbtables; $i++) {
+	my $ppair=$read_ppairs[$i]->[0];
+	if($ppair ne "") {
+	    $reached_end_of_tables=0;
+	    if($min_ppair eq "" || $ppair lt $min_ppair) {
+		$min_ppair=$ppair;
+		@tablesContainingPpair=((0) x $nbtables);
+		$tablesContainingPpair[$i]=1;
+	    }
+	    elsif($ppair eq $min_ppair) {
+                $tablesContainingPpair[$i]=1;
+	    }
+	}
+    }
+    last if($reached_end_of_tables);
+
+    ## Actual combination is performed here:
+    &combine_ppair(\@read_ppairs, \@tablesContainingPpair);
+
+    &read_line_from_tables(\@table_files, \@read_ppairs, \@tablesContainingPpair);
+    
+}
+
+print STDERR "...done!\n";
+
+print STDERR "The minimum score in all tables is $MINSCORE\n";
+
+if($combination_mode eq "stats") {
+my $tot_ppairs=0;
+print "
+# entries
+found in all tables:   $nbPpairs_inAll\n";
+
+for(my $i=0; $i<$nbtables; $i++) {
+    print "found only in PT_$i:    $nbPairs_found_only_in[$i]\n";
+}
+
+}
+
+####################################
+sub combine_ppair(PPAIRS_REFARRAY, TABLE_INDICES_REFARRAY) {
+    my $ra_ppairs=shift;   # 1st item: phrase-pair key (string); 
+                           # 2nd item: ref.array of scores;
+                           # 3rd item: additional info (string, may be empty)
+
+    my $ra_toRead=shift;   # Important: this says which phrase tables contain the ph.pair currently processed
+
+    my $ppair="";
+    my @scores=();
+    my $additional_info="";
+
+    my $to_print=1;
+
+    if($debug) {
+	print STDERR "combine_ppair:\n";
+	for(my $i=0; $i<$nbtables; $i++) {
+	    if($ra_toRead->[$i]) {
+		print STDERR "ppair_$i= ", join (" // ", @{$ra_ppairs->[$i]}), "\n";
+	    }
+	}
+    }
+
+    if($combination_mode eq "stats") {
+	$to_print=0;
+	my $found_in=-1;
+	my $nb_found=0;
+	for(my $i=0; $i<$nbtables; $i++) {
+	    if($ra_toRead->[$i]) {
+		$found_in=$i;
+		$nb_found++;
+	    }
+	}
+	if($nb_found==1) { $nbPairs_found_only_in[$found_in]++; }
+	elsif($nb_found==$nbtables) { $nbPpairs_inAll++; }
+    }
+    ### Fill-up + additional binary feature
+    elsif($combination_mode eq "fillup") {
+	my @bin_feats=(($exp_zero) x ($nbtables-1));
+	for(my $i=0; $i<$nbtables; $i++) {
+	    if($ra_toRead->[$i]) {
+		$ppair= shift(@{$ra_ppairs->[$i]});
+		# pruning criteria are applied here:
+		if($i>0 && $newSourceMaxLength>-1) {
+		    $ppair=~m/^(.*?)$delim_RE/;
+		    if(scalar(split(/ +/, $1)) > $newSourceMaxLength &&
+			!defined($sourcePhrasesPT1{$1})) 
+		       { $to_print=0; }
+		}
+#		@scores= @{$ra_ppairs->[$i]};
+		@scores = @{shift(@{$ra_ppairs->[$i]})};
+                # binary feature for ph.pair provenance fires here
+		if($i>0) { $bin_feats[$i-1]=$exp_one; } 
+		$additional_info=shift(@{$ra_ppairs->[$i]});
+		last;
+	    }
+	}
+	push(@scores, @bin_feats);
+    }
+    ### Linear interpolation
+    elsif($combination_mode eq "interp") {
+	my $firstPpair=-1;
+	@scores=((0) x $nbscores);
+	for(my $i=0; $i<$nbtables; $i++) {
+	    if($ra_toRead->[$i]) {
+		if($firstPpair==-1) { $firstPpair=$i; }
+		$ppair= shift(@{$ra_ppairs->[$i]});
+		my @scoresPT = @{shift(@{$ra_ppairs->[$i]})};
+		for(my $j=0; $j<$nbscores; $j++) {
+#		    $scores[$j]+= $weights[$i]->[$j]* $ra_ppairs->[$i][$j];
+		    $scores[$j]+= $weights[$i]->[$j]* $scoresPT[$j];
+		}
+	    }
+	    else {
+		for(my $j=0; $j<$nbscores; $j++) {
+                    $scores[$j]+= $weights[$i]->[$j]* $epsilon;
+                }
+	    }
+	    if($phPenalty_idx!=-1) {
+		$scores[$phPenalty_idx]= $phPenalty;
+	    }
+	}
+	if($debug) { print STDERR "..taking info from ptable_$firstPpair\n"; }
+	$additional_info= shift(@{$ra_ppairs->[$firstPpair]});
+    }
+    ### Union + feature concatenation
+    elsif($combination_mode eq "union") {
+        my $firstPpair=-1;
+	for(my $i=0; $i<$nbtables; $i++) {
+	    if($ra_toRead->[$i]) { 
+		if($firstPpair==-1) { $firstPpair=$i; }
+		$ppair= shift(@{$ra_ppairs->[$i]});
+		my @scoresPT= @{shift(@{$ra_ppairs->[$i]})};
+		if($phPenalty_idx!=-1) {
+#		    splice(@{$ra_ppairs->[$i]}, $phPenalty_idx, 1);
+		    splice(@scoresPT, $phPenalty_idx, 1);
+		}
+#		push(@scores, @{$ra_ppairs->[$i]});
+	        push(@scores, @scoresPT);
+	    } 
+	    else { 
+		push(@scores, @epsilons);
+	    }
+	}
+	if($phPenalty_idx!=-1) { 
+	    push(@scores, $phPenalty);
+	}
+	if($debug) { print STDERR "..taking info from ptable_$firstPpair\n"; }
+        $additional_info= shift(@{$ra_ppairs->[$firstPpair]});
+    }
+    ### Intersect + features from first table
+    elsif($combination_mode eq "intersect1") {
+        $to_print=0;
+        my $found_in_all=1;
+        for(my $i=0; $i<$nbtables; $i++) {
+            if(!$ra_toRead->[$i]) {
+		$found_in_all=0;
+		last;
+            }
+        }
+        if($found_in_all) { 
+	    $to_print=1;
+	    $ppair= shift(@{$ra_ppairs->[0]});
+#	    @scores= @{$ra_ppairs->[0]};
+	    @scores= @{shift(@{$ra_ppairs->[0]})};
+	    $additional_info= shift(@{$ra_ppairs->[0]});
+	}
+    }
+    else {
+	die "$usage\nUnknown combination mode!\n";
+    }
+
+
+    if($to_print) {
+	if($additional_info eq "") {
+	    print $ppair, join(" ", @scores), "\n";
+	}else {
+	    print $ppair, join(" ", @scores), $delim, $additional_info, "\n";
+	}
+    }
+}
+
+####################################
+# Read lines from all filehandles given in FILES_REFARRAY, 
+# or from the files whose indices are assigned 1 in the array TABLE_INDICES_REFARRAY
+# Parse each of them as a phrase pair entry and stores it to the corresponding position of PPAIRS_REFARRAY
+sub read_line_from_tables(FILES_REFARRAY, PPAIRS_REFARRAY, TABLE_INDICES_REFARRAY) {
+    my $ra_files=shift;
+    my $ra_ppairs=shift;
+
+    my $ra_toRead=shift;
+    my @toRead=((1) x $nbtables);   # by default read from all files
+    if($ra_toRead ne "") { 
+	@toRead=@$ra_toRead;
+    }
+
+    my $nbscores=-1;
+    my $key=""; my $additional_info="";
+    for(my $i=0; $i<$nbtables; $i++) {
+	next if($toRead[$i]==0);
+	my @ppair=();
+	my $file=$ra_files->[$i];
+	if(my $line = <$file>) { 
+	    chomp $line;
+	    my @fields = split(/$delim_RE/, $line);
+	    if(scalar(@fields)<3) {
+                die "Invalid phrase table entry:\n$line\n";
+	    }
+	    my @scores = split(/\s+/, $fields[2]);
+	    foreach my $score (@scores) {
+		if($score<$MINSCORE) { $MINSCORE=$score; }
+	    }
+	    # Get nb of scores from the 1st table. Check that all tables provide the same nb of scores,
+	    # unless mode is 'intersect' (then it doesn't matter as scores are taken only from 1st table)
+	    if($nbscores==-1) {
+		$nbscores=scalar(@scores);
+	    } elsif($nbscores!=scalar(@scores) && $combination_mode ne "intersect1") {
+		die "Wrong number of scores in table-$i! Should be $nbscores\n";
+	    }
+	    # Get additional fields if any (word aligment, phrase counts etc.)
+	    if(scalar(@fields)>3) {
+		$additional_info=join($delim, splice(@fields,3));
+		#print STDOUT "additional_info:__{$additional_info}__\n";
+	    }
+	    my $key = "$fields[0]$delim$fields[1]$delim";  ## IMPORTANT: the | delimiter at the end of the phrase pair is crucial to preserve sorting!!
+	    push(@ppair, $key, \@scores, $additional_info);
+	}
+	else { 
+	    push(@ppair, "");
+	}
+	$ra_ppairs->[$i]=\@ppair;
+    }
+
+    return $nbscores;
+}
+
+#########
+}
+
+
+&main;
--- a/contrib/fuzzy-match/Makefile
+++ b/contrib/fuzzy-match/Makefile
@ -0,0 +1,16 @@
+all: suffix-test fuzzy-match fuzzy-match2
+
+clean: 
+	rm -f *.o
+
+.cpp.o:
+	g++ -O6 -g -c $<
+
+suffix-test: Vocabulary.o SuffixArray.o suffix-test.o
+	g++ Vocabulary.o SuffixArray.o suffix-test.o -o suffix-test
+
+fuzzy-match: Vocabulary.o SuffixArray.o old/fuzzy-match.o
+	g++ Vocabulary.o SuffixArray.o fuzzy-match.o -o fuzzy-match
+
+fuzzy-match2: Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o
+	g++ Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o -o fuzzy-match2
--- a/contrib/fuzzy-match/Match.h
+++ b/contrib/fuzzy-match/Match.h
@ -0,0 +1,29 @@
+//
+//  Match.h
+//  fuzzy-match
+//
+//  Created by Hieu Hoang on 25/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_Match_h
+#define fuzzy_match_Match_h
+
+/* data structure for n-gram match between input and corpus */
+
+class Match {
+public:
+	int input_start;
+	int input_end;
+	int tm_start;
+	int tm_end;
+	int min_cost;
+	int max_cost;
+	int internal_cost;
+	Match( int is, int ie, int ts, int te, int min, int max, int i )
+  :input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
+  {}
+};
+
+
+#endif
--- a/contrib/fuzzy-match/SentenceAlignment.h
+++ b/contrib/fuzzy-match/SentenceAlignment.h
@ -0,0 +1,48 @@
+//
+//  SentenceAlignment.h
+//  fuzzy-match
+//
+//  Created by Hieu Hoang on 25/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_SentenceAlignment_h
+#define fuzzy_match_SentenceAlignment_h
+
+#include <sstream>
+#include "Vocabulary.h"
+
+extern Vocabulary vocabulary;
+
+struct SentenceAlignment
+{
+  int count;
+  vector< WORD_ID > target;
+  vector< pair<int,int> > alignment;
+  
+  SentenceAlignment()
+  {}
+  
+  string getTargetString() const
+  {
+    stringstream strme;
+    for (size_t i = 0; i < target.size(); ++i) {
+      const WORD &word = vocabulary.GetWord(target[i]);
+      strme << word << " ";
+    }
+    return strme.str();
+  }
+  
+  string getAlignmentString() const
+  {
+    stringstream strme;
+    for (size_t i = 0; i < alignment.size(); ++i) {
+      const pair<int,int> &alignPair = alignment[i];
+      strme << alignPair.first << "-" << alignPair.second << " ";
+    }
+    return strme.str();
+  }
+  
+};
+
+#endif
--- a/contrib/fuzzy-match/SuffixArray.cpp
+++ b/contrib/fuzzy-match/SuffixArray.cpp
@ -0,0 +1,244 @@
+#include "SuffixArray.h"
+#include <string>
+#include <stdlib.h>
+#include <cstring>
+
+using namespace std;
+
+SuffixArray::SuffixArray( string fileName ) 
+{
+	m_vcb.StoreIfNew( "<uNk>" );
+	m_endOfSentence = m_vcb.StoreIfNew( "<s>" );
+
+	ifstream extractFile;
+	char line[LINE_MAX_LENGTH];
+
+	// count the number of words first;
+	extractFile.open(fileName.c_str());
+	istream *fileP = &extractFile;
+	m_size = 0;
+	size_t sentenceCount = 0;
+	while(!fileP->eof()) {
+		SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
+		if (fileP->eof()) break;
+		vector< WORD_ID > words = m_vcb.Tokenize( line );
+		m_size += words.size() + 1;
+		sentenceCount++;
+	}
+	extractFile.close();
+	cerr << m_size << " words (incl. sentence boundaries)" << endl;
+
+	// allocate memory
+	m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
+	m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
+	m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
+	m_sentence = (size_t*) calloc( sizeof( size_t ), m_size );
+	m_sentenceLength = (char*) calloc( sizeof( char ), sentenceCount );
+
+	// fill the array
+	int wordIndex = 0;
+	int sentenceId = 0;
+	extractFile.open(fileName.c_str());
+	fileP = &extractFile;
+	while(!fileP->eof()) {
+		SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
+		if (fileP->eof()) break;
+		vector< WORD_ID > words = m_vcb.Tokenize( line );
+		vector< WORD_ID >::const_iterator i;
+		
+		for( i=words.begin(); i!=words.end(); i++)
+		{
+			m_index[ wordIndex ] = wordIndex;
+			m_sentence[ wordIndex ] = sentenceId;
+			m_wordInSentence[ wordIndex ] = i-words.begin();
+			m_array[ wordIndex++ ] = *i;
+		}
+		m_index[ wordIndex ] = wordIndex;
+		m_array[ wordIndex++ ] = m_endOfSentence;
+		m_sentenceLength[ sentenceId++ ] = words.size();
+	}
+	extractFile.close();
+	cerr << "done reading " << wordIndex << " words, " << sentenceId << " sentences." << endl;
+	// List(0,9);
+
+	// sort
+	m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
+	Sort( 0, m_size-1 );
+	free( m_buffer );
+	cerr << "done sorting" << endl;
+}
+
+// good ol' quick sort
+void SuffixArray::Sort(INDEX start, INDEX end) {
+	if (start == end) return;
+	INDEX mid = (start+end+1)/2;
+	Sort( start, mid-1 );
+	Sort( mid, end );
+
+	// merge
+	int i = start;
+	int j = mid;
+	int k = 0;
+	int length = end-start+1;
+	while( k<length )
+	{
+		if (i == mid ) 
+		{
+			m_buffer[ k++ ] = m_index[ j++ ];
+		}
+		else if (j > end ) 
+		{
+			m_buffer[ k++ ] = m_index[ i++ ];
+		}
+		else {
+			if (CompareIndex( m_index[i], m_index[j] ) < 0) 
+			{
+				m_buffer[ k++ ] = m_index[ i++ ];
+			}	
+			else 
+			{
+				m_buffer[ k++ ] = m_index[ j++ ];
+			}
+		}
+	}
+	
+	memcpy( ((char*)m_index) + sizeof( INDEX ) * start,
+					((char*)m_buffer), sizeof( INDEX ) * (end-start+1) );
+}
+
+SuffixArray::~SuffixArray()
+{ 
+	free(m_index); 
+	free(m_array);
+}
+
+int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
+{
+	// skip over identical words
+	INDEX offset = 0;
+	while( a+offset < m_size &&
+				 b+offset < m_size &&
+				 m_array[ a+offset ] == m_array[ b+offset ] )
+	{ offset++; }
+	
+	if( a+offset == m_size ) return -1;
+	if( b+offset == m_size ) return 1;
+	return CompareWord( m_array[ a+offset ], m_array[ b+offset ] );
+}
+
+inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
+{
+	// cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
+	return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
+}
+
+int SuffixArray::Count( const vector< WORD > &phrase )
+{
+	INDEX dummy;
+	return LimitedCount( phrase, m_size, dummy, dummy, 0, m_size-1 );
+}
+
+bool SuffixArray::MinCount( const vector< WORD > &phrase, INDEX min )
+{
+	INDEX dummy;
+	return LimitedCount( phrase, min, dummy, dummy, 0, m_size-1 ) >= min;
+}
+
+bool SuffixArray::Exists( const vector< WORD > &phrase )
+{
+	INDEX dummy;
+	return LimitedCount( phrase, 1, dummy, dummy, 0, m_size-1 ) == 1;
+}
+
+int SuffixArray::FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+	return LimitedCount( phrase, m_size, firstMatch, lastMatch, search_start, search_end );
+}
+
+int SuffixArray::LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+	// cerr << "FindFirst\n";
+	INDEX start = search_start;
+	INDEX end = (search_end == -1) ? (m_size-1) : search_end;
+	INDEX mid = FindFirst( phrase, start, end );
+	// cerr << "done\n";
+	if (mid == m_size) return 0; // no matches
+	if (min == 1) return 1;      // only existance check
+
+	int matchCount = 1;
+
+	//cerr << "before...\n";
+	firstMatch = FindLast( phrase, mid, start, -1 );
+	matchCount += mid - firstMatch;
+
+	//cerr << "after...\n";
+	lastMatch = FindLast( phrase, mid, end, 1 );
+	matchCount += lastMatch - mid;
+
+	return matchCount;
+}
+
+SuffixArray::INDEX SuffixArray::FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction )
+{
+	end += direction;
+	while(true)
+	{
+		INDEX mid = ( start + end + (direction>0 ? 0 : 1) )/2;
+		
+		int match = Match( phrase, mid );
+		int matchNext = Match( phrase, mid+direction );
+		//cerr << "\t" << start << ";" << mid << ";" << end << " -> " << match << "," << matchNext << endl;
+		
+		if (match == 0 && matchNext != 0) return mid;
+
+		if (match == 0) // mid point is a match
+			start = mid;
+		else
+			end = mid;
+	}
+}
+
+SuffixArray::INDEX SuffixArray::FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end )
+{
+	while(true)
+	{
+		INDEX mid = ( start + end + 1 )/2;
+		//cerr << "FindFirst(" << start << ";" << mid << ";" << end << ")\n";
+		int match = Match( phrase, mid );
+		
+		if (match == 0) return mid;
+		if (start >= end && match != 0 ) return m_size;
+		
+		if (match > 0)
+			start = mid+1;
+		else
+			end = mid-1;	
+	}
+}
+
+int SuffixArray::Match( const vector< WORD > &phrase, INDEX index )
+{
+	INDEX pos = m_index[ index ];
+	for(INDEX i=0; i<phrase.size() && i+pos<m_size; i++)
+	{
+		int match = CompareWord( m_vcb.GetWordID( phrase[i] ), m_array[ pos+i ] );
+		// cerr << "{" << index << "+" << i << "," << pos+i << ":" << match << "}" << endl;
+		if (match != 0) 
+			return match;
+	}
+	return 0;
+}
+
+void SuffixArray::List(INDEX start, INDEX end)
+{
+	for(INDEX i=start; i<=end; i++)
+	{
+		INDEX pos = m_index[ i ];
+		// cerr << i << ":" << pos << "\t";
+		for(int j=0; j<5 && j+pos<m_size; j++)
+		{
+			cout << " " << m_vcb.GetWord( m_array[ pos+j ] );
+		}
+		// cerr << "\n";
+	}
+}
--- a/contrib/fuzzy-match/SuffixArray.h
+++ b/contrib/fuzzy-match/SuffixArray.h
@ -0,0 +1,45 @@
+#include "Vocabulary.h"
+
+#pragma once
+
+#define LINE_MAX_LENGTH 10000
+
+
+class SuffixArray 
+{
+public:
+	typedef unsigned int INDEX;
+
+private:
+	WORD_ID *m_array;
+	INDEX *m_index;
+	INDEX *m_buffer;
+	char *m_wordInSentence;
+	size_t *m_sentence;
+	char *m_sentenceLength;
+	WORD_ID m_endOfSentence;
+	Vocabulary m_vcb;
+	INDEX m_size;
+
+public:
+	SuffixArray( string fileName );
+	~SuffixArray();
+
+	void Sort(INDEX start, INDEX end);
+	int CompareIndex( INDEX a, INDEX b ) const;
+	inline int CompareWord( WORD_ID a, WORD_ID b ) const;
+	int Count( const vector< WORD > &phrase );
+	bool MinCount( const vector< WORD > &phrase, INDEX min );
+	bool Exists( const vector< WORD > &phrase );
+	int FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
+	int LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
+	INDEX FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end );
+	INDEX FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction );
+	int Match( const vector< WORD > &phrase, INDEX index );
+	void List( INDEX start, INDEX end );
+	inline INDEX GetPosition( INDEX index ) { return m_index[ index ]; }
+	inline size_t GetSentence( INDEX position ) { return m_sentence[position]; }
+	inline char GetWordInSentence( INDEX position ) { return m_wordInSentence[position]; }
+	inline char GetSentenceLength( size_t sentenceId ) { return m_sentenceLength[sentenceId]; }
+	inline INDEX GetSize() { return m_size; }
+};
--- a/contrib/fuzzy-match/Util.cpp
+++ b/contrib/fuzzy-match/Util.cpp
@ -0,0 +1,147 @@
+//
+//  Util.cpp
+//  fuzzy-match
+//
+//  Created by Hieu Hoang on 26/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include <stdio.h>
+#include "Util.h"
+#include "SentenceAlignment.h"
+#include "SuffixArray.h"
+
+void load_corpus( const char* fileName, vector< vector< WORD_ID > > &corpus )
+{ // source 
+	ifstream fileStream;
+	fileStream.open(fileName);
+	if (!fileStream) {
+		cerr << "file not found: " << fileName << endl;
+		exit(1);
+	}
+  cerr << "loading " << fileName << endl;
+
+	istream *fileStreamP = &fileStream;
+  
+	char line[LINE_MAX_LENGTH];
+	while(true)
+	{
+		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+		if (fileStreamP->eof()) break;
+		corpus.push_back( vocabulary.Tokenize( line ) );
+	}
+}
+
+void load_target( const char* fileName, vector< vector< SentenceAlignment > > &corpus)
+{ 
+	ifstream fileStream;
+	fileStream.open(fileName);
+	if (!fileStream) {
+		cerr << "file not found: " << fileName << endl;
+		exit(1);
+	}
+  cerr << "loading " << fileName << endl;
+
+	istream *fileStreamP = &fileStream;
+  
+  WORD_ID delimiter = vocabulary.StoreIfNew("|||");
+  
+  int lineNum = 0;
+	char line[LINE_MAX_LENGTH];
+	while(true)
+	{
+		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+		if (fileStreamP->eof()) break;
+    
+    vector<WORD_ID> toks = vocabulary.Tokenize( line );
+    
+    corpus.push_back(vector< SentenceAlignment >());
+    vector< SentenceAlignment > &vec = corpus.back();
+    
+    vec.push_back(SentenceAlignment());
+    SentenceAlignment *sentence = &vec.back();
+    
+    const WORD &countStr = vocabulary.GetWord(toks[0]);
+    sentence->count = atoi(countStr.c_str());
+    
+    for (size_t i = 1; i < toks.size(); ++i) {
+      WORD_ID wordId = toks[i];
+      
+      if (wordId == delimiter) {
+        // target and alignments can have multiple sentences.
+        vec.push_back(SentenceAlignment());
+        sentence = &vec.back();
+        
+        // count
+        ++i;
+        
+        const WORD &countStr = vocabulary.GetWord(toks[i]);
+        sentence->count = atoi(countStr.c_str());
+      }
+      else {
+        // just a normal word, add
+        sentence->target.push_back(wordId);
+      }
+    }
+    
+    ++lineNum;
+    
+	}
+  
+}
+
+
+void load_alignment( const char* fileName, vector< vector< SentenceAlignment > > &corpus )
+{ 
+  ifstream fileStream;
+	fileStream.open(fileName);
+	if (!fileStream) {
+		cerr << "file not found: " << fileName << endl;
+		exit(1);
+	}
+  cerr << "loading " << fileName << endl;
+
+	istream *fileStreamP = &fileStream;
+  
+  string delimiter = "|||";
+  
+  int lineNum = 0;
+	char line[LINE_MAX_LENGTH];
+	while(true)
+	{
+		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+		if (fileStreamP->eof()) break;
+    
+    vector< SentenceAlignment > &vec = corpus[lineNum];
+    size_t targetInd = 0;
+    SentenceAlignment *sentence = &vec[targetInd];
+    
+    vector<string> toks = Tokenize(line);
+    
+    for (size_t i = 0; i < toks.size(); ++i) {
+      string &tok = toks[i];
+      
+      if (tok == delimiter) {
+        // target and alignments can have multiple sentences.
+        ++targetInd;
+        sentence = &vec[targetInd];
+        
+        ++i;
+      }
+      else {
+        // just a normal alignment, add
+        vector<int> alignPoint = Tokenize<int>(tok, "-");
+        assert(alignPoint.size() == 2);
+        sentence->alignment.push_back(pair<int,int>(alignPoint[0], alignPoint[1]));
+      }
+    }
+    
+    ++lineNum;
+    
+	}
+}
+
+
+
+
--- a/contrib/fuzzy-match/Util.h
+++ b/contrib/fuzzy-match/Util.h
@ -0,0 +1,87 @@
+//
+//  Util.h
+//  fuzzy-match
+//
+//  Created by Hieu Hoang on 25/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_Util_h
+#define fuzzy_match_Util_h
+
+#include <vector>
+#include <sstream>
+#include "Vocabulary.h"
+
+class SentenceAlignment;
+
+void load_corpus( const char* fileName, std::vector< std::vector< WORD_ID > > &corpus );
+void load_target( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus);
+void load_alignment( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus );
+
+/**
+ * Convert vector of type T to string
+ */
+template <typename T>
+std::string Join(const std::string& delimiter, const std::vector<T>& items)
+{
+  std::ostringstream outstr;
+  if(items.size() == 0) return "";
+  outstr << items[0];
+  for(unsigned int i = 1; i < items.size(); i++)
+    outstr << delimiter << items[i];
+  return outstr.str();
+}
+
+//! convert string to variable of type T. Used to reading floats, int etc from files
+template<typename T>
+inline T Scan(const std::string &input)
+{
+  std::stringstream stream(input);
+  T ret;
+  stream >> ret;
+  return ret;
+}
+
+//! convert vectors of string to vectors of type T variables
+template<typename T>
+inline std::vector<T> Scan(const std::vector< std::string > &input)
+{
+  std::vector<T> output(input.size());
+  for (size_t i = 0 ; i < input.size() ; i++) {
+    output[i] = Scan<T>( input[i] );
+  }
+  return output;
+}
+
+inline std::vector<std::string> Tokenize(const std::string& str,
+                                         const std::string& delimiters = " \t")
+{
+  std::vector<std::string> tokens;
+  // Skip delimiters at beginning.
+  std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+  // Find first "non-delimiter".
+  std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
+  
+  while (std::string::npos != pos || std::string::npos != lastPos) {
+    // Found a token, add it to the vector.
+    tokens.push_back(str.substr(lastPos, pos - lastPos));
+    // Skip delimiters.  Note the "not_of"
+    lastPos = str.find_first_not_of(delimiters, pos);
+    // Find next "non-delimiter"
+    pos = str.find_first_of(delimiters, lastPos);
+  }
+  
+  return tokens;
+}
+
+template<typename T>
+inline std::vector<T> Tokenize( const std::string &input
+                               , const std::string& delimiters = " \t")
+{
+  std::vector<std::string> stringVector = Tokenize(input, delimiters);
+  return Scan<T>( stringVector );
+}
+
+
+#endif
--- a/contrib/fuzzy-match/Vocabulary.cpp
+++ b/contrib/fuzzy-match/Vocabulary.cpp
@ -0,0 +1,45 @@
+// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
+#include "Vocabulary.h"
+
+// as in beamdecoder/tables.cpp
+vector<WORD_ID> Vocabulary::Tokenize( const char input[] ) {
+  vector< WORD_ID > token;
+  bool betweenWords = true;
+  int start=0;
+  int i=0;
+  for(; input[i] != '\0'; i++) {
+    bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+    if (!isSpace && betweenWords) {
+      start = i;
+      betweenWords = false;
+    }
+    else if (isSpace && !betweenWords) {
+      token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+      betweenWords = true;
+    }
+  }
+  if (!betweenWords)
+    token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+  return token;
+}
+
+WORD_ID Vocabulary::StoreIfNew( const WORD& word ) {
+  map<WORD, WORD_ID>::iterator i = lookup.find( word );
+  
+  if( i != lookup.end() )
+    return i->second;
+
+  WORD_ID id = vocab.size();
+  vocab.push_back( word );
+  lookup[ word ] = id;
+  return id;  
+}
+
+WORD_ID Vocabulary::GetWordID( const WORD &word ) {
+  map<WORD, WORD_ID>::iterator i = lookup.find( word );
+  if( i == lookup.end() )
+    return 0;
+  WORD_ID w= (WORD_ID) i->second;
+  return w;
+}
--- a/contrib/fuzzy-match/Vocabulary.h
+++ b/contrib/fuzzy-match/Vocabulary.h
@ -0,0 +1,40 @@
+// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $
+
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <assert.h>
+#include <stdlib.h>
+#include <string>
+#include <queue>
+#include <map>
+#include <cmath>
+
+using namespace std;
+
+#define MAX_LENGTH 10000
+
+#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
+                _IS.getline(_LINE, _SIZE, _DELIM); \
+                if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
+                if (_IS.gcount() == _SIZE-1) { \
+                  cerr << "Line too long! Buffer overflow. Delete lines >=" \
+                    << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
+                    << endl; \
+                    exit(1); \
+                } \
+              }
+
+typedef string WORD;
+typedef unsigned int WORD_ID;
+
+class Vocabulary {
+ public:
+  map<WORD, WORD_ID> lookup;
+  vector< WORD > vocab;
+  WORD_ID StoreIfNew( const WORD& );
+  WORD_ID GetWordID( const WORD& );
+  vector<WORD_ID> Tokenize( const char[] );
+  inline WORD &GetWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
+};
--- a/contrib/fuzzy-match/fuzzy-match2.cpp
+++ b/contrib/fuzzy-match/fuzzy-match2.cpp
@ -0,0 +1,460 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <map>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <cstring>
+#include <time.h>
+#include <fstream>
+#include "SentenceAlignment.h"
+#include "fuzzy-match2.h"
+#include "SuffixArray.h"
+
+/** This implementation is explained in
+       Koehn and Senellart: "Fast Approximate String Matching 
+       with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
+
+using namespace std;
+
+int main(int argc, char* argv[]) 
+{
+	vector< vector< WORD_ID > > source, input;
+	vector< vector< SentenceAlignment > > targetAndAlignment;
+	
+	
+	while(1) {
+		static struct option long_options[] = {
+			{"basic", no_argument, &basic_flag, 1},
+			{"word", no_argument, &lsed_flag, 0},
+			{"unrefined", no_argument, &refined_flag, 0},
+			{"nolengthfilter", no_argument, &length_filter_flag, 0},
+			{"noparse", no_argument, &parse_flag, 0},
+			{"multiple", no_argument, &multiple_flag, 1},
+			{"minmatch", required_argument, 0, 'm'},
+			{0, 0, 0, 0}
+		};
+		int option_index = 0;
+		int c = getopt_long (argc, argv, "m:", long_options, &option_index);
+		if (c == -1) break;
+		switch (c) {
+			case 0:
+//				if (long_options[option_index].flag != 0)
+//					break;
+//				printf ("option %s", long_options[option_index].name);
+//				if (optarg)
+//					printf (" with arg %s", optarg);
+//				printf ("\n");
+				break;
+			case 'm':
+				min_match = atoi(optarg);
+				if (min_match < 1 || min_match > 100) {
+					cerr << "error: --minmatch must have value in range 1..100\n";
+					exit(1);
+				}
+				cerr << "setting min match to " << min_match << endl;
+				break;
+			default:
+				cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
+				exit(1);
+		}
+	}
+	if (lsed_flag) { cerr << "lsed\n"; }
+	if (basic_flag) { cerr << "basic\n"; }
+	if (refined_flag) { cerr << "refined\n"; }
+	if (length_filter_flag) { cerr << "length filter\n"; }
+	if (parse_flag) { cerr << "parse\n"; }
+//	exit(1);
+
+
+	if (optind+4 != argc) {
+		cerr << "syntax: ./fuzzy-match input source target alignment [--basic] [--word] [--minmatch 1..100]\n";
+		exit(1);
+	}
+	
+	load_corpus(argv[optind], input);
+	load_corpus(argv[optind+1], source);
+	load_target(argv[optind+2], targetAndAlignment);
+	load_alignment(argv[optind+3], targetAndAlignment);
+
+  // ./fuzzy-match input corpus [-basic] 
+	
+//	load_corpus("../corpus/tm.truecased.4.en", source);
+//	load_corpus("../corpus/tm.truecased.4.it", target);
+//	load_corpus("../evaluation/test.input.tc.4", input);
+
+//	load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
+//	load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
+
+//	load_corpus("../corpus/tm.truecased.16.en", source);
+//	load_corpus("../evaluation/test.input.tc.16", input);
+
+	if (basic_flag) {
+		cerr << "using basic method\n";
+		clock_t start_main_clock2 = clock();
+		basic_fuzzy_match( source, input );
+		cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
+		exit(1);
+	}
+
+	cerr << "number of input sentences " << input.size() << endl;
+
+	cerr << "creating suffix array...\n";
+//	SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
+//	SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
+	SuffixArray suffixArray( argv[optind+1] );
+	
+	clock_t start_main_clock = clock();
+
+	// looping through all input sentences...
+	cerr << "looping...\n";
+	for(unsigned int sentenceInd = 0; sentenceInd < input.size(); sentenceInd++)
+	{
+		clock_t start_clock = clock();
+		// if (i % 10 == 0) cerr << ".";
+
+		// establish some basic statistics
+
+		// int input_length = compute_length( input[i] );
+		int input_length = input[sentenceInd].size();
+		int best_cost = input_length * (100-min_match) / 100 + 1;
+
+		int match_count = 0; // how many substring matches to be considered
+		//cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
+
+		// find match ranges in suffix array
+		vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
+		for(size_t start=0;start<input[sentenceInd].size();start++) 
+		{
+			SuffixArray::INDEX prior_first_match = 0;
+			SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
+			vector< string > substring;
+			bool stillMatched = true;
+			vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
+			//cerr << "start: " << start;
+			for(int word=start; stillMatched && word<input[sentenceInd].size(); word++)
+			{
+				substring.push_back( vocabulary.GetWord( input[sentenceInd][word] ) );
+
+				// only look up, if needed (i.e. no unnecessary short gram lookups)
+//				if (! word-start+1 <= short_match_max_length( input_length ) )
+				//			{
+				SuffixArray::INDEX first_match, last_match;
+				stillMatched = false;
+				if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
+				{
+					stillMatched = true;
+					matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
+					//cerr << " (" << first_match << "," << last_match << ")";
+					//cerr << " " << ( last_match - first_match + 1 );
+					prior_first_match = first_match;
+					prior_last_match = last_match;
+				}
+					//}
+			}
+			//cerr << endl;
+			match_range.push_back( matchedAtThisStart );
+		}
+
+		clock_t clock_range = clock();
+
+		map< int, vector< Match > > sentence_match;
+		map< int, int > sentence_match_word_count;
+
+		// go through all matches, longest first
+		for(int length = input[sentenceInd].size(); length >= 1; length--)
+		{
+			// do not create matches, if these are handled by the short match function
+			if (length <= short_match_max_length( input_length ) )
+			{
+				continue;
+			}
+
+			unsigned int count = 0;
+			for(int start = 0; start <= input[sentenceInd].size() - length; start++)
+			{
+				if (match_range[start].size() >= length)
+				{
+					pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
+					// cerr << " (" << range.first << "," << range.second << ")";
+					count += range.second - range.first + 1;
+
+					for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
+					{
+						int position = suffixArray.GetPosition( i );
+
+						// sentence length mismatch
+						size_t sentence_id = suffixArray.GetSentence( position );
+						int sentence_length = suffixArray.GetSentenceLength( sentence_id );
+						int diff = abs( (int)sentence_length - (int)input_length );
+						// cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
+						//if (length <= 2 && input_length>=5 &&
+						//		sentence_match.find( sentence_id ) == sentence_match.end())
+						//	continue;
+
+						if (diff > best_cost)
+							continue;
+
+						// compute minimal cost
+						int start_pos = suffixArray.GetWordInSentence( position );
+						int end_pos = start_pos + length-1;
+						// cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. " 
+						// << start << "-" << (start+length-1) << " (" << input_length << ")"; 
+						// different number of prior words -> cost is at least diff
+						int min_cost = abs( start - start_pos );
+						
+						// same number of words, but not sent. start -> cost is at least 1 
+						if (start == start_pos && start>0)
+							min_cost++;
+
+						// different number of remaining words -> cost is at least diff
+						min_cost += abs( ( sentence_length-1 - end_pos ) -
+														 ( input_length-1 - (start+length-1) ) );
+
+						// same number of words, but not sent. end -> cost is at least 1
+						if ( sentence_length-1 - end_pos ==
+								 input_length-1 - (start+length-1)
+								 && end_pos != sentence_length-1 )
+							min_cost++;
+
+						// cerr << " -> min_cost " << min_cost;
+						if (min_cost > best_cost)
+							continue;
+
+						// valid match
+						match_count++;
+
+						// compute maximal cost
+						int max_cost = max( start, start_pos )
+							+ max( sentence_length-1 - end_pos,
+										 input_length-1 - (start+length-1) );
+						// cerr << ", max_cost " << max_cost;
+						
+						Match m = Match( start, start+length-1, 
+														 start_pos, start_pos+length-1, 
+														 min_cost, max_cost, 0);
+						sentence_match[ sentence_id ].push_back( m );
+						sentence_match_word_count[ sentence_id ] += length;
+
+						if (max_cost < best_cost)
+						{
+							best_cost = max_cost;
+							if (best_cost == 0) break;
+						}
+						//if (match_count >= MAX_MATCH_COUNT) break;
+					}
+				}
+				// cerr << endl;
+				if (best_cost == 0) break;
+				//if (match_count >= MAX_MATCH_COUNT) break;
+			}
+			// cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
+
+			if (best_cost == 0) break;
+			//if (match_count >= MAX_MATCH_COUNT) break;
+		}
+		cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
+
+		clock_t clock_matches = clock();
+
+		// consider each sentence for which we have matches
+		int old_best_cost = best_cost;
+		int tm_count_word_match = 0;
+		int tm_count_word_match2 = 0;
+		int pruned_match_count = 0;
+		if (short_match_max_length( input_length ))
+		{
+			init_short_matches( input[sentenceInd] );
+		}
+		vector< int > best_tm;
+		typedef map< int, vector< Match > >::iterator I;
+
+		clock_t clock_validation_sum = 0;
+
+		for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
+		{
+			int tmID = tm->first;
+			int tm_length = suffixArray.GetSentenceLength(tmID);
+			vector< Match > &match = tm->second;
+			add_short_matches( match, source[tmID], input_length, best_cost );
+
+			//cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
+
+			// quick look: how many words are matched
+			int words_matched = 0;
+			for(int m=0;m<match.size();m++) {
+
+				if (match[m].min_cost <= best_cost) // makes no difference
+					words_matched += match[m].input_end - match[m].input_start + 1;
+			}
+			if (max(input_length,tm_length) - words_matched > best_cost)
+			{
+				if (length_filter_flag) continue;
+			}
+			tm_count_word_match++;
+
+			// prune, check again how many words are matched
+			vector< Match > pruned = prune_matches( match, best_cost );
+			words_matched = 0;
+			for(int p=0;p<pruned.size();p++) {
+				words_matched += pruned[p].input_end - pruned[p].input_start + 1;
+			}
+			if (max(input_length,tm_length) - words_matched > best_cost)
+			{
+				if (length_filter_flag) continue;
+			}
+			tm_count_word_match2++;
+
+			pruned_match_count += pruned.size();
+			int prior_best_cost = best_cost;
+			int cost;
+
+			clock_t clock_validation_start = clock();
+			if (! parse_flag ||
+			    pruned.size()>=10) // to prevent worst cases
+			{
+				string path;
+				cost = sed( input[sentenceInd], source[tmID], path, false );
+				if (cost <  best_cost) 
+				{
+					best_cost = cost;
+				}
+			}
+
+			else
+			{
+				cost = parse_matches( pruned, input_length, tm_length, best_cost );
+				if (prior_best_cost != best_cost)
+				{
+					best_tm.clear();
+				}
+			}
+			clock_validation_sum += clock() - clock_validation_start;
+			if (cost == best_cost)
+			{
+				best_tm.push_back( tmID );
+			}
+		}
+		cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
+		cerr << "tm considered: " << sentence_match.size()
+				 << " word-matched: " << tm_count_word_match 
+				 << " word-matched2: " << tm_count_word_match2 
+				 << " best: " << best_tm.size() << endl;
+
+		cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
+
+    // create xml and extract files
+    string inputStr, sourceStr;
+    for (size_t pos = 0; pos < input_length; ++pos) {
+      inputStr += vocabulary.GetWord(input[sentenceInd][pos]) + " ";
+    }
+    
+		// do not try to find the best ... report multiple matches
+		if (multiple_flag) {
+			int input_letter_length = compute_length( input[sentenceInd] );
+			for(int si=0; si<best_tm.size(); si++) {
+				int s = best_tm[si];
+				string path;
+				unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
+				// do not report multiple identical sentences, but just their count
+				cout << sentenceInd << " "; // sentence number
+				cout << letter_cost << "/" << input_letter_length << " ";
+				cout << "(" << best_cost <<"/" << input_length <<") ";
+				cout << "||| " << s << " ||| " << path << endl;
+        
+        vector<WORD_ID> &sourceSentence = source[s];
+        vector<SentenceAlignment> &targets = targetAndAlignment[s];
+        create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, path);
+
+			}
+		} // if (multiple_flag)
+    else {
+
+      // find the best matches according to letter sed
+      string best_path = "";
+      int best_match = -1;
+      int best_letter_cost;
+      if (lsed_flag) {
+        best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
+        for(int si=0; si<best_tm.size(); si++)
+        {
+          int s = best_tm[si];
+          string path;
+          unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
+          if (letter_cost < best_letter_cost)
+          {
+            best_letter_cost = letter_cost;
+            best_path = path;
+            best_match = s;
+          }
+        }
+      }
+      // if letter sed turned off, just compute path for first match
+      else {
+        if (best_tm.size() > 0) {
+          string path;
+          sed( input[sentenceInd], source[best_tm[0]], path, false );
+          best_path = path;
+          best_match = best_tm[0];
+        }
+      }
+      cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
+           << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
+           << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
+           << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
+           << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
+           << " )" << endl;
+      if (lsed_flag) {
+        cout << best_letter_cost << "/" << compute_length( input[sentenceInd] ) << " (";
+      }
+      cout << best_cost <<"/" << input_length;
+      if (lsed_flag) 	cout << ")";
+      cout << " ||| " << best_match << " ||| " << best_path << endl;
+
+      // creat xml & extracts
+      vector<WORD_ID> &sourceSentence = source[best_match];
+      vector<SentenceAlignment> &targets = targetAndAlignment[best_match];
+      create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, best_path);
+
+    } // else if (multiple_flag)
+    
+    
+  }
+	cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
+	
+}
+
+void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string  &path)
+{
+  string sourceStr;
+  for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
+    WORD_ID wordId = sourceSentence[pos];
+    sourceStr += vocabulary.GetWord(wordId) + " ";
+  }
+    
+  char *inputFileName = tmpnam(NULL);
+  ofstream inputFile(inputFileName);
+
+  for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
+    const SentenceAlignment &sentenceAlignment = targets[targetInd]; 
+    string targetStr = sentenceAlignment.getTargetString();
+    string alignStr = sentenceAlignment.getAlignmentString();
+    
+    inputFile 
+      << sentenceInd << endl
+      << cost << endl
+      << sourceStr << endl 
+      << inputStr << endl
+      << targetStr << endl
+      << alignStr << endl
+      << path << endl
+      << sentenceAlignment.count << endl;
+
+  }
+  
+  string cmd = string("perl create_xml.perl < ") + inputFileName;
+  cerr << cmd << endl;
+  inputFile.close();
+  
+}
--- a/contrib/fuzzy-match/fuzzy-match2.h
+++ b/contrib/fuzzy-match/fuzzy-match2.h
@ -0,0 +1,561 @@
+//
+//  fuzzy-match2.h
+//  fuzzy-match
+//
+//  Created by Hieu Hoang on 25/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_fuzzy_match2_h
+#define fuzzy_match_fuzzy_match2_h
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include "Vocabulary.h"
+#include "SuffixArray.h"
+#include "Util.h"
+#include "Match.h"
+
+#define MAX_MATCH_COUNT 10000000
+
+Vocabulary vocabulary;
+
+int basic_flag = false;
+int lsed_flag = true;
+int refined_flag = true;
+int length_filter_flag = true;
+int parse_flag = true;
+int min_match = 70;
+int multiple_flag = false;
+int multiple_slack = 0;
+int multiple_max = 100;
+map< WORD_ID,vector< int > > single_word_index;
+// global cache for word pairs
+map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
+
+void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string  &path);
+
+
+
+/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
+
+unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
+{
+	// check if already computed -> lookup in cache
+	pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
+	map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
+	if (lookup != lsed.end())
+	{
+		return (lookup->second);
+	}
+  
+	// get surface strings for word indices
+	const string &a = vocabulary.GetWord( aIdx );
+	const string &b = vocabulary.GetWord( bIdx );
+  
+	// initialize cost matrix
+	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int*  ), a.size()+1 );
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+		cost[i][0] = i;
+	}
+	for( unsigned int j=0; j<=b.size(); j++ ) {
+		cost[0][j] = j;
+	}
+  
+	// core string edit distance loop
+	for( unsigned int i=1; i<=a.size(); i++ ) {
+		for( unsigned int j=1; j<=b.size(); j++ ) {
+      
+			unsigned int ins = cost[i-1][j] + 1;
+			unsigned int del = cost[i][j-1] + 1;
+			bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
+			unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
+      
+			unsigned int min = (ins < del) ? ins : del;
+			min = (diag < min) ? diag : min;
+      
+			cost[i][j] = min;
+		}
+	}
+  
+	// clear out memory
+	unsigned int final = cost[a.size()][b.size()];
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		free( cost[i] );
+	}
+	free( cost );
+  
+	// cache and return result
+	lsed[ pIdx ] = final;
+	return final;
+}
+
+/* string edit distance implementation */
+
+unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
+  
+	// initialize cost and path matrices
+	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+	char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
+	
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+		path[i] = (char*) calloc( sizeof(char), b.size()+1 );
+		if (i>0)
+		{
+			cost[i][0] = cost[i-1][0];
+			if (use_letter_sed)
+			{
+				cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
+			}
+			else
+			{
+				cost[i][0]++;
+			}
+		}
+		else
+		{
+			cost[i][0] = 0;
+		}
+		path[i][0] = 'I';
+	}
+  
+	for( unsigned int j=0; j<=b.size(); j++ ) {
+		if (j>0) 
+		{
+			cost[0][j] = cost[0][j-1];
+			if (use_letter_sed)
+			{
+				cost[0][j] +=	vocabulary.GetWord( b[j-1] ).size();
+			}
+			else
+			{
+				cost[0][j]++;
+			}
+		}
+		else
+		{
+			cost[0][j] = 0;
+		}
+		path[0][j] = 'D';
+	}
+  
+	// core string edit distance algorithm
+	for( unsigned int i=1; i<=a.size(); i++ ) {
+		for( unsigned int j=1; j<=b.size(); j++ ) {
+			unsigned int ins = cost[i-1][j];
+			unsigned int del = cost[i][j-1];
+			unsigned int match;
+			if (use_letter_sed)
+			{
+				ins += vocabulary.GetWord( a[i-1] ).size();
+				del += vocabulary.GetWord( b[j-1] ).size();
+				match = letter_sed( a[i-1], b[j-1] );
+			}
+			else
+			{
+				ins++;
+				del++;
+				match = ( a[i-1] == b[j-1] ) ? 0 : 1;
+			}
+			unsigned int diag = cost[i-1][j-1] + match;
+      
+			char action = (ins < del) ? 'I' : 'D';
+			unsigned int min = (ins < del) ? ins : del;
+			if (diag < min)
+			{
+				action = (match>0) ? 'S' : 'M';
+				min = diag;
+			}
+      
+			cost[i][j] = min;
+			path[i][j] = action;
+		}
+	}
+  
+	// construct string for best path
+	unsigned int i = a.size();
+	unsigned int j = b.size();
+	best_path = "";
+	while( i>0 || j>0 )
+	{
+		best_path = path[i][j] + best_path;
+		if (path[i][j] == 'I') 
+		{
+			i--;
+		}
+		else if (path[i][j] == 'D') 
+		{
+			j--;
+		}
+		else 
+		{ 
+			i--; 
+			j--;
+		}
+	}
+	
+  
+	// clear out memory
+	unsigned int final = cost[a.size()][b.size()];
+  
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		free( cost[i] );
+		free( path[i] );
+	}
+	free( cost );
+	free( path );
+  
+	// return result
+	return final;
+}
+
+/* utlility function: compute length of sentence in characters 
+ (spaces do not count) */
+
+unsigned int compute_length( const vector< WORD_ID > &sentence )
+{
+	unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
+	{
+		length += vocabulary.GetWord( sentence[i] ).size();
+	}
+	return length;
+}
+
+/* brute force method: compare input to all corpus sentences */
+
+int basic_fuzzy_match( vector< vector< WORD_ID > > source, 
+                      vector< vector< WORD_ID > > input ) 
+{
+	// go through input set...
+	for(unsigned int i=0;i<input.size();i++)
+	{
+		bool use_letter_sed = false;
+    
+		// compute sentence length and worst allowed cost
+		unsigned int input_length;
+		if (use_letter_sed)
+		{
+			input_length = compute_length( input[i] );
+		}
+		else
+		{
+			input_length = input[i].size();
+		}
+		unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
+		string best_path = "";
+		int best_match = -1;
+    
+		// go through all corpus sentences
+		for(unsigned int s=0;s<source.size();s++)
+		{
+			int source_length;
+			if (use_letter_sed)
+			{
+				source_length = compute_length( source[s] );
+			}
+			else
+			{
+				source_length = source[s].size();
+			}
+			int diff = abs((int)source_length - (int)input_length);
+			if (length_filter_flag && (diff >= best_cost))
+			{
+				continue;
+			}
+      
+			// compute string edit distance
+			string path;
+			unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
+      
+			// update if new best
+			if (cost < best_cost) 
+			{
+				best_cost = cost;
+				best_path = path;
+				best_match = s;
+			}
+		}
+		cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
+	}
+}
+
+/* definition of short matches
+ very short n-gram matches (1-grams) will not be looked up in
+ the suffix array, since there are too many matches
+ and for longer sentences, at least one 2-gram match must occur */
+
+inline int short_match_max_length( int input_length )
+{
+	if ( ! refined_flag ) 
+		return 0;
+	if ( input_length >= 5 )
+		return 1;
+	return 0;	
+}
+
+/* if we have non-short matches in a sentence, we need to
+ take a closer look at it. 
+ this function creates a hash map for all input words and their positions
+ (to be used by the next function) 
+ (done here, because this has be done only once for an input sentence) */
+
+void init_short_matches( const vector< WORD_ID > &input )
+{
+	int max_length = short_match_max_length( input.size() );
+	if (max_length == 0)
+		return;
+  
+	single_word_index.clear();
+	
+	// store input words and their positions in hash map
+	for(int i=0; i<input.size(); i++)
+	{
+		if (single_word_index.find( input[i] ) == single_word_index.end())
+		{
+			vector< int > position_vector;
+			single_word_index[ input[i] ] = position_vector;
+		}
+		single_word_index[ input[i] ].push_back( i );
+	}	
+}
+
+/* add all short matches to list of matches for a sentence */
+
+void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
+{	
+	int max_length = short_match_max_length( input_length );
+	if (max_length == 0)
+		return;
+  
+	int tm_length = tm.size();
+	map< WORD_ID,vector< int > >::iterator input_word_hit;
+	for(int t_pos=0; t_pos<tm.size(); t_pos++)
+	{
+		input_word_hit = single_word_index.find( tm[t_pos] );
+		if (input_word_hit != single_word_index.end())
+		{
+			vector< int > &position_vector = input_word_hit->second;
+			for(int j=0; j<position_vector.size(); j++)
+			{
+				int &i_pos = position_vector[j];
+        
+				// before match
+				int max_cost = max( i_pos , t_pos );
+				int min_cost = abs( i_pos - t_pos );
+				if ( i_pos>0 && i_pos == t_pos ) 
+					min_cost++;
+				
+				// after match
+				max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
+				min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
+				if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
+					min_cost++;
+				
+				if (min_cost <= best_cost)
+				{
+					Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
+					match.push_back( new_match );
+				}
+			}
+		} 
+	}
+}
+
+/* remove matches that are subsumed by a larger match */
+
+vector< Match > prune_matches( const vector< Match > &match, int best_cost )
+{
+	//cerr << "\tpruning";
+	vector< Match > pruned;
+	for(int i=match.size()-1; i>=0; i--)
+	{
+		//cerr << " (" << match[i].input_start << "," << match[i].input_end 
+		//		 << " ; " << match[i].tm_start << "," << match[i].tm_end 
+		//		 << " * " << match[i].min_cost << ")";
+    
+		//if (match[i].min_cost > best_cost)
+		//	continue;
+    
+		bool subsumed = false;
+		for(int j=match.size()-1; j>=0; j--)
+		{
+			if (i!=j // do not compare match with itself
+					&& ( match[i].input_end - match[i].input_start <= 
+              match[j].input_end - match[j].input_start ) // i shorter than j
+					&& ((match[i].input_start == match[j].input_start &&
+							 match[i].tm_start    == match[j].tm_start	) ||
+							(match[i].input_end   == match[j].input_end &&
+							 match[i].tm_end      == match[j].tm_end) ) )
+			{
+				subsumed = true;
+			}
+		}
+		if (! subsumed && match[i].min_cost <= best_cost)
+		{
+			//cerr << "*";
+			pruned.push_back( match[i] );
+		}
+	}
+	//cerr << endl;
+	return pruned;
+}
+
+/* A* parsing method to compute string edit distance */
+
+int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
+{	
+	// cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
+  
+	if (match.size() == 1)
+		return match[0].max_cost;
+	if (match.size() == 0)
+		return input_length+tm_length;
+	
+	int this_best_cost = input_length + tm_length;
+	for(int i=0;i<match.size();i++)
+	{
+		this_best_cost = min( this_best_cost, match[i].max_cost );
+	}
+	// cerr << "\tthis best cost: " << this_best_cost << endl;
+	
+	// bottom up combination of spans
+	vector< vector< Match > > multi_match;
+	multi_match.push_back( match );
+	
+	int match_level = 1;
+	while(multi_match[ match_level-1 ].size()>0)
+	{
+		// init vector
+		vector< Match > empty;
+		multi_match.push_back( empty );
+    
+		for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
+		{
+			int second_level = match_level - first_level -1;
+			//cerr << "\tcombining level " << first_level << " and " << second_level << endl;
+			
+			vector< Match > &first_match  = multi_match[ first_level ];
+			vector< Match > &second_match = multi_match[ second_level ];
+      
+			for(int i1 = 0; i1 < first_match.size(); i1++) {
+				for(int i2 = 0; i2 < second_match.size(); i2++) {
+          
+					// do not combine the same pair twice
+					if (first_level == second_level && i2 <= i1) 
+					{
+						continue;
+					}
+          
+					// get sorted matches (first is before second)
+					Match *first, *second;
+					if (first_match[i1].input_start < second_match[i2].input_start )
+					{
+						first = &first_match[i1];
+						second = &second_match[i2];
+					}
+					else
+					{
+						second = &first_match[i1];
+						first = &second_match[i2];
+					}
+          
+					//cerr << "\tcombining " 
+					//		 << "(" << first->input_start << "," << first->input_end << "), "
+					//		 << first->tm_start << " [" << first->internal_cost << "]"
+					//		 << " with "
+					//		 << "(" << second->input_start << "," << second->input_end << "), "
+					//		 << second->tm_start<< " [" << second->internal_cost << "]"
+					//		 << endl;
+          
+					// do not process overlapping matches
+					if (first->input_end >= second->input_start) 
+					{
+						continue;
+					}
+          
+					// no overlap / mismatch in tm
+					if (first->tm_end >= second->tm_start)
+					{
+						continue;
+					}
+          
+					// compute cost
+					int min_cost = 0;
+					int max_cost = 0;
+          
+					// initial
+					min_cost += abs( first->input_start - first->tm_start );
+					max_cost += max( first->input_start, first->tm_start );				 
+          
+					// same number of words, but not sent. start -> cost is at least 1 
+					if (first->input_start == first->tm_start && first->input_start > 0)
+					{
+						min_cost++;
+					}
+          
+					// in-between
+					int skipped_words = second->input_start - first->input_end -1;
+					int skipped_words_tm = second->tm_start - first->tm_end -1;
+					int internal_cost = max( skipped_words, skipped_words_tm );
+					internal_cost += first->internal_cost + second->internal_cost;
+					min_cost += internal_cost;
+					max_cost += internal_cost;
+					
+					// final
+					min_cost += abs( (tm_length-1 - second->tm_end) -
+                          (input_length-1 - second->input_end) );
+					max_cost += max( (tm_length-1 - second->tm_end),
+                          (input_length-1 - second->input_end) );
+          
+					// same number of words, but not sent. end -> cost is at least 1
+					if ( ( input_length-1 - second->input_end 
+                == tm_length-1 - second->tm_end )
+              && input_length-1 != second->input_end )
+					{
+						min_cost++;
+					}
+          
+					// cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
+          
+					// if worst than best cost, forget it
+					if (min_cost > best_cost)					
+					{
+						continue;
+					}
+					
+					// add match
+					Match new_match( first->input_start,
+                          second->input_end,
+                          first->tm_start,
+                          second->tm_end,
+                          min_cost,
+                          max_cost,
+                          internal_cost);
+					multi_match[ match_level ].push_back( new_match );
+					// cerr << "\tstored\n";
+					
+					// possibly updating this_best_cost
+					if (max_cost < this_best_cost)
+					{
+						// cerr << "\tupdating this best cost to " << max_cost << "\n";
+						this_best_cost = max_cost;
+            
+						// possibly updating best_cost
+						if (max_cost < best_cost)
+						{
+							// cerr << "\tupdating best cost to " << max_cost << "\n";
+							best_cost = max_cost;
+						}					
+					}
+				}
+			}
+		}
+		match_level++;
+	}
+	return this_best_cost;
+}
+
+#endif
--- a/contrib/fuzzy-match/make-xml-from-match.perl
+++ b/contrib/fuzzy-match/make-xml-from-match.perl
@ -0,0 +1,214 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $DEBUG = 1;
+
+my $match_file  = "tm/BEST.acquis-xml-escaped.4.uniq";
+my $source_file = "data/acquis.truecased.4.en.uniq";
+my $target_file = "data/acquis.truecased.4.fr.uniq.most-frequent";
+my $alignment_file = "data/acquis.truecased.4.align.uniq.most-frequent";
+my $out_file = "data/ac-test.input.xml.4.uniq";
+my $in_file = "evaluation/ac-test.input.tc.4";
+
+#my $match_file  = "tm/BEST.acquis-xml-escaped.4";
+#my $source_file = "corpus/acquis.truecased.4.en";
+#my $target_file = "corpus/acquis.truecased.4.fr";
+#my $alignment_file = "model/aligned.4.grow-diag-final-and";
+#my $out_file = "data/ac-test.input.xml.4";
+#my $in_file = "evaluation/ac-test.input.tc.4";
+
+#my $match_file  = "tm/BEST.acquis.with";
+#my $source_file = "../acquis-truecase/corpus/acquis.truecased.190.en";
+#my $target_file = "../acquis-truecase/corpus/acquis.truecased.190.fr";
+#my $alignment_file = "../acquis-truecase/model/aligned.190.grow-diag-final-and";
+#my $out_file = "data/ac-test.input.xml";
+#my $in_file = "evaluation/ac-test.input.tc.1";
+
+my @INPUT = `cat $in_file`; chop(@INPUT);
+my @SOURCE = `cat $source_file`; chop(@SOURCE);
+my @TARGET = `cat $target_file`; chop(@TARGET);
+my @ALIGNMENT = `cat $alignment_file`; chop(@ALIGNMENT);
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+for(my $i=0;$i<4107;$i++) {
+
+    # get match data
+    my $match = <MATCH>;
+    chop($match);
+    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+    # construct frame
+    if ($sentence < 1e9 && $sentence >= 0) {
+	my $frame = &create_xml($SOURCE[$sentence],
+				$INPUT[$i],
+				$TARGET[$sentence],
+				$ALIGNMENT[$sentence],
+				$path);
+	print FRAME $frame."\n";
+    }
+
+    # no frame -> output source
+    else {
+	print FRAME $INPUT[$i]."\n";
+    }
+}
+close(FRAME);
+close(MATCH);
+
+sub create_xml {
+    my ($source,$input,$target,$alignment,$path) = @_;
+    
+    my @INPUT = split(/ /,$input);
+    my @SOURCE = split(/ /,$source);
+    my @TARGET = split(/ /,$target);
+    my %ALIGN = &create_alignment($alignment);
+    
+    my %FRAME_INPUT;
+    my @TARGET_BITMAP; 
+    foreach (@TARGET) { push @TARGET_BITMAP,1 }
+    
+    ### STEP 1: FIND MISMATCHES
+
+    my ($s,$i) = (0,0);
+    my $currently_matching = 0;
+    my ($start_s,$start_i) = (0,0);
+
+    $path .= "X"; # indicate end
+    print "$input\n$source\n$target\n$path\n";
+    for(my $p=0;$p<length($path);$p++) {
+	my $action = substr($path,$p,1);
+	
+	# beginning of a mismatch
+	if ($currently_matching && $action ne "M" && $action ne "X") {
+	    $start_i = $i;
+	    $start_s = $s;
+	    $currently_matching = 0;
+	}
+	
+	# end of a mismatch
+	elsif (!$currently_matching && 
+	       ($action eq "M" || $action eq "X")) {
+	    
+	    # remove use of affected target words
+	    for(my $ss = $start_s; $ss<$s; $ss++) {
+		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+		    $TARGET_BITMAP[$tt] = 0;
+		}
+		
+		# also remove enclosed unaligned words?
+	    }
+	    
+	    # are there input words that need to be inserted ?
+	    print "($start_i<$i)?\n";
+	    if ($start_i<$i) {
+		
+		# take note of input words to be inserted
+		my $insertion = "";
+		for(my $ii = $start_i; $ii<$i; $ii++) {
+		    $insertion .= $INPUT[$ii]." ";
+		}
+		
+		# find position for inserted input words
+		
+		# find first removed target word
+		my $start_t = 1000;
+		for(my $ss = $start_s; $ss<$s; $ss++) {
+		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			$start_t = $tt if $tt < $start_t;
+		    }
+		}
+
+		# end of sentence? add to end
+		if ($start_t == 1000 && $i > $#INPUT) {
+		    $start_t = $#TARGET;
+		}
+		
+		# backtrack to previous words if unaligned
+		if ($start_t == 1000) {
+		    $start_t = -1;
+		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			    $start_t = $tt if $tt > $start_t;
+			}
+		    }
+		}
+		$FRAME_INPUT{$start_t} .= $insertion;
+	    }
+	    
+	    $currently_matching = 1;
+	}
+	
+	print "$action $s $i ($start_s $start_i) $currently_matching";
+	if ($action ne "I") {
+	    print " ->";
+	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+		print " ".$tt;
+	    }
+	}
+	print "\n";
+	$s++ unless $action eq "I";
+	$i++ unless $action eq "D";
+    }
+    
+
+    print $target."\n";
+    foreach (@TARGET_BITMAP) { print $_; } print "\n";
+    foreach (sort keys %FRAME_INPUT) { 
+	print "$_: $FRAME_INPUT{$_}\n";
+    }
+
+    ### STEP 2: BUILD FRAME
+
+    # modify frame
+    my $frame = "";
+    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+    
+    my $currently_included = 0;
+    my $start_t = -1;
+    push @TARGET_BITMAP,0; # indicate end
+
+    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
+	
+	# beginning of tm target inclusion
+	if (!$currently_included && $TARGET_BITMAP[$t]) {
+	    $start_t = $t;
+	    $currently_included = 1;
+	}
+	
+	# end of tm target inclusion (not included word or inserted input)
+	elsif ($currently_included && 
+	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+	    # add xml (unless change is at the beginning of the sentence
+	    if ($start_t >= 0) {
+		my $target = "";
+		print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+		    $target .= $TARGET[$tt] . " ";
+		}
+		chop($target);
+		$frame .= "<xml translation=\"$target\"> x </xml> ";
+	    }
+	    $currently_included = 0;
+	}
+	
+	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+	print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+    }
+
+    print $frame."\n-------------------------------------\n";
+    return $frame;
+}
+
+sub create_alignment {
+	my ($line) = @_;
+	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+	foreach my $point (split(/ /,$line)) {
+		my ($s,$t) = split(/\-/,$point);
+		$ALIGNED_TO_S[$s]{$t}++;
+		$ALIGNED_TO_T[$t]{$s}++;
+	}
+	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+	return %ALIGNMENT;
+}
--- a/contrib/fuzzy-match/old/fuzzy-match.cpp
+++ b/contrib/fuzzy-match/old/fuzzy-match.cpp
@ -0,0 +1,982 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <vector>
+#include <map>
+#include <string>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <cstring>
+#include <time.h>
+
+#include "Vocabulary.h"
+#include "SuffixArray.h"
+
+/** This implementation is explained in
+       Koehn and Senellart: "Fast Approximate String Matching 
+       with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
+
+using namespace std;
+
+Vocabulary vocabulary;
+
+int basic_flag = false;
+int lsed_flag = true;
+int refined_flag = true;
+int length_filter_flag = true;
+int parse_flag = true;
+int min_match = 70;
+int multiple_flag = false;
+int multiple_slack = 0;
+int multiple_max = 100;
+
+void load_corpus( char* fileName, vector< vector< WORD_ID > > &corpus )
+{
+	ifstream fileStream;
+	fileStream.open(fileName);
+	if (!fileStream) {
+		cerr << "file not found: " << fileName << endl;
+		exit(1);
+	}
+	istream *fileStreamP = &fileStream;
+
+	char line[LINE_MAX_LENGTH];
+	while(true)
+	{
+		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+		if (fileStreamP->eof()) break;
+		corpus.push_back( vocabulary.Tokenize( line ) );
+	}
+}
+
+
+/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
+
+// global cache for word pairs
+map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
+
+unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
+{
+	// check if already computed -> lookup in cache
+	pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
+	map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
+	if (lookup != lsed.end())
+	{
+		return (lookup->second);
+	}
+
+	// get surface strings for word indices
+	const string &a = vocabulary.GetWord( aIdx );
+	const string &b = vocabulary.GetWord( bIdx );
+
+	// initialize cost matrix
+	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int*  ), a.size()+1 );
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+		cost[i][0] = i;
+	}
+	for( unsigned int j=0; j<=b.size(); j++ ) {
+		cost[0][j] = j;
+	}
+
+	// core string edit distance loop
+	for( unsigned int i=1; i<=a.size(); i++ ) {
+		for( unsigned int j=1; j<=b.size(); j++ ) {
+
+			unsigned int ins = cost[i-1][j] + 1;
+			unsigned int del = cost[i][j-1] + 1;
+			bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
+			unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
+
+			unsigned int min = (ins < del) ? ins : del;
+			min = (diag < min) ? diag : min;
+
+			cost[i][j] = min;
+		}
+	}
+
+	// clear out memory
+	unsigned int final = cost[a.size()][b.size()];
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		free( cost[i] );
+	}
+	free( cost );
+
+	// cache and return result
+	lsed[ pIdx ] = final;
+	return final;
+}
+
+/* string edit distance implementation */
+
+unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
+
+	// initialize cost and path matrices
+	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+	char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
+	
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+		path[i] = (char*) calloc( sizeof(char), b.size()+1 );
+		if (i>0)
+		{
+			cost[i][0] = cost[i-1][0];
+			if (use_letter_sed)
+			{
+				cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
+			}
+			else
+			{
+				cost[i][0]++;
+			}
+		}
+		else
+		{
+			cost[i][0] = 0;
+		}
+		path[i][0] = 'I';
+	}
+
+	for( unsigned int j=0; j<=b.size(); j++ ) {
+		if (j>0) 
+		{
+			cost[0][j] = cost[0][j-1];
+			if (use_letter_sed)
+			{
+				cost[0][j] +=	vocabulary.GetWord( b[j-1] ).size();
+			}
+			else
+			{
+				cost[0][j]++;
+			}
+		}
+		else
+		{
+			cost[0][j] = 0;
+		}
+		path[0][j] = 'D';
+	}
+
+	// core string edit distance algorithm
+	for( unsigned int i=1; i<=a.size(); i++ ) {
+		for( unsigned int j=1; j<=b.size(); j++ ) {
+			unsigned int ins = cost[i-1][j];
+			unsigned int del = cost[i][j-1];
+			unsigned int match;
+			if (use_letter_sed)
+			{
+				ins += vocabulary.GetWord( a[i-1] ).size();
+				del += vocabulary.GetWord( b[j-1] ).size();
+				match = letter_sed( a[i-1], b[j-1] );
+			}
+			else
+			{
+				ins++;
+				del++;
+				match = ( a[i-1] == b[j-1] ) ? 0 : 1;
+			}
+			unsigned int diag = cost[i-1][j-1] + match;
+
+			char action = (ins < del) ? 'I' : 'D';
+			unsigned int min = (ins < del) ? ins : del;
+			if (diag < min)
+			{
+				action = (match>0) ? 'S' : 'M';
+				min = diag;
+			}
+
+			cost[i][j] = min;
+			path[i][j] = action;
+		}
+	}
+
+	// construct string for best path
+	unsigned int i = a.size();
+	unsigned int j = b.size();
+	best_path = "";
+	while( i>0 || j>0 )
+	{
+		best_path = path[i][j] + best_path;
+		if (path[i][j] == 'I') 
+		{
+			i--;
+		}
+		else if (path[i][j] == 'D') 
+		{
+			j--;
+		}
+		else 
+		{ 
+			i--; 
+			j--;
+		}
+	}
+	
+
+	// clear out memory
+	unsigned int final = cost[a.size()][b.size()];
+
+	for( unsigned int i=0; i<=a.size(); i++ ) {
+		free( cost[i] );
+		free( path[i] );
+	}
+	free( cost );
+	free( path );
+
+	// return result
+	return final;
+}
+
+/* utlility function: compute length of sentence in characters 
+   (spaces do not count) */
+
+unsigned int compute_length( const vector< WORD_ID > &sentence )
+{
+	unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
+	{
+		length += vocabulary.GetWord( sentence[i] ).size();
+	}
+	return length;
+}
+
+/* brute force method: compare input to all corpus sentences */
+
+int basic_fuzzy_match( vector< vector< WORD_ID > > source, 
+                       vector< vector< WORD_ID > > input ) 
+{
+	// go through input set...
+	for(unsigned int i=0;i<input.size();i++)
+	{
+		bool use_letter_sed = false;
+
+		// compute sentence length and worst allowed cost
+		unsigned int input_length;
+		if (use_letter_sed)
+		{
+			input_length = compute_length( input[i] );
+		}
+		else
+		{
+			input_length = input[i].size();
+		}
+		unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
+		string best_path = "";
+		int best_match = -1;
+
+		// go through all corpus sentences
+		for(unsigned int s=0;s<source.size();s++)
+		{
+			int source_length;
+			if (use_letter_sed)
+			{
+				source_length = compute_length( source[s] );
+			}
+			else
+			{
+				source_length = source[s].size();
+			}
+			int diff = abs((int)source_length - (int)input_length);
+			if (length_filter_flag && (diff >= best_cost))
+			{
+				continue;
+			}
+
+			// compute string edit distance
+			string path;
+			unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
+
+			// update if new best
+			if (cost < best_cost) 
+			{
+				best_cost = cost;
+				best_path = path;
+				best_match = s;
+			}
+		}
+		cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
+	}
+}
+
+#define MAX_MATCH_COUNT 10000000
+
+/* data structure for n-gram match between input and corpus */
+
+class Match {
+public:
+	int input_start;
+	int input_end;
+	int tm_start;
+	int tm_end;
+	int min_cost;
+	int max_cost;
+	int internal_cost;
+	Match( int is, int ie, int ts, int te, int min, int max, int i )
+		:input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
+		{}
+};
+
+map< WORD_ID,vector< int > > single_word_index;
+
+/* definition of short matches
+   very short n-gram matches (1-grams) will not be looked up in
+   the suffix array, since there are too many matches
+   and for longer sentences, at least one 2-gram match must occur */
+
+inline int short_match_max_length( int input_length )
+{
+	if ( ! refined_flag ) 
+		return 0;
+	if ( input_length >= 5 )
+		return 1;
+	return 0;	
+}
+
+/* if we have non-short matches in a sentence, we need to
+   take a closer look at it. 
+	 this function creates a hash map for all input words and their positions
+   (to be used by the next function) 
+   (done here, because this has be done only once for an input sentence) */
+
+void init_short_matches( const vector< WORD_ID > &input )
+{
+	int max_length = short_match_max_length( input.size() );
+	if (max_length == 0)
+		return;
+
+	single_word_index.clear();
+	
+	// store input words and their positions in hash map
+	for(int i=0; i<input.size(); i++)
+	{
+		if (single_word_index.find( input[i] ) == single_word_index.end())
+		{
+			vector< int > position_vector;
+			single_word_index[ input[i] ] = position_vector;
+		}
+		single_word_index[ input[i] ].push_back( i );
+	}	
+}
+
+/* add all short matches to list of matches for a sentence */
+
+void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
+{	
+	int max_length = short_match_max_length( input_length );
+	if (max_length == 0)
+		return;
+
+	int tm_length = tm.size();
+	map< WORD_ID,vector< int > >::iterator input_word_hit;
+	for(int t_pos=0; t_pos<tm.size(); t_pos++)
+	{
+		input_word_hit = single_word_index.find( tm[t_pos] );
+		if (input_word_hit != single_word_index.end())
+		{
+			vector< int > &position_vector = input_word_hit->second;
+			for(int j=0; j<position_vector.size(); j++)
+			{
+				int &i_pos = position_vector[j];
+
+				// before match
+				int max_cost = max( i_pos , t_pos );
+				int min_cost = abs( i_pos - t_pos );
+				if ( i_pos>0 && i_pos == t_pos ) 
+					min_cost++;
+				
+				// after match
+				max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
+				min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
+				if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
+					min_cost++;
+				
+				if (min_cost <= best_cost)
+				{
+					Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
+					match.push_back( new_match );
+				}
+			}
+		} 
+	}
+}
+
+/* remove matches that are subsumed by a larger match */
+
+vector< Match > prune_matches( const vector< Match > &match, int best_cost )
+{
+	//cerr << "\tpruning";
+	vector< Match > pruned;
+	for(int i=match.size()-1; i>=0; i--)
+	{
+		//cerr << " (" << match[i].input_start << "," << match[i].input_end 
+		//		 << " ; " << match[i].tm_start << "," << match[i].tm_end 
+		//		 << " * " << match[i].min_cost << ")";
+
+		//if (match[i].min_cost > best_cost)
+		//	continue;
+
+		bool subsumed = false;
+		for(int j=match.size()-1; j>=0; j--)
+		{
+			if (i!=j // do not compare match with itself
+					&& ( match[i].input_end - match[i].input_start <= 
+							 match[j].input_end - match[j].input_start ) // i shorter than j
+					&& ((match[i].input_start == match[j].input_start &&
+							 match[i].tm_start    == match[j].tm_start	) ||
+							(match[i].input_end   == match[j].input_end &&
+							 match[i].tm_end      == match[j].tm_end) ) )
+			{
+				subsumed = true;
+			}
+		}
+		if (! subsumed && match[i].min_cost <= best_cost)
+		{
+			//cerr << "*";
+			pruned.push_back( match[i] );
+		}
+	}
+	//cerr << endl;
+	return pruned;
+}
+
+/* A* parsing method to compute string edit distance */
+
+int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
+{	
+	// cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
+
+	if (match.size() == 1)
+		return match[0].max_cost;
+	if (match.size() == 0)
+		return input_length+tm_length;
+	
+	int this_best_cost = input_length + tm_length;
+	for(int i=0;i<match.size();i++)
+	{
+		this_best_cost = min( this_best_cost, match[i].max_cost );
+	}
+	// cerr << "\tthis best cost: " << this_best_cost << endl;
+	
+	// bottom up combination of spans
+	vector< vector< Match > > multi_match;
+	multi_match.push_back( match );
+	
+	int match_level = 1;
+	while(multi_match[ match_level-1 ].size()>0)
+	{
+		// init vector
+		vector< Match > empty;
+		multi_match.push_back( empty );
+
+		for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
+		{
+			int second_level = match_level - first_level -1;
+			//cerr << "\tcombining level " << first_level << " and " << second_level << endl;
+			
+			vector< Match > &first_match  = multi_match[ first_level ];
+			vector< Match > &second_match = multi_match[ second_level ];
+
+			for(int i1 = 0; i1 < first_match.size(); i1++) {
+				for(int i2 = 0; i2 < second_match.size(); i2++) {
+
+					// do not combine the same pair twice
+					if (first_level == second_level && i2 <= i1) 
+					{
+						continue;
+					}
+
+					// get sorted matches (first is before second)
+					Match *first, *second;
+					if (first_match[i1].input_start < second_match[i2].input_start )
+					{
+						first = &first_match[i1];
+						second = &second_match[i2];
+					}
+					else
+					{
+						second = &first_match[i1];
+						first = &second_match[i2];
+					}
+
+					//cerr << "\tcombining " 
+					//		 << "(" << first->input_start << "," << first->input_end << "), "
+					//		 << first->tm_start << " [" << first->internal_cost << "]"
+					//		 << " with "
+					//		 << "(" << second->input_start << "," << second->input_end << "), "
+					//		 << second->tm_start<< " [" << second->internal_cost << "]"
+					//		 << endl;
+
+					// do not process overlapping matches
+					if (first->input_end >= second->input_start) 
+					{
+						continue;
+					}
+
+					// no overlap / mismatch in tm
+					if (first->tm_end >= second->tm_start)
+					{
+						continue;
+					}
+
+					// compute cost
+					int min_cost = 0;
+					int max_cost = 0;
+
+					// initial
+					min_cost += abs( first->input_start - first->tm_start );
+					max_cost += max( first->input_start, first->tm_start );				 
+
+					// same number of words, but not sent. start -> cost is at least 1 
+					if (first->input_start == first->tm_start && first->input_start > 0)
+					{
+						min_cost++;
+					}
+
+					// in-between
+					int skipped_words = second->input_start - first->input_end -1;
+					int skipped_words_tm = second->tm_start - first->tm_end -1;
+					int internal_cost = max( skipped_words, skipped_words_tm );
+					internal_cost += first->internal_cost + second->internal_cost;
+					min_cost += internal_cost;
+					max_cost += internal_cost;
+					
+					// final
+					min_cost += abs( (tm_length-1 - second->tm_end) -
+													 (input_length-1 - second->input_end) );
+					max_cost += max( (tm_length-1 - second->tm_end),
+													 (input_length-1 - second->input_end) );
+
+					// same number of words, but not sent. end -> cost is at least 1
+					if ( ( input_length-1 - second->input_end 
+								 == tm_length-1 - second->tm_end )
+							 && input_length-1 != second->input_end )
+					{
+						min_cost++;
+					}
+
+					// cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
+
+					// if worst than best cost, forget it
+					if (min_cost > best_cost)					
+					{
+						continue;
+					}
+					
+					// add match
+					Match new_match( first->input_start,
+													 second->input_end,
+													 first->tm_start,
+													 second->tm_end,
+													 min_cost,
+													 max_cost,
+													 internal_cost);
+					multi_match[ match_level ].push_back( new_match );
+					// cerr << "\tstored\n";
+					
+					// possibly updating this_best_cost
+					if (max_cost < this_best_cost)
+					{
+						// cerr << "\tupdating this best cost to " << max_cost << "\n";
+						this_best_cost = max_cost;
+
+						// possibly updating best_cost
+						if (max_cost < best_cost)
+						{
+							// cerr << "\tupdating best cost to " << max_cost << "\n";
+							best_cost = max_cost;
+						}					
+					}
+				}
+			}
+		}
+		match_level++;
+	}
+	return this_best_cost;
+}
+
+int main(int argc, char* argv[]) 
+{
+	vector< vector< WORD_ID > > source, input;
+
+	while(1) {
+		static struct option long_options[] = {
+			{"basic", no_argument, &basic_flag, 1},
+			{"word", no_argument, &lsed_flag, 0},
+			{"unrefined", no_argument, &refined_flag, 0},
+			{"nolengthfilter", no_argument, &length_filter_flag, 0},
+			{"noparse", no_argument, &parse_flag, 0},
+			{"multiple", no_argument, &multiple_flag, 1},
+			{"minmatch", required_argument, 0, 'm'},
+			{0, 0, 0, 0}
+		};
+		int option_index = 0;
+		int c = getopt_long (argc, argv, "m:", long_options, &option_index);
+		if (c == -1) break;
+		switch (c) {
+			case 0:
+//				if (long_options[option_index].flag != 0)
+//					break;
+//				printf ("option %s", long_options[option_index].name);
+//				if (optarg)
+//					printf (" with arg %s", optarg);
+//				printf ("\n");
+				break;
+			case 'm':
+				min_match = atoi(optarg);
+				if (min_match < 1 || min_match > 100) {
+					cerr << "error: --minmatch must have value in range 1..100\n";
+					exit(1);
+				}
+				cerr << "setting min match to " << min_match << endl;
+				break;
+			default:
+				cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
+				exit(1);
+		}
+	}
+	if (lsed_flag) { cerr << "lsed\n"; }
+	if (basic_flag) { cerr << "basic\n"; }
+	if (refined_flag) { cerr << "refined\n"; }
+	if (length_filter_flag) { cerr << "length filter\n"; }
+	if (parse_flag) { cerr << "parse\n"; }
+//	exit(1);
+
+
+	if (optind+2 != argc) {
+		cerr << "syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
+		exit(1);
+	}
+	
+	cerr << "loading corpus...\n";
+
+	load_corpus(argv[optind], input);
+	load_corpus(argv[optind+1], source);
+
+  // ./fuzzy-match input corpus [-basic] 
+	
+//	load_corpus("../corpus/tm.truecased.4.en", source);
+//	load_corpus("../corpus/tm.truecased.4.it", target);
+//	load_corpus("../evaluation/test.input.tc.4", input);
+
+//	load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
+//	load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
+
+//	load_corpus("../corpus/tm.truecased.16.en", source);
+//	load_corpus("../evaluation/test.input.tc.16", input);
+
+	if (basic_flag) {
+		cerr << "using basic method\n";
+		clock_t start_main_clock2 = clock();
+		basic_fuzzy_match( source, input );
+		cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
+		exit(1);
+	}
+
+	cerr << "number of input sentences " << input.size() << endl;
+
+	cerr << "creating suffix array...\n";
+//	SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
+//	SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
+	SuffixArray suffixArray( argv[optind+1] );
+	
+	clock_t start_main_clock = clock();
+
+	// looping through all input sentences...
+	cerr << "looping...\n";
+	for(unsigned int i=0;i<input.size();i++)
+	{
+		clock_t start_clock = clock();
+		// if (i % 10 == 0) cerr << ".";
+		int input_id = i; // clean up this mess!
+
+		// establish some basic statistics
+
+		// int input_length = compute_length( input[i] );
+		int input_length = input[i].size();
+		int best_cost = input_length * (100-min_match) / 100 + 1;
+
+		int match_count = 0; // how many substring matches to be considered
+		//cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
+
+		// find match ranges in suffix array
+		vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
+		for(size_t start=0;start<input[i].size();start++) 
+		{
+			SuffixArray::INDEX prior_first_match = 0;
+			SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
+			vector< string > substring;
+			bool stillMatched = true;
+			vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
+			//cerr << "start: " << start;
+			for(int word=start; stillMatched && word<input[i].size(); word++)
+			{
+				substring.push_back( vocabulary.GetWord( input[i][word] ) );
+
+				// only look up, if needed (i.e. no unnecessary short gram lookups)
+//				if (! word-start+1 <= short_match_max_length( input_length ) )
+				//			{
+				SuffixArray::INDEX first_match, last_match;
+				stillMatched = false;
+				if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
+				{
+					stillMatched = true;
+					matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
+					//cerr << " (" << first_match << "," << last_match << ")";
+					//cerr << " " << ( last_match - first_match + 1 );
+					prior_first_match = first_match;
+					prior_last_match = last_match;
+				}
+					//}
+			}
+			//cerr << endl;
+			match_range.push_back( matchedAtThisStart );
+		}
+
+		clock_t clock_range = clock();
+
+		map< int, vector< Match > > sentence_match;
+		map< int, int > sentence_match_word_count;
+
+		// go through all matches, longest first
+		for(int length = input[i].size(); length >= 1; length--)
+		{
+			// do not create matches, if these are handled by the short match function
+			if (length <= short_match_max_length( input_length ) )
+			{
+				continue;
+			}
+
+			unsigned int count = 0;
+			for(int start = 0; start <= input[i].size() - length; start++)
+			{
+				if (match_range[start].size() >= length)
+				{
+					pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
+					// cerr << " (" << range.first << "," << range.second << ")";
+					count += range.second - range.first + 1;
+
+					for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
+					{
+						int position = suffixArray.GetPosition( i );
+
+						// sentence length mismatch
+						size_t sentence_id = suffixArray.GetSentence( position );
+						int sentence_length = suffixArray.GetSentenceLength( sentence_id );
+						int diff = abs( (int)sentence_length - (int)input_length );
+						// cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
+						//if (length <= 2 && input_length>=5 &&
+						//		sentence_match.find( sentence_id ) == sentence_match.end())
+						//	continue;
+
+						if (diff > best_cost)
+							continue;
+
+						// compute minimal cost
+						int start_pos = suffixArray.GetWordInSentence( position );
+						int end_pos = start_pos + length-1;
+						// cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. " 
+						// << start << "-" << (start+length-1) << " (" << input_length << ")"; 
+						// different number of prior words -> cost is at least diff
+						int min_cost = abs( start - start_pos );
+						
+						// same number of words, but not sent. start -> cost is at least 1 
+						if (start == start_pos && start>0)
+							min_cost++;
+
+						// different number of remaining words -> cost is at least diff
+						min_cost += abs( ( sentence_length-1 - end_pos ) -
+														 ( input_length-1 - (start+length-1) ) );
+
+						// same number of words, but not sent. end -> cost is at least 1
+						if ( sentence_length-1 - end_pos ==
+								 input_length-1 - (start+length-1)
+								 && end_pos != sentence_length-1 )
+							min_cost++;
+
+						// cerr << " -> min_cost " << min_cost;
+						if (min_cost > best_cost)
+							continue;
+
+						// valid match
+						match_count++;
+
+						// compute maximal cost
+						int max_cost = max( start, start_pos )
+							+ max( sentence_length-1 - end_pos,
+										 input_length-1 - (start+length-1) );
+						// cerr << ", max_cost " << max_cost;
+						
+						Match m = Match( start, start+length-1, 
+														 start_pos, start_pos+length-1, 
+														 min_cost, max_cost, 0);
+						sentence_match[ sentence_id ].push_back( m );
+						sentence_match_word_count[ sentence_id ] += length;
+
+						if (max_cost < best_cost)
+						{
+							best_cost = max_cost;
+							if (best_cost == 0) break;
+						}
+						//if (match_count >= MAX_MATCH_COUNT) break;
+					}
+				}
+				// cerr << endl;
+				if (best_cost == 0) break;
+				//if (match_count >= MAX_MATCH_COUNT) break;
+			}
+			// cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
+
+			if (best_cost == 0) break;
+			//if (match_count >= MAX_MATCH_COUNT) break;
+		}
+		cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
+
+		clock_t clock_matches = clock();
+
+		// consider each sentence for which we have matches
+		int old_best_cost = best_cost;
+		int tm_count_word_match = 0;
+		int tm_count_word_match2 = 0;
+		int pruned_match_count = 0;
+		if (short_match_max_length( input_length ))
+		{
+			init_short_matches( input[i] );
+		}
+		vector< int > best_tm;
+		typedef map< int, vector< Match > >::iterator I;
+
+		clock_t clock_validation_sum = 0;
+
+		for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
+		{
+			int tmID = tm->first;
+			int tm_length = suffixArray.GetSentenceLength(tmID);
+			vector< Match > &match = tm->second;
+			add_short_matches( match, source[tmID], input_length, best_cost );
+
+			//cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
+
+			// quick look: how many words are matched
+			int words_matched = 0;
+			for(int m=0;m<match.size();m++) {
+
+				if (match[m].min_cost <= best_cost) // makes no difference
+					words_matched += match[m].input_end - match[m].input_start + 1;
+			}
+			if (max(input_length,tm_length) - words_matched > best_cost)
+			{
+				if (length_filter_flag) continue;
+			}
+			tm_count_word_match++;
+
+			// prune, check again how many words are matched
+			vector< Match > pruned = prune_matches( match, best_cost );
+			words_matched = 0;
+			for(int p=0;p<pruned.size();p++) {
+				words_matched += pruned[p].input_end - pruned[p].input_start + 1;
+			}
+			if (max(input_length,tm_length) - words_matched > best_cost)
+			{
+				if (length_filter_flag) continue;
+			}
+			tm_count_word_match2++;
+
+			pruned_match_count += pruned.size();
+			int prior_best_cost = best_cost;
+			int cost;
+
+			clock_t clock_validation_start = clock();
+			if (! parse_flag ||
+			    pruned.size()>=10) // to prevent worst cases
+			{
+				string path;
+				cost = sed( input[input_id], source[tmID], path, false );
+				if (cost <  best_cost) 
+				{
+					best_cost = cost;
+				}
+			}
+
+			else
+			{
+				cost = parse_matches( pruned, input_length, tm_length, best_cost );
+				if (prior_best_cost != best_cost)
+				{
+					best_tm.clear();
+				}
+			}
+			clock_validation_sum += clock() - clock_validation_start;
+			if (cost == best_cost)
+			{
+				best_tm.push_back( tmID );
+			}
+		}
+		cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
+		cerr << "tm considered: " << sentence_match.size()
+				 << " word-matched: " << tm_count_word_match 
+				 << " word-matched2: " << tm_count_word_match2 
+				 << " best: " << best_tm.size() << endl;
+
+		cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
+
+		// do not try to find the best ... report multiple matches
+		if (multiple_flag) {
+			int input_letter_length = compute_length( input[input_id] );
+			for(int si=0; si<best_tm.size(); si++) {
+				int s = best_tm[si];
+				string path;
+				unsigned int letter_cost = sed( input[input_id], source[s], path, true );
+				// do not report multiple identical sentences, but just their count
+				cout << i << " "; // sentence number
+				cout << letter_cost << "/" << input_letter_length << " ";
+				cout << "(" << best_cost <<"/" << input_length <<") ";
+				cout << "||| " << s << " ||| " << path << endl;
+			}
+			continue;
+		}
+
+		// find the best matches according to letter sed
+		string best_path = "";
+		int best_match = -1;
+		int best_letter_cost;
+		if (lsed_flag) {
+			best_letter_cost = compute_length( input[input_id] ) * min_match / 100 + 1;
+			for(int si=0; si<best_tm.size(); si++)
+			{
+				int s = best_tm[si];
+				string path;
+				unsigned int letter_cost = sed( input[input_id], source[s], path, true );
+				if (letter_cost < best_letter_cost)
+				{
+					best_letter_cost = letter_cost;
+					best_path = path;
+					best_match = s;
+				}
+			}
+		}
+		// if letter sed turned off, just compute path for first match
+		else {
+			if (best_tm.size() > 0) {
+				string path;
+				sed( input[input_id], source[best_tm[0]], path, false );
+				best_path = path;
+				best_match = best_tm[0];
+			}
+		}
+		cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
+				 << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
+				 << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
+				 << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
+				 << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
+				 << " )" << endl;
+		if (lsed_flag) {
+			cout << best_letter_cost << "/" << compute_length( input[input_id] ) << " (";
+		}
+		cout << best_cost <<"/" << input_length;
+		if (lsed_flag) 	cout << ")";
+		cout << " ||| " << best_match << " ||| " << best_path << endl;
+	}
+	cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
+	
+
+}
--- a/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl
+++ b/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl
@ -0,0 +1,58 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $src_in = "corpus/acquis.truecased.4.en";
+my $tgt_in = "corpus/acquis.truecased.4.fr";
+my $align_in = "model/aligned.4.grow-diag-final-and";
+
+my $src_out = "data/acquis.truecased.4.en.uniq";
+my $tgt_out = "data/acquis.truecased.4.fr.uniq";
+my $tgt_mf  = "data/acquis.truecased.4.fr.uniq.most-frequent";
+my $align_out = "data/acquis.truecased.4.align.uniq";
+my $align_mf  = "data/acquis.truecased.4.align.uniq.most-frequent";
+
+my (%TRANS,%ALIGN);
+
+open(SRC,$src_in);
+open(TGT,$tgt_in);
+open(ALIGN,$align_in);
+while(my $src = <SRC>) {
+  my $tgt = <TGT>;
+  my $align = <ALIGN>;
+  chop($tgt);
+  chop($align);
+  $TRANS{$src}{$tgt}++;
+  $ALIGN{$src}{$tgt} = $align;
+}
+close(SRC);
+close(TGT);
+
+open(SRC_OUT,">$src_out");
+open(TGT_OUT,">$tgt_out");
+open(TGT_MF, ">$tgt_mf");
+open(ALIGN_OUT,">$align_out");
+open(ALIGN_MF, ">$align_mf");
+foreach my $src (keys %TRANS) {
+  print SRC_OUT $src;
+  my $first = 1;
+  my ($max,$best) = (0);
+  foreach my $tgt (keys %{$TRANS{$src}}) {
+    print TGT_OUT " ||| " unless $first;
+    print TGT_OUT $TRANS{$src}{$tgt}." ".$tgt;
+    print ALIGN_OUT " ||| " unless $first;
+    print ALIGN_OUT $ALIGN{$src}{$tgt};
+    if ($TRANS{$src}{$tgt} > $max) {
+      $max = $TRANS{$src}{$tgt};
+      $best = $tgt;
+    }
+    $first = 0;
+  }
+  print TGT_OUT "\n";
+  print ALIGN_OUT "\n";
+  print TGT_MF $best."\n";
+  print ALIGN_MF $ALIGN{$src}{$best}."\n";
+}
+close(SRC_OUT);
+close(TGT_OUT);
+
--- a/contrib/fuzzy-match/old/make-pt-from-tm.perl
+++ b/contrib/fuzzy-match/old/make-pt-from-tm.perl
@ -0,0 +1,308 @@
+#!/usr/bin/perl -w 
+
+use strict;
+use FindBin qw($RealBin);
+use File::Basename;
+
+my $DEBUG = 1;
+my $OUTPUT_RULES = 1;
+
+#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
+my $in_file 		= $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
+my $source_file 	= $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
+my $target_file 	= $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
+my $alignment_file	= $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
+my $lex_file		= $ARGV[4]; #$data_root/in/lex.4;
+my $pt_file			= $ARGV[5]; #"$data_root/out/pt";
+
+my $cmd;
+
+my $TMPDIR=dirname($pt_file)  ."/tmp.$$";
+$cmd = "mkdir -p $TMPDIR";
+`$cmd`;
+
+my $match_file  = "$TMPDIR/match";
+
+# suffix array creation and extraction
+$cmd = "$RealBin/fuzzy-match --multiple $in_file  $source_file > $match_file";
+print STDERR "$cmd \n";
+`$cmd`;
+
+# make into xml and pt
+my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
+
+my @INPUT = `cat $in_file`; chop(@INPUT);
+my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
+my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
+my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+open(RULE,">$out_file.extract") if $OUTPUT_RULES;
+open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
+open(INFO,">$out_file.info");
+while( my $match = <MATCH> ) {
+    chop($match);
+    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+    $score =~ /^(\d+) (.+)/ || die;
+    my ($i,$match_score) = ($1,$2);
+	print STDERR "i=$i match_score=$match_score\n";
+	
+    # construct frame
+    if ($sentence < 1e9 && $sentence >= 0) {
+		my $SOURCE = $ALL_SOURCE[$sentence];
+		my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
+		my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
+		
+		for(my $j=0;$j<scalar(@TARGET);$j++) {
+			$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
+			my ($target_count,$target) = ($1,$2);
+			my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = 
+			&create_xml($SOURCE,
+					$INPUT[$i],
+					$target,
+					$ALIGNMENT[$j],
+					$path);
+			print FRAME $frame."\n";
+			print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
+			print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
+			print INFO "$i ||| $match_score ||| $target_count\n";
+		}
+    }
+}
+close(FRAME);
+close(MATCH);
+close(RULE) if $OUTPUT_RULES;
+close(RULE_INV) if $OUTPUT_RULES;
+
+`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
+`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
+
+if ($OUTPUT_RULES)
+{
+  $cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
+  print STDERR "Executing: $cmd \n";
+  `$cmd`;
+}
+
+#$cmd = "rm -rf $TMPDIR";
+#`$cmd`;
+
+#######################################################
+sub create_xml {
+    my ($source,$input,$target,$alignment,$path) = @_;
+    
+	print STDERR " HIEU \n $source \n $input \n $target \n $alignment \n $path \n";
+
+    my @INPUT = split(/ /,$input);
+    my @SOURCE = split(/ /,$source);
+    my @TARGET = split(/ /,$target);
+    my %ALIGN = &create_alignment($alignment);
+    
+    my %FRAME_INPUT;
+    my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
+    foreach (@TARGET) { push @TARGET_BITMAP,1 }
+    
+    ### STEP 1: FIND MISMATCHES
+
+    my ($s,$i) = (0,0);
+    my $currently_matching = 0;
+    my ($start_s,$start_i) = (0,0);
+
+    $path .= "X"; # indicate end
+    print STDERR "$input\n$source\n$target\n$path\n";
+    for(my $p=0;$p<length($path);$p++) {
+	my $action = substr($path,$p,1);
+
+	# beginning of a mismatch
+	if ($currently_matching && $action ne "M" && $action ne "X") {
+	    $start_i = $i;
+	    $start_s = $s;
+	    $currently_matching = 0;
+	}
+	
+	# end of a mismatch
+	elsif (!$currently_matching && 
+	       ($action eq "M" || $action eq "X")) {
+	    
+	    # remove use of affected target words
+	    for(my $ss = $start_s; $ss<$s; $ss++) {
+		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+		    $TARGET_BITMAP[$tt] = 0;
+		}
+		
+		# also remove enclosed unaligned words?
+	    }
+	    
+	    # are there input words that need to be inserted ?
+	    print STDERR "($start_i<$i)?\n";
+	    if ($start_i<$i) {
+		
+		# take note of input words to be inserted
+		my $insertion = "";
+		for(my $ii = $start_i; $ii<$i; $ii++) {
+		    $insertion .= $INPUT[$ii]." ";
+		}
+		
+		# find position for inserted input words
+		
+		# find first removed target word
+		my $start_t = 1000;
+		for(my $ss = $start_s; $ss<$s; $ss++) {
+		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			$start_t = $tt if $tt < $start_t;
+		    }
+		}
+
+		# end of sentence? add to end
+		if ($start_t == 1000 && $i > $#INPUT) {
+		    $start_t = $#TARGET;
+		}
+		
+		# backtrack to previous words if unaligned
+		if ($start_t == 1000) {
+		    $start_t = -1;
+		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			    $start_t = $tt if $tt > $start_t;
+			}
+		    }
+		}
+		$FRAME_INPUT{$start_t} .= $insertion;
+		my %NT = ("start_t" => $start_t,
+			  "start_i" => $start_i );
+		push @NT,\%NT;		
+	    }	    
+	    $currently_matching = 1;
+	}
+	
+	print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
+	if ($action ne "I") {
+	    print STDERR " ->";
+	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+		print STDERR " ".$tt;
+	    }
+	}
+	print STDERR "\n";
+	$s++ unless $action eq "I";
+	$i++ unless $action eq "D";
+	$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
+	push @INPUT_BITMAP, 1 if $action eq "M";
+	push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
+    }
+    
+
+    print STDERR $target."\n";
+    foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
+    foreach (sort keys %FRAME_INPUT) { 
+	print STDERR "$_: $FRAME_INPUT{$_}\n";
+    }
+
+    ### STEP 2: BUILD RULE AND FRAME
+        
+    # hierarchical rule
+    my $rule_s = "";
+    my $rule_pos_s = 0;
+    my %RULE_ALIGNMENT_S;
+    for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
+		if ($INPUT_BITMAP[$i]) {
+			$rule_s .= $INPUT[$i]." ";
+			$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
+		}
+		foreach my $NT (@NT) {
+			if ($i == $$NT{"start_i"}) {
+				$rule_s .= "[X][X] ";
+				$$NT{"rule_pos_s"} = $rule_pos_s++;
+			}
+		}
+    }
+
+    my $rule_t = "";
+    my $rule_pos_t = 0;
+    my %RULE_ALIGNMENT_T;
+    for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
+	if ($t>=0 && $TARGET_BITMAP[$t]) {
+	    $rule_t .= $TARGET[$t]." ";
+	    $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
+	}
+	foreach my $NT (@NT) {
+	    if ($t == $$NT{"start_t"}) {
+		$rule_t .= "[X][X] ";
+		$$NT{"rule_pos_t"} = $rule_pos_t++;
+	    }
+	}
+    }
+
+    my $rule_alignment = "";
+    foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
+	foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
+	    next unless defined($RULE_ALIGNMENT_T{$t});
+	    $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
+	}
+    }
+    foreach my $NT (@NT) {
+	$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
+    }
+    
+    chop($rule_s);
+    chop($rule_t);
+    chop($rule_alignment);
+
+    my $rule_alignment_inv = "";
+    foreach (split(/ /,$rule_alignment)) {
+	/^(\d+)\-(\d+)$/;
+	$rule_alignment_inv .= "$2-$1 ";
+    }
+    chop($rule_alignment_inv);
+
+    # frame
+    my $frame = "";
+    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+    my $currently_included = 0;
+    my $start_t = -1;
+    push @TARGET_BITMAP,0; # indicate end
+
+    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
+	# beginning of tm target inclusion
+	if (!$currently_included && $TARGET_BITMAP[$t]) {
+	    $start_t = $t;
+	    $currently_included = 1;
+	}
+	
+	# end of tm target inclusion (not included word or inserted input)
+	elsif ($currently_included && 
+	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+	    # add xml (unless change is at the beginning of the sentence
+	    if ($start_t >= 0) {
+		my $target = "";
+		print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+		    $target .= $TARGET[$tt] . " ";
+		}
+		chop($target);
+		$frame .= "<xml translation=\"$target\"> x </xml> ";
+	    }
+	    $currently_included = 0;
+	}
+	
+	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+	print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+    }
+
+    print STDERR $frame."\n-------------------------------------\n";
+    return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
+}
+
+sub create_alignment {
+	my ($line) = @_;
+	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+	foreach my $point (split(/ /,$line)) {
+		my ($s,$t) = split(/\-/,$point);
+		$ALIGNED_TO_S[$s]{$t}++;
+		$ALIGNED_TO_T[$t]{$s}++;
+	}
+	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+	return %ALIGNMENT;
+}
--- a/contrib/fuzzy-match/old/make-pt-from-tm2.perl
+++ b/contrib/fuzzy-match/old/make-pt-from-tm2.perl
@ -0,0 +1,300 @@
+#!/usr/bin/perl -w -d 
+
+use strict;
+use FindBin qw($RealBin);
+use File::Basename;
+
+my $DEBUG = 1;
+my $OUTPUT_RULES = 1;
+
+#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
+my $in_file 		= $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
+my $source_file 	= $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
+my $target_file 	= $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
+my $alignment_file	= $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
+my $lex_file		= $ARGV[4]; #$data_root/in/lex.4;
+my $pt_file			= $ARGV[5]; #"$data_root/out/pt";
+
+my $cmd;
+
+my $TMPDIR= "/tmp/tmp.$$";
+$cmd = "mkdir -p $TMPDIR";
+`$cmd`;
+$TMPDIR = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/out/tmp.3196";
+
+my $match_file  = "$TMPDIR/match";
+
+# suffix array creation and extraction
+$cmd = "$RealBin/fuzzy-match --multiple $in_file  $source_file > $match_file";
+`$cmd`;
+
+# make into xml and pt
+my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+open(RULE,">$out_file.extract") if $OUTPUT_RULES;
+open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
+open(INFO,">$out_file.info");
+while( my $match = <MATCH> ) {
+    chop($match);
+    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+    $score =~ /^(\d+) (.+)/ || die;
+    my ($i,$match_score) = ($1,$2);
+
+    # construct frame
+    if ($sentence < 1e9 && $sentence >= 0) {
+		my $SOURCE = $ALL_SOURCE[$sentence];
+		my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
+		my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
+		
+		for(my $j=0;$j<scalar(@TARGET);$j++) {
+			$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
+			my ($target_count,$target) = ($1,$2);
+			my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = 
+			&create_xml($SOURCE,
+					$INPUT[$i],
+					$target,
+					$ALIGNMENT[$j],
+					$path);
+			print FRAME $frame."\n";
+			print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
+			print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
+			print INFO "$i ||| $match_score ||| $target_count\n";
+		}
+    }
+}
+close(FRAME);
+close(MATCH);
+close(RULE) if $OUTPUT_RULES;
+close(RULE_INV) if $OUTPUT_RULES;
+
+`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
+`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
+
+if ($OUTPUT_RULES)
+{
+  $cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
+  print STDERR "Executing: $cmd \n";
+  `$cmd`;
+}
+
+#$cmd = "rm -rf $TMPDIR";
+#`$cmd`;
+
+#######################################################
+sub create_xml {
+    my ($source,$input,$target,$alignment,$path) = @_;
+    
+    my @INPUT = split(/ /,$input);
+    my @SOURCE = split(/ /,$source);
+    my @TARGET = split(/ /,$target);
+    my %ALIGN = &create_alignment($alignment);
+    
+    my %FRAME_INPUT;
+    my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
+    foreach (@TARGET) { push @TARGET_BITMAP,1 }
+    
+    ### STEP 1: FIND MISMATCHES
+
+    my ($s,$i) = (0,0);
+    my $currently_matching = 0;
+    my ($start_s,$start_i) = (0,0);
+
+    $path .= "X"; # indicate end
+    print STDERR "$input\n$source\n$target\n$path\n";
+    for(my $p=0;$p<length($path);$p++) {
+	my $action = substr($path,$p,1);
+
+	# beginning of a mismatch
+	if ($currently_matching && $action ne "M" && $action ne "X") {
+	    $start_i = $i;
+	    $start_s = $s;
+	    $currently_matching = 0;
+	}
+	
+	# end of a mismatch
+	elsif (!$currently_matching && 
+	       ($action eq "M" || $action eq "X")) {
+	    
+	    # remove use of affected target words
+	    for(my $ss = $start_s; $ss<$s; $ss++) {
+		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+		    $TARGET_BITMAP[$tt] = 0;
+		}
+		
+		# also remove enclosed unaligned words?
+	    }
+	    
+	    # are there input words that need to be inserted ?
+	    print STDERR "($start_i<$i)?\n";
+	    if ($start_i<$i) {
+		
+		# take note of input words to be inserted
+		my $insertion = "";
+		for(my $ii = $start_i; $ii<$i; $ii++) {
+		    $insertion .= $INPUT[$ii]." ";
+		}
+		
+		# find position for inserted input words
+		
+		# find first removed target word
+		my $start_t = 1000;
+		for(my $ss = $start_s; $ss<$s; $ss++) {
+		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			$start_t = $tt if $tt < $start_t;
+		    }
+		}
+
+		# end of sentence? add to end
+		if ($start_t == 1000 && $i > $#INPUT) {
+		    $start_t = $#TARGET;
+		}
+		
+		# backtrack to previous words if unaligned
+		if ($start_t == 1000) {
+		    $start_t = -1;
+		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			    $start_t = $tt if $tt > $start_t;
+			}
+		    }
+		}
+		$FRAME_INPUT{$start_t} .= $insertion;
+		my %NT = ("start_t" => $start_t,
+			  "start_i" => $start_i );
+		push @NT,\%NT;		
+	    }	    
+	    $currently_matching = 1;
+	}
+	
+	print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
+	if ($action ne "I") {
+	    print STDERR " ->";
+	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+		print STDERR " ".$tt;
+	    }
+	}
+	print STDERR "\n";
+	$s++ unless $action eq "I";
+	$i++ unless $action eq "D";
+	$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
+	push @INPUT_BITMAP, 1 if $action eq "M";
+	push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
+    }
+    
+
+    print STDERR $target."\n";
+    foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
+    foreach (sort keys %FRAME_INPUT) { 
+	print STDERR "$_: $FRAME_INPUT{$_}\n";
+    }
+
+    ### STEP 2: BUILD RULE AND FRAME
+        
+    # hierarchical rule
+    my $rule_s = "";
+    my $rule_pos_s = 0;
+    my %RULE_ALIGNMENT_S;
+    for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
+		if ($INPUT_BITMAP[$i]) {
+			$rule_s .= $INPUT[$i]." ";
+			$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
+		}
+		foreach my $NT (@NT) {
+			if ($i == $$NT{"start_i"}) {
+				$rule_s .= "[X][X] ";
+				$$NT{"rule_pos_s"} = $rule_pos_s++;
+			}
+		}
+    }
+
+    my $rule_t = "";
+    my $rule_pos_t = 0;
+    my %RULE_ALIGNMENT_T;
+    for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
+	if ($t>=0 && $TARGET_BITMAP[$t]) {
+	    $rule_t .= $TARGET[$t]." ";
+	    $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
+	}
+	foreach my $NT (@NT) {
+	    if ($t == $$NT{"start_t"}) {
+		$rule_t .= "[X][X] ";
+		$$NT{"rule_pos_t"} = $rule_pos_t++;
+	    }
+	}
+    }
+
+    my $rule_alignment = "";
+    foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
+	foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
+	    next unless defined($RULE_ALIGNMENT_T{$t});
+	    $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
+	}
+    }
+    foreach my $NT (@NT) {
+	$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
+    }
+    
+    chop($rule_s);
+    chop($rule_t);
+    chop($rule_alignment);
+
+    my $rule_alignment_inv = "";
+    foreach (split(/ /,$rule_alignment)) {
+	/^(\d+)\-(\d+)$/;
+	$rule_alignment_inv .= "$2-$1 ";
+    }
+    chop($rule_alignment_inv);
+
+    # frame
+    my $frame = "";
+    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+    my $currently_included = 0;
+    my $start_t = -1;
+    push @TARGET_BITMAP,0; # indicate end
+
+    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
+	# beginning of tm target inclusion
+	if (!$currently_included && $TARGET_BITMAP[$t]) {
+	    $start_t = $t;
+	    $currently_included = 1;
+	}
+	
+	# end of tm target inclusion (not included word or inserted input)
+	elsif ($currently_included && 
+	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+	    # add xml (unless change is at the beginning of the sentence
+	    if ($start_t >= 0) {
+		my $target = "";
+		print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+		    $target .= $TARGET[$tt] . " ";
+		}
+		chop($target);
+		$frame .= "<xml translation=\"$target\"> x </xml> ";
+	    }
+	    $currently_included = 0;
+	}
+	
+	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+	print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+    }
+
+    print STDERR $frame."\n-------------------------------------\n";
+    return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
+}
+
+sub create_alignment {
+	my ($line) = @_;
+	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+	foreach my $point (split(/ /,$line)) {
+		my ($s,$t) = split(/\-/,$point);
+		$ALIGNED_TO_S[$s]{$t}++;
+		$ALIGNED_TO_T[$t]{$s}++;
+	}
+	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+	return %ALIGNMENT;
+}
--- a/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl
+++ b/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl
@ -0,0 +1,288 @@
+#!/usr/bin/perl -w 
+
+use strict;
+
+my $DEBUG = 1;
+my $OUTPUT_RULES = 1;
+
+my $scripts_root_dir = "/Users/hieuhoang/workspace/github/hieuhoang/scripts";
+
+my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
+#my $match_file  = "$data_root/in/BEST.acquis-xml-escaped.4.uniq.multi.tuning";
+my $match_file  = "$data_root/out/BEST";
+my $source_file = "$data_root/in/acquis.truecased.4.en.uniq";
+my $target_file = "$data_root/in/acquis.truecased.4.fr.uniq";
+my $alignment_file = "$data_root/in/acquis.truecased.4.align.uniq";
+my $out_file = "$data_root/out/ac-test.input.xml.4.uniq.multi.tuning";
+my $in_file = "$data_root/in/ac-test.input.tc.4";
+
+#my $match_file  = "tm/BEST.acquis-xml-escaped.4.uniq.multi";
+#my $source_file = "data/acquis.truecased.4.en.uniq";
+#my $target_file = "data/acquis.truecased.4.fr.uniq";
+#my $alignment_file = "data/acquis.truecased.4.align.uniq";
+#my $out_file = "data/ac-test.input.xml.4.uniq.multi.xxx";
+#my $in_file = "evaluation/ac-test.input.tc.4";
+
+my @INPUT = `cat $in_file`; chop(@INPUT);
+my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
+my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
+my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+open(RULE,">$out_file.extract") if $OUTPUT_RULES;
+open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
+open(INFO,">$out_file.info");
+while( my $match = <MATCH> ) {
+    chop($match);
+    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+    $score =~ /^(\d+) (.+)/ || die;
+    my ($i,$match_score) = ($1,$2);
+
+    # construct frame
+    if ($sentence < 1e9 && $sentence >= 0) {
+		my $SOURCE = $ALL_SOURCE[$sentence];
+		my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
+		my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
+		
+		for(my $j=0;$j<scalar(@TARGET);$j++) {
+			$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
+			my ($target_count,$target) = ($1,$2);
+			my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = 
+			&create_xml($SOURCE,
+					$INPUT[$i],
+					$target,
+					$ALIGNMENT[$j],
+					$path);
+			print FRAME $frame."\n";
+			print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
+			print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
+			print INFO "$i ||| $match_score ||| $target_count\n";
+		}
+    }
+}
+close(FRAME);
+close(MATCH);
+close(RULE) if $OUTPUT_RULES;
+close(RULE_INV) if $OUTPUT_RULES;
+
+`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
+`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
+
+`$scripts_root_dir/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $data_root/in/lex.4 -phrase-translation-table $out_file.phrase-table` if $OUTPUT_RULES;
+
+sub create_xml {
+    my ($source,$input,$target,$alignment,$path) = @_;
+    
+    my @INPUT = split(/ /,$input);
+    my @SOURCE = split(/ /,$source);
+    my @TARGET = split(/ /,$target);
+    my %ALIGN = &create_alignment($alignment);
+    
+    my %FRAME_INPUT;
+    my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
+    foreach (@TARGET) { push @TARGET_BITMAP,1 }
+    
+    ### STEP 1: FIND MISMATCHES
+
+    my ($s,$i) = (0,0);
+    my $currently_matching = 0;
+    my ($start_s,$start_i) = (0,0);
+
+    $path .= "X"; # indicate end
+    print "$input\n$source\n$target\n$path\n";
+    for(my $p=0;$p<length($path);$p++) {
+	my $action = substr($path,$p,1);
+
+	# beginning of a mismatch
+	if ($currently_matching && $action ne "M" && $action ne "X") {
+	    $start_i = $i;
+	    $start_s = $s;
+	    $currently_matching = 0;
+	}
+	
+	# end of a mismatch
+	elsif (!$currently_matching && 
+	       ($action eq "M" || $action eq "X")) {
+	    
+	    # remove use of affected target words
+	    for(my $ss = $start_s; $ss<$s; $ss++) {
+		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+		    $TARGET_BITMAP[$tt] = 0;
+		}
+		
+		# also remove enclosed unaligned words?
+	    }
+	    
+	    # are there input words that need to be inserted ?
+	    print "($start_i<$i)?\n";
+	    if ($start_i<$i) {
+		
+		# take note of input words to be inserted
+		my $insertion = "";
+		for(my $ii = $start_i; $ii<$i; $ii++) {
+		    $insertion .= $INPUT[$ii]." ";
+		}
+		
+		# find position for inserted input words
+		
+		# find first removed target word
+		my $start_t = 1000;
+		for(my $ss = $start_s; $ss<$s; $ss++) {
+		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			$start_t = $tt if $tt < $start_t;
+		    }
+		}
+
+		# end of sentence? add to end
+		if ($start_t == 1000 && $i > $#INPUT) {
+		    $start_t = $#TARGET;
+		}
+		
+		# backtrack to previous words if unaligned
+		if ($start_t == 1000) {
+		    $start_t = -1;
+		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+			    $start_t = $tt if $tt > $start_t;
+			}
+		    }
+		}
+		$FRAME_INPUT{$start_t} .= $insertion;
+		my %NT = ("start_t" => $start_t,
+			  "start_i" => $start_i );
+		push @NT,\%NT;		
+	    }	    
+	    $currently_matching = 1;
+	}
+	
+	print "$action $s $i ($start_s $start_i) $currently_matching";
+	if ($action ne "I") {
+	    print " ->";
+	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+		print " ".$tt;
+	    }
+	}
+	print "\n";
+	$s++ unless $action eq "I";
+	$i++ unless $action eq "D";
+	$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
+	push @INPUT_BITMAP, 1 if $action eq "M";
+	push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
+    }
+    
+
+    print $target."\n";
+    foreach (@TARGET_BITMAP) { print $_; } print "\n";
+    foreach (sort keys %FRAME_INPUT) { 
+	print "$_: $FRAME_INPUT{$_}\n";
+    }
+
+    ### STEP 2: BUILD RULE AND FRAME
+        
+    # hierarchical rule
+    my $rule_s = "";
+    my $rule_pos_s = 0;
+    my %RULE_ALIGNMENT_S;
+    for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
+	if ($INPUT_BITMAP[$i]) {
+	    $rule_s .= $INPUT[$i]." ";
+	    $RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
+	}
+	foreach my $NT (@NT) {
+	    if ($i == $$NT{"start_i"}) {
+		$rule_s .= "[X][X] ";
+		$$NT{"rule_pos_s"} = $rule_pos_s++;
+	    }
+	}
+    }
+
+    my $rule_t = "";
+    my $rule_pos_t = 0;
+    my %RULE_ALIGNMENT_T;
+    for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
+	if ($t>=0 && $TARGET_BITMAP[$t]) {
+	    $rule_t .= $TARGET[$t]." ";
+	    $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
+	}
+	foreach my $NT (@NT) {
+	    if ($t == $$NT{"start_t"}) {
+		$rule_t .= "[X][X] ";
+		$$NT{"rule_pos_t"} = $rule_pos_t++;
+	    }
+	}
+    }
+
+    my $rule_alignment = "";
+    foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
+	foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
+	    next unless defined($RULE_ALIGNMENT_T{$t});
+	    $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
+	}
+    }
+    foreach my $NT (@NT) {
+	$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
+    }
+    
+    chop($rule_s);
+    chop($rule_t);
+    chop($rule_alignment);
+
+    my $rule_alignment_inv = "";
+    foreach (split(/ /,$rule_alignment)) {
+	/^(\d+)\-(\d+)$/;
+	$rule_alignment_inv .= "$2-$1 ";
+    }
+    chop($rule_alignment_inv);
+
+    # frame
+    my $frame = "";
+    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+    my $currently_included = 0;
+    my $start_t = -1;
+    push @TARGET_BITMAP,0; # indicate end
+
+    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
+	# beginning of tm target inclusion
+	if (!$currently_included && $TARGET_BITMAP[$t]) {
+	    $start_t = $t;
+	    $currently_included = 1;
+	}
+	
+	# end of tm target inclusion (not included word or inserted input)
+	elsif ($currently_included && 
+	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+	    # add xml (unless change is at the beginning of the sentence
+	    if ($start_t >= 0) {
+		my $target = "";
+		print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+		    $target .= $TARGET[$tt] . " ";
+		}
+		chop($target);
+		$frame .= "<xml translation=\"$target\"> x </xml> ";
+	    }
+	    $currently_included = 0;
+	}
+	
+	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+	print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+    }
+
+    print $frame."\n-------------------------------------\n";
+    return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
+}
+
+sub create_alignment {
+	my ($line) = @_;
+	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+	foreach my $point (split(/ /,$line)) {
+		my ($s,$t) = split(/\-/,$point);
+		$ALIGNED_TO_S[$s]{$t}++;
+		$ALIGNED_TO_T[$t]{$s}++;
+	}
+	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+	return %ALIGNMENT;
+}
--- a/contrib/fuzzy-match/suffix-test.cpp
+++ b/contrib/fuzzy-match/suffix-test.cpp
@ -0,0 +1,27 @@
+#include "SuffixArray.h"
+
+using namespace std;
+
+int main(int argc, char* argv[]) 
+{
+	SuffixArray suffixArray( "/home/pkoehn/syntax/grammars/wmt09-de-en/corpus.1k.de" );
+	//suffixArray.List(10,20);
+	vector< string > der;
+	der.push_back("der");
+	vector< string > inDer;
+	inDer.push_back("in");
+	inDer.push_back("der");
+	vector< string > zzz;
+	zzz.push_back("zzz");
+	vector< string > derDer;
+	derDer.push_back("der");
+	derDer.push_back("der");
+
+	cout << "count of 'der' " << suffixArray.Count( der ) << endl;
+	cout << "limited count of 'der' " << suffixArray.MinCount( der, 2 ) << endl;
+	cout << "count of 'in der' " << suffixArray.Count( inDer ) << endl;
+	cout << "count of 'der der' " << suffixArray.Count( derDer ) << endl;
+	cout << "limited count of 'der der' " << suffixArray.MinCount( derDer, 1 ) << endl;
+	// cout << "count of 'zzz' " << suffixArray.Count( zzz ) << endl;
+	// cout << "limited count of 'zzz' " << suffixArray.LimitedCount( zzz, 1 ) << endl;
+}
--- a/contrib/lmserver/INSTALL
+++ b/contrib/lmserver/INSTALL
@ -1 +0,0 @@
-/usr/share/automake-1.9/INSTALL
--- a/contrib/other-builds/CreateOnDisk.vcxproj
+++ b/contrib/other-builds/CreateOnDisk.vcxproj
@ -43,6 +43,8 @@
    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
@ -58,10 +60,11 @@
      <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
-      <AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <SubSystem>Console</SubSystem>
      <TargetMachine>MachineX86</TargetMachine>
+      <AdditionalLibraryDirectories>C:\boost\boost_1_47\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@ -69,7 +72,7 @@
      <Optimization>MaxSpeed</Optimization>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <PrecompiledHeader>
      </PrecompiledHeader>
@ -78,12 +81,13 @@
      <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
-      <AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <SubSystem>Console</SubSystem>
      <OptimizeReferences>true</OptimizeReferences>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <TargetMachine>MachineX86</TargetMachine>
+      <AdditionalLibraryDirectories>C:\boost\boost_1_47\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
--- a/contrib/other-builds/OnDiskPt.vcxproj
+++ b/contrib/other-builds/OnDiskPt.vcxproj
@ -69,7 +69,7 @@
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <MinimalRebuild>true</MinimalRebuild>
      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@ -84,7 +84,7 @@
    <ClCompile>
      <Optimization>MaxSpeed</Optimization>
      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <PrecompiledHeader>
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@ -41,9 +41,13 @@
 								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
 									<listOptionValue builtIn="false" value="/opt/local/include"/>
 								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.1052680347" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
@ -128,4 +132,5 @@
 	<storageModule moduleId="refreshScope" versionNumber="1">
 		<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
 	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 </cproject>
--- a/contrib/other-builds/fuzzy-match.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/project.pbxproj
@ -0,0 +1,292 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		1E42EFB615BEFAEB00E937EB /* fuzzy-match2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */; };
+		1E42EFB715BEFAEB00E937EB /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */; };
+		1E42EFB815BEFAEB00E937EB /* SuffixArray.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DD015BED3D4001914A2 /* SuffixArray.h */; };
+		1E42EFB915BEFAEB00E937EB /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */; };
+		1E42EFBA15BEFAEB00E937EB /* Vocabulary.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCB15BED3AC001914A2 /* Vocabulary.h */; };
+		1E806DCC15BED3AC001914A2 /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */; };
+		1E806DD115BED3D4001914A2 /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */; };
+		1ECD60A815C15E28004172A4 /* Util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ECD60A515C15D3A004172A4 /* Util.cpp */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		1E42EFAA15BEFAD300E937EB /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+		1ED87EEB15BED331003E47AA /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "fuzzy-match2.cpp"; path = "../tm-mt-integration/fuzzy-match2.cpp"; sourceTree = "<group>"; };
+		1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "fuzzy-match2"; sourceTree = BUILT_PRODUCTS_DIR; };
+		1E42EFD115C00AC100E937EB /* fuzzy-match2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "fuzzy-match2.h"; path = "../tm-mt-integration/fuzzy-match2.h"; sourceTree = "<group>"; };
+		1E42EFD215C00BAE00E937EB /* Util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Util.h; path = "../tm-mt-integration/Util.h"; sourceTree = "<group>"; };
+		1E42EFD315C00C0A00E937EB /* SentenceAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SentenceAlignment.h; path = "../tm-mt-integration/SentenceAlignment.h"; sourceTree = "<group>"; };
+		1E42EFD715C00D6300E937EB /* Match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Match.h; path = "../tm-mt-integration/Match.h"; sourceTree = "<group>"; };
+		1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocabulary.cpp; path = "../tm-mt-integration/Vocabulary.cpp"; sourceTree = "<group>"; };
+		1E806DCB15BED3AC001914A2 /* Vocabulary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocabulary.h; path = "../tm-mt-integration/Vocabulary.h"; sourceTree = "<group>"; };
+		1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SuffixArray.cpp; path = "../tm-mt-integration/SuffixArray.cpp"; sourceTree = "<group>"; };
+		1E806DD015BED3D4001914A2 /* SuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SuffixArray.h; path = "../tm-mt-integration/SuffixArray.h"; sourceTree = "<group>"; };
+		1ECD60A515C15D3A004172A4 /* Util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Util.cpp; path = "../tm-mt-integration/Util.cpp"; sourceTree = "<group>"; };
+		1ED87EED15BED331003E47AA /* fuzzy-match */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "fuzzy-match"; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		1E42EFA915BEFAD300E937EB /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		1ED87EEA15BED331003E47AA /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		1ED87EE215BED32F003E47AA = {
+			isa = PBXGroup;
+			children = (
+				1E42EFD715C00D6300E937EB /* Match.h */,
+				1E42EFD315C00C0A00E937EB /* SentenceAlignment.h */,
+				1E42EFD215C00BAE00E937EB /* Util.h */,
+				1ECD60A515C15D3A004172A4 /* Util.cpp */,
+				1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */,
+				1E806DD015BED3D4001914A2 /* SuffixArray.h */,
+				1E42EFD115C00AC100E937EB /* fuzzy-match2.h */,
+				1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */,
+				1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */,
+				1E806DCB15BED3AC001914A2 /* Vocabulary.h */,
+				1ED87EEE15BED331003E47AA /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		1ED87EEE15BED331003E47AA /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1ED87EED15BED331003E47AA /* fuzzy-match */,
+				1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		1E42EFAB15BEFAD300E937EB /* fuzzy-match2 */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1E42EFB315BEFAD300E937EB /* Build configuration list for PBXNativeTarget "fuzzy-match2" */;
+			buildPhases = (
+				1E42EFA815BEFAD300E937EB /* Sources */,
+				1E42EFA915BEFAD300E937EB /* Frameworks */,
+				1E42EFAA15BEFAD300E937EB /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "fuzzy-match2";
+			productName = "fuzzy-match2";
+			productReference = 1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */;
+			productType = "com.apple.product-type.tool";
+		};
+		1ED87EEC15BED331003E47AA /* fuzzy-match */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1ED87EF715BED331003E47AA /* Build configuration list for PBXNativeTarget "fuzzy-match" */;
+			buildPhases = (
+				1ED87EE915BED331003E47AA /* Sources */,
+				1ED87EEA15BED331003E47AA /* Frameworks */,
+				1ED87EEB15BED331003E47AA /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "fuzzy-match";
+			productName = "fuzzy-match";
+			productReference = 1ED87EED15BED331003E47AA /* fuzzy-match */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		1ED87EE415BED32F003E47AA /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1ED87EE715BED32F003E47AA /* Build configuration list for PBXProject "fuzzy-match" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+			);
+			mainGroup = 1ED87EE215BED32F003E47AA;
+			productRefGroup = 1ED87EEE15BED331003E47AA /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				1ED87EEC15BED331003E47AA /* fuzzy-match */,
+				1E42EFAB15BEFAD300E937EB /* fuzzy-match2 */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		1E42EFA815BEFAD300E937EB /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1ECD60A815C15E28004172A4 /* Util.cpp in Sources */,
+				1E42EFB615BEFAEB00E937EB /* fuzzy-match2.cpp in Sources */,
+				1E42EFB715BEFAEB00E937EB /* SuffixArray.cpp in Sources */,
+				1E42EFB815BEFAEB00E937EB /* SuffixArray.h in Sources */,
+				1E42EFB915BEFAEB00E937EB /* Vocabulary.cpp in Sources */,
+				1E42EFBA15BEFAEB00E937EB /* Vocabulary.h in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		1ED87EE915BED331003E47AA /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1E806DCC15BED3AC001914A2 /* Vocabulary.cpp in Sources */,
+				1E806DD115BED3D4001914A2 /* SuffixArray.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1E42EFB415BEFAD300E937EB /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		1E42EFB515BEFAD300E937EB /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+		1ED87EF515BED331003E47AA /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+				COPY_PHASE_STRIP = NO;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		1ED87EF615BED331003E47AA /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+				COPY_PHASE_STRIP = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		1ED87EF815BED331003E47AA /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		1ED87EF915BED331003E47AA /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1E42EFB315BEFAD300E937EB /* Build configuration list for PBXNativeTarget "fuzzy-match2" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1E42EFB415BEFAD300E937EB /* Debug */,
+				1E42EFB515BEFAD300E937EB /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1ED87EE715BED32F003E47AA /* Build configuration list for PBXProject "fuzzy-match" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1ED87EF515BED331003E47AA /* Debug */,
+				1ED87EF615BED331003E47AA /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1ED87EF715BED331003E47AA /* Build configuration list for PBXNativeTarget "fuzzy-match" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1ED87EF815BED331003E47AA /* Debug */,
+				1ED87EF915BED331003E47AA /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 1ED87EE415BED32F003E47AA /* Project object */;
+}
--- a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Bucket
+   type = "1"
+   version = "1.0">
+   <FileBreakpoints>
+      <FileBreakpoint
+         shouldBeEnabled = "Yes"
+         ignoreCount = "0"
+         continueAfterRunningActions = "No"
+         isPathRelative = "0"
+         filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match2.cpp"
+         timestampString = "364996019.762643"
+         startingColumnNumber = "9223372036854775807"
+         endingColumnNumber = "9223372036854775807"
+         startingLineNumber = "456"
+         endingLineNumber = "456"
+         landmarkName = "create_extract(int sentenceInd, int cost, const vector&lt; WORD_ID &gt; &amp;sourceSentence, const vector&lt;SentenceAlignment&gt; &amp;targets, const string &amp;inputStr, const string &amp;path)"
+         landmarkType = "7">
+      </FileBreakpoint>
+   </FileBreakpoints>
+</Bucket>
--- a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match.xcscheme
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match.xcscheme
@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
+               BuildableName = "fuzzy-match"
+               BlueprintName = "fuzzy-match"
+               ReferencedContainer = "container:fuzzy-match.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      buildConfiguration = "Debug">
+      <Testables>
+      </Testables>
+   </TestAction>
+   <LaunchAction
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      buildConfiguration = "Debug">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
+            BuildableName = "fuzzy-match"
+            BlueprintName = "fuzzy-match"
+            ReferencedContainer = "container:fuzzy-match.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+      <CommandLineArguments>
+         <CommandLineArgument
+            argument = "--multiple /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/ac-test.input.tc.4 /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/acquis.truecased.4.en.uniq"
+            isEnabled = "YES">
+         </CommandLineArgument>
+      </CommandLineArguments>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      buildConfiguration = "Release">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
+            BuildableName = "fuzzy-match"
+            BlueprintName = "fuzzy-match"
+            ReferencedContainer = "container:fuzzy-match.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
--- a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match2.xcscheme
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match2.xcscheme
@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
+               BuildableName = "fuzzy-match2"
+               BlueprintName = "fuzzy-match2"
+               ReferencedContainer = "container:fuzzy-match.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      buildConfiguration = "Debug">
+      <Testables>
+      </Testables>
+   </TestAction>
+   <LaunchAction
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "YES"
+      customWorkingDirectory = "/Users/hieuhoang/unison/workspace/experiment/data/tm-mt-integration/in"
+      buildConfiguration = "Debug">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
+            BuildableName = "fuzzy-match2"
+            BlueprintName = "fuzzy-match2"
+            ReferencedContainer = "container:fuzzy-match.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+      <CommandLineArguments>
+         <CommandLineArgument
+            argument = "--multiple ac-test.input.tc.4  acquis.truecased.4.en.uniq acquis.truecased.4.fr.uniq acquis.truecased.4.align.uniq"
+            isEnabled = "YES">
+         </CommandLineArgument>
+      </CommandLineArguments>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      buildConfiguration = "Release">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
+            BuildableName = "fuzzy-match2"
+            BlueprintName = "fuzzy-match2"
+            ReferencedContainer = "container:fuzzy-match.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
--- a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>fuzzy-match.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
+		<key>fuzzy-match2.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>1</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>1E42EFAB15BEFAD300E937EB</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+		<key>1ED87EEC15BED331003E47AA</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/contrib/other-builds/kenlm.vcxproj
+++ b/contrib/other-builds/kenlm.vcxproj
@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
@ -123,7 +123,12 @@
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <IncludePath>C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <IncludePath>C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PrecompiledHeader>
@ -131,7 +136,7 @@
      <WarningLevel>Level3</WarningLevel>
      <Optimization>Disabled</Optimization>
      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
@ -147,7 +152,7 @@
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
    </ClCompile>
    <Link>
--- a/contrib/other-builds/lm.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/lm.xcodeproj/project.pbxproj
@ -405,6 +405,9 @@
 /* Begin PBXProject section */
 		1EE8C2E01476A48E002496F2 /* Project object */ = {
 			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0420;
+			};
 			buildConfigurationList = 1EE8C2E31476A48E002496F2 /* Build configuration list for PBXProject "lm" */;
 			compatibilityVersion = "Xcode 3.2";
 			developmentRegion = English;
@ -539,6 +542,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				EXECUTABLE_PREFIX = lib;
+				GCC_PREPROCESSOR_DEFINITIONS = "KENLM_MAX_ORDER=7";
 				LIBRARY_SEARCH_PATHS = (
 					"$(inherited)",
 					"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
@ -556,6 +560,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				EXECUTABLE_PREFIX = lib;
+				GCC_PREPROCESSOR_DEFINITIONS = "KENLM_MAX_ORDER=7";
 				LIBRARY_SEARCH_PATHS = (
 					"$(inherited)",
 					"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
--- a/contrib/other-builds/lm/.cproject
+++ b/contrib/other-builds/lm/.cproject
@ -42,7 +42,11 @@
 								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
 									<listOptionValue builtIn="false" value="/opt/local/include"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.1980966336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
@ -53,6 +57,9 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
+					<sourceEntries>
+						<entry excluding="left_test.cc|model_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+					</sourceEntries>
 				</configuration>
 			</storageModule>
 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
@ -122,4 +129,5 @@
 		</scannerConfigBuildInfo>
 	</storageModule>
 	<storageModule moduleId="refreshScope"/>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 </cproject>
--- a/contrib/other-builds/lm/.project
+++ b/contrib/other-builds/lm/.project
@ -326,6 +326,21 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.hh</locationURI>
 		</link>
+		<link>
+			<name>value.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/value.hh</locationURI>
+		</link>
+		<link>
+			<name>value_build.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/value_build.cc</locationURI>
+		</link>
+		<link>
+			<name>value_build.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/value_build.hh</locationURI>
+		</link>
 		<link>
 			<name>virtual_interface.cc</name>
 			<type>1</type>
--- a/contrib/other-builds/mert.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/mert.xcodeproj/project.pbxproj
@ -312,6 +312,7 @@
 				1E1D826815AC640800FE42E9 /* Release */,
 			);
 			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
 		};
 		1EB0AEFF1593A2180007E2A4 /* Build configuration list for PBXProject "mert" */ = {
 			isa = XCConfigurationList;
--- a/contrib/other-builds/mert.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+++ b/contrib/other-builds/mert.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:mert.xcodeproj">
+   </FileRef>
+</Workspace>
--- a/contrib/other-builds/mert.xcodeproj/project.xcworkspace/xcuserdata/hieuhoang.xcuserdatad/UserInterfaceState.xcuserstate
+++ b/contrib/other-builds/mert.xcodeproj/project.xcworkspace/xcuserdata/hieuhoang.xcuserdatad/UserInterfaceState.xcuserstate
--- a/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
+++ b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Bucket
+   type = "1"
+   version = "1.0">
+   <FileBreakpoints>
+      <FileBreakpoint
+         shouldBeEnabled = "Yes"
+         ignoreCount = "0"
+         continueAfterRunningActions = "No"
+         isPathRelative = "0"
+         filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp"
+         timestampString = "363625029.073606"
+         startingColumnNumber = "9223372036854775807"
+         endingColumnNumber = "9223372036854775807"
+         startingLineNumber = "316"
+         endingLineNumber = "316"
+         landmarkName = "main(int argc, char **argv)"
+         landmarkType = "7">
+      </FileBreakpoint>
+      <FileBreakpoint
+         shouldBeEnabled = "Yes"
+         ignoreCount = "0"
+         continueAfterRunningActions = "No"
+         isPathRelative = "0"
+         filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp"
+         timestampString = "363625081.848519"
+         startingColumnNumber = "9223372036854775807"
+         endingColumnNumber = "9223372036854775807"
+         startingLineNumber = "326"
+         endingLineNumber = "326"
+         landmarkName = "main(int argc, char **argv)"
+         landmarkType = "7">
+      </FileBreakpoint>
+   </FileBreakpoints>
+</Bucket>
--- a/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/extractor.xcscheme
+++ b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/extractor.xcscheme
@ -13,10 +13,10 @@
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
-               BlueprintIdentifier = "D2AAC045055464E500DB518D"
-               BuildableName = "libmoses.a"
-               BlueprintName = "moses"
-               ReferencedContainer = "container:moses.xcodeproj">
+               BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
+               BuildableName = "extractor"
+               BlueprintName = "extractor"
+               ReferencedContainer = "container:mert.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
@ -35,6 +35,15 @@
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      buildConfiguration = "Debug">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
+            BuildableName = "extractor"
+            BlueprintName = "extractor"
+            ReferencedContainer = "container:mert.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
      <AdditionalOptions>
      </AdditionalOptions>
   </LaunchAction>
@ -43,6 +52,15 @@
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      buildConfiguration = "Release">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
+            BuildableName = "extractor"
+            BlueprintName = "extractor"
+            ReferencedContainer = "container:mert.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
--- a/contrib/other-builds/OnDiskPt.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/OnDiskPt.xcscheme
+++ b/contrib/other-builds/OnDiskPt.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/OnDiskPt.xcscheme
@ -13,10 +13,10 @@
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
-               BlueprintIdentifier = "D2AAC045055464E500DB518D"
-               BuildableName = "libOnDiskPt.a"
-               BlueprintName = "OnDiskPt"
-               ReferencedContainer = "container:OnDiskPt.xcodeproj">
+               BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
+               BuildableName = "mert"
+               BlueprintName = "mert"
+               ReferencedContainer = "container:mert.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
@ -35,6 +35,15 @@
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      buildConfiguration = "Debug">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
+            BuildableName = "mert"
+            BlueprintName = "mert"
+            ReferencedContainer = "container:mert.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
      <AdditionalOptions>
      </AdditionalOptions>
   </LaunchAction>
@ -43,6 +52,15 @@
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      buildConfiguration = "Release">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
+            BuildableName = "mert"
+            BlueprintName = "mert"
+            ReferencedContainer = "container:mert.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
--- a/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
+++ b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>extractor.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>1</integer>
+		</dict>
+		<key>mert.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>2</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>1E1D825E15AC640800FE42E9</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+		<key>1EB0AF041593A2180007E2A4</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
@ -308,6 +308,7 @@
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
 					/opt/local/lib,
+					../../cmph/lib,
 				);
 				OTHER_LDFLAGS = (
 					"-lz",
@ -318,6 +319,9 @@
 					"-lflm",
 					"-llattice",
 					"-lboost_thread-mt",
+					"-lboost_filesystem-mt",
+					"-lboost_system-mt",
+					"-lcmph",
 				);
 				PRODUCT_NAME = "moses-chart-cmd";
 				USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
@ -341,6 +345,7 @@
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
 					/opt/local/lib,
+					../../cmph/lib,
 				);
 				OTHER_LDFLAGS = (
 					"-lz",
@ -351,6 +356,9 @@
 					"-lflm",
 					"-llattice",
 					"-lboost_thread-mt",
+					"-lboost_filesystem-mt",
+					"-lboost_system-mt",
+					"-lcmph",
 				);
 				PRODUCT_NAME = "moses-chart-cmd";
 				USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
--- a/contrib/other-builds/moses-cmd.vcxproj
+++ b/contrib/other-builds/moses-cmd.vcxproj
@ -43,6 +43,10 @@
    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47\lib;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47\lib;$(LibraryPath)</LibraryPath>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
--- a/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
@ -326,15 +326,20 @@
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
 					/opt/local/lib,
+					../../cmph/lib,
 				);
 				OTHER_LDFLAGS = (
-					"-lflm",
-					"-lmisc",
-					"-loolm",
-					"-ldstruct",
 					"-lz",
 					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
 					"-lboost_thread-mt",
+					"-lboost_filesystem-mt",
+					"-lboost_system-mt",
+					"-lcmph",
 				);
 				PREBINDING = NO;
 				PRODUCT_NAME = "moses-cmd";
@ -369,15 +374,20 @@
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
 					/opt/local/lib,
+					../../cmph/lib,
 				);
 				OTHER_LDFLAGS = (
-					"-lflm",
-					"-lmisc",
-					"-loolm",
-					"-ldstruct",
 					"-lz",
 					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
 					"-lboost_thread-mt",
+					"-lboost_filesystem-mt",
+					"-lboost_system-mt",
+					"-lcmph",
 				);
 				PREBINDING = NO;
 				PRODUCT_NAME = "moses-cmd";
@ -409,15 +419,20 @@
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
 					/opt/local/lib,
+					../../cmph/lib,
 				);
 				OTHER_LDFLAGS = (
-					"-lflm",
-					"-lmisc",
-					"-loolm",
-					"-ldstruct",
 					"-lz",
 					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
 					"-lboost_thread-mt",
+					"-lboost_filesystem-mt",
+					"-lboost_system-mt",
+					"-lcmph",
 				);
 				PREBINDING = NO;
 				PRODUCT_NAME = "moses-cmd";
--- a/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/moses-cmd.xcscheme
+++ b/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/moses-cmd.xcscheme
@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "8DD76F620486A84900D96B5E"
+               BuildableName = "moses-cmd"
+               BlueprintName = "moses-cmd"
+               ReferencedContainer = "container:moses-cmd.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      buildConfiguration = "Debug">
+      <Testables>
+      </Testables>
+   </TestAction>
+   <LaunchAction
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      buildConfiguration = "Debug">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "8DD76F620486A84900D96B5E"
+            BuildableName = "moses-cmd"
+            BlueprintName = "moses-cmd"
+            ReferencedContainer = "container:moses-cmd.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      buildConfiguration = "Release">
+      <BuildableProductRunnable>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "8DD76F620486A84900D96B5E"
+            BuildableName = "moses-cmd"
+            BlueprintName = "moses-cmd"
+            ReferencedContainer = "container:moses-cmd.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
--- a/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
+++ b/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>moses-cmd.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>2</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>8DD76F620486A84900D96B5E</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@ -25,17 +25,27 @@
 							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.84059290" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
 								<option id="macosx.cpp.link.option.libs.1641794848" name="Libraries (-l)" superClass="macosx.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="rt"/>
+									<listOptionValue builtIn="false" value="misc"/>
+									<listOptionValue builtIn="false" value="dstruct"/>
+									<listOptionValue builtIn="false" value="oolm"/>
+									<listOptionValue builtIn="false" value="flm"/>
+									<listOptionValue builtIn="false" value="lattice"/>
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="lm"/>
 									<listOptionValue builtIn="false" value="util"/>
 									<listOptionValue builtIn="false" value="irstlm"/>
+									<listOptionValue builtIn="false" value="z"/>
+									<listOptionValue builtIn="false" value="boost_system"/>
+									<listOptionValue builtIn="false" value="boost_filesystem"/>
 								</option>
 								<option id="macosx.cpp.link.option.paths.1615268628" name="Library search path (-L)" superClass="macosx.cpp.link.option.paths" valueType="libPaths">
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/moses/Debug"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/OnDiskPt/Debug"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/lm/Debug"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/util/Debug"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/irstlm/lib"/>
+									<listOptionValue builtIn="false" value="${workspace_loc:/moses}/Debug"/>
+									<listOptionValue builtIn="false" value="${workspace_loc:}/../../srilm/lib/i686-m64"/>
+									<listOptionValue builtIn="false" value="${workspace_loc:/OnDiskPt}/Debug"/>
+									<listOptionValue builtIn="false" value="${workspace_loc:/lm}/Debug"/>
+									<listOptionValue builtIn="false" value="${workspace_loc:/util}/Debug"/>
+									<listOptionValue builtIn="false" value="${workspace_loc:}/../../irstlm/lib"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.412058804" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
 									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
@ -51,8 +61,11 @@
 								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1176009559" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.option.include.paths.1024398579" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
 									<listOptionValue builtIn="false" value="/opt/local/include"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.491464216" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
@ -122,12 +135,13 @@
 	<storageModule moduleId="refreshScope" versionNumber="1">
 		<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
 	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 	<storageModule moduleId="scannerConfiguration">
 		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
-		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
 		</scannerConfigBuildInfo>
-		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
 		</scannerConfigBuildInfo>
 		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865;cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475">
--- a/contrib/other-builds/moses.sln
+++ b/contrib/other-builds/moses.sln
@ -20,6 +20,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CreateOnDisk", "CreateOnDis
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kenlm", "kenlm.vcxproj", "{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}"
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mosesserver", "mosesserver.vcxproj", "{85811FDF-8AD1-4490-A545-B2F51931A18C}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Win32 = Debug|Win32
@ -39,11 +41,17 @@ Global
 		{E2233DB1-5592-46FE-9420-E529420612FA}.Release|Win32.ActiveCfg = Release|Win32
 		{E2233DB1-5592-46FE-9420-E529420612FA}.Release|Win32.Build.0 = Release|Win32
 		{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Debug|Win32.ActiveCfg = Debug|Win32
+		{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Debug|Win32.Build.0 = Debug|Win32
 		{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Release|Win32.ActiveCfg = Release|Win32
+		{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Release|Win32.Build.0 = Release|Win32
 		{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.ActiveCfg = Debug|Win32
 		{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.Build.0 = Debug|Win32
 		{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.ActiveCfg = Release|Win32
 		{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.Build.0 = Release|Win32
+		{85811FDF-8AD1-4490-A545-B2F51931A18C}.Debug|Win32.ActiveCfg = Debug|Win32
+		{85811FDF-8AD1-4490-A545-B2F51931A18C}.Debug|Win32.Build.0 = Debug|Win32
+		{85811FDF-8AD1-4490-A545-B2F51931A18C}.Release|Win32.ActiveCfg = Release|Win32
+		{85811FDF-8AD1-4490-A545-B2F51931A18C}.Release|Win32.Build.0 = Release|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
--- a/contrib/other-builds/moses.vcxproj
+++ b/contrib/other-builds/moses.vcxproj
@ -13,6 +13,7 @@
  <ItemGroup>
    <ClInclude Include="..\..\moses\src\AlignmentInfo.h" />
    <ClInclude Include="..\..\moses\src\AlignmentInfoCollection.h" />
+    <ClInclude Include="..\..\moses\src\BilingualDynSuffixArray.h" />
    <ClInclude Include="..\..\moses\src\BitmapContainer.h" />
    <ClInclude Include="..\..\moses\src\CellCollection.h" />
    <ClInclude Include="..\..\moses\src\ChartCell.h" />
@ -162,6 +163,7 @@
  <ItemGroup>
    <ClCompile Include="..\..\moses\src\AlignmentInfo.cpp" />
    <ClCompile Include="..\..\moses\src\AlignmentInfoCollection.cpp" />
+    <ClCompile Include="..\..\moses\src\BilingualDynSuffixArray.cpp" />
    <ClCompile Include="..\..\moses\src\BitmapContainer.cpp" />
    <ClCompile Include="..\..\moses\src\ChartCell.cpp" />
    <ClCompile Include="..\..\moses\src\ChartCellCollection.cpp" />
@ -319,13 +321,13 @@
    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
-    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\GnuWin32\include;C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
-    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\GnuWin32\include;C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;C:\GnuWin32\include;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;C:\GnuWin32\include;$(IncludePath)</IncludePath>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <MinimalRebuild>true</MinimalRebuild>
      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
@ -344,7 +346,7 @@
      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
      <PrecompiledHeader>
--- a/contrib/other-builds/moses.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses.xcodeproj/project.pbxproj
@ -7,8 +7,38 @@
 	objects = {

 /* Begin PBXBuildFile section */
+		1E0BA41815B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */; };
+		1E0BA41915B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */; };
 		1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */; };
 		1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E1D823F15AC29BB00FE42E9 /* FileHandler.h */; };
+		1E365EEA16120F4600BA335B /* ChartTranslationOptions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E365EE816120F4600BA335B /* ChartTranslationOptions.cpp */; };
+		1E365EEB16120F4600BA335B /* ChartTranslationOptions.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E365EE916120F4600BA335B /* ChartTranslationOptions.h */; };
+		1E619EA115B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */; };
+		1E619EA215B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */; };
+		1E6D9FD615D027560064D436 /* BlockHashIndex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */; };
+		1E6D9FD715D027560064D436 /* BlockHashIndex.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FBE15D027560064D436 /* BlockHashIndex.h */; };
+		1E6D9FD815D027560064D436 /* CanonicalHuffman.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */; };
+		1E6D9FD915D027560064D436 /* CmphStringVectorAdapter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */; };
+		1E6D9FDA15D027560064D436 /* CmphStringVectorAdapter.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */; };
+		1E6D9FDB15D027560064D436 /* ConsistantPhrases.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC215D027560064D436 /* ConsistantPhrases.h */; };
+		1E6D9FDD15D027560064D436 /* LexicalReorderingTableCompact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */; };
+		1E6D9FDE15D027560064D436 /* LexicalReorderingTableCompact.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */; };
+		1E6D9FDF15D027560064D436 /* LexicalReorderingTableCreator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */; };
+		1E6D9FE015D027560064D436 /* LexicalReorderingTableCreator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */; };
+		1E6D9FE115D027560064D436 /* ListCoders.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC815D027560064D436 /* ListCoders.h */; };
+		1E6D9FE215D027560064D436 /* MmapAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC915D027560064D436 /* MmapAllocator.h */; };
+		1E6D9FE315D027560064D436 /* MonotonicVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCA15D027560064D436 /* MonotonicVector.h */; };
+		1E6D9FE415D027560064D436 /* MurmurHash3.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */; };
+		1E6D9FE515D027560064D436 /* MurmurHash3.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCC15D027560064D436 /* MurmurHash3.h */; };
+		1E6D9FE615D027560064D436 /* PackedArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCD15D027560064D436 /* PackedArray.h */; };
+		1E6D9FE715D027560064D436 /* PhraseDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */; };
+		1E6D9FE815D027560064D436 /* PhraseDecoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCF15D027560064D436 /* PhraseDecoder.h */; };
+		1E6D9FE915D027560064D436 /* PhraseDictionaryCompact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */; };
+		1E6D9FEA15D027560064D436 /* PhraseDictionaryCompact.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */; };
+		1E6D9FEB15D027560064D436 /* PhraseTableCreator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */; };
+		1E6D9FEC15D027560064D436 /* PhraseTableCreator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD315D027560064D436 /* PhraseTableCreator.h */; };
+		1E6D9FED15D027560064D436 /* StringVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD415D027560064D436 /* StringVector.h */; };
+		1E6D9FEE15D027560064D436 /* TargetPhraseCollectionCache.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */; };
 		1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */; };
 		1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E879EA615A346F90051F346 /* SearchNormalBatch.h */; };
 		1EAC363514CDC79300DF97C3 /* Loader.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC362C14CDC79300DF97C3 /* Loader.h */; };
@ -20,6 +50,8 @@
 		1EAC363B14CDC79300DF97C3 /* LoaderHiero.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC363214CDC79300DF97C3 /* LoaderHiero.h */; };
 		1EAC363C14CDC79300DF97C3 /* LoaderStandard.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAC363314CDC79300DF97C3 /* LoaderStandard.cpp */; };
 		1EAC363D14CDC79300DF97C3 /* LoaderStandard.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC363414CDC79300DF97C3 /* LoaderStandard.h */; };
+		1EC32DB815D2D90700A313B1 /* ThrowingFwrite.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */; };
+		1EC32DB915D2D90700A313B1 /* ThrowingFwrite.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */; };
 		1EC7374614B977AB00238410 /* AlignmentInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D314B977AA00238410 /* AlignmentInfo.cpp */; };
 		1EC7374714B977AB00238410 /* AlignmentInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735D414B977AA00238410 /* AlignmentInfo.h */; };
 		1EC7374814B977AB00238410 /* AlignmentInfoCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D514B977AA00238410 /* AlignmentInfoCollection.cpp */; };
@ -28,7 +60,6 @@
 		1EC7374B14B977AB00238410 /* BilingualDynSuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */; };
 		1EC7374C14B977AB00238410 /* BitmapContainer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D914B977AA00238410 /* BitmapContainer.cpp */; };
 		1EC7374D14B977AB00238410 /* BitmapContainer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DA14B977AA00238410 /* BitmapContainer.h */; };
-		1EC7374E14B977AB00238410 /* CellCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DB14B977AA00238410 /* CellCollection.h */; };
 		1EC7374F14B977AB00238410 /* ChartCell.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735DC14B977AA00238410 /* ChartCell.cpp */; };
 		1EC7375014B977AB00238410 /* ChartCell.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DD14B977AA00238410 /* ChartCell.h */; };
 		1EC7375114B977AB00238410 /* ChartCellCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */; };
@ -42,10 +73,6 @@
 		1EC7375914B977AB00238410 /* ChartManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735E614B977AA00238410 /* ChartManager.cpp */; };
 		1EC7375A14B977AB00238410 /* ChartManager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735E714B977AA00238410 /* ChartManager.h */; };
 		1EC7375C14B977AB00238410 /* ChartRuleLookupManager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735E914B977AA00238410 /* ChartRuleLookupManager.h */; };
-		1EC7376114B977AB00238410 /* ChartTranslationOption.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735EE14B977AA00238410 /* ChartTranslationOption.cpp */; };
-		1EC7376214B977AB00238410 /* ChartTranslationOption.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735EF14B977AA00238410 /* ChartTranslationOption.h */; };
-		1EC7376314B977AB00238410 /* ChartTranslationOptionCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735F014B977AA00238410 /* ChartTranslationOptionCollection.cpp */; };
-		1EC7376414B977AB00238410 /* ChartTranslationOptionCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735F114B977AA00238410 /* ChartTranslationOptionCollection.h */; };
 		1EC7376514B977AB00238410 /* ChartTranslationOptionList.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735F214B977AA00238410 /* ChartTranslationOptionList.cpp */; };
 		1EC7376614B977AB00238410 /* ChartTranslationOptionList.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735F314B977AA00238410 /* ChartTranslationOptionList.h */; };
 		1EC7376714B977AB00238410 /* ChartTrellisDetour.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735F414B977AA00238410 /* ChartTrellisDetour.cpp */; };
@ -295,14 +322,53 @@
 		1EDA809114D19FBF003D2191 /* UTrie.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EDA808314D19FBF003D2191 /* UTrie.h */; };
 		1EDA809214D19FBF003D2191 /* UTrieNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EDA808414D19FBF003D2191 /* UTrieNode.cpp */; };
 		1EDA809314D19FBF003D2191 /* UTrieNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EDA808514D19FBF003D2191 /* UTrieNode.h */; };
+		1EE418ED15C7FDCB0028F9AB /* Match.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E415C7FDCB0028F9AB /* Match.h */; };
+		1EE418EE15C7FDCB0028F9AB /* SentenceAlignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */; };
+		1EE418EF15C7FDCB0028F9AB /* SentenceAlignment.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */; };
+		1EE418F015C7FDCB0028F9AB /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */; };
+		1EE418F115C7FDCB0028F9AB /* SuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E815C7FDCB0028F9AB /* SuffixArray.h */; };
+		1EE418F215C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */; };
+		1EE418F315C7FDCB0028F9AB /* FuzzyMatchWrapper.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */; };
+		1EE418F415C7FDCB0028F9AB /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */; };
+		1EE418F515C7FDCB0028F9AB /* Vocabulary.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */; };
 		1EF0709314B9EFCC0052152A /* ParallelBackoff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF0709114B9EFCC0052152A /* ParallelBackoff.cpp */; };
 		1EF0709414B9EFCC0052152A /* ParallelBackoff.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EF0709214B9EFCC0052152A /* ParallelBackoff.h */; };
 		1EF8F2C4159A61970047B613 /* HypoList.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EF8F2C3159A61970047B613 /* HypoList.h */; };
 /* End PBXBuildFile section */

 /* Begin PBXFileReference section */
+		1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryFuzzyMatch.cpp; path = ../../moses/src/RuleTable/PhraseDictionaryFuzzyMatch.cpp; sourceTree = "<group>"; };
+		1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryFuzzyMatch.h; path = ../../moses/src/RuleTable/PhraseDictionaryFuzzyMatch.h; sourceTree = "<group>"; };
 		1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FileHandler.cpp; sourceTree = "<group>"; };
 		1E1D823F15AC29BB00FE42E9 /* FileHandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FileHandler.h; sourceTree = "<group>"; };
+		1E365EE816120F4600BA335B /* ChartTranslationOptions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOptions.cpp; path = ../../moses/src/ChartTranslationOptions.cpp; sourceTree = "<group>"; };
+		1E365EE916120F4600BA335B /* ChartTranslationOptions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOptions.h; path = ../../moses/src/ChartTranslationOptions.h; sourceTree = "<group>"; };
+		1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartRuleLookupManagerMemoryPerSentence.cpp; path = ../../moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp; sourceTree = "<group>"; };
+		1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartRuleLookupManagerMemoryPerSentence.h; path = ../../moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h; sourceTree = "<group>"; };
+		1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BlockHashIndex.cpp; path = ../../moses/src/CompactPT/BlockHashIndex.cpp; sourceTree = "<group>"; };
+		1E6D9FBE15D027560064D436 /* BlockHashIndex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BlockHashIndex.h; path = ../../moses/src/CompactPT/BlockHashIndex.h; sourceTree = "<group>"; };
+		1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CanonicalHuffman.h; path = ../../moses/src/CompactPT/CanonicalHuffman.h; sourceTree = "<group>"; };
+		1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CmphStringVectorAdapter.cpp; path = ../../moses/src/CompactPT/CmphStringVectorAdapter.cpp; sourceTree = "<group>"; };
+		1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CmphStringVectorAdapter.h; path = ../../moses/src/CompactPT/CmphStringVectorAdapter.h; sourceTree = "<group>"; };
+		1E6D9FC215D027560064D436 /* ConsistantPhrases.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ConsistantPhrases.h; path = ../../moses/src/CompactPT/ConsistantPhrases.h; sourceTree = "<group>"; };
+		1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReorderingTableCompact.cpp; path = ../../moses/src/CompactPT/LexicalReorderingTableCompact.cpp; sourceTree = "<group>"; };
+		1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LexicalReorderingTableCompact.h; path = ../../moses/src/CompactPT/LexicalReorderingTableCompact.h; sourceTree = "<group>"; };
+		1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReorderingTableCreator.cpp; path = ../../moses/src/CompactPT/LexicalReorderingTableCreator.cpp; sourceTree = "<group>"; };
+		1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LexicalReorderingTableCreator.h; path = ../../moses/src/CompactPT/LexicalReorderingTableCreator.h; sourceTree = "<group>"; };
+		1E6D9FC815D027560064D436 /* ListCoders.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ListCoders.h; path = ../../moses/src/CompactPT/ListCoders.h; sourceTree = "<group>"; };
+		1E6D9FC915D027560064D436 /* MmapAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MmapAllocator.h; path = ../../moses/src/CompactPT/MmapAllocator.h; sourceTree = "<group>"; };
+		1E6D9FCA15D027560064D436 /* MonotonicVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MonotonicVector.h; path = ../../moses/src/CompactPT/MonotonicVector.h; sourceTree = "<group>"; };
+		1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MurmurHash3.cpp; path = ../../moses/src/CompactPT/MurmurHash3.cpp; sourceTree = "<group>"; };
+		1E6D9FCC15D027560064D436 /* MurmurHash3.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MurmurHash3.h; path = ../../moses/src/CompactPT/MurmurHash3.h; sourceTree = "<group>"; };
+		1E6D9FCD15D027560064D436 /* PackedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PackedArray.h; path = ../../moses/src/CompactPT/PackedArray.h; sourceTree = "<group>"; };
+		1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDecoder.cpp; path = ../../moses/src/CompactPT/PhraseDecoder.cpp; sourceTree = "<group>"; };
+		1E6D9FCF15D027560064D436 /* PhraseDecoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDecoder.h; path = ../../moses/src/CompactPT/PhraseDecoder.h; sourceTree = "<group>"; };
+		1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryCompact.cpp; path = ../../moses/src/CompactPT/PhraseDictionaryCompact.cpp; sourceTree = "<group>"; };
+		1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryCompact.h; path = ../../moses/src/CompactPT/PhraseDictionaryCompact.h; sourceTree = "<group>"; };
+		1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseTableCreator.cpp; path = ../../moses/src/CompactPT/PhraseTableCreator.cpp; sourceTree = "<group>"; };
+		1E6D9FD315D027560064D436 /* PhraseTableCreator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseTableCreator.h; path = ../../moses/src/CompactPT/PhraseTableCreator.h; sourceTree = "<group>"; };
+		1E6D9FD415D027560064D436 /* StringVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = StringVector.h; path = ../../moses/src/CompactPT/StringVector.h; sourceTree = "<group>"; };
+		1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TargetPhraseCollectionCache.h; path = ../../moses/src/CompactPT/TargetPhraseCollectionCache.h; sourceTree = "<group>"; };
 		1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SearchNormalBatch.cpp; path = ../../moses/src/SearchNormalBatch.cpp; sourceTree = "<group>"; };
 		1E879EA615A346F90051F346 /* SearchNormalBatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SearchNormalBatch.h; path = ../../moses/src/SearchNormalBatch.h; sourceTree = "<group>"; };
 		1EAC362C14CDC79300DF97C3 /* Loader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Loader.h; path = ../../moses/src/RuleTable/Loader.h; sourceTree = "<group>"; };
@ -314,6 +380,8 @@
 		1EAC363214CDC79300DF97C3 /* LoaderHiero.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LoaderHiero.h; path = ../../moses/src/RuleTable/LoaderHiero.h; sourceTree = "<group>"; };
 		1EAC363314CDC79300DF97C3 /* LoaderStandard.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LoaderStandard.cpp; path = ../../moses/src/RuleTable/LoaderStandard.cpp; sourceTree = "<group>"; };
 		1EAC363414CDC79300DF97C3 /* LoaderStandard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LoaderStandard.h; path = ../../moses/src/RuleTable/LoaderStandard.h; sourceTree = "<group>"; };
+		1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ThrowingFwrite.cpp; path = ../../moses/src/CompactPT/ThrowingFwrite.cpp; sourceTree = "<group>"; };
+		1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ThrowingFwrite.h; path = ../../moses/src/CompactPT/ThrowingFwrite.h; sourceTree = "<group>"; };
 		1EC735D314B977AA00238410 /* AlignmentInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfo.cpp; path = ../../moses/src/AlignmentInfo.cpp; sourceTree = "<group>"; };
 		1EC735D414B977AA00238410 /* AlignmentInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AlignmentInfo.h; path = ../../moses/src/AlignmentInfo.h; sourceTree = "<group>"; };
 		1EC735D514B977AA00238410 /* AlignmentInfoCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfoCollection.cpp; path = ../../moses/src/AlignmentInfoCollection.cpp; sourceTree = "<group>"; };
@ -322,7 +390,6 @@
 		1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BilingualDynSuffixArray.h; path = ../../moses/src/BilingualDynSuffixArray.h; sourceTree = "<group>"; };
 		1EC735D914B977AA00238410 /* BitmapContainer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BitmapContainer.cpp; path = ../../moses/src/BitmapContainer.cpp; sourceTree = "<group>"; };
 		1EC735DA14B977AA00238410 /* BitmapContainer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BitmapContainer.h; path = ../../moses/src/BitmapContainer.h; sourceTree = "<group>"; };
-		1EC735DB14B977AA00238410 /* CellCollection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CellCollection.h; path = ../../moses/src/CellCollection.h; sourceTree = "<group>"; };
 		1EC735DC14B977AA00238410 /* ChartCell.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCell.cpp; path = ../../moses/src/ChartCell.cpp; sourceTree = "<group>"; };
 		1EC735DD14B977AA00238410 /* ChartCell.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartCell.h; path = ../../moses/src/ChartCell.h; sourceTree = "<group>"; };
 		1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCellCollection.cpp; path = ../../moses/src/ChartCellCollection.cpp; sourceTree = "<group>"; };
@ -336,10 +403,6 @@
 		1EC735E614B977AA00238410 /* ChartManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartManager.cpp; path = ../../moses/src/ChartManager.cpp; sourceTree = "<group>"; };
 		1EC735E714B977AA00238410 /* ChartManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartManager.h; path = ../../moses/src/ChartManager.h; sourceTree = "<group>"; };
 		1EC735E914B977AA00238410 /* ChartRuleLookupManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartRuleLookupManager.h; path = ../../moses/src/ChartRuleLookupManager.h; sourceTree = "<group>"; };
-		1EC735EE14B977AA00238410 /* ChartTranslationOption.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOption.cpp; path = ../../moses/src/ChartTranslationOption.cpp; sourceTree = "<group>"; };
-		1EC735EF14B977AA00238410 /* ChartTranslationOption.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOption.h; path = ../../moses/src/ChartTranslationOption.h; sourceTree = "<group>"; };
-		1EC735F014B977AA00238410 /* ChartTranslationOptionCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOptionCollection.cpp; path = ../../moses/src/ChartTranslationOptionCollection.cpp; sourceTree = "<group>"; };
-		1EC735F114B977AA00238410 /* ChartTranslationOptionCollection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOptionCollection.h; path = ../../moses/src/ChartTranslationOptionCollection.h; sourceTree = "<group>"; };
 		1EC735F214B977AA00238410 /* ChartTranslationOptionList.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOptionList.cpp; path = ../../moses/src/ChartTranslationOptionList.cpp; sourceTree = "<group>"; };
 		1EC735F314B977AA00238410 /* ChartTranslationOptionList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOptionList.h; path = ../../moses/src/ChartTranslationOptionList.h; sourceTree = "<group>"; };
 		1EC735F414B977AA00238410 /* ChartTrellisDetour.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTrellisDetour.cpp; path = ../../moses/src/ChartTrellisDetour.cpp; sourceTree = "<group>"; };
@ -591,6 +654,15 @@
 		1EDA808314D19FBF003D2191 /* UTrie.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UTrie.h; path = ../../moses/src/RuleTable/UTrie.h; sourceTree = "<group>"; };
 		1EDA808414D19FBF003D2191 /* UTrieNode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = UTrieNode.cpp; path = ../../moses/src/RuleTable/UTrieNode.cpp; sourceTree = "<group>"; };
 		1EDA808514D19FBF003D2191 /* UTrieNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UTrieNode.h; path = ../../moses/src/RuleTable/UTrieNode.h; sourceTree = "<group>"; };
+		1EE418E415C7FDCB0028F9AB /* Match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Match.h; path = "../../moses/src/fuzzy-match/Match.h"; sourceTree = "<group>"; };
+		1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SentenceAlignment.cpp; path = "../../moses/src/fuzzy-match/SentenceAlignment.cpp"; sourceTree = "<group>"; };
+		1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SentenceAlignment.h; path = "../../moses/src/fuzzy-match/SentenceAlignment.h"; sourceTree = "<group>"; };
+		1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SuffixArray.cpp; path = "../../moses/src/fuzzy-match/SuffixArray.cpp"; sourceTree = "<group>"; };
+		1EE418E815C7FDCB0028F9AB /* SuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SuffixArray.h; path = "../../moses/src/fuzzy-match/SuffixArray.h"; sourceTree = "<group>"; };
+		1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FuzzyMatchWrapper.cpp; path = "../../moses/src/fuzzy-match/FuzzyMatchWrapper.cpp"; sourceTree = "<group>"; };
+		1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FuzzyMatchWrapper.h; path = "../../moses/src/fuzzy-match/FuzzyMatchWrapper.h"; sourceTree = "<group>"; };
+		1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocabulary.cpp; path = "../../moses/src/fuzzy-match/Vocabulary.cpp"; sourceTree = "<group>"; };
+		1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocabulary.h; path = "../../moses/src/fuzzy-match/Vocabulary.h"; sourceTree = "<group>"; };
 		1EF0709114B9EFCC0052152A /* ParallelBackoff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParallelBackoff.cpp; sourceTree = "<group>"; };
 		1EF0709214B9EFCC0052152A /* ParallelBackoff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParallelBackoff.h; sourceTree = "<group>"; };
 		1EF8F2C3159A61970047B613 /* HypoList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HypoList.h; path = ../../moses/src/HypoList.h; sourceTree = "<group>"; };
@ -621,8 +693,8 @@
 		08FB7795FE84155DC02AAC07 /* Source */ = {
 			isa = PBXGroup;
 			children = (
-				1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */,
-				1E879EA615A346F90051F346 /* SearchNormalBatch.h */,
+				1E6D9FF015D027680064D436 /* CompactPT */,
+				1ECF13DE15C1A82400EA1DCE /* fuzzy-match */,
 				1EDA803514D19ECD003D2191 /* Scope3Parser */,
 				1EDA803414D19EB8003D2191 /* CYKPlusParser */,
 				1EC7365B14B977AA00238410 /* LM */,
@ -636,7 +708,6 @@
 				1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */,
 				1EC735D914B977AA00238410 /* BitmapContainer.cpp */,
 				1EC735DA14B977AA00238410 /* BitmapContainer.h */,
-				1EC735DB14B977AA00238410 /* CellCollection.h */,
 				1EC735DC14B977AA00238410 /* ChartCell.cpp */,
 				1EC735DD14B977AA00238410 /* ChartCell.h */,
 				1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */,
@ -650,10 +721,8 @@
 				1EC735E614B977AA00238410 /* ChartManager.cpp */,
 				1EC735E714B977AA00238410 /* ChartManager.h */,
 				1EC735E914B977AA00238410 /* ChartRuleLookupManager.h */,
-				1EC735EE14B977AA00238410 /* ChartTranslationOption.cpp */,
-				1EC735EF14B977AA00238410 /* ChartTranslationOption.h */,
-				1EC735F014B977AA00238410 /* ChartTranslationOptionCollection.cpp */,
-				1EC735F114B977AA00238410 /* ChartTranslationOptionCollection.h */,
+				1E365EE816120F4600BA335B /* ChartTranslationOptions.cpp */,
+				1E365EE916120F4600BA335B /* ChartTranslationOptions.h */,
 				1EC735F214B977AA00238410 /* ChartTranslationOptionList.cpp */,
 				1EC735F314B977AA00238410 /* ChartTranslationOptionList.h */,
 				1EC735F414B977AA00238410 /* ChartTrellisDetour.cpp */,
@ -782,6 +851,8 @@
 				1EC736F414B977AB00238410 /* SearchCubePruning.h */,
 				1EC736F514B977AB00238410 /* SearchNormal.cpp */,
 				1EC736F614B977AB00238410 /* SearchNormal.h */,
+				1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */,
+				1E879EA615A346F90051F346 /* SearchNormalBatch.h */,
 				1EC736F714B977AB00238410 /* Sentence.cpp */,
 				1EC736F814B977AB00238410 /* Sentence.h */,
 				1EC736F914B977AB00238410 /* SentenceStats.cpp */,
@ -845,6 +916,39 @@
 			name = Products;
 			sourceTree = "<group>";
 		};
+		1E6D9FF015D027680064D436 /* CompactPT */ = {
+			isa = PBXGroup;
+			children = (
+				1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */,
+				1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */,
+				1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */,
+				1E6D9FBE15D027560064D436 /* BlockHashIndex.h */,
+				1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */,
+				1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */,
+				1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */,
+				1E6D9FC215D027560064D436 /* ConsistantPhrases.h */,
+				1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */,
+				1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */,
+				1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */,
+				1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */,
+				1E6D9FC815D027560064D436 /* ListCoders.h */,
+				1E6D9FC915D027560064D436 /* MmapAllocator.h */,
+				1E6D9FCA15D027560064D436 /* MonotonicVector.h */,
+				1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */,
+				1E6D9FCC15D027560064D436 /* MurmurHash3.h */,
+				1E6D9FCD15D027560064D436 /* PackedArray.h */,
+				1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */,
+				1E6D9FCF15D027560064D436 /* PhraseDecoder.h */,
+				1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */,
+				1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */,
+				1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */,
+				1E6D9FD315D027560064D436 /* PhraseTableCreator.h */,
+				1E6D9FD415D027560064D436 /* StringVector.h */,
+				1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */,
+			);
+			name = CompactPT;
+			sourceTree = "<group>";
+		};
 		1EAC362B14CDC76200DF97C3 /* RuleTable */ = {
 			isa = PBXGroup;
 			children = (
@ -856,6 +960,8 @@
 				1EDA807D14D19FBF003D2191 /* PhraseDictionaryOnDisk.h */,
 				1EDA807E14D19FBF003D2191 /* PhraseDictionarySCFG.cpp */,
 				1EDA807F14D19FBF003D2191 /* PhraseDictionarySCFG.h */,
+				1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */,
+				1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */,
 				1EDA808014D19FBF003D2191 /* Trie.cpp */,
 				1EDA808114D19FBF003D2191 /* Trie.h */,
 				1EDA808214D19FBF003D2191 /* UTrie.cpp */,
@ -930,9 +1036,27 @@
 			path = ../../moses/src/LM;
 			sourceTree = "<group>";
 		};
+		1ECF13DE15C1A82400EA1DCE /* fuzzy-match */ = {
+			isa = PBXGroup;
+			children = (
+				1EE418E415C7FDCB0028F9AB /* Match.h */,
+				1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */,
+				1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */,
+				1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */,
+				1EE418E815C7FDCB0028F9AB /* SuffixArray.h */,
+				1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */,
+				1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */,
+				1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */,
+				1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */,
+			);
+			name = "fuzzy-match";
+			sourceTree = "<group>";
+		};
 		1EDA803414D19EB8003D2191 /* CYKPlusParser */ = {
 			isa = PBXGroup;
 			children = (
+				1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */,
+				1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */,
 				1EDA806214D19F12003D2191 /* ChartRuleLookupManagerCYKPlus.cpp */,
 				1EDA806314D19F12003D2191 /* ChartRuleLookupManagerCYKPlus.h */,
 				1EDA806414D19F12003D2191 /* ChartRuleLookupManagerMemory.cpp */,
@ -986,7 +1110,6 @@
 				1EC7374914B977AB00238410 /* AlignmentInfoCollection.h in Headers */,
 				1EC7374B14B977AB00238410 /* BilingualDynSuffixArray.h in Headers */,
 				1EC7374D14B977AB00238410 /* BitmapContainer.h in Headers */,
-				1EC7374E14B977AB00238410 /* CellCollection.h in Headers */,
 				1EC7375014B977AB00238410 /* ChartCell.h in Headers */,
 				1EC7375214B977AB00238410 /* ChartCellCollection.h in Headers */,
 				1EC7375314B977AB00238410 /* ChartCellLabel.h in Headers */,
@ -995,8 +1118,6 @@
 				1EC7375814B977AB00238410 /* ChartHypothesisCollection.h in Headers */,
 				1EC7375A14B977AB00238410 /* ChartManager.h in Headers */,
 				1EC7375C14B977AB00238410 /* ChartRuleLookupManager.h in Headers */,
-				1EC7376214B977AB00238410 /* ChartTranslationOption.h in Headers */,
-				1EC7376414B977AB00238410 /* ChartTranslationOptionCollection.h in Headers */,
 				1EC7376614B977AB00238410 /* ChartTranslationOptionList.h in Headers */,
 				1EC7376814B977AB00238410 /* ChartTrellisDetour.h in Headers */,
 				1EC7376A14B977AB00238410 /* ChartTrellisDetourQueue.h in Headers */,
@ -1143,6 +1264,31 @@
 				1EF8F2C4159A61970047B613 /* HypoList.h in Headers */,
 				1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */,
 				1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */,
+				1E0BA41915B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h in Headers */,
+				1E619EA215B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h in Headers */,
+				1EE418ED15C7FDCB0028F9AB /* Match.h in Headers */,
+				1EE418EF15C7FDCB0028F9AB /* SentenceAlignment.h in Headers */,
+				1EE418F115C7FDCB0028F9AB /* SuffixArray.h in Headers */,
+				1EE418F315C7FDCB0028F9AB /* FuzzyMatchWrapper.h in Headers */,
+				1EE418F515C7FDCB0028F9AB /* Vocabulary.h in Headers */,
+				1E6D9FD715D027560064D436 /* BlockHashIndex.h in Headers */,
+				1E6D9FD815D027560064D436 /* CanonicalHuffman.h in Headers */,
+				1E6D9FDA15D027560064D436 /* CmphStringVectorAdapter.h in Headers */,
+				1E6D9FDB15D027560064D436 /* ConsistantPhrases.h in Headers */,
+				1E6D9FDE15D027560064D436 /* LexicalReorderingTableCompact.h in Headers */,
+				1E6D9FE015D027560064D436 /* LexicalReorderingTableCreator.h in Headers */,
+				1E6D9FE115D027560064D436 /* ListCoders.h in Headers */,
+				1E6D9FE215D027560064D436 /* MmapAllocator.h in Headers */,
+				1E6D9FE315D027560064D436 /* MonotonicVector.h in Headers */,
+				1E6D9FE515D027560064D436 /* MurmurHash3.h in Headers */,
+				1E6D9FE615D027560064D436 /* PackedArray.h in Headers */,
+				1E6D9FE815D027560064D436 /* PhraseDecoder.h in Headers */,
+				1E6D9FEA15D027560064D436 /* PhraseDictionaryCompact.h in Headers */,
+				1E6D9FEC15D027560064D436 /* PhraseTableCreator.h in Headers */,
+				1E6D9FED15D027560064D436 /* StringVector.h in Headers */,
+				1E6D9FEE15D027560064D436 /* TargetPhraseCollectionCache.h in Headers */,
+				1EC32DB915D2D90700A313B1 /* ThrowingFwrite.h in Headers */,
+				1E365EEB16120F4600BA335B /* ChartTranslationOptions.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -1172,7 +1318,7 @@
 		08FB7793FE84155DC02AAC07 /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 0410;
+				LastUpgradeCheck = 0420;
 			};
 			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "moses" */;
 			compatibilityVersion = "Xcode 3.2";
@ -1207,8 +1353,6 @@
 				1EC7375514B977AB00238410 /* ChartHypothesis.cpp in Sources */,
 				1EC7375714B977AB00238410 /* ChartHypothesisCollection.cpp in Sources */,
 				1EC7375914B977AB00238410 /* ChartManager.cpp in Sources */,
-				1EC7376114B977AB00238410 /* ChartTranslationOption.cpp in Sources */,
-				1EC7376314B977AB00238410 /* ChartTranslationOptionCollection.cpp in Sources */,
 				1EC7376514B977AB00238410 /* ChartTranslationOptionList.cpp in Sources */,
 				1EC7376714B977AB00238410 /* ChartTrellisDetour.cpp in Sources */,
 				1EC7376914B977AB00238410 /* ChartTrellisDetourQueue.cpp in Sources */,
@ -1328,6 +1472,22 @@
 				1EDA809214D19FBF003D2191 /* UTrieNode.cpp in Sources */,
 				1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */,
 				1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */,
+				1E0BA41815B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp in Sources */,
+				1E619EA115B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp in Sources */,
+				1EE418EE15C7FDCB0028F9AB /* SentenceAlignment.cpp in Sources */,
+				1EE418F015C7FDCB0028F9AB /* SuffixArray.cpp in Sources */,
+				1EE418F215C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp in Sources */,
+				1EE418F415C7FDCB0028F9AB /* Vocabulary.cpp in Sources */,
+				1E6D9FD615D027560064D436 /* BlockHashIndex.cpp in Sources */,
+				1E6D9FD915D027560064D436 /* CmphStringVectorAdapter.cpp in Sources */,
+				1E6D9FDD15D027560064D436 /* LexicalReorderingTableCompact.cpp in Sources */,
+				1E6D9FDF15D027560064D436 /* LexicalReorderingTableCreator.cpp in Sources */,
+				1E6D9FE415D027560064D436 /* MurmurHash3.cpp in Sources */,
+				1E6D9FE715D027560064D436 /* PhraseDecoder.cpp in Sources */,
+				1E6D9FE915D027560064D436 /* PhraseDictionaryCompact.cpp in Sources */,
+				1E6D9FEB15D027560064D436 /* PhraseTableCreator.cpp in Sources */,
+				1EC32DB815D2D90700A313B1 /* ThrowingFwrite.cpp in Sources */,
+				1E365EEA16120F4600BA335B /* ChartTranslationOptions.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -1338,6 +1498,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
 				COPY_PHASE_STRIP = NO;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_MODEL_TUNING = G5;
@ -1352,6 +1513,9 @@
 					"_FILE_OFFSET_BITS=64",
 					_LARGE_FILES,
 					WITH_THREADS,
+					IS_XCODE,
+					HAVE_CMPH,
+					"KENLM_MAX_ORDER=7",
 				);
 				HEADER_SEARCH_PATHS = (
 					../..,
@ -1376,6 +1540,7 @@
 					"\"$(SRCROOT)/../../moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
 				);
 				PRODUCT_NAME = moses;
+				USER_HEADER_SEARCH_PATHS = "../.. ../../moses/src ../../irstlm/include ../../srilm/include ../../kenlm ../../randlm/include /opt/local/include ../../synlm/hhmm/wsjparse/include ../../synlm/hhmm/rvtl/include/ ../.. ../../cmph/include";
 			};
 			name = Debug;
 		};
@ -1383,6 +1548,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				GCC_MODEL_TUNING = G5;
 				GCC_PREPROCESSOR_DEFINITIONS = (
@ -1395,6 +1561,9 @@
 					"_FILE_OFFSET_BITS=64",
 					_LARGE_FILES,
 					WITH_THREADS,
+					IS_XCODE,
+					HAVE_CMPH,
+					"KENLM_MAX_ORDER=7",
 				);
 				HEADER_SEARCH_PATHS = (
 					../..,
@ -1419,6 +1588,7 @@
 					"\"$(SRCROOT)/../../moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
 				);
 				PRODUCT_NAME = moses;
+				USER_HEADER_SEARCH_PATHS = "../.. ../../moses/src ../../irstlm/include ../../srilm/include ../../kenlm ../../randlm/include /opt/local/include ../../synlm/hhmm/wsjparse/include ../../synlm/hhmm/rvtl/include/ ../.. ../../cmph/include";
 			};
 			name = Release;
 		};
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@ -3,8 +3,8 @@

 <cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
 	<storageModule moduleId="org.eclipse.cdt.core.settings">
-		<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426">
-			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+		<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.656913512">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.656913512" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings>
 					<externalSetting>
 						<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
@ -13,7 +13,7 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -21,65 +21,70 @@
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
-					<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426." name="/" resourcePath="">
-						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.497902212" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
-							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1820609450" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
-							<builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1998579330" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
-							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1330311562" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
-							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1226580551" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
-								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.102127808" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.656913512" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1793369992" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1051650049" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+							<builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.505583888" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.base.1976472988" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+								<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1759650532" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.2123672332" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.57896781" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/opt/local/include/"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+									<listOptionValue builtIn="false" value="LM_IRST"/>
+									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
+									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1905116220" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1524900118" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.exe.debug.option.debugging.level.581728958" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.877210753" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1168585173" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.2074660557" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.340054018" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
 									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
 									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
 								</inputType>
 							</tool>
-							<tool command="as" commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.1556759720" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
-								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.897776351" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
-							</tool>
-							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1820797229" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
-							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
-								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.1898625650" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.806998992" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.option.include.paths.1819917957" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
-									<listOptionValue builtIn="false" value="/opt/local/include"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
-								</option>
-								<option id="gnu.cpp.compiler.option.preprocessor.def.1569452418" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
-									<listOptionValue builtIn="false" value="LM_SRI"/>
-									<listOptionValue builtIn="false" value="LM_IRST"/>
-									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
-								</option>
-								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
-							</tool>
-							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
-								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.753046525" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
-								<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1396911098" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
-								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1919272901" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.933467113" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.99047750" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
 							</tool>
 						</toolChain>
 					</folderInfo>
-					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1722029461" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1432960145" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1906856645" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.460380900" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1692203139" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.538301588" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.854427429" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.558758254" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1930327037" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1751563578" name="PhraseTableCreator.cpp" rcbsApplicability="disable" resourcePath="CompactPT/PhraseTableCreator.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1652631861">
+						<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1652631861" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
+					</fileInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1174630266" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.707830535" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.160366559" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.622077510" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1084194539" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
 					<sourceEntries>
-						<entry excluding="SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.cpp|LM/LDHT.h|LM/Remote.h|LM/Remote.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="CompactPT/PhraseTableCreator.cpp|CompactPT/LexicalReorderingTableCreator.cpp|LM/SRI.h|LM/SRI.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
 		</cconfiguration>
-		<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.722580523">
-			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.722580523" moduleId="org.eclipse.cdt.core.settings" name="Release">
+		<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.401150096">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.401150096" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -88,59 +93,41 @@
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.722580523" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
-					<folderInfo id="cdt.managedbuild.config.macosx.exe.release.722580523." name="/" resourcePath="">
-						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.2070671582" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
-							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.503591386" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
-							<builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.108117223" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
-							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1203406445" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
-							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1539915639" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
-								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1333560300" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.401150096" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+					<folderInfo id="cdt.managedbuild.config.gnu.exe.release.401150096." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.36295137" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.538725710" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+							<builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.1875953334" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.base.1633496039" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2060881562" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+								<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1375372870" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.exe.release.option.debugging.level.815283803" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1020483420" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.85324871" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1137534635" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.exe.release.option.debugging.level.143589037" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.304912704" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.283583965" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.2059280959" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2020956494" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
 									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
 									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
 								</inputType>
 							</tool>
-							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1693865756" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
-								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.2000339940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
-							</tool>
-							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.505919286" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
-							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
-								<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1036481202" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.484015287" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.option.preprocessor.def.1089615214" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
-									<listOptionValue builtIn="false" value="LM_SRI"/>
-									<listOptionValue builtIn="false" value="LM_IRST"/>
-									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
-								</option>
-								<option id="gnu.cpp.compiler.option.include.paths.1722702487" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
-									<listOptionValue builtIn="false" value="/opt/local/include"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
-								</option>
-								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
-							</tool>
-							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
-								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1487222992" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
-								<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1171203697" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
-								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1172147378" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.782286837" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1766138143" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
 							</tool>
 						</toolChain>
 					</folderInfo>
-					<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1831545277" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1743378025" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1490362543" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
-					<sourceEntries>
-						<entry excluding="LM/LDHT.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
-					</sourceEntries>
 				</configuration>
 			</storageModule>
 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
 		</cconfiguration>
 	</storageModule>
 	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
-		<project id="moses.cdt.managedbuild.target.macosx.exe.1209017164" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
+		<project id="moses.cdt.managedbuild.target.gnu.exe.1375079569" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
 	</storageModule>
 	<storageModule moduleId="scannerConfiguration">
 		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
@ -150,12 +137,24 @@
 		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839;cdt.managedbuild.tool.gnu.c.compiler.input.1172147378">
 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
 		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.401150096;cdt.managedbuild.config.gnu.exe.release.401150096.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.85324871;cdt.managedbuild.tool.gnu.c.compiler.input.304912704">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.656913512;cdt.managedbuild.config.gnu.exe.debug.656913512.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327;cdt.managedbuild.tool.gnu.cpp.compiler.input.1905116220">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
 		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805;cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565">
 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
 		</scannerConfigBuildInfo>
 		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925;cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391">
 			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
 		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.656913512;cdt.managedbuild.config.gnu.exe.debug.656913512.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903;cdt.managedbuild.tool.gnu.c.compiler.input.877210753">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.401150096;cdt.managedbuild.config.gnu.exe.release.401150096.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2060881562;cdt.managedbuild.tool.gnu.cpp.compiler.input.1020483420">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
 	</storageModule>
 	<storageModule moduleId="refreshScope" versionNumber="1">
 		<resource resourceType="PROJECT" workspacePath="/moses"/>
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -101,6 +101,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/AlignmentInfoCollection.h</locationURI>
 		</link>
+		<link>
+			<name>ApplicableRuleTrie.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.cpp</locationURI>
+		</link>
+		<link>
+			<name>ApplicableRuleTrie.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.h</locationURI>
+		</link>
 		<link>
 			<name>BilingualDynSuffixArray.cpp</name>
 			<type>1</type>
@ -271,6 +281,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisPathList.h</locationURI>
 		</link>
+		<link>
+			<name>CompactPT</name>
+			<type>2</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CompactPT</locationURI>
+		</link>
 		<link>
 			<name>ConfusionNet.cpp</name>
 			<type>1</type>
@ -441,6 +456,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FloydWarshall.h</locationURI>
 		</link>
+		<link>
+			<name>FuzzyMatchWrapper.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/FuzzyMatchWrapper.cpp</locationURI>
+		</link>
+		<link>
+			<name>FuzzyMatchWrapper.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/FuzzyMatchWrapper.h</locationURI>
+		</link>
 		<link>
 			<name>GenerationDictionary.cpp</name>
 			<type>1</type>
@ -536,6 +561,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/InputType.h</locationURI>
 		</link>
+		<link>
+			<name>IntermediateVarSpanNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/IntermediateVarSpanNode.h</locationURI>
+		</link>
 		<link>
 			<name>Jamfile</name>
 			<type>1</type>
@ -606,6 +636,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Manager.h</locationURI>
 		</link>
+		<link>
+			<name>Match.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/Match.h</locationURI>
+		</link>
 		<link>
 			<name>NonTerminal.cpp</name>
 			<type>1</type>
@ -661,6 +696,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Parameter.h</locationURI>
 		</link>
+		<link>
+			<name>Parser.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.cpp</locationURI>
+		</link>
+		<link>
+			<name>Parser.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.h</locationURI>
+		</link>
 		<link>
 			<name>PartialTranslOptColl.cpp</name>
 			<type>1</type>
@ -809,7 +854,7 @@
 		<link>
 			<name>RuleTable</name>
 			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable</locationURI>
 		</link>
 		<link>
 			<name>SRI.lo</name>
@ -821,11 +866,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SRI.o</locationURI>
 		</link>
-		<link>
-			<name>Scope3Parser</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
 		<link>
 			<name>ScoreComponentCollection.cpp</name>
 			<type>1</type>
@ -886,6 +926,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SearchNormal.h</locationURI>
 		</link>
+		<link>
+			<name>SearchNormalBatch.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-1-ECLIPSE_HOME/workspace/github/hieuhoang/moses/src/SearchNormalBatch.cpp</locationURI>
+		</link>
+		<link>
+			<name>SearchNormalBatch.h</name>
+			<type>1</type>
+			<locationURI>PARENT-1-ECLIPSE_HOME/workspace/github/hieuhoang/moses/src/SearchNormalBatch.h</locationURI>
+		</link>
 		<link>
 			<name>Sentence.cpp</name>
 			<type>1</type>
@ -896,6 +946,21 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Sentence.h</locationURI>
 		</link>
+		<link>
+			<name>SentenceAlignment.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SentenceAlignment.cpp</locationURI>
+		</link>
+		<link>
+			<name>SentenceAlignment.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SentenceAlignment.h</locationURI>
+		</link>
+		<link>
+			<name>SentenceMap.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/SentenceMap.h</locationURI>
+		</link>
 		<link>
 			<name>SentenceStats.cpp</name>
 			<type>1</type>
@ -916,6 +981,26 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SquareMatrix.h</locationURI>
 		</link>
+		<link>
+			<name>StackLattice.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLattice.h</locationURI>
+		</link>
+		<link>
+			<name>StackLatticeBuilder.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.cpp</locationURI>
+		</link>
+		<link>
+			<name>StackLatticeBuilder.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.h</locationURI>
+		</link>
+		<link>
+			<name>StackLatticeSearcher.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeSearcher.h</locationURI>
+		</link>
 		<link>
 			<name>StackVec.h</name>
 			<type>1</type>
@ -941,6 +1026,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/StaticData.o</locationURI>
 		</link>
+		<link>
+			<name>SuffixArray.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SuffixArray.cpp</locationURI>
+		</link>
+		<link>
+			<name>SuffixArray.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SuffixArray.h</locationURI>
+		</link>
 		<link>
 			<name>SyntacticLanguageModel.cpp</name>
 			<type>1</type>
@ -1181,6 +1276,31 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Util.o</locationURI>
 		</link>
+		<link>
+			<name>VarSpanNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanNode.h</locationURI>
+		</link>
+		<link>
+			<name>VarSpanTrieBuilder.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.cpp</locationURI>
+		</link>
+		<link>
+			<name>VarSpanTrieBuilder.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.h</locationURI>
+		</link>
+		<link>
+			<name>Vocabulary.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/Vocabulary.cpp</locationURI>
+		</link>
+		<link>
+			<name>Vocabulary.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/Vocabulary.h</locationURI>
+		</link>
 		<link>
 			<name>Word.cpp</name>
 			<type>1</type>
@ -1336,6 +1456,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h</locationURI>
 		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h</locationURI>
+		</link>
 		<link>
 			<name>CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</name>
 			<type>1</type>
@ -1381,6 +1511,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>DynSAInclude/FileHandler.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/FileHandler.cpp</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/FileHandler.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/FileHandler.h</locationURI>
+		</link>
 		<link>
 			<name>DynSAInclude/Jamfile</name>
 			<type>1</type>
@ -1396,26 +1536,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/RandLMFilter.h</locationURI>
 		</link>
-		<link>
-			<name>DynSAInclude/bin</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
 		<link>
 			<name>DynSAInclude/fdstream.h</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/fdstream.h</locationURI>
 		</link>
-		<link>
-			<name>DynSAInclude/file.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/file.cpp</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/file.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/file.h</locationURI>
-		</link>
 		<link>
 			<name>DynSAInclude/hash.h</name>
 			<type>1</type>
@ -1616,211 +1741,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>RuleTable/Jamfile</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Jamfile</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/Loader.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Loader.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderCompact.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderCompact.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderCompact.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderCompact.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderFactory.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderFactory.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderFactory.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderFactory.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderHiero.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderHiero.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderHiero.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderHiero.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderStandard.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderStandard.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/LoaderStandard.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderStandard.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionaryALSuffixArray.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionaryALSuffixArray.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryALSuffixArray.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionaryNodeSCFG.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryNodeSCFG.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionaryNodeSCFG.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryNodeSCFG.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionaryOnDisk.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryOnDisk.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionaryOnDisk.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryOnDisk.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionarySCFG.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionarySCFG.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/PhraseDictionarySCFG.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionarySCFG.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/Trie.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Trie.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/Trie.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Trie.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/UTrie.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrie.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/UTrie.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrie.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/UTrieNode.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrieNode.cpp</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/UTrieNode.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrieNode.h</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/ApplicableRuleTrie.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.cpp</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/ApplicableRuleTrie.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/IntermediateVarSpanNode.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/IntermediateVarSpanNode.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/Jamfile</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Jamfile</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/Parser.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.cpp</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/Parser.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/SentenceMap.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/SentenceMap.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/StackLattice.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLattice.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/StackLatticeBuilder.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.cpp</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/StackLatticeBuilder.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/StackLatticeSearcher.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeSearcher.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/VarSpanNode.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanNode.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/VarSpanTrieBuilder.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.cpp</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/VarSpanTrieBuilder.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.h</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
 		<link>
 			<name>bin/darwin-4.2.1</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>bin/gcc-4.6</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>CYKPlusParser/bin/clang-darwin-4.2.1</name>
 			<type>2</type>
@ -1832,12 +1762,7 @@
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
 		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1</name>
+			<name>CYKPlusParser/bin/gcc-4.6</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
@ -1856,21 +1781,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/lm.log</locationURI>
 		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
 		<link>
 			<name>bin/darwin-4.2.1/release</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>bin/gcc-4.6/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release</name>
 			<type>2</type>
@ -1882,12 +1802,7 @@
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
 		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release</name>
+			<name>CYKPlusParser/bin/gcc-4.6/release</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
@ -1901,21 +1816,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
 		<link>
 			<name>bin/darwin-4.2.1/release/debug-symbols-on</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
 			<type>2</type>
@ -1927,12 +1837,7 @@
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
 		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on</name>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
@ -1951,21 +1856,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
 		<link>
 			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
 			<type>2</type>
@ -1982,12 +1882,7 @@
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
 		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
@ -2011,31 +1906,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
 		<link>
 			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
 			<type>2</type>
@ -2072,12 +1952,7 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/DotChartOnDisk.o</locationURI>
 		</link>
 		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi</name>
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
@ -2191,91 +2066,6 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderCompact.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderCompact.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderFactory.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderFactory.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderHiero.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderHiero.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderStandard.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderStandard.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryALSuffixArray.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryALSuffixArray.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryNodeSCFG.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryNodeSCFG.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryOnDisk.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryOnDisk.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionarySCFG.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionarySCFG.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Trie.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Trie.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrie.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrie.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrieNode.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrieNode.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ApplicableRuleTrie.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ApplicableRuleTrie.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Parser.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Parser.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/StackLatticeBuilder.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/StackLatticeBuilder.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/VarSpanTrieBuilder.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/VarSpanTrieBuilder.o</locationURI>
-		</link>
 		<link>
 			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/AlignmentInfo.o</name>
 			<type>1</type>
@ -2751,6 +2541,56 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libmoses_internal.a</locationURI>
 		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/FuzzyMatchWrapper.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/FuzzyMatchWrapper.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Parser.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Parser.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SentenceAlignment.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SentenceAlignment.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SuffixArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SuffixArray.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Vocabulary.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Vocabulary.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libfuzzy-match.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libfuzzy-match.a</locationURI>
+		</link>
 		<link>
 			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</name>
 			<type>1</type>
@ -2787,24 +2627,39 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</locationURI>
 		</link>
 		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</name>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerCYKPlus.o</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerCYKPlus.o</locationURI>
 		</link>
 		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude</name>
-			<type>2</type>
-			<locationURI>virtual:/virtual</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</name>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemory.o</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemory.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemoryPerSentence.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemoryPerSentence.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartInMemory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartInMemory.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</locationURI>
 		</link>
 		<link>
 			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Base.o</name>
@ -2921,91 +2776,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/libLM.a</locationURI>
 		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderCompact.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderCompact.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderFactory.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderFactory.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderHiero.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderHiero.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderStandard.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderStandard.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryALSuffixArray.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryALSuffixArray.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNodeSCFG.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNodeSCFG.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryOnDisk.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryOnDisk.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionarySCFG.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionarySCFG.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Trie.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Trie.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrie.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrie.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrieNode.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrieNode.o</locationURI>
-		</link>
-		<link>
-			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libRuleTable.a</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libRuleTable.a</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parser.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parser.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</locationURI>
-		</link>
-		<link>
-			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</locationURI>
-		</link>
 		<link>
 			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
 			<type>1</type>
@ -3021,35 +2791,5 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
 		</link>
-		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</locationURI>
-		</link>
-		<link>
-			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
-		</link>
 	</linkedResources>
 </projectDescription>
--- a/contrib/other-builds/mosesserver.vcxproj
+++ b/contrib/other-builds/mosesserver.vcxproj
@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{85811FDF-8AD1-4490-A545-B2F51931A18C}</ProjectGuid>
+    <RootNamespace>mosescmd</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>C:\xmlrpc-c\include;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <RandomizedBaseAddress>false</RandomizedBaseAddress>
+      <DataExecutionPrevention>
+      </DataExecutionPrevention>
+      <TargetMachine>MachineX86</TargetMachine>
+      <AdditionalLibraryDirectories>C:\xmlrpc-c\bin\Debug-Static-Win32;C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>C:\xmlrpc-c\include;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <RandomizedBaseAddress>false</RandomizedBaseAddress>
+      <DataExecutionPrevention>
+      </DataExecutionPrevention>
+      <TargetMachine>MachineX86</TargetMachine>
+      <AdditionalLibraryDirectories>C:\xmlrpc-c\bin\Release-Static-Win32;C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\server\mosesserver.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/contrib/other-builds/processLexicalTableMin.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/processLexicalTableMin.xcodeproj/project.pbxproj
@ -0,0 +1,297 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		1E6D9FF115D027F00064D436 /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EB3EBD515D0269B006B9CF1 /* libmoses.a */; };
+		1EB3EBB315D024C7006B9CF1 /* processLexicalTableMin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		1E6D9FF215D0292D0064D436 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D;
+			remoteInfo = moses;
+		};
+		1EB3EBD415D0269B006B9CF1 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
+			proxyType = 2;
+			remoteGlobalIDString = D2AAC046055464E500DB518D;
+			remoteInfo = moses;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		1E3A0AEA15D0242A003EF9B4 /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = processLexicalTableMin; sourceTree = BUILT_PRODUCTS_DIR; };
+		1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = processLexicalTableMin.cpp; path = ../../misc/processLexicalTableMin.cpp; sourceTree = "<group>"; };
+		1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		1E3A0AE915D0242A003EF9B4 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1E6D9FF115D027F00064D436 /* libmoses.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		1E3A0AE115D02427003EF9B4 = {
+			isa = PBXGroup;
+			children = (
+				1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */,
+				1E3A0AED15D0242A003EF9B4 /* Products */,
+				1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */,
+			);
+			sourceTree = "<group>";
+		};
+		1E3A0AED15D0242A003EF9B4 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		1EB3EBD115D0269B006B9CF1 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1EB3EBD515D0269B006B9CF1 /* libmoses.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		1E3A0AEB15D0242A003EF9B4 /* processLexicalTableMin */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1E3A0AF615D0242B003EF9B4 /* Build configuration list for PBXNativeTarget "processLexicalTableMin" */;
+			buildPhases = (
+				1E3A0AE815D0242A003EF9B4 /* Sources */,
+				1E3A0AE915D0242A003EF9B4 /* Frameworks */,
+				1E3A0AEA15D0242A003EF9B4 /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				1E6D9FF315D0292D0064D436 /* PBXTargetDependency */,
+			);
+			name = processLexicalTableMin;
+			productName = processLexicalTableMin;
+			productReference = 1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		1E3A0AE315D02427003EF9B4 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1E3A0AE615D02427003EF9B4 /* Build configuration list for PBXProject "processLexicalTableMin" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+			);
+			mainGroup = 1E3A0AE115D02427003EF9B4;
+			productRefGroup = 1E3A0AED15D0242A003EF9B4 /* Products */;
+			projectDirPath = "";
+			projectReferences = (
+				{
+					ProductGroup = 1EB3EBD115D0269B006B9CF1 /* Products */;
+					ProjectRef = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
+				},
+			);
+			projectRoot = "";
+			targets = (
+				1E3A0AEB15D0242A003EF9B4 /* processLexicalTableMin */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXReferenceProxy section */
+		1EB3EBD515D0269B006B9CF1 /* libmoses.a */ = {
+			isa = PBXReferenceProxy;
+			fileType = archive.ar;
+			path = libmoses.a;
+			remoteRef = 1EB3EBD415D0269B006B9CF1 /* PBXContainerItemProxy */;
+			sourceTree = BUILT_PRODUCTS_DIR;
+		};
+/* End PBXReferenceProxy section */
+
+/* Begin PBXSourcesBuildPhase section */
+		1E3A0AE815D0242A003EF9B4 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1EB3EBB315D024C7006B9CF1 /* processLexicalTableMin.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		1E6D9FF315D0292D0064D436 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			name = moses;
+			targetProxy = 1E6D9FF215D0292D0064D436 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		1E3A0AF415D0242B003EF9B4 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+				COPY_PHASE_STRIP = NO;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					../../,
+					../../irstlm/include,
+					/opt/local/include,
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+				USER_HEADER_SEARCH_PATHS = "../../ ../../irstlm/include /opt/local/include ../../moses/src";
+			};
+			name = Debug;
+		};
+		1E3A0AF515D0242B003EF9B4 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+				COPY_PHASE_STRIP = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					../../,
+					../../irstlm/include,
+					/opt/local/include,
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				SDKROOT = macosx;
+				USER_HEADER_SEARCH_PATHS = "../../ ../../irstlm/include /opt/local/include ../../moses/src";
+			};
+			name = Release;
+		};
+		1E3A0AF715D0242B003EF9B4 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+				"GCC_PREPROCESSOR_DEFINITIONS[arch=*]" = WITH_THREADS;
+				LIBRARY_SEARCH_PATHS = (
+					../../irstlm/lib,
+					../../srilm/lib/macosx,
+					../../randlm/lib,
+					/opt/local/lib,
+				);
+				OTHER_LDFLAGS = (
+					"-lz",
+					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
+					"-lrandlm",
+					"-lboost_thread-mt",
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		1E3A0AF815D0242B003EF9B4 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+				LIBRARY_SEARCH_PATHS = (
+					../../irstlm/lib,
+					../../srilm/lib/macosx,
+					../../randlm/lib,
+					/opt/local/lib,
+				);
+				OTHER_LDFLAGS = (
+					"-lz",
+					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
+					"-lrandlm",
+					"-lboost_thread-mt",
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1E3A0AE615D02427003EF9B4 /* Build configuration list for PBXProject "processLexicalTableMin" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1E3A0AF415D0242B003EF9B4 /* Debug */,
+				1E3A0AF515D0242B003EF9B4 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1E3A0AF615D0242B003EF9B4 /* Build configuration list for PBXNativeTarget "processLexicalTableMin" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1E3A0AF715D0242B003EF9B4 /* Debug */,
+				1E3A0AF815D0242B003EF9B4 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 1E3A0AE315D02427003EF9B4 /* Project object */;
+}
--- a/contrib/other-builds/processPhraseTableMin.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/processPhraseTableMin.xcodeproj/project.pbxproj
@ -0,0 +1,304 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		1EF3D68A15D02AEF00969478 /* processPhraseTableMin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */; };
+		1EF3D6A415D02B6400969478 /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EF3D69915D02B4400969478 /* libmoses.a */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		1EF3D69815D02B4400969478 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
+			proxyType = 2;
+			remoteGlobalIDString = D2AAC046055464E500DB518D;
+			remoteInfo = moses;
+		};
+		1EF3D6A515D02B6B00969478 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D;
+			remoteInfo = moses;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		1E6D9FFD15D02A8D0064D436 /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = processPhraseTableMin; sourceTree = BUILT_PRODUCTS_DIR; };
+		1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = processPhraseTableMin.cpp; path = ../../misc/processPhraseTableMin.cpp; sourceTree = "<group>"; };
+		1EF3D69415D02B4400969478 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		1E6D9FFC15D02A8D0064D436 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1EF3D6A415D02B6400969478 /* libmoses.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		1E6D9FF415D02A8C0064D436 = {
+			isa = PBXGroup;
+			children = (
+				1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */,
+				1E6DA00015D02A8D0064D436 /* Products */,
+				1EF3D69415D02B4400969478 /* moses.xcodeproj */,
+			);
+			sourceTree = "<group>";
+		};
+		1E6DA00015D02A8D0064D436 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		1EF3D69515D02B4400969478 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1EF3D69915D02B4400969478 /* libmoses.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		1E6D9FFE15D02A8D0064D436 /* processPhraseTableMin */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1E6DA00915D02A8D0064D436 /* Build configuration list for PBXNativeTarget "processPhraseTableMin" */;
+			buildPhases = (
+				1E6D9FFB15D02A8D0064D436 /* Sources */,
+				1E6D9FFC15D02A8D0064D436 /* Frameworks */,
+				1E6D9FFD15D02A8D0064D436 /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				1EF3D6A615D02B6B00969478 /* PBXTargetDependency */,
+			);
+			name = processPhraseTableMin;
+			productName = processPhraseTableMin;
+			productReference = 1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		1E6D9FF615D02A8C0064D436 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1E6D9FF915D02A8C0064D436 /* Build configuration list for PBXProject "processPhraseTableMin" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+			);
+			mainGroup = 1E6D9FF415D02A8C0064D436;
+			productRefGroup = 1E6DA00015D02A8D0064D436 /* Products */;
+			projectDirPath = "";
+			projectReferences = (
+				{
+					ProductGroup = 1EF3D69515D02B4400969478 /* Products */;
+					ProjectRef = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
+				},
+			);
+			projectRoot = "";
+			targets = (
+				1E6D9FFE15D02A8D0064D436 /* processPhraseTableMin */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXReferenceProxy section */
+		1EF3D69915D02B4400969478 /* libmoses.a */ = {
+			isa = PBXReferenceProxy;
+			fileType = archive.ar;
+			path = libmoses.a;
+			remoteRef = 1EF3D69815D02B4400969478 /* PBXContainerItemProxy */;
+			sourceTree = BUILT_PRODUCTS_DIR;
+		};
+/* End PBXReferenceProxy section */
+
+/* Begin PBXSourcesBuildPhase section */
+		1E6D9FFB15D02A8D0064D436 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1EF3D68A15D02AEF00969478 /* processPhraseTableMin.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		1EF3D6A615D02B6B00969478 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			name = moses;
+			targetProxy = 1EF3D6A515D02B6B00969478 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		1E6DA00715D02A8D0064D436 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+				COPY_PHASE_STRIP = NO;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				LIBRARY_SEARCH_PATHS = "";
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		1E6DA00815D02A8D0064D436 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+				COPY_PHASE_STRIP = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				LIBRARY_SEARCH_PATHS = "";
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		1E6DA00A15D02A8D0064D436 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+				HEADER_SEARCH_PATHS = (
+					../../,
+					../../irstlm/include,
+					/opt/local/include,
+					../../moses/src,
+					../../cmph/include,
+				);
+				LIBRARY_SEARCH_PATHS = (
+					../../irstlm/lib,
+					../../srilm/lib/macosx,
+					../../randlm/lib,
+					/opt/local/lib,
+					../../cmph/lib,
+				);
+				OTHER_LDFLAGS = (
+					"-lz",
+					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
+					"-lrandlm",
+					"-lboost_thread-mt",
+					"-lcmph",
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		1E6DA00B15D02A8D0064D436 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+				HEADER_SEARCH_PATHS = (
+					../../,
+					../../irstlm/include,
+					/opt/local/include,
+					../../moses/src,
+					../../cmph/include,
+				);
+				LIBRARY_SEARCH_PATHS = (
+					../../irstlm/lib,
+					../../srilm/lib/macosx,
+					../../randlm/lib,
+					/opt/local/lib,
+					../../cmph/lib,
+				);
+				OTHER_LDFLAGS = (
+					"-lz",
+					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
+					"-lrandlm",
+					"-lboost_thread-mt",
+					"-lcmph",
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1E6D9FF915D02A8C0064D436 /* Build configuration list for PBXProject "processPhraseTableMin" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1E6DA00715D02A8D0064D436 /* Debug */,
+				1E6DA00815D02A8D0064D436 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1E6DA00915D02A8D0064D436 /* Build configuration list for PBXNativeTarget "processPhraseTableMin" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1E6DA00A15D02A8D0064D436 /* Debug */,
+				1E6DA00B15D02A8D0064D436 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 1E6D9FF615D02A8C0064D436 /* Project object */;
+}
--- a/contrib/other-builds/util/.cproject
+++ b/contrib/other-builds/util/.cproject
@ -41,9 +41,12 @@
 								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.623959371" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.892917290" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.option.include.paths.1401298824" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
-									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
 									<listOptionValue builtIn="false" value="/opt/local/include"/>
 								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.1952961175" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
@ -130,4 +133,5 @@
 	<storageModule moduleId="refreshScope" versionNumber="1">
 		<resource resourceType="PROJECT" workspacePath="/util"/>
 	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 </cproject>
--- a/contrib/python/README.md
+++ b/contrib/python/README.md
@ -0,0 +1,28 @@
+# Python interface to Moses
+
+The idea is to have some of Moses' internals exposed to Python (inspired on pycdec).
+
+## What's been interfaced?
+
+* Binary phrase table:
+
+        Moses::PhraseDictionaryTree.h
+
+## Building
+
+1.  Build the python extension
+
+        python setup.py build_ext -i [--with-cmph]
+
+3.  Check the example code
+
+        echo "casa" | python example.py examples/phrase-table 5 1
+        echo "essa casa" | python example.py examples/phrase-table 5 1
+
+## Changing the code
+
+If you want to add your changes you are going to have to recompile the cython code.
+
+1.  Compile the cython code (use Cython 0.16): this will generate binpt/binpt.cpp
+
+        cython --cplus binpt/binpt.pyx
--- a/contrib/python/binpt/binpt.cpp
+++ b/contrib/python/binpt/binpt.cpp
--- a/contrib/python/binpt/binpt.pxd
+++ b/contrib/python/binpt/binpt.pxd
@ -0,0 +1,25 @@
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+from libcpp.pair cimport pair
+
+ctypedef string* str_pointer
+
+cdef extern from 'TypeDef.h' namespace 'Moses':
+    ctypedef vector[float] Scores
+    ctypedef pair[vector[str_pointer], Scores] StringTgtCand
+
+cdef extern from 'PhraseDictionaryTree.h' namespace 'Moses':
+    cdef cppclass PhraseDictionaryTree:
+        PhraseDictionaryTree(unsigned nscores)
+        void UseWordAlignment(bint use)
+        bint UseWordAlignment()
+        int Read(string& path)
+        void GetTargetCandidates(vector[string]& fs, 
+                vector[StringTgtCand]& rv)
+        void GetTargetCandidates(vector[string]& fs, 
+                vector[StringTgtCand]& rv,
+                vector[string]& wa)
+
+cdef extern from 'Util.h' namespace 'Moses':
+    cdef vector[string] Tokenize(string& text, string& delimiters)
+
--- a/contrib/python/binpt/binpt.pyx
+++ b/contrib/python/binpt/binpt.pyx
@ -0,0 +1,166 @@
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+import os
+import cython
+
+cpdef int fsign(float x):
+    '''Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing'''
+    return 1 if x >= 0 else -1
+
+cdef bytes as_str(data):
+    if isinstance(data, bytes):
+        return data
+    elif isinstance(data, unicode):
+        return data.encode('UTF-8')
+    raise TypeError('Cannot convert %s to string' % type(data))
+
+cdef class QueryResult(object):
+    '''This class represents a query result, that is,
+    a target phrase (tuple of words/strings),
+    a feature vector (tuple of floats)
+    and possibly an alignment info (string).
+    Here we don't bother parsing the alignment info, as it's often only
+    used as is, threfore saving some time.'''
+
+    cdef tuple _words
+    cdef tuple _scores
+    cdef bytes _wa
+
+    def __cinit__(self, words, scores, wa = None):
+        '''Requires a tuple of words (as strings) and a tuple of scores (as floats).
+        Word-alignment info (as string) may be provided'''
+        self._words = words
+        self._scores = scores
+        self._wa = wa
+
+    @property
+    def words(self):
+        '''Tuple of words (as strings)'''
+        return self._words
+
+    @property
+    def scores(self):
+        '''Tuple of scores (as floats)'''
+        return self._scores
+
+    @property
+    def wa(self):
+        '''Word-alignment info (as string)'''
+        return self._wa
+
+    @staticmethod
+    def desc(x, y, keys = lambda r: r.scores[0]):
+        '''Returns the sign of keys(y) - keys(x).
+        Can only be used if scores is not an empty vector as
+        keys defaults to scores[0]'''
+        return fsign(keys(y) - keys(x))
+
+    def __str__(self):
+        '''Returns a string such as: <words> ||| <scores> [||| word-alignment info]'''
+        if self._wa:
+            return ' ||| '.join( (' '.join(self._words),
+                ' '.join([str(x) for x in self._scores]),
+                self._wa) )
+        else:
+            return ' ||| '.join( (' '.join(self._words),
+                ' '.join([str(x) for x in self._scores]) ) )
+
+    def __repr__(self):
+        return repr((repr(self._words), repr(self._scores), repr(self._wa)))
+
+cdef QueryResult get_query_result(StringTgtCand& cand, object wa = None):
+    '''Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string)
+    to a QueryResult (python object).'''
+    cdef tuple words = tuple([cand.first[i].c_str() for i in range(cand.first.size())])
+    cdef tuple scores = tuple([cand.second[i] for i in range(cand.second.size())])
+    return QueryResult(words, scores, wa)
+
+cdef class BinaryPhraseTable(object):
+    '''This class encapsulates a Moses::PhraseDictionaryTree for operations over
+    binary phrase tables.'''
+
+    cdef PhraseDictionaryTree* __tree
+    cdef bytes _path
+    cdef unsigned _nscores
+    cdef bint _wa
+    cdef bytes _delimiters
+
+    def __cinit__(self, bytes path, unsigned nscores = 5, bint wa = False, delimiters = ' \t'):
+        '''It requies a path to binary phrase table (stem of the table, e.g europarl.fr-en 
+        is the stem for europar.fr-en.binphr.*).
+        Moses::PhraseDictionaryTree also needs to be aware of the number of scores (usually 5),
+        and whether or not there is word-alignment info in the table (usually not).
+        One can also specify the token delimiters, for Moses::Tokenize(text, delimiters), which is space or tab by default.'''
+
+        if not BinaryPhraseTable.isValidBinaryTable(path, wa):
+            raise ValueError, "'%s' doesn't seem a valid binary table." % path
+        self._path = path
+        self._nscores = nscores
+        self._wa = wa
+        self._delimiters = delimiters
+        self.__tree = new PhraseDictionaryTree(nscores)
+        self.__tree.UseWordAlignment(wa)
+        self.__tree.Read(string(path))
+
+    def __dealloc__(self):
+        del self.__tree
+
+    @staticmethod
+    def isValidBinaryTable(stem, bint wa = False):
+        '''This sanity check was added to the constructor, but you can access it from outside this class
+        to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+        if wa:
+            return os.path.isfile(stem + ".binphr.idx") \
+                and os.path.isfile(stem + ".binphr.srctree.wa") \
+                and os.path.isfile(stem + ".binphr.srcvoc") \
+                and os.path.isfile(stem + ".binphr.tgtdata.wa") \
+                and os.path.isfile(stem + ".binphr.tgtvoc")
+        else:
+            return os.path.isfile(stem + ".binphr.idx") \
+                and os.path.isfile(stem + ".binphr.srctree") \
+                and os.path.isfile(stem + ".binphr.srcvoc") \
+                and os.path.isfile(stem + ".binphr.tgtdata") \
+                and os.path.isfile(stem + ".binphr.tgtvoc")
+
+    @property
+    def path(self):
+        return self._path
+
+    @property
+    def nscores(self):
+        return self._nscores
+
+    @property
+    def wa(self):
+        return self._wa
+
+    @property
+    def delimiters(self):
+        return self._delimiters
+
+    def query(self, line, cmp = None, top = 0):
+        '''Queries the phrase table and returns a list of matches.
+        Each match is a QueryResult.
+        If 'cmp' is defined the return list is sorted.
+        If 'top' is defined, onlye the top elements will be returned.'''
+        cdef bytes text = as_str(line)
+        cdef vector[string] fphrase = Tokenize(string(text), string(self._delimiters))
+        cdef vector[StringTgtCand]* rv = new vector[StringTgtCand]()
+        cdef vector[string]* wa = NULL
+        cdef list phrases
+        if not self.__tree.UseWordAlignment():
+            self.__tree.GetTargetCandidates(fphrase, rv[0])
+            phrases = [get_query_result(rv[0][i]) for i in range(rv.size())]
+        else:
+            wa = new vector[string]()
+            self.__tree.GetTargetCandidates(fphrase, rv[0], wa[0])
+            phrases = [get_query_result(rv[0][i], wa[0][i].c_str()) for i in range(rv.size())]
+            del wa
+        del rv
+        if cmp:
+            phrases.sort(cmp=cmp)
+        if top > 0:
+            return phrases[0:top]
+        else:  
+            return phrases
+        
--- a/contrib/python/example.py
+++ b/contrib/python/example.py
@ -0,0 +1,31 @@
+import binpt
+#from binpt import QueryResult
+import sys
+
+
+if len(sys.argv) < 3:
+    print "Usage: %s phrase-table nscores [wa] < query > result" % (sys.argv[0])
+    sys.exit(0)
+
+pt_file = sys.argv[1]
+nscores = int(sys.argv[2])
+wa = len(sys.argv) == 4
+
+pt = binpt.BinaryPhraseTable(pt_file, nscores, wa)
+print >> sys.stderr, "-ttable %s -nscores %d -alignment-info %s -delimiter '%s'\n" %(pt.path, pt.nscores, str(pt.wa), pt.delimiters)
+
+for line in sys.stdin:
+    f = line.strip()
+    matches = pt.query(f, cmp = binpt.QueryResult.desc, top = 20)
+    print '\n'.join([' ||| '.join((f, str(e))) for e in matches])
+    '''
+    # This is how one would use the QueryResult object
+    for e in matches:
+        print ' '.join(e.words) # tuple of strings
+        print e.scores # tuple of floats
+        if e.wa:
+            print e.wa # string
+    '''
+            
+     
+
--- a/contrib/python/examples/phrase-table.binphr.idx
+++ b/contrib/python/examples/phrase-table.binphr.idx
--- a/contrib/python/examples/phrase-table.binphr.srctree.wa
+++ b/contrib/python/examples/phrase-table.binphr.srctree.wa
--- a/contrib/python/examples/phrase-table.binphr.srcvoc
+++ b/contrib/python/examples/phrase-table.binphr.srcvoc
@ -0,0 +1,2 @@
+1 essa
+0 casa
--- a/contrib/python/examples/phrase-table.binphr.tgtdata.wa
+++ b/contrib/python/examples/phrase-table.binphr.tgtdata.wa
--- a/contrib/python/examples/phrase-table.binphr.tgtvoc
+++ b/contrib/python/examples/phrase-table.binphr.tgtvoc
@ -0,0 +1,4 @@
+3 this
+2 location
+1 house
+0 building
--- a/contrib/python/examples/phrase-table.txt
+++ b/contrib/python/examples/phrase-table.txt
@ -0,0 +1,4 @@
+casa ||| building ||| 0.6 0.75 0.35 0.35 2.718 ||| 0-0 ||| 2 2
+casa ||| house ||| 0.7 0.75 0.35 0.35 2.718 ||| 0-0 ||| 2 2
+casa ||| location ||| 0.5 0.75 0.35 0.35 2.718 ||| 0-0 ||| 2 2
+essa casa ||| this house ||| 0.7 0.5 0.8 0.6 2.718 ||| 0-0 1-1 ||| 2 2
--- a/contrib/python/setup.py
+++ b/contrib/python/setup.py
@ -0,0 +1,47 @@
+from distutils.core import setup
+from distutils.extension import Extension
+import os
+import sys
+
+available_switches = ['--with-cmph']
+with_cmph = False
+
+while sys.argv[-1] in available_switches:
+    switch = sys.argv.pop()
+    if switch == '--with-cmph':
+        with_cmph = True
+
+
+#### From here you probably don't need to change anything
+#### unless a new dependency shows up in Moses
+
+mosesdir = os.path.abspath('../../')
+includes = [mosesdir, os.path.join(mosesdir, 'moses/src'), os.path.join(mosesdir, 'util')]
+libdir = os.path.join(mosesdir, 'lib')
+
+basic=['z', 'stdc++', 'pthread', 'm', 'gcc_s', 'c', 'boost_system', 'boost_thread', 'boost_filesystem', 'rt']
+moses=['OnDiskPt', 'kenutil', 'kenlm', 'LM', 'mert_lib', 'moses_internal', 'CYKPlusParser', 'Scope3Parser', 'fuzzy-match', 'RuleTable', 'CompactPT', 'moses', 'dynsa', 'pcfg_common' ]
+additional=[]
+
+if with_cmph:
+    additional.append('cmph')
+
+exobj = [os.path.join(libdir, 'lib' + l + '.so') for l in moses]
+
+ext_modules = [
+    Extension(name = 'binpt',
+        sources = ['binpt/binpt.cpp'],
+        language = 'C++', 
+        include_dirs = includes,
+        extra_objects = exobj,
+        library_dirs = [libdir],
+        runtime_library_dirs = [libdir],
+        libraries = basic + moses + additional,
+        extra_compile_args = ['-O3', '-DNDEBUG'],
+    )
+]
+
+setup(
+    name='binpt',
+    ext_modules=ext_modules
+)
--- a/contrib/relent-filter/AUTHORS
+++ b/contrib/relent-filter/AUTHORS
@ -0,0 +1 @@
+Wang Ling - lingwang at cs dot cmu dot edu
--- a/contrib/relent-filter/README.txt
+++ b/contrib/relent-filter/README.txt
@ -0,0 +1,91 @@
+Implementation of the Relative Entropy-based Phrase table filtering algorithm by Wang Ling (Ling et al, 2012).
+
+This implementation also calculates the significance scores for the phrase tables based on the Fisher's Test(Johnson et al, 2007). Uses a slightly modified version of the "sigtest-filter" by Chris Dyer. 
+
+-------BUILD INSTRUCTIONS-------
+
+1 - Build the sigtest-filter binary
+
+1.1 - Download and build SALM available at http://projectile.sv.cmu.edu/research/public/tools/salm/salm.htm
+
+1.2 - Run "make SALMDIR=<path_to_salm>" in "<path_to_moses>/contrib/relent-filter/sigtest-filter" to create the executable filter-pt
+
+2 - Build moses project by running "./bjam <options>", this will create the executables for relent filtering 
+
+-------USAGE INSTRUCTIONS-------
+
+Required files:
+s_train - source training file 
+t_train - target training file
+moses_ini - path to the moses configuration file ( after tuning )
+pruning_binaries - path to the relent pruning binaries ( should be "<path_to_moses>/bin" )
+pruning_scripts - path to the relent pruning scripts ( should be "<path_to_moses>/contrib/relent-filter/scripts" )
+sigbin - path to the sigtest filter binaries ( should be "<path_to_moses>/contrib/relent-filter/sigtest-filter" )
+output_dir - path to write the output
+
+1 - build suffix arrays for the source and target parallel training data
+
+1.1 - run "<path to salm>/Bin/Linux/Index/IndexSA.O32 <s_train>" (or IndexSA.O64)
+
+1.2 - run "<path to salm>/Bin/Linux/Index/IndexSA.O32 <t_train>" (or IndexSA.O64)
+
+2 - calculate phrase pair scores by running:
+
+perl <pruning_scripts>/calcPruningScores.pl -moses_ini <moses_ini> -training_s <s_train> -training_t <t_train> -prune_bin <pruning_binaries> -prune_scripts <pruning_scripts> -moses_scripts <path_to_moses>/scripts/training/ -workdir <output_dir> -dec_size 10000
+
+this will create the following files in the <output_dir/scores/> dir:
+
+count.txt - counts of the phrase pairs for N(s,t) N(s,*) and N(*,t)
+divergence.txt - negative log of the divergence of the phrase pair
+empirical.txt - empirical distribution of the phrase pairs N(s,t)/N(*,*)
+rel_ent.txt - relative entropy of the phrase pairs
+significance.txt - significance of the phrase pairs
+
+You can use any one of these files for pruning and also combine these scores using <pruning_scripts>/interpolateScores.pl
+
+3 - To actually prune a phrase table you should run <pruning_scripts>/prunePT.pl
+
+For instance, to prune 30% of the phrase table using rel_ent run:
+perl <pruning_scripts>/prunePT.pl -table <phrase_table_file> -scores <output_dir>/scores/rel_ent.txt -percentage 70 > <pruned_phrase_table_file>
+
+You can also prune by threshold 
+perl <pruning_scripts>/prunePT.pl -table <phrase_table_file> -scores <output_dir>/scores/rel_ent.txt -threshold 0.1 > <pruned_phrase_table_file>
+
+The same must be done for the reordering table by replacing <phrase_table_file> with the <reord_table_file>
+
+perl <pruning_scripts>/prunePT.pl -table <reord_table_file> -scores <output_dir>/scores/rel_ent.txt -percentage 70 > <pruned_reord_table_file>
+
+-------RUNNING STEP 2 IN PARALLEL-------
+
+Step 2 requires the forced decoding of the whole set of phrase pairs in the table, so unless you test it on a small corpora, it usually requires large amounts of time to process. 
+Thus, we recommend users to run multiple instances of "<pruning_scripts>/calcPruningScores.pl" in parallel to process different parts of the phrase table. 
+
+To do this, run:
+
+perl <pruning_scripts>/calcPruningScores.pl -moses_ini <moses_ini> -training_s <s_train> -training_t <t_train> -prune_bin <pruning_binaries> -prune_scripts <pruning_scripts> -moses_scripts <path_to_moses>/scripts/training/ -workdir <output_dir> -dec_size 10000 -start 0 -end 100000
+
+The -start and -end tags tell the script to only calculate the results for phrase pairs between 0 and 99999. 
+
+Thus, an example of a shell script to run for the whole phrase table would be:
+
+size=`wc <phrase_table_file> | gawk '{print $1}'`
+phrases_per_process=100000
+
+for i in $(seq 0 $phrases_per_process $size)
+do
+   end=`expr $i + $phrases_per_process`
+   perl <pruning_scripts>/calcPruningScores.pl -moses_ini <moses_ini> -training_s <s_train> -training_t <t_train> -prune_bin <pruning_binaries> -prune_scripts <pruning_scripts> -moses_scripts <path_to_moses>/scripts/training/ -workdir <output_dir>.$i-$end -dec_size 10000 -start $i -end $end
+done
+
+After all processes finish, simply join the partial score files together in the same order.
+
+-------REFERENCES-------
+Ling, W., Graça, J., Trancoso, I., and Black, A. (2012). Entropy-based pruning for phrase-based
+machine translation. In Proceedings of the 2012 
+Joint Conference on Empirical Methods in Natural Language Processing and
+Computational Natural Language Learning (EMNLP-CoNLL), pp. 962-971.
+
+H. Johnson, J. Martin, G. Foster and R. Kuhn. (2007) Improving Translation
+Quality by Discarding Most of the Phrasetable. In Proceedings of the 2007
+Joint Conference on Empirical Methods in Natural Language Processing and
+Computational Natural Language Learning (EMNLP-CoNLL), pp. 967-975.
--- a/contrib/relent-filter/scripts/calcEmpiricalDistribution.pl
+++ b/contrib/relent-filter/scripts/calcEmpiricalDistribution.pl
@ -0,0 +1,53 @@
+#!/usr/bin/perl -w
+
+# read arguments
+my $countFile = $ARGV[0];
+
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+
+&process_count_file($countFile);
+
+sub process_count_file {
+    $file = $_[0];
+    open(COUNT_READER, &open_compressed($file)) or die "ERROR: Can't read $file";
+
+    print STDERR "reading file to calculate normalizer";
+    $normalizer=0;
+    while(<COUNT_READER>) {
+        my $line = $_;
+        chomp($line);
+        my @line_array = split(/\s+/, $line);
+        my $count = $line_array[0];
+        $normalizer+=$count;
+    }
+
+    close(COUNT_READER);
+
+    print STDERR "reading file again to print the counts";
+    open(COUNT_READER, &open_compressed($file)) or die "ERROR: Can't read $file";
+
+    while(<COUNT_READER>) {
+        my $line = $_;
+        chomp($line);
+        my @line_array = split(/\s+/, $line);
+        my $score = $line_array[0]/$normalizer;
+        print $score."\n";
+    }
+
+    close(COUNT_READER);
+}
+
+sub open_compressed {
+    my ($file) = @_;
+    print STDERR "FILE: $file\n";
+
+    # add extensions, if necessary
+    $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+    $file = $file.".gz"  if ! -e $file && -e $file.".gz";
+
+    # pipe zipped, if necessary
+    return "$BZCAT $file|" if $file =~ /\.bz2$/;
+    return "$ZCAT $file|"  if $file =~ /\.gz$/;
+    return $file;
+}
--- a/contrib/relent-filter/scripts/calcPruningScores.pl
+++ b/contrib/relent-filter/scripts/calcPruningScores.pl
@ -0,0 +1,351 @@
+#!/usr/bin/perl -w
+use Getopt::Long;
+use File::Basename;
+use POSIX;
+
+# read arguments
+my $line_start = 0;
+my $line_end = LONG_MAX;
+my $tmp_dir = "";
+my $dec_size = LONG_MAX;
+$_HELP = 1 if (@ARGV < 1 or !GetOptions ("moses_ini=s" => \$moses_ini, #moses conf file
+"start:i" => \$line_start, #fisrt phrase to process
+"end:i" => \$line_end, #last sentence to process (not including)
+"training_s=s" => \$training_s, #source training file
+"training_t=s" => \$training_t, #target training file
+"prune_bin=s" => \$prune_bin, #binary files in the pruning toolkit
+"prune_scripts=s" => \$prune_scripts, #scripts in the pruning toolkit
+"sig_bin=s" => \$sig_bin, #binary files to calculate significance
+"moses_scripts=s" => \$moses_scripts, #dir with the moses scripts
+"tmp_dir:s" => \$tmp_dir, #dir with the moses scripts
+"dec_size:i" => \$dec_size, #dir with the moses scripts
+"workdir=s" => \$workdir)); #directory to put all the output files
+
+# help message if arguments are not correct
+if ($_HELP) {
+    print "
+Usage: perl calcPruningScores.pl [PARAMS]
+Function: Calculates relative entropy for each phrase pair in a translation model.
+Authors: Wang Ling ( lingwang at cs dot cmu dot edu )
+PARAMS:
+  -moses_ini : moses configuration file with the model to prune (phrase table, reordering table, weights etc...)
+  -training_s : source training file, please run salm first
+  -training_t : target training file, please run salm first
+  -prune_bin : path to the binaries for pruning (probably <PATH_TO_MOSES>/bin)
+  -prune_scripts : path to the scripts for pruning (probably the directory where this script is)
+  -sig_bin : path to the binary for significance testing included in this toolkit
+  -moses_scripts : path to the moses training scripts (where filter-model-given-input.pl is)
+  -workdir : directory to produce the output
+  -tmp_dir : directory to store temporary files (improve performance if stored in a local disk), omit to store in workdir
+  -dec_size : number of phrase pairs to be decoded at a time, omit to decode all selected phrase pairs at once
+  -start and -end : starting and ending phrase pairs to process, to be used if you want to launch multiple processes in parallel for different parts of the phrase table. If specified the process will process the phrase pairs from <start> to <end-1>
+
+For any questions contact lingwang at cs dot cmu dot edu
+";
+  exit(1);
+}
+
+# setting up working dirs
+my $TMP_DIR = $tmp_dir;
+if ($tmp_dir eq ""){
+   $TMP_DIR = "$workdir/tmp";
+}
+my $SCORE_DIR = "$workdir/scores";
+my $FILTER_DIR = "$TMP_DIR/filter";
+
+# files for divergence module
+my $SOURCE_FILE = "$TMP_DIR/source.txt";
+my $CONSTRAINT_FILE = "$TMP_DIR/constraint.txt";
+my $DIVERGENCE_FILE = "$SCORE_DIR/divergence.txt";
+
+# files for significance module
+my $SIG_TABLE_FILE = "$TMP_DIR/source_target.txt";
+my $SIG_MOD_OUTPUT = "$TMP_DIR/sig_mod.out";
+my $SIG_FILE = "$SCORE_DIR/significance.txt";
+my $COUNT_FILE = "$SCORE_DIR/count.txt";
+my $EMP_DIST_FILE= "$SCORE_DIR/empirical.txt";
+my $REL_ENT_FILE= "$SCORE_DIR/rel_ent.txt";
+
+# setting up executables
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+my $CP = "cp";
+my $SED = "sed";
+my $RM = "rm";
+my $SORT_EXEC = "sort";
+my $PRUNE_EXEC = "$prune_bin/calcDivergence";
+my $SIG_EXEC = "$sig_bin/filter-pt";
+my $FILTER_EXEC = "perl $moses_scripts/filter-model-given-input.pl";
+my $CALC_EMP_EXEC ="perl $prune_scripts/calcEmpiricalDistribution.pl";
+my $INT_TABLE_EXEC = "perl $prune_scripts/interpolateScores.pl";
+
+# moses ini variables
+my ($TRANSLATION_TABLE_FILE, $REORDERING_TABLE_FILE);
+
+# phrase table variables
+my ($N_PHRASES, $N_PHRASES_TO_PROCESS);
+
+# main functions
+&prepare();
+&calc_sig_and_counts();
+&calc_div();
+&clear_up();
+
+# (1) preparing data
+sub prepare {
+    print STDERR "(1) preparing data @ ".`date`;
+    safesystem("mkdir -p $workdir") or die("ERROR: could not create work dir $workdir");
+    safesystem("mkdir -p $TMP_DIR") or die("ERROR: could not create work dir $TMP_DIR");
+    safesystem("mkdir -p $SCORE_DIR") or die("ERROR: could not create work dir $SCORE_DIR");
+    &get_moses_ini_params();
+    &copy_tables_to_tmp_dir();
+    &write_data_files();
+    
+    $N_PHRASES = &get_number_of_phrases();
+    $line_end = ($line_end > $N_PHRASES) ? $N_PHRASES : $line_end;
+    $N_PHRASES_TO_PROCESS = $line_end - $line_start;
+}
+
+sub write_data_files {
+    open(SOURCE_WRITER,">".$SOURCE_FILE) or die "ERROR: Can't write $SOURCE_FILE";
+    open(CONSTRAINT_WRITER,">".$CONSTRAINT_FILE) or die "ERROR: Can't write $CONSTRAINT_FILE";
+    open(TABLE_WRITER,">".$SIG_TABLE_FILE) or die "ERROR: Can't write $SIG_TABLE_FILE";
+    open(TTABLE_READER, &open_compressed($TRANSLATION_TABLE_FILE)) or die "ERROR: Can't read $TRANSLATION_TABLE_FILE";
+    
+    $line_number = 0; 
+    while($line_number < $line_start && !eof(TTABLE_READER)){
+	<TTABLE_READER>;
+	$line_number++;
+    }
+    while($line_number < $line_end && !eof(TTABLE_READER)) {
+        my $line = <TTABLE_READER>;
+        chomp($line);
+	my @line_array = split(/\s+\|\|\|\s+/, $line);
+	my $source = $line_array[0];
+        my $target = $line_array[1];
+        my $scores = $line_array[2];
+        print TABLE_WRITER $source." ||| ".$target." ||| ".$scores."\n";
+        print SOURCE_WRITER $source."\n";
+	print CONSTRAINT_WRITER $target."\n";
+        $line_number++;
+    }
+    
+    close(SOURCE_WRITER);
+    close(CONSTRAINT_WRITER);
+    close(TABLE_WRITER);
+    close(TTABLE_READER);
+}
+
+sub copy_tables_to_tmp_dir {
+    $tmp_t_table = "$TMP_DIR/".basename($TRANSLATION_TABLE_FILE);
+    $tmp_r_table = "$TMP_DIR/".basename($REORDERING_TABLE_FILE);
+    $tmp_moses_ini = "$TMP_DIR/moses.ini";
+    $cp_t_cmd = "$CP $TRANSLATION_TABLE_FILE $TMP_DIR";
+    $cp_r_cmd = "$CP $REORDERING_TABLE_FILE $TMP_DIR";
+    safesystem("$cp_t_cmd") or die("ERROR: could not run:\n $cp_t_cmd");
+    safesystem("$cp_r_cmd") or die("ERROR: could not run:\n $cp_r_cmd");
+
+    $sed_cmd = "$SED s#$TRANSLATION_TABLE_FILE#$tmp_t_table#g $moses_ini | $SED s#$REORDERING_TABLE_FILE#$tmp_r_table#g > $tmp_moses_ini";
+    safesystem("$sed_cmd") or die("ERROR: could not run:\n $sed_cmd");
+    
+    $TRANSLATION_TABLE_FILE = $tmp_t_table;
+    $REORDERING_TABLE_FILE = $tmp_r_table;
+    $moses_ini = $tmp_moses_ini;
+}
+
+# (2) calculating sig and counts
+sub calc_sig_and_counts {
+    print STDERR "(2) calculating counts and significance".`date`;
+    print STDERR "(2.1) running significance module".`date`;
+    &run_significance_module();
+    print STDERR "(2.2) writing counts and significance tables".`date`;
+    &write_counts_and_significance_table();
+    print STDERR "(2.3) calculating empirical distribution".`date`;
+}
+
+sub write_counts_and_significance_table {
+    open(COUNT_WRITER,">".$COUNT_FILE) or die "ERROR: Can't write $COUNT_FILE";
+    open(SIG_WRITER,">".$SIG_FILE) or die "ERROR: Can't write $SIG_FILE";
+    open(SIG_MOD_READER, &open_compressed($SIG_MOD_OUTPUT)) or die "ERROR: Can't read $SIG_MOD_OUTPUT";
+
+    while(<SIG_MOD_READER>) {
+        my($line) = $_;
+        chomp($line);
+        my @line_array = split(/\s+\|\|\|\s+/, $line);
+        my $count = $line_array[0];
+        my $sig = $line_array[1];
+        print COUNT_WRITER $count."\n";
+        print SIG_WRITER $sig."\n";
+    }
+
+    close(SIG_MOD_READER);
+    close(COUNT_WRITER);
+    close(SIG_WRITER);
+}
+
+sub run_significance_module {
+    my $sig_cmd = "cat $SIG_TABLE_FILE | $SIG_EXEC -e $training_t -f $training_s -l -10000 -p -c > $SIG_MOD_OUTPUT";
+    safesystem("$sig_cmd") or die("ERROR: could not run:\n $sig_cmd");
+}
+
+# (3) calculating divergence
+sub calc_div {
+    print STDERR "(3) calculating relative entropy".`date`;
+    print STDERR "(3.1) calculating empirical distribution".`date`;
+    &calculate_empirical_distribution();
+    print STDERR "(3.2) calculating divergence (this might take a while)".`date`;
+    if($N_PHRASES_TO_PROCESS > $dec_size) {
+       &calculate_divergence_shared("$FILTER_DIR");
+    }
+    else{
+       &calculate_divergence($moses_ini);
+    }
+    print STDERR "(3.3) calculating relative entropy from empirical and divergence distributions".`date`;
+    &calculate_relative_entropy();
+}
+
+sub calculate_empirical_distribution {
+    my $emp_cmd = "$CALC_EMP_EXEC $COUNT_FILE > $EMP_DIST_FILE";
+    safesystem("$emp_cmd") or die("ERROR: could not run:\n $emp_cmd");
+}
+
+sub get_fragmented_file_name {
+    my ($name, $frag, $interval) = @_;
+    return "$name-$frag-".($frag+$interval);
+}    
+
+sub calculate_divergence {
+    my $moses_ini_file = $_[0];
+    print STDERR "force decoding phrase pairs\n";
+    my $prune_cmd = "cat $SOURCE_FILE | $PRUNE_EXEC -f $moses_ini_file -constraint $CONSTRAINT_FILE -early-discarding-threshold 0 -s 100000 -ttable-limit 0 > $DIVERGENCE_FILE 2> /dev/null";
+    safesystem("$prune_cmd") or die("ERROR: could not run:\n $prune_cmd");
+}
+
+sub calculate_divergence_shared {
+    my $filter_dir = $_[0];
+
+    &split_file_into_chunks($SOURCE_FILE, $dec_size, $N_PHRASES_TO_PROCESS);
+    &split_file_into_chunks($CONSTRAINT_FILE, $dec_size, $N_PHRASES_TO_PROCESS);
+
+    for(my $i = 0; $i < $N_PHRASES_TO_PROCESS; $i = $i + $dec_size) {
+        my $filter_cmd = "$FILTER_EXEC ".&get_fragmented_file_name($FILTER_DIR, $i, $dec_size)." $moses_ini ".&get_fragmented_file_name($SOURCE_FILE, $i, $dec_size);
+        safesystem("$filter_cmd") or die("ERROR: could not run:\n $filter_cmd");
+
+        my $moses_ini_file = &get_fragmented_file_name($filter_dir, $i, $dec_size)."/moses.ini";
+        my $source_file = &get_fragmented_file_name($SOURCE_FILE, $i, $dec_size);
+        my $constraint_file = &get_fragmented_file_name($CONSTRAINT_FILE, $i, $dec_size);
+        my $prune_cmd;
+	print STDERR "force decoding phrase pairs $i to ".($i + $dec_size)."\n";
+	if($i == 0){
+            $prune_cmd = "cat $source_file | $PRUNE_EXEC -f $moses_ini_file -constraint $constraint_file -early-discarding-threshold 0 -s 100000 -ttable-limit 0 > $DIVERGENCE_FILE 2> /dev/null";
+	}
+	else{
+            $prune_cmd = "cat $source_file | $PRUNE_EXEC -f $moses_ini_file -constraint $constraint_file -early-discarding-threshold 0 -s 100000 -ttable-limit 0 >> $DIVERGENCE_FILE 2> /dev/null";
+	}
+        safesystem("$prune_cmd") or die("ERROR: could not run:\n $prune_cmd");
+
+	my $rm_cmd = "$RM -r ".&get_fragmented_file_name($FILTER_DIR, $i, $dec_size);
+        safesystem("$rm_cmd") or die("ERROR: could not run:\n $rm_cmd");
+
+    }
+}
+
+sub calculate_relative_entropy {
+    my $int_cmd = "$INT_TABLE_EXEC -files \"$EMP_DIST_FILE $DIVERGENCE_FILE\" -weights \"1 1\" -operation \"*\" > $REL_ENT_FILE";
+    safesystem("$int_cmd") or die("ERROR: could not run:\n $int_cmd");
+
+}
+
+# (4) clear up stuff that is not needed
+sub clear_up {
+   print STDERR "(4) removing tmp dir".`date`;
+   $rm_cmd = "$RM -r $TMP_DIR";
+   safesystem("$rm_cmd") or die("ERROR: could not run:\n $rm_cmd");
+}
+
+# utility functions
+
+sub safesystem {
+  print STDERR "Executing: @_\n";
+  system(@_);
+  if ($? == -1) {
+      print STDERR "ERROR: Failed to execute: @_\n  $!\n";
+      exit(1);
+  }
+  elsif ($? & 127) {
+      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n",
+          ($? & 127),  ($? & 128) ? 'with' : 'without';
+      exit(1);
+  }
+  else {
+    my $exitcode = $? >> 8;
+    print STDERR "Exit code: $exitcode\n" if $exitcode;
+    return ! $exitcode;
+  }
+}
+
+sub open_compressed {
+    my ($file) = @_;
+    print STDERR "FILE: $file\n";
+
+    # add extensions, if necessary
+    $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+    $file = $file.".gz"  if ! -e $file && -e $file.".gz";
+
+    # pipe zipped, if necessary
+    return "$BZCAT $file|" if $file =~ /\.bz2$/;
+    return "$ZCAT $file|"  if $file =~ /\.gz$/;
+    return $file;
+}
+
+sub get_moses_ini_params {
+
+    open(MOSES_READER, $moses_ini);
+    while(<MOSES_READER>) {
+        my($line) = $_;
+        chomp($line);
+
+        if($line eq "[ttable-file]"){
+            $tableLine = <MOSES_READER>;
+            chomp($tableLine);
+            ($_,$_,$_,$_,$TRANSLATION_TABLE_FILE) = split(" ",$tableLine); # put the other parameters there if needed
+        }
+        if($line eq "[distortion-file]"){
+            $tableLine = <MOSES_READER>;
+            chomp($tableLine);
+            ($_,$_,$_,$REORDERING_TABLE_FILE) = split(" ",$tableLine); # put the other parameters there if needed
+        }
+    }
+    close(MOSES_READER);
+}
+
+sub get_number_of_phrases {
+   my $ret = 0;
+   open(TABLE_READER, &open_compressed($TRANSLATION_TABLE_FILE)) or die "ERROR: Can't read $TRANSLATION_TABLE_FILE";
+
+   while(<TABLE_READER>) {
+      $ret++;
+   }
+
+   close (TABLE_READER);
+   return $ret;
+}
+
+sub split_file_into_chunks {
+    my ($file_to_split, $chunk_size, $number_of_phrases_to_process) = @_;
+    open(SOURCE_READER, &open_compressed($file_to_split)) or die "ERROR: Can't read $file_to_split";
+    my $FRAG_SOURCE_WRITER;
+    for(my $i = 0; $i < $number_of_phrases_to_process && !eof(SOURCE_READER); $i++) {
+       if(($i % $chunk_size) == 0){ # open fragmented file to write
+          my $frag_file = &get_fragmented_file_name($file_to_split, $i, $chunk_size);
+          open(FRAG_SOURCE_WRITER, ">".$frag_file) or die "ERROR: Can't write $frag_file";
+       }
+       my $line = <SOURCE_READER>;
+       print FRAG_SOURCE_WRITER $line;
+       if((%i % $chunk_size) == $chunk_size - 1 || (%i % $chunk_size) == $number_of_phrases_to_process - 1){ # close fragmented file before opening a new one
+          close(FRAG_SOURCE_WRITER);
+       }
+    }
+}
+
+
--- a/contrib/relent-filter/scripts/interpolateScores.pl
+++ b/contrib/relent-filter/scripts/interpolateScores.pl
@ -0,0 +1,94 @@
+#!/usr/bin/perl -w
+use Getopt::Long;
+use File::Basename;
+use POSIX;
+
+$operation="+";
+
+# read arguments
+$_HELP = 1 if (@ARGV < 1 or !GetOptions ("files=s" => \$files, #moses conf file
+"weights=s" => \$weights,
+"operation=s" => \$operation)); #directory to put all the output files
+
+
+# help message if arguments are not correct
+if ($_HELP) {
+    print "Relative Entropy Pruning
+Usage: perl interpolateScores.pl [PARAMS]
+Function: interpolates any number of score files interlated by their weights 
+Authors: Wang Ling ( lingwang at cs dot cmu dot edu )
+PARAMS:
+  -files=s : table files to interpolate separated by a space (Ex \"file1 file2 file3\")
+  -weights : interpolation weights separated by a space (Ex \"0.3 0.3 0.4\")
+  -operation : +,* or min depending on the operation to perform to combine scores
+For any questions contact lingwang at cs dot cmu dot edu
+";
+  exit(1);
+}
+
+@FILES = split(/\s+/, $files);
+@WEIGHTS = split(/\s+/, $weights);
+
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+
+&interpolate();
+
+sub interpolate {
+    my @READERS;
+    for($i = 0; $i < @FILES; $i++){
+	local *FILE;
+        open(FILE, &open_compressed($FILES[$i])) or die "ERROR: Can't read $FILES[$i]";
+        push(@READERS, *FILE);
+    }
+    $FIRST = $READERS[0];
+    while(!eof($FIRST)) {
+        if($operation eq "+"){
+	    my $score = 0;
+            for($i = 0; $i < @FILES; $i++){
+	        my $READER = $READERS[$i];
+                my $line = <$READER>;
+                chomp($line);
+		$score += $line*$WEIGHTS[$i];
+            }
+	    print "$score\n";
+	}
+        if($operation eq "*"){
+	    my $score = 1;
+	    for($i = 0; $i < @FILES; $i++){
+                my $READER = $READERS[$i];
+                my $line = <$READER>;
+                chomp($line);
+                $score *= $line ** $WEIGHTS[$i];
+            }
+	    print "$score\n"
+	}
+	if($operation eq "min"){
+	    my $score = 99999;
+            for($i = 0; $i < @FILES; $i++){
+                my $READER = $READERS[$i];
+                my $line = <$READER>;
+                chomp($line);
+		if ($score > $line*$WEIGHTS[$i]){
+		    $score = $line*$WEIGHTS[$i];
+		}
+            }
+            print "$score\n"
+
+	}
+    }
+}
+
+sub open_compressed {
+    my ($file) = @_;
+    print STDERR "FILE: $file\n";
+
+    # add extensions, if necessary
+    $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+    $file = $file.".gz"  if ! -e $file && -e $file.".gz";
+
+    # pipe zipped, if necessary
+    return "$BZCAT $file|" if $file =~ /\.bz2$/;
+    return "$ZCAT $file|"  if $file =~ /\.gz$/;
+    return $file;
+}
--- a/contrib/relent-filter/scripts/prunePT.pl
+++ b/contrib/relent-filter/scripts/prunePT.pl
@ -0,0 +1,114 @@
+#!/usr/bin/perl -w
+
+# read arguments
+my $tmp_dir = "";
+my $percentage = -1;
+my $threshold = -1;
+use Getopt::Long;
+$_HELP = 1 if (@ARGV < 1 or !GetOptions ("table=s" => \$table, #table to filter
+"scores=s" => \$scores_file, #scores of each phrase pair, should have same size as the table to filter
+"percentage=i" => \$percentage, # percentage of phrase table to remain
+"threshold=i" => \$threshold)); # threshold (score < threshold equals prune entry)
+
+# help message if arguments are not correct
+if ($_HELP) {
+    print "Relative Entropy Pruning
+Usage: perl prunePT.pl [PARAMS]
+Function: prunes a phrase table given a score file 
+Authors: Wang Ling ( lingwang at cs dot cmu dot edu )
+PARAMS:
+  -table : table to prune
+  -percentage : percentage of phrase table to remain (if the scores do not allow the exact percentage if multiple entries have the same threshold, the script chooses to retain more than the given percentage)
+  -threshold : threshold to prune (score < threshold equals prune entry), do not use this if percentage is specified
+For any questions contact lingwang at cs dot cmu dot edu
+";
+  exit(1);
+}
+
+
+my $THRESHOLD = $threshold;
+if ($percentage != -1){
+   $THRESHOLD = &get_threshold_by_percentage($percentage);
+}
+
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+
+&prune_by_threshold($THRESHOLD);
+
+sub prune_by_threshold {
+   my $th = $_[0];
+   print STDERR "pruning using threshold $th \n";
+   open (SCORE_READER, &open_compressed($scores_file));
+   open (TABLE_READER, &open_compressed($table));
+   $number_of_phrases=0;
+   $number_of_unpruned_phrases=0;
+   while(!eof(SCORE_READER) && !eof(TABLE_READER)){
+      $score_line = <SCORE_READER>;
+      $table_line = <TABLE_READER>;
+      chomp($score_line);
+      if($score_line >= $th){
+        print $table_line;
+        $number_of_unpruned_phrases++;
+      }
+      $number_of_phrases++;
+   }
+   print STDERR "pruned ".($number_of_phrases - $number_of_unpruned_phrases)." phrase pairs out of $number_of_phrases\n";
+}
+
+sub get_threshold_by_percentage {
+   my $percentage = $_[0];
+   $ret = 0;
+
+   $number_of_phrases = &get_number_of_phrases();
+   $stop_phrase = ($percentage * $number_of_phrases) / 100;
+   $phrase_number = 0;
+
+   
+   open (SCORE_READER, &open_compressed($scores_file));
+   while(<SCORE_READER>) {
+       my $line = $_;
+
+   }
+   close (SCORE_READER);
+
+   open (SCORE_READER, "cat $scores_file | LC_ALL=c sort -g |");
+   while(<SCORE_READER>) {
+      my $line = $_;
+      if($phrase_number >= $stop_phrase){
+	 chomp($line);
+         $ret = $line;
+         last;
+      }
+      $phrase_number++;
+   }
+   
+   close (SCORE_READER);
+   return $ret;
+}
+
+sub get_number_of_phrases {
+   $ret = 0;
+   open (SCORE_READER, $scores_file);
+
+   while(<SCORE_READER>) {
+      $ret++;
+   }
+   
+   close (SCORE_READER);
+   return $ret;
+}
+
+sub open_compressed {
+    my ($file) = @_;
+    print STDERR "FILE: $file\n";
+
+    # add extensions, if necessary
+    $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+    $file = $file.".gz"  if ! -e $file && -e $file.".gz";
+
+    # pipe zipped, if necessary
+    return "$BZCAT $file|" if $file =~ /\.bz2$/;
+    return "$ZCAT $file|"  if $file =~ /\.gz$/;
+    return $file;
+}
--- a/contrib/relent-filter/sigtest-filter/Makefile
+++ b/contrib/relent-filter/sigtest-filter/Makefile
@ -0,0 +1,10 @@
+SALMDIR=/Users/hieuhoang/workspace/salm
+FLAVOR?=o64
+INC=-I$(SALMDIR)/Src/Shared -I$(SALMDIR)/Src/SuffixArrayApplications -I$(SALMDIR)/Src/SuffixArrayApplications/SuffixArraySearch
+OBJS=$(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArrayApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArraySearchApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_String.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_IDVocabulary.$(FLAVOR)
+
+all: filter-pt
+
+filter-pt: filter-pt.cpp
+	./check-install $(SALMDIR)
+	$(CXX) -O6 $(INC) $(OBJS) -o filter-pt filter-pt.cpp
--- a/contrib/relent-filter/sigtest-filter/README.txt
+++ b/contrib/relent-filter/sigtest-filter/README.txt
@ -0,0 +1,42 @@
+Re-implementation of Johnson et al. (2007)'s phrasetable filtering strategy.
+
+This implementation relies on Joy Zhang's SALM Suffix Array toolkit. It is
+available here:
+
+  http://projectile.sv.cmu.edu/research/public/tools/salm/salm.htm
+  
+--Chris Dyer <redpony@umd.edu>
+
+BUILD INSTRUCTIONS
+---------------------------------
+
+1. Download and build SALM.
+
+2. make SALMDIR=/path/to/SALM
+
+
+USAGE INSTRUCTIONS
+---------------------------------
+
+1. Using the SALM/Bin/Linux/Index/IndexSA.O32, create a suffix array index
+   of the source and target sides of your training bitext.
+
+2. cat phrase-table.txt | ./filter-pt -e TARG.suffix -f SOURCE.suffix \
+    -l <FILTER-VALUE>
+
+   FILTER-VALUE is the -log prob threshold described in Johnson et al.
+     (2007)'s paper.  It may be either 'a+e', 'a-e', or a positive real
+     value. 'a+e' is a good setting- it filters out <1,1,1> phrase pairs.
+     I also recommend using -n 30, which filteres out all but the top
+     30 phrase pairs, sorted by P(e|f).  This was used in the paper.
+
+3. Run with no options to see more use-cases.
+
+
+REFERENCES
+---------------------------------
+
+H. Johnson, J. Martin, G. Foster and R. Kuhn. (2007) Improving Translation
+  Quality by Discarding Most of the Phrasetable. In Proceedings of the 2007
+  Joint Conference on Empirical Methods in Natural Language Processing and
+  Computational Natural Language Learning (EMNLP-CoNLL), pp. 967-975.
--- a/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp
+++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp
@ -0,0 +1,231 @@
+// XGetopt.cpp  Version 1.2
+//
+// Author:  Hans Dietrich
+//          hdietrich2@hotmail.com
+//
+// Description:
+//     XGetopt.cpp implements getopt(), a function to parse command lines.
+//
+// History
+//     Version 1.2 - 2003 May 17
+//     - Added Unicode support
+//
+//     Version 1.1 - 2002 March 10
+//     - Added example to XGetopt.cpp module header
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty.  I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are using precompiled headers then include this line:
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are not using precompiled headers then include these lines:
+//#include <windows.h>
+//#include <stdio.h>
+//#include <tchar.h>
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "WIN32_functions.h"
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  X G e t o p t . c p p
+//
+//
+//  NAME
+//       getopt -- parse command line options
+//
+//  SYNOPSIS
+//       int getopt(int argc, char *argv[], char *optstring)
+//
+//       extern char *optarg;
+//       extern int optind;
+//
+//  DESCRIPTION
+//       The getopt() function parses the command line arguments. Its
+//       arguments argc and argv are the argument count and array as
+//       passed into the application on program invocation.  In the case
+//       of Visual C++ programs, argc and argv are available via the
+//       variables __argc and __argv (double underscores), respectively.
+//       getopt returns the next option letter in argv that matches a
+//       letter in optstring.  (Note:  Unicode programs should use
+//       __targv instead of __argv.  Also, all character and string
+//       literals should be enclosed in ( ) ).
+//
+//       optstring is a string of recognized option letters;  if a letter
+//       is followed by a colon, the option is expected to have an argument
+//       that may or may not be separated from it by white space.  optarg
+//       is set to point to the start of the option argument on return from
+//       getopt.
+//
+//       Option letters may be combined, e.g., "-ab" is equivalent to
+//       "-a -b".  Option letters are case sensitive.
+//
+//       getopt places in the external variable optind the argv index
+//       of the next argument to be processed.  optind is initialized
+//       to 0 before the first call to getopt.
+//
+//       When all options have been processed (i.e., up to the first
+//       non-option argument), getopt returns EOF, optarg will point
+//       to the argument, and optind will be set to the argv index of
+//       the argument.  If there are no non-option arguments, optarg
+//       will be set to NULL.
+//
+//       The special option "--" may be used to delimit the end of the
+//       options;  EOF will be returned, and "--" (and everything after it)
+//       will be skipped.
+//
+//  RETURN VALUE
+//       For option letters contained in the string optstring, getopt
+//       will return the option letter.  getopt returns a question mark (?)
+//       when it encounters an option letter not included in optstring.
+//       EOF is returned when processing is finished.
+//
+//  BUGS
+//       1)  Long options are not supported.
+//       2)  The GNU double-colon extension is not supported.
+//       3)  The environment variable POSIXLY_CORRECT is not supported.
+//       4)  The + syntax is not supported.
+//       5)  The automatic permutation of arguments is not supported.
+//       6)  This implementation of getopt() returns EOF if an error is
+//           encountered, instead of -1 as the latest standard requires.
+//
+//  EXAMPLE
+//       BOOL CMyApp::ProcessCommandLine(int argc, char *argv[])
+//       {
+//           int c;
+//
+//           while ((c = getopt(argc, argv, ("aBn:"))) != EOF)
+//           {
+//               switch (c)
+//               {
+//                   case ('a'):
+//                       TRACE(("option a\n"));
+//                       //
+//                       // set some flag here
+//                       //
+//                       break;
+//
+//                   case ('B'):
+//                       TRACE( ("option B\n"));
+//                       //
+//                       // set some other flag here
+//                       //
+//                       break;
+//
+//                   case ('n'):
+//                       TRACE(("option n: value=%d\n"), atoi(optarg));
+//                       //
+//                       // do something with value here
+//                       //
+//                       break;
+//
+//                   case ('?'):
+//                       TRACE(("ERROR: illegal option %s\n"), argv[optind-1]);
+//                       return FALSE;
+//                       break;
+//
+//                   default:
+//                       TRACE(("WARNING: no handler for option %c\n"), c);
+//                       return FALSE;
+//                       break;
+//               }
+//           }
+//           //
+//           // check for non-option args here
+//           //
+//           return TRUE;
+//       }
+//
+///////////////////////////////////////////////////////////////////////////////
+
+char	*optarg;		// global argument pointer
+int		optind = 0; 	// global argv index
+
+int getopt(int argc, char *argv[], char *optstring)
+{
+  static char *next = NULL;
+  if (optind == 0)
+    next = NULL;
+
+  optarg = NULL;
+
+  if (next == NULL || *next =='\0') {
+    if (optind == 0)
+      optind++;
+
+    if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) {
+      optarg = NULL;
+      if (optind < argc)
+        optarg = argv[optind];
+      return EOF;
+    }
+
+    if (strcmp(argv[optind], "--") == 0) {
+      optind++;
+      optarg = NULL;
+      if (optind < argc)
+        optarg = argv[optind];
+      return EOF;
+    }
+
+    next = argv[optind];
+    next++;		// skip past -
+    optind++;
+  }
+
+  char c = *next++;
+  char *cp = strchr(optstring, c);
+
+  if (cp == NULL || c == (':'))
+    return ('?');
+
+  cp++;
+  if (*cp == (':')) {
+    if (*next != ('\0')) {
+      optarg = next;
+      next = NULL;
+    } else if (optind < argc) {
+      optarg = argv[optind];
+      optind++;
+    } else {
+      return ('?');
+    }
+  }
+
+  return c;
+}
+
+// for an overview, see
+//    W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
+double lgamma(int x)
+{
+  // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
+  if (x <= 2) {
+    return 0.0;
+  }
+  static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
+  double tmp=(double)x+5.5;
+  tmp -= (((double)x)+0.5)*log(tmp);
+  double y=(double)x;
+  double sum = 1.000000000190015;
+  for (size_t j=0; j<6; ++j) {
+    sum += coefs[j]/++y;
+  }
+  return -tmp+log(2.5066282746310005*sum/(double)x);
+}
--- a/contrib/relent-filter/sigtest-filter/WIN32_functions.h
+++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.h
@ -0,0 +1,24 @@
+// XGetopt.h  Version 1.2
+//
+// Author:  Hans Dietrich
+//          hdietrich2@hotmail.com
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty.  I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef XGETOPT_H
+#define XGETOPT_H
+
+extern int optind, opterr;
+extern char *optarg;
+
+int getopt(int argc, char *argv[], char *optstring);
+double lgamma(int x);
+
+#endif //XGETOPT_H
--- a/contrib/relent-filter/sigtest-filter/check-install
+++ b/contrib/relent-filter/sigtest-filter/check-install
@ -0,0 +1,5 @@
+#!/usr/bin/perl -w
+use strict;
+my $path = shift @ARGV;
+die "Can't find SALM installation path: $path\nPlease use:\n\n   make SALMDIR=/path/to/SALM\n\n" unless (-d $path);
+exit 0;
--- a/contrib/relent-filter/sigtest-filter/filter-pt.cpp
+++ b/contrib/relent-filter/sigtest-filter/filter-pt.cpp
@ -0,0 +1,377 @@
+
+#include <cstring> 
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+
+#include "_SuffixArraySearchApplicationBase.h"
+
+#include <vector>
+#include <iostream>
+#include <set>
+
+#ifdef WIN32
+#include "WIN32_functions.h"
+#else
+#include <unistd.h>
+#endif
+
+typedef std::set<TextLenType> SentIdSet;
+typedef std::map<std::string, SentIdSet> PhraseSetMap;
+
+#undef min
+
+// constants
+const size_t MINIMUM_SIZE_TO_KEEP = 10000;     // reduce this to improve memory usage,
+// increase for speed
+const std::string SEPARATOR       = " ||| ";
+
+const double ALPHA_PLUS_EPS  = -1000.0;        // dummy value
+const double ALPHA_MINUS_EPS = -2000.0;        // dummy value
+
+// configuration params
+int pfe_filter_limit = 0;               // 0 = don't filter anything based on P(f|e)
+bool print_cooc_counts = false;         // add cooc counts to phrase table?
+bool print_neglog_significance = false; // add -log(p) to phrase table?
+double sig_filter_limit = 0;            // keep phrase pairs with -log(sig) > sig_filter_limit
+//    higher = filter-more
+bool pef_filter_only = false;           // only filter based on pef
+
+// globals
+PhraseSetMap esets;
+double p_111 = 0.0;                     // alpha
+size_t nremoved_sigfilter = 0;
+size_t nremoved_pfefilter = 0;
+
+C_SuffixArraySearchApplicationBase e_sa;
+C_SuffixArraySearchApplicationBase f_sa;
+int num_lines;
+
+void usage()
+{
+  std::cerr << "\nFilter phrase table using significance testing as described\n"
+            << "in H. Johnson, et al. (2007) Improving Translation Quality\n"
+            << "by Discarding Most of the Phrasetable. EMNLP 2007.\n"
+            << "\nUsage:\n"
+            << "\n  filter-pt -e english.suf-arr -f french.suf-arr\n"
+            << "      [-c] [-p] [-l threshold] [-n num] < PHRASE-TABLE > FILTERED-PHRASE-TABLE\n\n"
+            << "   [-l threshold] >0.0, a+e, or a-e: keep values that have a -log significance > this\n"
+            << "   [-n num      ] 0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements\n"
+            << "   [-c          ] add the cooccurence counts to the phrase table\n"
+            << "   [-p          ] add -log(significance) to the phrasetable\n\n";
+  exit(1);
+}
+
+struct PTEntry {
+  PTEntry(const std::string& str, int index);
+  std::string f_phrase;
+  std::string e_phrase;
+  std::string extra;
+  std::string scores;
+  float pfe;
+  int cf;
+  int ce;
+  int cfe;
+  float nlog_pte;
+  void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
+    cfe = _cef;
+    cf = _cf;
+    ce = _ce;
+    nlog_pte = nlp;
+  }
+
+};
+
+PTEntry::PTEntry(const std::string& str, int index) :
+  cf(0), ce(0), cfe(0), nlog_pte(0.0)
+{
+  size_t pos = 0;
+  std::string::size_type nextPos = str.find(SEPARATOR, pos);
+  this->f_phrase = str.substr(pos,nextPos);
+
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  this->e_phrase = str.substr(pos,nextPos-pos);
+
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  this->scores = str.substr(pos,nextPos-pos);
+
+  pos = nextPos + SEPARATOR.size();
+  this->extra = str.substr(pos);
+
+  int c = 0;
+  std::string::iterator i=scores.begin();
+  if (index > 0) {
+    for (; i != scores.end(); ++i) {
+      if ((*i) == ' ') {
+        c++;
+        if (c == index) break;
+      }
+    }
+  }
+  if (i != scores.end()) {
+    ++i;
+  }
+  char f[24];
+  char *fp=f;
+  while (i != scores.end() && *i != ' ') {
+    *fp++=*i++;
+  }
+  *fp++=0;
+
+  this->pfe = atof(f);
+
+  // std::cerr << "L: " << f_phrase << " ::: " << e_phrase << " ::: " << scores << " ::: " << pfe << std::endl;
+  // std::cerr << "X: " << extra << "\n";
+}
+
+struct PfeComparer {
+  bool operator()(const PTEntry* a, const PTEntry* b) const {
+    return a->pfe > b->pfe;
+  }
+};
+
+struct NlogSigThresholder {
+  NlogSigThresholder(float threshold) : t(threshold) {}
+  float t;
+  bool operator()(const PTEntry* a) const {
+    if (a->nlog_pte < t) {
+      delete a;
+      return true;
+    } else return false;
+  }
+};
+
+std::ostream& operator << (std::ostream& os, const PTEntry& pp)
+{
+  //os << pp.f_phrase << " ||| " << pp.e_phrase;
+  //os << " ||| " << pp.scores;
+  //if (pp.extra.size()>0) os << " ||| " << pp.extra;
+  if (print_cooc_counts) os << pp.cfe << " " << pp.cf << " " << pp.ce;
+  if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
+  return os;
+}
+
+void print(int a, int b, int c, int d, float p)
+{
+  std::cerr << a << "\t" << b << "\t P=" << p << "\n"
+            << c << "\t" << d << "\t xf=" << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
+}
+
+// 2x2 (one-sided) Fisher's exact test
+// see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
+double fisher_exact(int cfe, int ce, int cf)
+{
+  assert(cfe <= ce);
+  assert(cfe <= cf);
+
+  int a = cfe;
+  int b = (cf - cfe);
+  int c = (ce - cfe);
+  int d = (num_lines - ce - cf + cfe);
+  int n = a + b + c + d;
+
+  double cp = exp(lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d) - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c) - lgamma(1+d));
+  double total_p = 0.0;
+  int tc = std::min(b,c);
+  for (int i=0; i<=tc; i++) {
+    total_p += cp;
+//      double lg = lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d) - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c) - lgamma(1+d); double cp = exp(lg);
+//      print(a,b,c,d,cp);
+    double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
+    cp *= coef;
+    ++a;
+    --c;
+    ++d;
+    --b;
+  }
+  return total_p;
+}
+
+// input: unordered list of translation options for a single source phrase
+void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
+{
+  if (pfe_filter_limit>0 && options.size() > pfe_filter_limit) {
+    nremoved_pfefilter += (options.size() - pfe_filter_limit);
+    std::nth_element(options.begin(), options.begin()+pfe_filter_limit, options.end(), PfeComparer());
+    for (std::vector<PTEntry*>::iterator i=options.begin()+pfe_filter_limit; i != options.end(); ++i)
+      delete *i;
+    options.erase(options.begin()+pfe_filter_limit,options.end());
+  }
+  if (pef_filter_only) return;
+
+  SentIdSet fset;
+  vector<S_SimplePhraseLocationElement> locations;
+  //std::cerr << "Looking up f-phrase: " << options.front()->f_phrase << "\n";
+
+  locations = f_sa.locateExactPhraseInCorpus(options.front()->f_phrase.c_str());
+  if(locations.size()==0) {
+    cerr<<"No occurrences found!!\n";
+  }
+  for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin();
+       i != locations.end();
+       ++i) {
+    fset.insert(i->sentIdInCorpus);
+  }
+  size_t cf = fset.size();
+  for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
+    const std::string& e_phrase = (*i)->e_phrase;
+    size_t cef=0;
+    SentIdSet& eset = esets[(*i)->e_phrase];
+    if (eset.empty()) {
+      //std::cerr << "Looking up e-phrase: " << e_phrase << "\n";
+      vector<S_SimplePhraseLocationElement> locations = e_sa.locateExactPhraseInCorpus(e_phrase.c_str());
+      for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin(); i!= locations.end(); ++i) {
+        TextLenType curSentId = i->sentIdInCorpus;
+        eset.insert(curSentId);
+      }
+    }
+    size_t ce=eset.size();
+    if (ce < cf) {
+      for (SentIdSet::iterator i=eset.begin(); i != eset.end(); ++i) {
+        if (fset.find(*i) != fset.end()) cef++;
+      }
+    } else {
+      for (SentIdSet::iterator i=fset.begin(); i != fset.end(); ++i) {
+        if (eset.find(*i) != eset.end()) cef++;
+      }
+    }
+    double nlp = -log(fisher_exact(cef, cf, ce));
+    (*i)->set_cooc_stats(cef, cf, ce, nlp);
+    if (ce < MINIMUM_SIZE_TO_KEEP) {
+      esets.erase(e_phrase);
+    }
+  }
+  std::vector<PTEntry*>::iterator new_end =
+    std::remove_if(options.begin(), options.end(), NlogSigThresholder(sig_filter_limit));
+  nremoved_sigfilter += (options.end() - new_end);
+  options.erase(new_end,options.end());
+}
+
+int main(int argc, char * argv[])
+{
+  int c;
+  const char* efile=0;
+  const char* ffile=0;
+  int pfe_index = 2;
+  while ((c = getopt(argc, argv, "cpf:e:i:n:l:")) != -1) {
+    switch (c) {
+    case 'e':
+      efile = optarg;
+      break;
+    case 'f':
+      ffile = optarg;
+      break;
+    case 'i':  // index of pfe in phrase table
+      pfe_index = atoi(optarg);
+      break;
+    case 'n':  // keep only the top n entries in phrase table sorted by p(f|e) (0=all)
+      pfe_filter_limit = atoi(optarg);
+      std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
+      break;
+    case 'c':
+      print_cooc_counts = true;
+      break;
+    case 'p':
+      print_neglog_significance = true;
+      break;
+    case 'l':
+      std::cerr << "-l = " << optarg << "\n";
+      if (strcmp(optarg,"a+e") == 0) {
+        sig_filter_limit = ALPHA_PLUS_EPS;
+      } else if (strcmp(optarg,"a-e") == 0) {
+        sig_filter_limit = ALPHA_MINUS_EPS;
+      } else {
+        char *x;
+        sig_filter_limit = strtod(optarg, &x);
+      }
+      break;
+    default:
+      usage();
+    }
+  }
+  //-----------------------------------------------------------------------------
+  if (optind != argc || ((!efile || !ffile) && !pef_filter_only)) {
+    usage();
+  }
+
+  //load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false)
+  if (!pef_filter_only) {
+    e_sa.loadData_forSearch(efile, false, false);
+    f_sa.loadData_forSearch(ffile, false, false);
+    size_t elines = e_sa.returnTotalSentNumber();
+    size_t flines = f_sa.returnTotalSentNumber();
+    if (elines != flines) {
+      std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
+      usage();
+    } else {
+      std::cerr << "Training corpus: " << elines << " lines\n";
+      num_lines = elines;
+    }
+    p_111 = -log(fisher_exact(1,1,1));
+    std::cerr << "\\alpha = " << p_111 << "\n";
+    if (sig_filter_limit == ALPHA_MINUS_EPS) {
+      sig_filter_limit = p_111 - 0.001;
+    } else if (sig_filter_limit == ALPHA_PLUS_EPS) {
+      sig_filter_limit = p_111 + 0.001;
+    }
+    std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
+  } else {
+    std::cerr << "Filtering using P(e|f) only. n=" << pfe_filter_limit << std::endl;
+  }
+
+  char tmpString[10000];
+  std::string prev = "";
+  std::vector<PTEntry*> options;
+  size_t pt_lines = 0;
+  while(!cin.eof()) {
+    cin.getline(tmpString,10000,'\n');
+    if(++pt_lines%10000==0) {
+      std::cerr << ".";
+      if(pt_lines%500000==0) std::cerr << "[n:"<<pt_lines<<"]\n";
+    }
+
+    if(strlen(tmpString)>0) {
+      PTEntry* pp = new PTEntry(tmpString, pfe_index);
+      if (prev != pp->f_phrase) {
+        prev = pp->f_phrase;
+
+        if (!options.empty()) {  // always true after first line
+          compute_cooc_stats_and_filter(options);
+        }
+        for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
+          std::cout << **i << std::endl;
+          delete *i;
+        }
+        options.clear();
+        options.push_back(pp);
+
+      } else {
+        options.push_back(pp);
+      }
+      //			  for(int i=0;i<locations.size(); i++){
+      //				  cout<<"SentId="<<locations[i].sentIdInCorpus<<" Pos="<<(int)locations[i].posInSentInCorpus<<endl;
+      //			  }
+    }
+  }
+  compute_cooc_stats_and_filter(options);
+  for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
+    std::cout << **i << std::endl;
+    delete *i;
+  }
+  float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+  float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+  std::cerr << "\n\n------------------------------------------------------\n"
+            << "  unfiltered phrases pairs: " << pt_lines << "\n"
+            << "\n"
+            << "     P(f|e) filter [first]: " << nremoved_pfefilter << "   (" << pfefper << "%)\n"
+            << "       significance filter: " << nremoved_sigfilter << "   (" << sigfper << "%)\n"
+            << "            TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << "   (" << (sigfper + pfefper) << "%)\n"
+            << "\n"
+            << "     FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << "   (" << (100.0-sigfper - pfefper) << "%)\n"
+            << "------------------------------------------------------\n";
+
+  return 0;
+}
--- a/contrib/relent-filter/sigtest-filter/sigtest-filter.sln
+++ b/contrib/relent-filter/sigtest-filter/sigtest-filter.sln
@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sigtest-filter", "sigtest-filter.vcproj", "{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Debug|Win32.ActiveCfg = Debug|Win32
+		{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Debug|Win32.Build.0 = Debug|Win32
+		{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Release|Win32.ActiveCfg = Release|Win32
+		{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/contrib/relent-filter/src/IOWrapper.cpp
+++ b/contrib/relent-filter/src/IOWrapper.cpp
@ -0,0 +1,580 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+			this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+			this list of conditions and the following disclaimer in the documentation
+			and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+			may be used to endorse or promote products derived from this software
+			without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+// example file on how to use moses library
+
+#include <iostream>
+#include <stack>
+#include "TypeDef.h"
+#include "Util.h"
+#include "IOWrapper.h"
+#include "Hypothesis.h"
+#include "WordsRange.h"
+#include "TrellisPathList.h"
+#include "StaticData.h"
+#include "DummyScoreProducers.h"
+#include "InputFileStream.h"
+
+using namespace std;
+using namespace Moses;
+
+namespace MosesCmd
+{
+
+IOWrapper::IOWrapper(
+  const vector<FactorType>				&inputFactorOrder
+  , const vector<FactorType>			&outputFactorOrder
+  , const FactorMask							&inputFactorUsed
+  , size_t												nBestSize
+  , const string									&nBestFilePath)
+  :m_inputFactorOrder(inputFactorOrder)
+  ,m_outputFactorOrder(outputFactorOrder)
+  ,m_inputFactorUsed(inputFactorUsed)
+  ,m_inputFile(NULL)
+  ,m_inputStream(&std::cin)
+  ,m_nBestStream(NULL)
+  ,m_outputWordGraphStream(NULL)
+  ,m_outputSearchGraphStream(NULL)
+  ,m_detailedTranslationReportingStream(NULL)
+  ,m_alignmentOutputStream(NULL)
+{
+  Initialization(inputFactorOrder, outputFactorOrder
+                 , inputFactorUsed
+                 , nBestSize, nBestFilePath);
+}
+
+IOWrapper::IOWrapper(const std::vector<FactorType>	&inputFactorOrder
+                     , const std::vector<FactorType>	&outputFactorOrder
+                     , const FactorMask							&inputFactorUsed
+                     , size_t												nBestSize
+                     , const std::string							&nBestFilePath
+                     , const std::string							&inputFilePath)
+  :m_inputFactorOrder(inputFactorOrder)
+  ,m_outputFactorOrder(outputFactorOrder)
+  ,m_inputFactorUsed(inputFactorUsed)
+  ,m_inputFilePath(inputFilePath)
+  ,m_inputFile(new InputFileStream(inputFilePath))
+  ,m_nBestStream(NULL)
+  ,m_outputWordGraphStream(NULL)
+  ,m_outputSearchGraphStream(NULL)
+  ,m_detailedTranslationReportingStream(NULL)
+  ,m_alignmentOutputStream(NULL)
+{
+  Initialization(inputFactorOrder, outputFactorOrder
+                 , inputFactorUsed
+                 , nBestSize, nBestFilePath);
+
+  m_inputStream = m_inputFile;
+}
+
+IOWrapper::~IOWrapper()
+{
+  if (m_inputFile != NULL)
+    delete m_inputFile;
+  if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
+    // outputting n-best to file, rather than stdout. need to close file and delete obj
+    delete m_nBestStream;
+  }
+  if (m_outputWordGraphStream != NULL) {
+    delete m_outputWordGraphStream;
+  }
+  if (m_outputSearchGraphStream != NULL) {
+    delete m_outputSearchGraphStream;
+  }
+  delete m_detailedTranslationReportingStream;
+  delete m_alignmentOutputStream;
+}
+
+void IOWrapper::Initialization(const std::vector<FactorType>	&/*inputFactorOrder*/
+                               , const std::vector<FactorType>			&/*outputFactorOrder*/
+                               , const FactorMask							&/*inputFactorUsed*/
+                               , size_t												nBestSize
+                               , const std::string							&nBestFilePath)
+{
+  const StaticData &staticData = StaticData::Instance();
+
+  // n-best
+  m_surpressSingleBestOutput = false;
+
+  if (nBestSize > 0) {
+    if (nBestFilePath == "-" || nBestFilePath == "/dev/stdout") {
+      m_nBestStream = &std::cout;
+      m_surpressSingleBestOutput = true;
+    } else {
+      std::ofstream *file = new std::ofstream;
+      m_nBestStream = file;
+      file->open(nBestFilePath.c_str());
+    }
+  }
+
+  // wordgraph output
+  if (staticData.GetOutputWordGraph()) {
+    string fileName = staticData.GetParam("output-word-graph")[0];
+    std::ofstream *file = new std::ofstream;
+    m_outputWordGraphStream  = file;
+    file->open(fileName.c_str());
+  }
+
+
+// search graph output
+  if (staticData.GetOutputSearchGraph()) {
+    string fileName;
+    if (staticData.GetOutputSearchGraphExtended())
+      fileName = staticData.GetParam("output-search-graph-extended")[0];
+    else
+      fileName = staticData.GetParam("output-search-graph")[0];
+    std::ofstream *file = new std::ofstream;
+    m_outputSearchGraphStream = file;
+    file->open(fileName.c_str());
+  }
+
+  // detailed translation reporting
+  if (staticData.IsDetailedTranslationReportingEnabled()) {
+    const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
+    m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
+    CHECK(m_detailedTranslationReportingStream->good());
+  }
+
+  // sentence alignment output
+  if (! staticData.GetAlignmentOutputFile().empty()) {
+    m_alignmentOutputStream = new ofstream(staticData.GetAlignmentOutputFile().c_str());
+    CHECK(m_alignmentOutputStream->good());
+  }
+
+}
+
+InputType*IOWrapper::GetInput(InputType* inputType)
+{
+  if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
+    if (long x = inputType->GetTranslationId()) {
+      if (x>=m_translationId) m_translationId = x+1;
+    } else inputType->SetTranslationId(m_translationId++);
+
+    return inputType;
+  } else {
+    delete inputType;
+    return NULL;
+  }
+}
+
+/***
+ * print surface factor only for the given phrase
+ */
+void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+		   bool reportSegmentation, bool reportAllFactors)
+{
+  CHECK(outputFactorOrder.size() > 0);
+  const Phrase& phrase = edge.GetCurrTargetPhrase();
+  if (reportAllFactors == true) {
+    out << phrase;
+  } else {
+    size_t size = phrase.GetSize();
+    for (size_t pos = 0 ; pos < size ; pos++) {
+      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+      out << *factor;
+      CHECK(factor);
+
+      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+        CHECK(factor);
+
+        out << "|" << *factor;
+      }
+      out << " ";
+    }
+  }
+
+  // trace option "-t"
+  if (reportSegmentation == true && phrase.GetSize() > 0) {
+    out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
+	<< "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
+  }
+}
+
+void OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
+                   bool reportSegmentation, bool reportAllFactors)
+{
+  if (hypo != NULL) {
+    // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
+    OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
+    OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
+  }
+}
+
+void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
+{
+  typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+  AlignVec alignments = ai.GetSortedAlignments();
+  
+  AlignVec::const_iterator it;
+  for (it = alignments.begin(); it != alignments.end(); ++it) {
+    const std::pair<size_t,size_t> &alignment = **it;
+    out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+  }
+  
+}
+
+void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
+{
+  size_t targetOffset = 0;
+
+  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+    const Hypothesis &edge = *edges[currEdge];
+    const TargetPhrase &tp = edge.GetCurrTargetPhrase();
+    size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
+    
+    OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset);
+
+    targetOffset += tp.GetSize();
+  }
+  out << std::endl;
+}
+
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
+{
+  ostringstream out;
+  OutputAlignment(out, edges);
+  
+  collector->Write(lineNo,out.str());
+}
+
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo)
+{
+  if (collector) {
+    std::vector<const Hypothesis *> edges;
+    const Hypothesis *currentHypo = hypo;
+    while (currentHypo) {
+      edges.push_back(currentHypo);
+      currentHypo = currentHypo->GetPrevHypo();
+    }
+
+    OutputAlignment(collector,lineNo, edges);
+  }
+}
+
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPath &path)
+{
+  if (collector) {
+    OutputAlignment(collector,lineNo, path.GetEdges());
+  }
+}
+
+void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, std::ostream &out)
+{
+  const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+    const Hypothesis &edge = *edges[currEdge];
+    OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
+  }
+  out << endl;
+}
+
+void IOWrapper::Backtrack(const Hypothesis *hypo)
+{
+
+  if (hypo->GetPrevHypo() != NULL) {
+    VERBOSE(3,hypo->GetId() << " <= ");
+    Backtrack(hypo->GetPrevHypo());
+  }
+}
+
+void OutputBestHypo(const std::vector<Word>&  mbrBestHypo, long /*translationId*/, bool /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out)
+{
+
+  for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
+    const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+    CHECK(factor);
+    if (i>0) out << " " << *factor;
+    else     out << *factor;
+  }
+  out << endl;
+}
+
+
+void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
+{
+  if (hypo->GetPrevHypo()) {
+    OutputInput(map, hypo->GetPrevHypo());
+    map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
+  }
+}
+
+void OutputInput(std::ostream& os, const Hypothesis* hypo)
+{
+  size_t len = hypo->GetInput().GetSize();
+  std::vector<const Phrase*> inp_phrases(len, 0);
+  OutputInput(inp_phrases, hypo);
+  for (size_t i=0; i<len; ++i)
+    if (inp_phrases[i]) os << *inp_phrases[i];
+}
+
+void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
+{
+  if (hypo != NULL) {
+    VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
+    VERBOSE(3,"Best path: ");
+    Backtrack(hypo);
+    VERBOSE(3,"0" << std::endl);
+    if (!m_surpressSingleBestOutput) {
+      if (StaticData::Instance().IsPathRecoveryEnabled()) {
+        OutputInput(cout, hypo);
+        cout << "||| ";
+      }
+      OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
+      cout << endl;
+    }
+  } else {
+    VERBOSE(1, "NO BEST TRANSLATION" << endl);
+    if (!m_surpressSingleBestOutput) {
+      cout << endl;
+    }
+  }
+}
+
+void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation)
+{
+  const StaticData &staticData = StaticData::Instance();
+  bool labeledOutput = staticData.IsLabeledNBestList();
+  bool reportAllFactors = staticData.GetReportAllFactorsNBest();
+  bool includeAlignment = staticData.NBestIncludesAlignment();
+  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
+
+  TrellisPathList::const_iterator iter;
+  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+    const TrellisPath &path = **iter;
+    const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+    // print the surface factor of the translation
+    out << translationId << " ||| ";
+    for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+      const Hypothesis &edge = *edges[currEdge];
+      OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
+    }
+    out << " |||";
+
+    std::string lastName = "";
+    const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
+    for( size_t i=0; i<sff.size(); i++ ) {
+      if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() ) {
+        lastName = sff[i]->GetScoreProducerWeightShortName();
+        out << " " << lastName << ":";
+      }
+      vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
+      for (size_t j = 0; j<scores.size(); ++j) {
+        out << " " << scores[j];
+      }
+    }
+
+    const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
+    for( size_t i=0; i<slf.size(); i++ ) {
+      if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() ) {
+        lastName = slf[i]->GetScoreProducerWeightShortName();
+        out << " " << lastName << ":";
+      }
+      vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
+      for (size_t j = 0; j<scores.size(); ++j) {
+        out << " " << scores[j];
+      }
+    }
+
+    // translation components
+    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
+    if (pds.size() > 0) {
+
+      for( size_t i=0; i<pds.size(); i++ ) {
+	size_t pd_numinputscore = pds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){
+
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  pds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
+      }
+    }
+
+    // generation
+    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
+    if (gds.size() > 0) {
+
+      for( size_t i=0; i<gds.size(); i++ ) {
+	size_t pd_numinputscore = gds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){
+
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  gds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
+      }
+    }
+
+    // total
+    out << " ||| " << path.GetTotalScore();
+
+    //phrase-to-phrase alignment
+    if (includeAlignment) {
+      out << " |||";
+      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+        const Hypothesis &edge = *edges[currEdge];
+        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+        WordsRange targetRange = path.GetTargetWordsRange(edge);
+        out << " " << sourceRange.GetStartPos();
+        if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+          out << "-" << sourceRange.GetEndPos();
+        }
+        out<< "=" << targetRange.GetStartPos();
+        if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+          out<< "-" << targetRange.GetEndPos();
+        }
+      }
+    }
+
+    if (includeWordAlignment) {
+      out << " ||| ";
+      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+        const Hypothesis &edge = *edges[currEdge];
+        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+        WordsRange targetRange = path.GetTargetWordsRange(edge);
+        const int sourceOffset = sourceRange.GetStartPos();
+        const int targetOffset = targetRange.GetStartPos();
+        const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo();
+        
+        OutputAlignment(out, ai, sourceOffset, targetOffset);
+
+      }
+    }
+
+    if (StaticData::Instance().IsPathRecoveryEnabled()) {
+      out << "|||";
+      OutputInput(out, edges[0]);
+    }
+
+    out << endl;
+  }
+
+
+  out <<std::flush;
+}
+
+void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId)
+{
+  for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
+    out << translationId;
+    out << " |||";
+    const vector<Word> mbrHypo = si->GetWords();
+    for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
+      const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+      if (i>0) out << " " << *factor;
+      else     out << *factor;
+    }
+    out << " |||";
+    out << " map: " << si->GetMapScore();
+    out << " w: " << mbrHypo.size();
+    const vector<float>& ngramScores = si->GetNgramScores();
+    for (size_t i = 0; i < ngramScores.size(); ++i) {
+      out << " " << ngramScores[i];
+    }
+    out << " ||| " << si->GetScore();
+
+    out << endl;
+  }
+}
+
+
+void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solutions,long translationId)
+{
+  OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
+}
+
+bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
+{
+  delete source;
+  switch(inputType) {
+  case SentenceInput:
+    source = ioWrapper.GetInput(new Sentence);
+    break;
+  case ConfusionNetworkInput:
+    source = ioWrapper.GetInput(new ConfusionNet);
+    break;
+  case WordLatticeInput:
+    source = ioWrapper.GetInput(new WordLattice);
+    break;
+  default:
+    TRACE_ERR("Unknown input type: " << inputType << "\n");
+  }
+  return (source ? true : false);
+}
+
+
+
+IOWrapper *GetIOWrapper(const StaticData &staticData)
+{
+  IOWrapper *ioWrapper;
+  const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
+      ,&outputFactorOrder = staticData.GetOutputFactorOrder();
+  FactorMask inputFactorUsed(inputFactorOrder);
+
+  // io
+  if (staticData.GetParam("input-file").size() == 1) {
+    VERBOSE(2,"IO from File" << endl);
+    string filePath = staticData.GetParam("input-file")[0];
+
+    ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
+                              , staticData.GetNBestSize()
+                              , staticData.GetNBestFilePath()
+                              , filePath);
+  } else {
+    VERBOSE(1,"IO from STDOUT/STDIN" << endl);
+    ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
+                              , staticData.GetNBestSize()
+                              , staticData.GetNBestFilePath());
+  }
+  ioWrapper->ResetTranslationId();
+
+  IFVERBOSE(1)
+  PrintUserTime("Created input-output object");
+
+  return ioWrapper;
+}
+
+}
+
--- a/contrib/relent-filter/src/IOWrapper.h
+++ b/contrib/relent-filter/src/IOWrapper.h
@ -0,0 +1,142 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+			this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+			this list of conditions and the following disclaimer in the documentation
+			and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+			may be used to endorse or promote products derived from this software
+			without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+// example file on how to use moses library
+
+#ifndef moses_cmd_IOWrapper_h
+#define moses_cmd_IOWrapper_h
+
+#include <cassert>
+#include <fstream>
+#include <ostream>
+#include <vector>
+#include "util/check.hh"
+
+#include "TypeDef.h"
+#include "Sentence.h"
+#include "FactorTypeSet.h"
+#include "FactorCollection.h"
+#include "Hypothesis.h"
+#include "OutputCollector.h"
+#include "TrellisPathList.h"
+#include "InputFileStream.h"
+#include "InputType.h"
+#include "WordLattice.h"
+#include "LatticeMBR.h"
+
+namespace MosesCmd
+{
+
+/** Helper class that holds misc variables to write data out to command line.
+ */
+class IOWrapper
+{
+protected:
+  long m_translationId;
+
+  const std::vector<Moses::FactorType>	&m_inputFactorOrder;
+  const std::vector<Moses::FactorType>	&m_outputFactorOrder;
+  const Moses::FactorMask							&m_inputFactorUsed;
+  std::string										m_inputFilePath;
+  Moses::InputFileStream				*m_inputFile;
+  std::istream									*m_inputStream;
+  std::ostream 									*m_nBestStream
+  ,*m_outputWordGraphStream,*m_outputSearchGraphStream;
+  std::ostream                  *m_detailedTranslationReportingStream;
+  std::ofstream *m_alignmentOutputStream;
+  bool													m_surpressSingleBestOutput;
+
+  void Initialization(const std::vector<Moses::FactorType>	&inputFactorOrder
+                      , const std::vector<Moses::FactorType>			&outputFactorOrder
+                      , const Moses::FactorMask							&inputFactorUsed
+                      , size_t												nBestSize
+                      , const std::string							&nBestFilePath);
+
+public:
+  IOWrapper(const std::vector<Moses::FactorType>	&inputFactorOrder
+            , const std::vector<Moses::FactorType>			&outputFactorOrder
+            , const Moses::FactorMask							&inputFactorUsed
+            , size_t												nBestSize
+            , const std::string							&nBestFilePath);
+
+  IOWrapper(const std::vector<Moses::FactorType>	&inputFactorOrder
+            , const std::vector<Moses::FactorType>	&outputFactorOrder
+            , const Moses::FactorMask							&inputFactorUsed
+            , size_t												nBestSize
+            , const std::string							&nBestFilePath
+            , const std::string                                                     &infilePath);
+  ~IOWrapper();
+
+  Moses::InputType* GetInput(Moses::InputType *inputType);
+
+  void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
+  void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
+  void Backtrack(const Moses::Hypothesis *hypo);
+
+  void ResetTranslationId() {
+    m_translationId = 0;
+  }
+
+  std::ofstream *GetAlignmentOutputStream() {
+    return m_alignmentOutputStream;
+  }
+
+  std::ostream &GetOutputWordGraphStream() {
+    return *m_outputWordGraphStream;
+  }
+  std::ostream &GetOutputSearchGraphStream() {
+    return *m_outputSearchGraphStream;
+  }
+
+  std::ostream &GetDetailedTranslationReportingStream() {
+    assert (m_detailedTranslationReportingStream);
+    return *m_detailedTranslationReportingStream;
+  }
+};
+
+IOWrapper *GetIOWrapper(const Moses::StaticData &staticData);
+bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
+void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors);
+void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
+                 const Moses::TranslationSystem* system, long translationId, bool reportSegmentation);
+void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
+void OutputBestHypo(const std::vector<Moses::Word>&  mbrBestHypo, long /*translationId*/,
+                    bool reportSegmentation, bool reportAllFactors, std::ostream& out);
+void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out);
+void OutputInput(std::ostream& os, const Moses::Hypothesis* hypo);
+void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::Hypothesis *hypo);
+void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo,  const Moses::TrellisPath &path);
+
+
+}
+
+#endif
--- a/contrib/relent-filter/src/Jamfile
+++ b/contrib/relent-filter/src/Jamfile
@ -0,0 +1,6 @@
+alias deps : ../../../moses/src//moses ;
+
+exe calcDivergence : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp RelativeEntropyCalc.cpp deps ;
+
+alias programs : calcDivergence ;
+
--- a/contrib/relent-filter/src/LatticeMBR.cpp
+++ b/contrib/relent-filter/src/LatticeMBR.cpp
@ -0,0 +1,669 @@
+/*
+ *  LatticeMBR.cpp
+ *  moses-cmd
+ *
+ *  Created by Abhishek Arun on 26/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "LatticeMBR.h"
+#include "StaticData.h"
+#include <algorithm>
+#include <set>
+
+using namespace std;
+using namespace Moses;
+
+namespace MosesCmd
+{
+
+size_t bleu_order = 4;
+float UNKNGRAMLOGPROB = -20;
+void GetOutputWords(const TrellisPath &path, vector <Word> &translation)
+{
+  const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+  // print the surface factor of the translation
+  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+    const Hypothesis &edge = *edges[currEdge];
+    const Phrase &phrase = edge.GetCurrTargetPhrase();
+    size_t size = phrase.GetSize();
+    for (size_t pos = 0 ; pos < size ; pos++) {
+      translation.push_back(phrase.GetWord(pos));
+    }
+  }
+}
+
+
+void extract_ngrams(const vector<Word >& sentence, map < Phrase, int >  & allngrams)
+{
+  for (int k = 0; k < (int)bleu_order; k++) {
+    for(int i =0; i < max((int)sentence.size()-k,0); i++) {
+      Phrase ngram( k+1);
+      for ( int j = i; j<= i+k; j++) {
+        ngram.AddWord(sentence[j]);
+      }
+      ++allngrams[ngram];
+    }
+  }
+}
+
+
+
+void NgramScores::addScore(const Hypothesis* node, const Phrase& ngram, float score)
+{
+  set<Phrase>::const_iterator ngramIter = m_ngrams.find(ngram);
+  if (ngramIter == m_ngrams.end()) {
+    ngramIter = m_ngrams.insert(ngram).first;
+  }
+  map<const Phrase*,float>& ngramScores = m_scores[node];
+  map<const Phrase*,float>::iterator scoreIter = ngramScores.find(&(*ngramIter));
+  if (scoreIter == ngramScores.end()) {
+    ngramScores[&(*ngramIter)] = score;
+  } else {
+    ngramScores[&(*ngramIter)] = log_sum(score,scoreIter->second);
+  }
+}
+
+NgramScores::NodeScoreIterator NgramScores::nodeBegin(const Hypothesis* node)
+{
+  return m_scores[node].begin();
+}
+
+
+NgramScores::NodeScoreIterator NgramScores::nodeEnd(const Hypothesis* node)
+{
+  return m_scores[node].end();
+}
+
+LatticeMBRSolution::LatticeMBRSolution(const TrellisPath& path, bool isMap) :
+  m_score(0.0f)
+{
+  const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+    const Hypothesis &edge = *edges[currEdge];
+    const Phrase &phrase = edge.GetCurrTargetPhrase();
+    size_t size = phrase.GetSize();
+    for (size_t pos = 0 ; pos < size ; pos++) {
+      m_words.push_back(phrase.GetWord(pos));
+    }
+  }
+  if (isMap) {
+    m_mapScore = path.GetTotalScore();
+  } else {
+    m_mapScore = 0;
+  }
+}
+
+
+void LatticeMBRSolution::CalcScore(map<Phrase, float>& finalNgramScores, const vector<float>& thetas, float mapWeight)
+{
+  m_ngramScores.assign(thetas.size()-1, -10000);
+
+  map < Phrase, int > counts;
+  extract_ngrams(m_words,counts);
+
+  //Now score this translation
+  m_score = thetas[0] * m_words.size();
+
+  //Calculate the ngramScores, working in log space at first
+  for (map < Phrase, int >::iterator ngrams = counts.begin(); ngrams != counts.end(); ++ngrams) {
+    float ngramPosterior = UNKNGRAMLOGPROB;
+    map<Phrase,float>::const_iterator ngramPosteriorIt = finalNgramScores.find(ngrams->first);
+    if (ngramPosteriorIt != finalNgramScores.end()) {
+      ngramPosterior = ngramPosteriorIt->second;
+    }
+    size_t ngramSize = ngrams->first.GetSize();
+    m_ngramScores[ngramSize-1] = log_sum(log((float)ngrams->second) + ngramPosterior,m_ngramScores[ngramSize-1]);
+  }
+
+  //convert from log to probability and create weighted sum
+  for (size_t i = 0; i < m_ngramScores.size(); ++i) {
+    m_ngramScores[i] = exp(m_ngramScores[i]);
+    m_score += thetas[i+1] * m_ngramScores[i];
+  }
+
+
+  //The map score
+  m_score += m_mapScore*mapWeight;
+}
+
+
+void pruneLatticeFB(Lattice & connectedHyp, map < const Hypothesis*, set <const Hypothesis* > > & outgoingHyps, map<const Hypothesis*, vector<Edge> >& incomingEdges,
+                    const vector< float> & estimatedScores, const Hypothesis* bestHypo, size_t edgeDensity, float scale)
+{
+
+  //Need hyp 0 in connectedHyp - Find empty hypothesis
+  VERBOSE(2,"Pruning lattice to edge density " << edgeDensity << endl);
+  const Hypothesis* emptyHyp = connectedHyp.at(0);
+  while (emptyHyp->GetId() != 0) {
+    emptyHyp = emptyHyp->GetPrevHypo();
+  }
+  connectedHyp.push_back(emptyHyp); //Add it to list of hyps
+
+  //Need hyp 0's outgoing Hyps
+  for (size_t i = 0; i < connectedHyp.size(); ++i) {
+    if (connectedHyp[i]->GetId() > 0 && connectedHyp[i]->GetPrevHypo()->GetId() == 0)
+      outgoingHyps[emptyHyp].insert(connectedHyp[i]);
+  }
+
+  //sort hyps based on estimated scores - do so by copying to multimap
+  multimap<float, const Hypothesis*> sortHypsByVal;
+  for (size_t i =0; i < estimatedScores.size(); ++i) {
+    sortHypsByVal.insert(make_pair(estimatedScores[i], connectedHyp[i]));
+  }
+
+  multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end();
+  float bestScore = it->first;
+  //store best score as score of hyp 0
+  sortHypsByVal.insert(make_pair(bestScore, emptyHyp));
+
+
+  IFVERBOSE(3) {
+    for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
+      const Hypothesis* currHyp =  it->second;
+      cerr << "Hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl;
+    }
+  }
+
+
+  set <const Hypothesis*> survivingHyps; //store hyps that make the cut in this
+
+  VERBOSE(2, "BEST HYPO TARGET LENGTH : " << bestHypo->GetSize() << endl)
+  size_t numEdgesTotal = edgeDensity * bestHypo->GetSize(); //as per Shankar, aim for (density * target length of MAP solution) arcs
+  size_t numEdgesCreated = 0;
+  VERBOSE(2, "Target edge count: " << numEdgesTotal << endl);
+
+  float prevScore = -999999;
+
+  //now iterate over multimap
+  for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
+    float currEstimatedScore = it->first;
+    const Hypothesis* currHyp =  it->second;
+
+    if (numEdgesCreated >= numEdgesTotal && prevScore > currEstimatedScore) //if this hyp has equal estimated score to previous, include its edges too
+      break;
+
+    prevScore = currEstimatedScore;
+    VERBOSE(3, "Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
+    VERBOSE(3, "Considering hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl)
+
+    survivingHyps.insert(currHyp); //CurrHyp made the cut
+
+    // is its best predecessor already included ?
+    if (survivingHyps.find(currHyp->GetPrevHypo()) != survivingHyps.end()) { //yes, then add an edge
+      vector <Edge>& edges = incomingEdges[currHyp];
+      Edge winningEdge(currHyp->GetPrevHypo(),currHyp,scale*(currHyp->GetScore() - currHyp->GetPrevHypo()->GetScore()),currHyp->GetCurrTargetPhrase());
+      edges.push_back(winningEdge);
+      ++numEdgesCreated;
+    }
+
+    //let's try the arcs too
+    const ArcList *arcList = currHyp->GetArcList();
+    if (arcList != NULL) {
+      ArcList::const_iterator iterArcList;
+      for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
+        const Hypothesis *loserHypo = *iterArcList;
+        const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
+        if (survivingHyps.find(loserPrevHypo) != survivingHyps.end()) { //found it, add edge
+          double arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
+          Edge losingEdge(loserPrevHypo, currHyp, arcScore*scale, loserHypo->GetCurrTargetPhrase());
+          vector <Edge>& edges = incomingEdges[currHyp];
+          edges.push_back(losingEdge);
+          ++numEdgesCreated;
+        }
+      }
+    }
+
+    //Now if a successor node has already been visited, add an edge connecting the two
+    map < const Hypothesis*, set < const Hypothesis* > >::const_iterator outgoingIt = outgoingHyps.find(currHyp);
+
+    if (outgoingIt != outgoingHyps.end()) {//currHyp does have successors
+      const set<const Hypothesis*> & outHyps = outgoingIt->second; //the successors
+      for (set<const Hypothesis*>::const_iterator outHypIts = outHyps.begin(); outHypIts != outHyps.end(); ++outHypIts) {
+        const Hypothesis* succHyp = *outHypIts;
+
+        if (survivingHyps.find(succHyp) == survivingHyps.end()) //Have we encountered the successor yet?
+          continue; //No, move on to next
+
+        //Curr Hyp can be : a) the best predecessor  of succ b) or an arc attached to succ
+        if (succHyp->GetPrevHypo() == currHyp) { //best predecessor
+          vector <Edge>& succEdges = incomingEdges[succHyp];
+          Edge succWinningEdge(currHyp, succHyp, scale*(succHyp->GetScore() - currHyp->GetScore()), succHyp->GetCurrTargetPhrase());
+          succEdges.push_back(succWinningEdge);
+          survivingHyps.insert(succHyp);
+          ++numEdgesCreated;
+        }
+
+        //now, let's find an arc
+        const ArcList *arcList = succHyp->GetArcList();
+        if (arcList != NULL) {
+          ArcList::const_iterator iterArcList;
+          //QUESTION: What happens if there's more than one loserPrevHypo?
+          for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
+            const Hypothesis *loserHypo = *iterArcList;
+            const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
+            if (loserPrevHypo == currHyp) { //found it
+              vector <Edge>& succEdges = incomingEdges[succHyp];
+              double arcScore = loserHypo->GetScore() - currHyp->GetScore();
+              Edge losingEdge(currHyp, succHyp,scale* arcScore, loserHypo->GetCurrTargetPhrase());
+              succEdges.push_back(losingEdge);
+              ++numEdgesCreated;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  connectedHyp.clear();
+  for (set <const Hypothesis*>::iterator it =  survivingHyps.begin(); it != survivingHyps.end(); ++it) {
+    connectedHyp.push_back(*it);
+  }
+
+  VERBOSE(2, "Done! Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
+
+  IFVERBOSE(3) {
+    cerr << "Surviving hyps: " ;
+    for (set <const Hypothesis*>::iterator it =  survivingHyps.begin(); it != survivingHyps.end(); ++it) {
+      cerr << (*it)->GetId() << " ";
+    }
+    cerr << endl;
+  }
+
+
+}
+
+void calcNgramExpectations(Lattice & connectedHyp, map<const Hypothesis*, vector<Edge> >& incomingEdges,
+                           map<Phrase, float>& finalNgramScores, bool posteriors)
+{
+
+  sort(connectedHyp.begin(),connectedHyp.end(),ascendingCoverageCmp); //sort by increasing source word cov
+
+  /*cerr << "Lattice:" << endl;
+  for (Lattice::const_iterator i = connectedHyp.begin(); i != connectedHyp.end(); ++i) {
+      const Hypothesis* h = *i;
+      cerr << *h << endl;
+      const vector<Edge>& edges = incomingEdges[h];
+      for (size_t e = 0; e < edges.size(); ++e) {
+          cerr << edges[e];
+      }
+  }*/
+
+  map<const Hypothesis*, float> forwardScore;
+  forwardScore[connectedHyp[0]] = 0.0f; //forward score of hyp 0 is 1 (or 0 in logprob space)
+  set< const Hypothesis *> finalHyps; //store completed hyps
+
+  NgramScores ngramScores;//ngram scores for each hyp
+
+  for (size_t i = 1; i < connectedHyp.size(); ++i) {
+    const Hypothesis* currHyp = connectedHyp[i];
+    if (currHyp->GetWordsBitmap().IsComplete()) {
+      finalHyps.insert(currHyp);
+    }
+
+    VERBOSE(3, "Processing hyp: " << currHyp->GetId() << ", num words cov= " << currHyp->GetWordsBitmap().GetNumWordsCovered() <<  endl)
+
+    vector <Edge> & edges = incomingEdges[currHyp];
+    for (size_t e = 0; e < edges.size(); ++e) {
+      const Edge& edge = edges[e];
+      if (forwardScore.find(currHyp) == forwardScore.end()) {
+        forwardScore[currHyp] = forwardScore[edge.GetTailNode()] + edge.GetScore();
+        VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] = fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
+      } else {
+        forwardScore[currHyp] = log_sum(forwardScore[currHyp], forwardScore[edge.GetTailNode()] + edge.GetScore());
+        VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] += fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
+      }
+    }
+
+    //Process ngrams now
+    for (size_t j =0 ; j < edges.size(); ++j) {
+      Edge& edge = edges[j];
+      const NgramHistory & incomingPhrases = edge.GetNgrams(incomingEdges);
+
+      //let's first score ngrams introduced by this edge
+      for (NgramHistory::const_iterator it = incomingPhrases.begin(); it != incomingPhrases.end(); ++it) {
+        const Phrase& ngram = it->first;
+        const PathCounts& pathCounts = it->second;
+        VERBOSE(4, "Calculating score for: " << it->first << endl)
+
+        for (PathCounts::const_iterator pathCountIt = pathCounts.begin(); pathCountIt != pathCounts.end(); ++pathCountIt) {
+          //Score of an n-gram is forward score of head node of leftmost edge + all edge scores
+          const Path&  path = pathCountIt->first;
+          //cerr << "path count for " << ngram << " is " << pathCountIt->second << endl;
+          float score = forwardScore[path[0]->GetTailNode()];
+          for (size_t i = 0; i < path.size(); ++i) {
+            score += path[i]->GetScore();
+          }
+          //if we're doing expectations, then the number of times the ngram
+          //appears on the path is relevant.
+          size_t count = posteriors ? 1 : pathCountIt->second;
+          for (size_t k = 0; k < count; ++k) {
+            ngramScores.addScore(currHyp,ngram,score);
+          }
+        }
+      }
+
+      //Now score ngrams that are just being propagated from the history
+      for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(edge.GetTailNode());
+           it != ngramScores.nodeEnd(edge.GetTailNode()); ++it) {
+        const Phrase & currNgram = *(it->first);
+        float currNgramScore = it->second;
+        VERBOSE(4, "Calculating score for: " << currNgram << endl)
+
+        // For posteriors, don't double count ngrams
+        if (!posteriors || incomingPhrases.find(currNgram) == incomingPhrases.end()) {
+          float score = edge.GetScore() + currNgramScore;
+          ngramScores.addScore(currHyp,currNgram,score);
+        }
+      }
+
+    }
+  }
+
+  float Z = 9999999; //the total score of the lattice
+
+  //Done - Print out ngram posteriors for final hyps
+  for (set< const Hypothesis *>::iterator finalHyp = finalHyps.begin(); finalHyp != finalHyps.end(); ++finalHyp) {
+    const Hypothesis* hyp = *finalHyp;
+
+    for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(hyp); it != ngramScores.nodeEnd(hyp); ++it) {
+      const Phrase& ngram = *(it->first);
+      if (finalNgramScores.find(ngram) == finalNgramScores.end()) {
+        finalNgramScores[ngram] = it->second;
+      } else {
+        finalNgramScores[ngram] = log_sum(it->second,  finalNgramScores[ngram]);
+      }
+    }
+
+    if (Z == 9999999) {
+      Z = forwardScore[hyp];
+    } else {
+      Z = log_sum(Z, forwardScore[hyp]);
+    }
+  }
+
+  //Z *= scale;  //scale the score
+
+  for (map<Phrase, float>::iterator finalScoresIt = finalNgramScores.begin();  finalScoresIt != finalNgramScores.end(); ++finalScoresIt) {
+    finalScoresIt->second =  finalScoresIt->second - Z;
+    IFVERBOSE(2) {
+      VERBOSE(2,finalScoresIt->first << " [" << finalScoresIt->second << "]" << endl);
+    }
+  }
+
+}
+
+const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & incomingEdges)
+{
+
+  if (m_ngrams.size() > 0)
+    return m_ngrams;
+
+  const Phrase& currPhrase = GetWords();
+  //Extract the n-grams local to this edge
+  for (size_t start = 0; start < currPhrase.GetSize(); ++start) {
+    for (size_t end = start; end < start + bleu_order; ++end) {
+      if (end < currPhrase.GetSize()) {
+        Phrase edgeNgram(end-start+1);
+        for (size_t index = start; index <= end; ++index) {
+          edgeNgram.AddWord(currPhrase.GetWord(index));
+        }
+        //cout << "Inserting Phrase : " << edgeNgram << endl;
+        vector<const Edge*> edgeHistory;
+        edgeHistory.push_back(this);
+        storeNgramHistory(edgeNgram, edgeHistory);
+      } else {
+        break;
+      }
+    }
+  }
+
+  map<const Hypothesis*, vector<Edge> >::iterator it = incomingEdges.find(m_tailNode);
+  if (it != incomingEdges.end()) { //node has incoming edges
+    vector<Edge> & inEdges = it->second;
+
+    for (vector<Edge>::iterator edge = inEdges.begin(); edge != inEdges.end(); ++edge) {//add the ngrams straddling prev and curr edge
+      const NgramHistory & edgeIncomingNgrams = edge->GetNgrams(incomingEdges);
+      for (NgramHistory::const_iterator edgeInNgramHist = edgeIncomingNgrams.begin(); edgeInNgramHist != edgeIncomingNgrams.end(); ++edgeInNgramHist) {
+        const Phrase& edgeIncomingNgram = edgeInNgramHist->first;
+        const PathCounts &  edgeIncomingNgramPaths = edgeInNgramHist->second;
+        size_t back = min(edgeIncomingNgram.GetSize(), edge->GetWordsSize());
+        const Phrase&  edgeWords = edge->GetWords();
+        IFVERBOSE(3) {
+          cerr << "Edge: "<< *edge <<endl;
+          cerr << "edgeWords: " << edgeWords << endl;
+          cerr << "edgeInNgram: " << edgeIncomingNgram << endl;
+        }
+
+        Phrase edgeSuffix(ARRAY_SIZE_INCR);
+        Phrase ngramSuffix(ARRAY_SIZE_INCR);
+        GetPhraseSuffix(edgeWords,back,edgeSuffix);
+        GetPhraseSuffix(edgeIncomingNgram,back,ngramSuffix);
+
+        if (ngramSuffix == edgeSuffix) { //we've got the suffix of previous edge
+          size_t  edgeInNgramSize =  edgeIncomingNgram.GetSize();
+
+          for (size_t i = 0; i < GetWordsSize() && i + edgeInNgramSize < bleu_order ; ++i) {
+            Phrase newNgram(edgeIncomingNgram);
+            for (size_t j = 0; j <= i ; ++j) {
+              newNgram.AddWord(GetWords().GetWord(j));
+            }
+            VERBOSE(3, "Inserting New Phrase : " << newNgram << endl)
+
+            for (PathCounts::const_iterator pathIt = edgeIncomingNgramPaths.begin(); pathIt !=  edgeIncomingNgramPaths.end(); ++pathIt) {
+              Path newNgramPath = pathIt->first;
+              newNgramPath.push_back(this);
+              storeNgramHistory(newNgram, newNgramPath, pathIt->second);
+            }
+          }
+        }
+      }
+    }
+  }
+  return m_ngrams;
+}
+
+//Add the last lastN words of origPhrase to targetPhrase
+void Edge::GetPhraseSuffix(const Phrase&  origPhrase, size_t lastN, Phrase& targetPhrase) const
+{
+  size_t origSize = origPhrase.GetSize();
+  size_t startIndex = origSize - lastN;
+  for (size_t index = startIndex; index < origPhrase.GetSize(); ++index) {
+    targetPhrase.AddWord(origPhrase.GetWord(index));
+  }
+}
+
+bool Edge::operator< (const Edge& compare ) const
+{
+  if (m_headNode->GetId() < compare.m_headNode->GetId())
+    return true;
+  if (compare.m_headNode->GetId() < m_headNode->GetId())
+    return false;
+  if (m_tailNode->GetId() < compare.m_tailNode->GetId())
+    return true;
+  if (compare.m_tailNode->GetId() < m_tailNode->GetId())
+    return false;
+  return GetScore() <  compare.GetScore();
+}
+
+ostream& operator<< (ostream& out, const Edge& edge)
+{
+  out << "Head: " << edge.m_headNode->GetId() << ", Tail: " << edge.m_tailNode->GetId() << ", Score: " << edge.m_score << ", Phrase: " << edge.m_targetPhrase << endl;
+  return out;
+}
+
+bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b)
+{
+  return a->GetWordsBitmap().GetNumWordsCovered() <  b->GetWordsBitmap().GetNumWordsCovered();
+}
+
+void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
+                        vector<LatticeMBRSolution>& solutions, size_t n)
+{
+  const StaticData& staticData = StaticData::Instance();
+  std::map < int, bool > connected;
+  std::vector< const Hypothesis *> connectedList;
+  map<Phrase, float> ngramPosteriors;
+  std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
+  map<const Hypothesis*, vector<Edge> > incomingEdges;
+  vector< float> estimatedScores;
+  manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores);
+  pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores, manager.GetBestHypothesis(), staticData.GetLatticeMBRPruningFactor(),staticData.GetMBRScale());
+  calcNgramExpectations(connectedList, incomingEdges, ngramPosteriors,true);
+
+  vector<float> mbrThetas = staticData.GetLatticeMBRThetas();
+  float p = staticData.GetLatticeMBRPrecision();
+  float r = staticData.GetLatticeMBRPRatio();
+  float mapWeight = staticData.GetLatticeMBRMapWeight();
+  if (mbrThetas.size() == 0) { //thetas not specified on the command line, use p and r instead
+    mbrThetas.push_back(-1); //Theta 0
+    mbrThetas.push_back(1/(bleu_order*p));
+    for (size_t i = 2; i <= bleu_order; ++i) {
+      mbrThetas.push_back(mbrThetas[i-1] / r);
+    }
+  }
+  IFVERBOSE(2) {
+    VERBOSE(2,"Thetas: ");
+    for (size_t i = 0; i < mbrThetas.size(); ++i) {
+      VERBOSE(2,mbrThetas[i] << " ");
+    }
+    VERBOSE(2,endl);
+  }
+  TrellisPathList::const_iterator iter;
+  size_t ctr = 0;
+  LatticeMBRSolutionComparator comparator;
+  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter, ++ctr) {
+    const TrellisPath &path = **iter;
+    solutions.push_back(LatticeMBRSolution(path,iter==nBestList.begin()));
+    solutions.back().CalcScore(ngramPosteriors,mbrThetas,mapWeight);
+    sort(solutions.begin(), solutions.end(), comparator);
+    while (solutions.size() > n) {
+      solutions.pop_back();
+    }
+  }
+  VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl);
+}
+
+vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
+{
+
+  vector<LatticeMBRSolution> solutions;
+  getLatticeMBRNBest(manager, nBestList, solutions,1);
+  return solutions.at(0).GetWords();
+}
+
+const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList)
+{
+  static const int BLEU_ORDER = 4;
+  static const float SMOOTH = 1;
+
+  //calculate the ngram expectations
+  const StaticData& staticData = StaticData::Instance();
+  std::map < int, bool > connected;
+  std::vector< const Hypothesis *> connectedList;
+  map<Phrase, float> ngramExpectations;
+  std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
+  map<const Hypothesis*, vector<Edge> > incomingEdges;
+  vector< float> estimatedScores;
+  manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores);
+  pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores, manager.GetBestHypothesis(), staticData.GetLatticeMBRPruningFactor(),staticData.GetMBRScale());
+  calcNgramExpectations(connectedList, incomingEdges, ngramExpectations,false);
+
+  //expected length is sum of expected unigram counts
+  //cerr << "Thread " << pthread_self() <<  " Ngram expectations size: " << ngramExpectations.size() << endl;
+  float ref_length = 0.0f;
+  for (map<Phrase,float>::const_iterator ref_iter = ngramExpectations.begin();
+       ref_iter != ngramExpectations.end(); ++ref_iter) {
+    //cerr << "Ngram: " << ref_iter->first << " score: " <<
+    //    ref_iter->second << endl;
+    if (ref_iter->first.GetSize() == 1) {
+      ref_length += exp(ref_iter->second);
+      //    cerr << "Expected for " << ref_iter->first << " is " << exp(ref_iter->second) << endl;
+    }
+  }
+
+  VERBOSE(2,"REF Length: " << ref_length << endl);
+
+  //use the ngram expectations to rescore the nbest list.
+  TrellisPathList::const_iterator iter;
+  TrellisPathList::const_iterator best = nBestList.end();
+  float bestScore = -100000;
+  //cerr << "nbest list size: " << nBestList.GetSize() << endl;
+  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+    const TrellisPath &path = **iter;
+    vector<Word> words;
+    map<Phrase,int> ngrams;
+    GetOutputWords(path,words);
+    /*for (size_t i = 0; i < words.size(); ++i) {
+        cerr << words[i].GetFactor(0)->GetString() << " ";
+    }
+    cerr << endl;
+    */
+    extract_ngrams(words,ngrams);
+
+    vector<float> comps(2*BLEU_ORDER+1);
+    float logbleu = 0.0;
+    float brevity = 0.0;
+    int hyp_length = words.size();
+    for (int i = 0; i < BLEU_ORDER; ++i) {
+      comps[2*i] = 0.0;
+      comps[2*i+1] = max(hyp_length-i,0);
+    }
+
+    for (map<Phrase,int>::const_iterator hyp_iter = ngrams.begin();
+         hyp_iter != ngrams.end(); ++hyp_iter) {
+      map<Phrase,float>::const_iterator ref_iter = ngramExpectations.find(hyp_iter->first);
+      if (ref_iter != ngramExpectations.end()) {
+        comps[2*(hyp_iter->first.GetSize()-1)] += min(exp(ref_iter->second), (float)(hyp_iter->second));
+      }
+
+    }
+    comps[comps.size()-1] = ref_length;
+    /*for (size_t i = 0; i < comps.size(); ++i) {
+        cerr << comps[i] << " ";
+    }
+    cerr << endl;
+    */
+
+    float score = 0.0f;
+    if (comps[0] != 0) {
+      for (int i=0; i<BLEU_ORDER; i++) {
+        if ( i > 0 ) {
+          logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH);
+        } else {
+          logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]);
+        }
+      }
+      logbleu /= BLEU_ORDER;
+      brevity = 1.0-(float)comps[comps.size()-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
+      if (brevity < 0.0) {
+        logbleu += brevity;
+      }
+      score =  exp(logbleu);
+    }
+
+    //cerr << "score: " << score << " bestScore: " << bestScore <<  endl;
+    if (score > bestScore) {
+      bestScore = score;
+      best = iter;
+      VERBOSE(2,"NEW BEST: " << score << endl);
+      //for (size_t i = 0; i < comps.size(); ++i) {
+      //    cerr << comps[i] << " ";
+      //}
+      //cerr << endl;
+    }
+  }
+
+  assert (best != nBestList.end());
+  return **best;
+  //vector<Word> bestWords;
+  //GetOutputWords(**best,bestWords);
+  //return bestWords;
+}
+
+}
+
+
--- a/contrib/relent-filter/src/LatticeMBR.h
+++ b/contrib/relent-filter/src/LatticeMBR.h
@ -0,0 +1,153 @@
+/*
+ *  LatticeMBR.h
+ *  moses-cmd
+ *
+ *  Created by Abhishek Arun on 26/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#ifndef moses_cmd_LatticeMBR_h
+#define moses_cmd_LatticeMBR_h
+
+#include <map>
+#include <vector>
+#include <set>
+#include "Hypothesis.h"
+#include "Manager.h"
+#include "TrellisPathList.h"
+
+
+
+namespace MosesCmd
+{
+
+class Edge;
+
+typedef std::vector< const Moses::Hypothesis *> Lattice;
+typedef std::vector<const Edge*> Path;
+typedef std::map<Path, size_t> PathCounts;
+typedef std::map<Moses::Phrase, PathCounts > NgramHistory;
+
+class Edge
+{
+  const Moses::Hypothesis* m_tailNode;
+  const Moses::Hypothesis* m_headNode;
+  float m_score;
+  Moses::TargetPhrase m_targetPhrase;
+  NgramHistory m_ngrams;
+
+public:
+  Edge(const Moses::Hypothesis* from, const Moses::Hypothesis* to, float score, const Moses::TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
+    //cout << "Creating new edge from Node " << from->GetId() << ", to Node : " << to->GetId() << ", score: " << score << " phrase: " << targetPhrase << endl;
+  }
+
+  const Moses::Hypothesis* GetHeadNode() const {
+    return m_headNode;
+  }
+
+  const Moses::Hypothesis* GetTailNode() const {
+    return m_tailNode;
+  }
+
+  float GetScore() const {
+    return m_score;
+  }
+
+  size_t GetWordsSize() const {
+    return m_targetPhrase.GetSize();
+  }
+
+  const Moses::Phrase& GetWords() const {
+    return m_targetPhrase;
+  }
+
+  friend std::ostream& operator<< (std::ostream& out, const Edge& edge);
+
+  const NgramHistory&  GetNgrams(  std::map<const Moses::Hypothesis*, std::vector<Edge> > & incomingEdges) ;
+
+  bool operator < (const Edge & compare) const;
+
+  void GetPhraseSuffix(const Moses::Phrase& origPhrase, size_t lastN, Moses::Phrase& targetPhrase) const;
+
+  void storeNgramHistory(const Moses::Phrase& phrase, Path & path, size_t count = 1) {
+    m_ngrams[phrase][path]+= count;
+  }
+
+};
+
+/**
+* Data structure to hold the ngram scores as we traverse the lattice. Maps (hypo,ngram) to score
+*/
+class NgramScores
+{
+public:
+  NgramScores() {}
+
+  /** logsum this score to the existing score */
+  void addScore(const Moses::Hypothesis* node, const Moses::Phrase& ngram, float score);
+
+  /** Iterate through ngrams for selected node */
+  typedef std::map<const Moses::Phrase*, float>::const_iterator NodeScoreIterator;
+  NodeScoreIterator nodeBegin(const Moses::Hypothesis* node);
+  NodeScoreIterator nodeEnd(const Moses::Hypothesis* node);
+
+private:
+  std::set<Moses::Phrase> m_ngrams;
+  std::map<const Moses::Hypothesis*, std::map<const Moses::Phrase*, float> > m_scores;
+};
+
+
+/** Holds a lattice mbr solution, and its scores */
+class LatticeMBRSolution
+{
+public:
+  /** Read the words from the path */
+  LatticeMBRSolution(const Moses::TrellisPath& path, bool isMap);
+  const std::vector<float>& GetNgramScores() const {
+    return m_ngramScores;
+  }
+  const std::vector<Moses::Word>& GetWords() const {
+    return m_words;
+  }
+  float GetMapScore() const {
+    return m_mapScore;
+  }
+  float GetScore() const {
+    return m_score;
+  }
+
+  /** Initialise ngram scores */
+  void CalcScore(std::map<Moses::Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
+
+private:
+  std::vector<Moses::Word> m_words;
+  float m_mapScore;
+  std::vector<float> m_ngramScores;
+  float m_score;
+};
+
+struct LatticeMBRSolutionComparator {
+  bool operator()(const LatticeMBRSolution& a, const LatticeMBRSolution& b) {
+    return a.GetScore() > b.GetScore();
+  }
+};
+
+void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*, std::set <const Moses::Hypothesis* > > & outgoingHyps, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges,
+                    const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
+
+//Use the ngram scores to rerank the nbest list, return at most n solutions
+void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
+//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
+void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
+                           float>& finalNgramScores, bool posteriors);
+void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
+void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int >  & allngrams);
+bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
+std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+
+}
+
+#endif
--- a/contrib/relent-filter/src/LatticeMBRGrid.cpp
+++ b/contrib/relent-filter/src/LatticeMBRGrid.cpp
@ -0,0 +1,213 @@
+// $Id: LatticeMBRGrid.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2010 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+            this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+            this list of conditions and the following disclaimer in the documentation
+            and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+            may be used to endorse or promote products derived from this software
+            without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+/**
+* Lattice MBR grid search. Enables a grid search through the four parameters (p,r,scale and prune) used in lattice MBR.
+  See 'Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation by Tromble, Kumar, Och and Macherey,
+    EMNLP 2008 for details of the parameters.
+
+  The grid search is controlled by specifying comma separated lists for the lmbr parameters (-lmbr-p, -lmbr-r,
+  -lmbr-pruning-factor and -mbr-scale). All other parameters are passed through to moses. If any of the lattice mbr
+  parameters are missing, then they are set to their default values. Output is of the form:
+   sentence-id ||| p r prune scale ||| translation-hypothesis
+**/
+
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <stdexcept>
+#include <set>
+
+#include "IOWrapper.h"
+#include "LatticeMBR.h"
+#include "Manager.h"
+#include "StaticData.h"
+
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+//keys
+enum gridkey {lmbr_p,lmbr_r,lmbr_prune,lmbr_scale};
+
+namespace MosesCmd
+{
+
+class Grid
+{
+public:
+  /** Add a parameter with key, command line argument, and default value */
+  void addParam(gridkey key, const string& arg, float defaultValue) {
+    m_args[arg] = key;
+    CHECK(m_grid.find(key) == m_grid.end());
+    m_grid[key].push_back(defaultValue);
+  }
+
+  /** Parse the arguments, removing those that define the grid and returning a copy of the rest */
+  void parseArgs(int& argc, char**& argv) {
+    char** newargv = new char*[argc+1]; //Space to add mbr parameter
+    int newargc = 0;
+    for (int i = 0; i < argc; ++i) {
+      bool consumed = false;
+      for (map<string,gridkey>::const_iterator argi = m_args.begin(); argi != m_args.end(); ++argi) {
+        if (!strcmp(argv[i], argi->first.c_str())) {
+          ++i;
+          if (i >= argc) {
+            cerr << "Error: missing parameter for " << argi->first << endl;
+            throw runtime_error("Missing parameter");
+          } else {
+            string value = argv[i];
+            gridkey key = argi->second;
+            if (m_grid[key].size() != 1) {
+              throw runtime_error("Duplicate grid argument");
+            }
+            m_grid[key].clear();
+            char delim = ',';
+            string::size_type lastpos = value.find_first_not_of(delim);
+            string::size_type pos = value.find_first_of(delim,lastpos);
+            while (string::npos != pos || string::npos != lastpos) {
+              float param = atof(value.substr(lastpos, pos-lastpos).c_str());
+              if (!param) {
+                cerr << "Error: Illegal grid parameter for " << argi->first << endl;
+                throw runtime_error("Illegal grid parameter");
+              }
+              m_grid[key].push_back(param);
+              lastpos = value.find_first_not_of(delim,pos);
+              pos = value.find_first_of(delim,lastpos);
+            }
+            consumed = true;
+          }
+          if (consumed) break;
+        }
+      }
+      if (!consumed) {
+        newargv[newargc] = new char[strlen(argv[i]) + 1];
+        strcpy(newargv[newargc],argv[i]);
+        ++newargc;
+      }
+    }
+    argc = newargc;
+    argv = newargv;
+  }
+
+  /** Get the grid for a particular key.*/
+  const vector<float>& getGrid(gridkey key) const {
+    map<gridkey,vector<float> >::const_iterator iter = m_grid.find(key);
+    assert (iter != m_grid.end());
+    return iter->second;
+
+  }
+
+private:
+  map<gridkey,vector<float> > m_grid;
+  map<string,gridkey> m_args;
+};
+
+} // namespace
+
+int main(int argc, char* argv[])
+{
+  cerr << "Lattice MBR Grid search" << endl;
+
+  Grid grid;
+  grid.addParam(lmbr_p, "-lmbr-p", 0.5);
+  grid.addParam(lmbr_r, "-lmbr-r", 0.5);
+  grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0);
+  grid.addParam(lmbr_scale, "-mbr-scale",1.0);
+
+  grid.parseArgs(argc,argv);
+
+  Parameter* params = new Parameter();
+  if (!params->LoadParam(argc,argv)) {
+    params->Explain();
+    exit(1);
+  }
+  if (!StaticData::LoadDataStatic(params, argv[0])) {
+    exit(1);
+  }
+
+  StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
+  staticData.SetUseLatticeMBR(true);
+  IOWrapper* ioWrapper = GetIOWrapper(staticData);
+
+  if (!ioWrapper) {
+    throw runtime_error("Failed to initialise IOWrapper");
+  }
+  size_t nBestSize = staticData.GetMBRSize();
+
+  if (nBestSize <= 0) {
+    throw new runtime_error("Non-positive size specified for n-best list");
+  }
+
+  size_t lineCount = 0;
+  InputType* source = NULL;
+
+  const vector<float>& pgrid = grid.getGrid(lmbr_p);
+  const vector<float>& rgrid = grid.getGrid(lmbr_r);
+  const vector<float>& prune_grid = grid.getGrid(lmbr_prune);
+  const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
+
+  while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
+    ++lineCount;
+    Sentence sentence;
+    const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+    Manager manager(*source,staticData.GetSearchAlgorithm(), &system);
+    manager.ProcessSentence();
+    TrellisPathList nBestList;
+    manager.CalcNBest(nBestSize, nBestList,true);
+    //grid search
+    for (vector<float>::const_iterator pi = pgrid.begin(); pi != pgrid.end(); ++pi) {
+      float p = *pi;
+      staticData.SetLatticeMBRPrecision(p);
+      for (vector<float>::const_iterator ri = rgrid.begin(); ri != rgrid.end(); ++ri) {
+        float r = *ri;
+        staticData.SetLatticeMBRPRatio(r);
+        for (vector<float>::const_iterator prune_i = prune_grid.begin(); prune_i != prune_grid.end(); ++prune_i) {
+          size_t prune = (size_t)(*prune_i);
+          staticData.SetLatticeMBRPruningFactor(prune);
+          for (vector<float>::const_iterator scale_i = scale_grid.begin(); scale_i != scale_grid.end(); ++scale_i) {
+            float scale = *scale_i;
+            staticData.SetMBRScale(scale);
+            cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| ";
+            vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
+            OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(),
+                           staticData.GetReportAllFactors(),cout);
+          }
+        }
+
+      }
+    }
+
+
+  }
+
+}
--- a/contrib/relent-filter/src/Main.cpp
+++ b/contrib/relent-filter/src/Main.cpp
@ -0,0 +1,282 @@
+/***********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+/**
+ * Moses main, for single-threaded and multi-threaded.
+ **/
+
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#ifdef WIN32
+// Include Visual Leak Detector
+//#include <vld.h>
+#endif
+
+#include "Hypothesis.h"
+#include "Manager.h"
+#include "IOWrapper.h"
+#include "StaticData.h"
+#include "Util.h"
+#include "ThreadPool.h"
+#include "TranslationAnalysis.h"
+#include "OutputCollector.h"
+#include "RelativeEntropyCalc.h"
+#include "LexicalReordering.h"
+#include "LexicalReorderingState.h"
+
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+namespace MosesCmd
+{
+// output floats with three significant digits
+static const size_t PRECISION = 3;
+
+/** Enforce rounding */
+void fix(std::ostream& stream, size_t size)
+{
+  stream.setf(std::ios::fixed);
+  stream.precision(size);
+}
+
+/** Translates a sentence.
+  * - calls the search (Manager)
+  * - applies the decision rule
+  * - outputs best translation and additional reporting
+  **/
+class TranslationTask : public Task
+{
+
+public:
+
+  TranslationTask(size_t lineNumber,
+                  InputType* source, OutputCollector* searchGraphCollector) :
+    m_source(source), m_lineNumber(lineNumber),
+    m_searchGraphCollector(searchGraphCollector) {}
+
+	/** Translate one sentence
+   * gets called by main function implemented at end of this source file */
+  void Run() {
+
+    // report thread number
+#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
+    TRACE_ERR("Translating line " << m_lineNumber << "  in thread id " << pthread_self() << std::endl);
+#endif
+
+    // shorthand for "global data"
+    const StaticData &staticData = StaticData::Instance();
+    // input sentence
+    Sentence sentence();
+    // set translation system
+    const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+
+    // execute the translation
+    // note: this executes the search, resulting in a search graph
+    //       we still need to apply the decision rule (MAP, MBR, ...)
+    Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm(), &system);
+    manager.ProcessSentence();
+
+    // output search graph
+    if (m_searchGraphCollector) {
+      ostringstream out;
+      fix(out,PRECISION);
+
+      vector<SearchGraphNode> searchGraph;
+      manager.GetSearchGraph(searchGraph);
+      out << RelativeEntropyCalc::CalcRelativeEntropy(m_lineNumber,searchGraph) << endl;
+      m_searchGraphCollector->Write(m_lineNumber, out.str());
+
+    }
+    manager.CalcDecoderStatistics();
+  }
+
+  ~TranslationTask() {
+    delete m_source;
+  }
+
+private:
+  InputType* m_source;
+  size_t m_lineNumber;
+  OutputCollector* m_searchGraphCollector;
+  std::ofstream *m_alignmentStream;
+
+};
+
+static void PrintFeatureWeight(const FeatureFunction* ff)
+{
+
+  size_t weightStart  = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(ff->GetScoreBookkeepingID());
+  size_t weightEnd  = StaticData::Instance().GetScoreIndexManager().GetEndIndex(ff->GetScoreBookkeepingID());
+  for (size_t i = weightStart; i < weightEnd; ++i) {
+    cout << ff->GetScoreProducerDescription(i-weightStart) <<  " " << ff->GetScoreProducerWeightShortName(i-weightStart) << " "
+         << StaticData::Instance().GetAllWeights()[i] << endl;
+  }
+}
+
+
+static void ShowWeights()
+{
+  fix(cout,6);
+  const StaticData& staticData = StaticData::Instance();
+  const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+  const vector<const StatelessFeatureFunction*>& slf =system.GetStatelessFeatureFunctions();
+  const vector<const StatefulFeatureFunction*>& sff = system.GetStatefulFeatureFunctions();
+  const vector<PhraseDictionaryFeature*>& pds = system.GetPhraseDictionaries();
+  const vector<GenerationDictionary*>& gds = system.GetGenerationDictionaries();
+  for (size_t i = 0; i < sff.size(); ++i) {
+    PrintFeatureWeight(sff[i]);
+  }
+  for (size_t i = 0; i < slf.size(); ++i) {
+    PrintFeatureWeight(slf[i]);
+  }
+  for (size_t i = 0; i < pds.size(); ++i) {
+    PrintFeatureWeight(pds[i]);
+  }
+  for (size_t i = 0; i < gds.size(); ++i) {
+    PrintFeatureWeight(gds[i]);
+  }
+}
+
+} //namespace
+
+/** main function of the command line version of the decoder **/
+int main(int argc, char** argv)
+{
+  try {
+  
+    // echo command line, if verbose
+    IFVERBOSE(1) {
+      TRACE_ERR("command: ");
+      for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+      TRACE_ERR(endl);
+    }
+  
+    // set number of significant decimals in output
+    fix(cout,PRECISION);
+    fix(cerr,PRECISION);
+  
+    // load all the settings into the Parameter class
+    // (stores them as strings, or array of strings)
+    Parameter* params = new Parameter();
+    if (!params->LoadParam(argc,argv)) {
+      params->Explain();
+      exit(1);
+    }
+  
+  
+    // initialize all "global" variables, which are stored in StaticData
+    // note: this also loads models such as the language model, etc.
+    if (!StaticData::LoadDataStatic(params, argv[0])) {
+      exit(1);
+    }
+  
+    // setting "-show-weights" -> just dump out weights and exit
+    if (params->isParamSpecified("show-weights")) {
+      ShowWeights();
+      exit(0);
+    }
+  
+    // shorthand for accessing information in StaticData
+    const StaticData& staticData = StaticData::Instance();
+  
+  
+    //initialise random numbers
+    srand(time(NULL));
+  
+    // set up read/writing class
+    IOWrapper* ioWrapper = GetIOWrapper(staticData);
+    if (!ioWrapper) {
+      cerr << "Error; Failed to create IO object" << endl;
+      exit(1);
+    }
+  
+    // check on weights
+    vector<float> weights = staticData.GetAllWeights();
+    IFVERBOSE(2) {
+      TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager());
+      TRACE_ERR("The global weight vector looks like this:");
+      for (size_t j=0; j<weights.size(); j++) {
+        TRACE_ERR(" " << weights[j]);
+      }
+      TRACE_ERR("\n");
+    }
+    // every score must have a weight!  check that here:
+    if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) {
+      TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl);
+      exit(1);
+    }
+
+    // setting lexicalized reordering setup
+    PhraseBasedReorderingState::m_useFirstBackwardScore = false;
+  
+
+    auto_ptr<OutputCollector> outputCollector;
+    outputCollector.reset(new OutputCollector());
+
+#ifdef WITH_THREADS
+    ThreadPool pool(staticData.ThreadCount());
+#endif
+  
+    // main loop over set of input sentences
+    InputType* source = NULL;
+    size_t lineCount = 0;
+    while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
+      IFVERBOSE(1) {
+        ResetUserTime();
+      }
+      // set up task of translating one sentence
+      TranslationTask* task =
+        new TranslationTask(lineCount,source, outputCollector.get());
+      // execute task
+#ifdef WITH_THREADS
+    pool.Submit(task);
+#else
+      task->Run();
+      delete task;
+#endif
+  
+      source = NULL; //make sure it doesn't get deleted
+      ++lineCount;
+    }
+  
+  // we are done, finishing up
+#ifdef WITH_THREADS
+    pool.Stop(true); //flush remaining jobs
+#endif
+
+  } catch (const std::exception &e) {
+    std::cerr << "Exception: " << e.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+
+#ifndef EXIT_RETURN
+  //This avoids that destructors are called (it can take a long time)
+  exit(EXIT_SUCCESS);
+#else
+  return EXIT_SUCCESS;
+#endif
+}
--- a/contrib/relent-filter/src/Main.h
+++ b/contrib/relent-filter/src/Main.h
@ -0,0 +1,39 @@
+/*********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+                        this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+                        this list of conditions and the following disclaimer in the documentation
+                        and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+                        may be used to endorse or promote products derived from this software
+                        without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef moses_cmd_Main_h
+#define moses_cmd_Main_h
+
+#include "StaticData.h"
+
+class IOWrapper;
+
+int main(int argc, char* argv[]);
+#endif
--- a/contrib/relent-filter/src/RelativeEntropyCalc.cpp
+++ b/contrib/relent-filter/src/RelativeEntropyCalc.cpp
@ -0,0 +1,83 @@
+/***********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <vector>
+#include "Hypothesis.h"
+#include "StaticData.h"
+#include "RelativeEntropyCalc.h"
+#include "Manager.h"
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+namespace MosesCmd
+{
+  double RelativeEntropyCalc::CalcRelativeEntropy(int translationId, std::vector<SearchGraphNode>& searchGraph){
+      const StaticData &staticData = StaticData::Instance();
+      const Phrase *m_constraint = staticData.GetConstrainingPhrase(translationId);
+
+      double prunedScore = -numeric_limits<double>::max();
+      double unprunedScore =  -numeric_limits<double>::max();
+      for (size_t i = 0; i < searchGraph.size(); ++i) {
+         const SearchGraphNode& searchNode = searchGraph[i];
+         int nodeId = searchNode.hypo->GetId();
+         if(nodeId == 0) continue; // initial hypothesis
+
+         int forwardId = searchNode.forward;
+ 	 if(forwardId == -1){ // is final hypothesis
+            Phrase catOutput(0);
+	    ConcatOutputPhraseRecursive(catOutput, searchNode.hypo);
+	    if(catOutput == *m_constraint){ // is the output actually the same as the constraint (forced decoding does not always force the output)
+               const Hypothesis *prevHypo = searchNode.hypo->GetPrevHypo();
+               int backId = prevHypo->GetId();
+	       double derivationScore = searchNode.hypo->GetScore();
+	       if(backId != 0){ // derivation using smaller units
+		  if(prunedScore < derivationScore){
+		     prunedScore = derivationScore;
+	          }
+	       }
+	       if(unprunedScore < derivationScore){
+		  unprunedScore = derivationScore;
+	       }
+	    }
+	 }
+      }
+
+      double neg_log_div = 0;
+      if( unprunedScore == -numeric_limits<double>::max()){
+	neg_log_div = numeric_limits<double>::max(); // could not find phrase pair, give it a low score so that it doesnt get pruned
+      }
+      else{
+      	neg_log_div = unprunedScore - prunedScore;
+      }
+      if (neg_log_div > 100){
+	 return 100;
+      }
+      return neg_log_div; 
+  }
+
+  void RelativeEntropyCalc::ConcatOutputPhraseRecursive(Phrase& phrase, const Hypothesis *hypo){
+      int nodeId = hypo->GetId();
+      if(nodeId == 0) return; // initial hypothesis
+      ConcatOutputPhraseRecursive(phrase, hypo->GetPrevHypo());
+      const Phrase &endPhrase = hypo->GetCurrTargetPhrase();
+      phrase.Append(endPhrase);
+  }
+}
--- a/contrib/relent-filter/src/RelativeEntropyCalc.h
+++ b/contrib/relent-filter/src/RelativeEntropyCalc.h
@ -0,0 +1,51 @@
+/*********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+                        this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+                        this list of conditions and the following disclaimer in the documentation
+                        and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+                        may be used to endorse or promote products derived from this software
+                        without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#include <vector>
+#include "Hypothesis.h"
+#include "StaticData.h"
+#include "Manager.h"
+
+using namespace std;
+using namespace Moses;
+
+namespace MosesCmd
+{
+
+class RelativeEntropyCalc
+{
+public:
+   static double CalcRelativeEntropy(int translationId, std::vector<SearchGraphNode>& searchGraph);
+
+protected:
+   static void ConcatOutputPhraseRecursive(Phrase& phrase, const Hypothesis *hypo);
+};
+
+}
--- a/contrib/relent-filter/src/TranslationAnalysis.cpp
+++ b/contrib/relent-filter/src/TranslationAnalysis.cpp
@ -0,0 +1,126 @@
+// $Id$
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include "StaticData.h"
+#include "Hypothesis.h"
+#include "TranslationAnalysis.h"
+
+using namespace Moses;
+
+namespace TranslationAnalysis
+{
+
+void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, const Hypothesis* hypo)
+{
+  os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
+  std::vector<const Hypothesis*> translationPath;
+
+  while (hypo) {
+    translationPath.push_back(hypo);
+    hypo = hypo->GetPrevHypo();
+  }
+
+  std::reverse(translationPath.begin(), translationPath.end());
+  std::vector<std::string> droppedWords;
+  std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
+  if(tpi == translationPath.end())
+    return;
+  ++tpi;  // skip initial translation state
+  std::vector<std::string> sourceMap;
+  std::vector<std::string> targetMap;
+  std::vector<unsigned int> lmAcc(0);
+  size_t lmCalls = 0;
+  bool doLMStats = ((*tpi)->GetLMStats() != 0);
+  if (doLMStats)
+    lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
+  for (; tpi != translationPath.end(); ++tpi) {
+    std::ostringstream sms;
+    std::ostringstream tms;
+    std::string target = (*tpi)->GetTargetPhraseStringRep();
+    std::string source = (*tpi)->GetSourcePhraseStringRep();
+    WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
+    WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
+    const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
+    // language model backoff stats,
+    if (doLMStats) {
+      std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
+      std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
+      std::vector<unsigned int>::iterator acc = lmAcc.begin();
+
+      for (; i != lmstats.end(); ++i, ++acc) {
+        std::vector<unsigned int>::iterator j = i->begin();
+        lmCalls += i->size();
+        for (; j != i->end(); ++j) {
+          (*acc) += *j;
+        }
+      }
+    }
+    
+    bool epsilon = false;
+    if (target == "") {
+      target="<EPSILON>";
+      epsilon = true;
+      droppedWords.push_back(source);
+    }
+    os	<< "         SOURCE: " << swr << " " << source << std::endl
+        << "  TRANSLATED AS: "               << target << std::endl
+        << "  WORD ALIGNED: " << alignmentInfo					<< std::endl;
+    size_t twr_i = twr.GetStartPos();
+    size_t swr_i = swr.GetStartPos();
+    if (!epsilon) {
+      sms << twr_i;
+    }
+    if (epsilon) {
+      tms << "del(" << swr_i << ")";
+    } else {
+      tms << swr_i;
+    }
+    swr_i++;
+    twr_i++;
+    for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
+      sms << '-' << twr_i;
+    }
+    for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
+      tms << '-' << swr_i;
+    }
+    if (!epsilon) targetMap.push_back(sms.str());
+    sourceMap.push_back(tms.str());
+  }
+  std::vector<std::string>::iterator si = sourceMap.begin();
+  std::vector<std::string>::iterator ti = targetMap.begin();
+  os << std::endl << "SOURCE/TARGET SPANS:";
+  os << std::endl << "  SOURCE:";
+  for (; si != sourceMap.end(); ++si) {
+    os << " " << *si;
+  }
+  os << std::endl << "  TARGET:";
+  for (; ti != targetMap.end(); ++ti) {
+    os << " " << *ti;
+  }
+  os << std::endl << std::endl;
+  if (doLMStats && lmCalls > 0) {
+    std::vector<unsigned int>::iterator acc = lmAcc.begin();
+    const LMList& lmlist = system->GetLanguageModels();
+    LMList::const_iterator i = lmlist.begin();
+    for (; acc != lmAcc.end(); ++acc, ++i) {
+      char buf[256];
+      sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
+      os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
+    }
+  }
+
+  if (droppedWords.size() > 0) {
+    std::vector<std::string>::iterator dwi = droppedWords.begin();
+    os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
+    for (; dwi != droppedWords.end(); ++dwi) {
+      os << "\tdropped=" << *dwi << std::endl;
+    }
+  }
+  os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
+  StaticData::Instance().GetScoreIndexManager().PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
+  os << std::endl;
+}
+
+}
--- a/contrib/relent-filter/src/TranslationAnalysis.h
+++ b/contrib/relent-filter/src/TranslationAnalysis.h
@ -0,0 +1,25 @@
+// $Id$
+
+/*
+ * also see moses/SentenceStats
+ */
+
+#ifndef moses_cmd_TranslationAnalysis_h
+#define moses_cmd_TranslationAnalysis_h
+
+#include <iostream>
+#include "Hypothesis.h"
+#include "TranslationSystem.h"
+
+namespace TranslationAnalysis
+{
+
+/***
+ * print details about the translation represented in hypothesis to
+ * os.  Included information: phrase alignment, words dropped, scores
+ */
+void PrintTranslationAnalysis(const Moses::TranslationSystem* system, std::ostream &os, const Moses::Hypothesis* hypo);
+
+}
+
+#endif
--- a/contrib/relent-filter/src/mbr.cpp
+++ b/contrib/relent-filter/src/mbr.cpp
@ -0,0 +1,178 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+#include <vector>
+#include <map>
+#include <stdlib.h>
+#include <math.h>
+#include <algorithm>
+#include <stdio.h>
+#include "TrellisPathList.h"
+#include "TrellisPath.h"
+#include "StaticData.h"
+#include "Util.h"
+#include "mbr.h"
+
+using namespace std ;
+using namespace Moses;
+
+
+/* Input :
+   1. a sorted  n-best list, with duplicates filtered out in the following  format
+   0 ||| amr moussa is currently on a visit to libya , tomorrow , sunday , to hold talks with regard to the in sudan . ||| 0 -4.94418 0 0 -2.16036 0 0 -81.4462 -106.593 -114.43 -105.55 -12.7873 -26.9057 -25.3715 -52.9336 7.99917 -24 ||| -4.58432
+
+   2. a weight vector
+   3. bleu order ( default = 4)
+   4. scaling factor to weigh the weight vector (default = 1.0)
+
+   Output :
+   translations that minimise the Bayes Risk of the n-best list
+
+
+*/
+
+int BLEU_ORDER = 4;
+int SMOOTH = 1;
+float min_interval = 1e-4;
+void extract_ngrams(const vector<const Factor* >& sentence, map < vector < const Factor* >, int >  & allngrams)
+{
+  vector< const Factor* > ngram;
+  for (int k = 0; k < BLEU_ORDER; k++) {
+    for(int i =0; i < max((int)sentence.size()-k,0); i++) {
+      for ( int j = i; j<= i+k; j++) {
+        ngram.push_back(sentence[j]);
+      }
+      ++allngrams[ngram];
+      ngram.clear();
+    }
+  }
+}
+
+float calculate_score(const vector< vector<const Factor*> > & sents, int ref, int hyp,  vector < map < vector < const Factor *>, int > > & ngram_stats )
+{
+  int comps_n = 2*BLEU_ORDER+1;
+  vector<int> comps(comps_n);
+  float logbleu = 0.0, brevity;
+
+  int hyp_length = sents[hyp].size();
+
+  for (int i =0; i<BLEU_ORDER; i++) {
+    comps[2*i] = 0;
+    comps[2*i+1] = max(hyp_length-i,0);
+  }
+
+  map< vector < const Factor * > ,int > & hyp_ngrams = ngram_stats[hyp] ;
+  map< vector < const Factor * >, int > & ref_ngrams = ngram_stats[ref] ;
+
+  for (map< vector< const Factor * >, int >::iterator it = hyp_ngrams.begin();
+       it != hyp_ngrams.end(); it++) {
+    map< vector< const Factor * >, int >::iterator ref_it = ref_ngrams.find(it->first);
+    if(ref_it != ref_ngrams.end()) {
+      comps[2* (it->first.size()-1)] += min(ref_it->second,it->second);
+    }
+  }
+  comps[comps_n-1] = sents[ref].size();
+
+  for (int i=0; i<BLEU_ORDER; i++) {
+    if (comps[0] == 0)
+      return 0.0;
+    if ( i > 0 )
+      logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH);
+    else
+      logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]);
+  }
+  logbleu /= BLEU_ORDER;
+  brevity = 1.0-(float)comps[comps_n-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
+  if (brevity < 0.0)
+    logbleu += brevity;
+  return exp(logbleu);
+}
+
+const TrellisPath doMBR(const TrellisPathList& nBestList)
+{
+  float marginal = 0;
+
+  vector<float> joint_prob_vec;
+  vector< vector<const Factor*> > translations;
+  float joint_prob;
+  vector< map < vector <const Factor *>, int > > ngram_stats;
+
+  TrellisPathList::const_iterator iter;
+
+  // get max score to prevent underflow
+  float maxScore = -1e20;
+  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+    const TrellisPath &path = **iter;
+    float score = StaticData::Instance().GetMBRScale()
+                  * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights());
+    if (maxScore < score) maxScore = score;
+  }
+
+  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+    const TrellisPath &path = **iter;
+    joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights()) - maxScore);
+    marginal += joint_prob;
+    joint_prob_vec.push_back(joint_prob);
+
+    // get words in translation
+    vector<const Factor*> translation;
+    GetOutputFactors(path, translation);
+
+    // collect n-gram counts
+    map < vector < const Factor *>, int > counts;
+    extract_ngrams(translation,counts);
+
+    ngram_stats.push_back(counts);
+    translations.push_back(translation);
+  }
+
+  vector<float> mbr_loss;
+  float bleu, weightedLoss;
+  float weightedLossCumul = 0;
+  float minMBRLoss = 1000000;
+  int minMBRLossIdx = -1;
+
+  /* Main MBR computation done here */
+  iter = nBestList.begin();
+  for (unsigned int i = 0; i < nBestList.GetSize(); i++) {
+    weightedLossCumul = 0;
+    for (unsigned int j = 0; j < nBestList.GetSize(); j++) {
+      if ( i != j) {
+        bleu = calculate_score(translations, j, i,ngram_stats );
+        weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal);
+        weightedLossCumul += weightedLoss;
+        if (weightedLossCumul > minMBRLoss)
+          break;
+      }
+    }
+    if (weightedLossCumul < minMBRLoss) {
+      minMBRLoss = weightedLossCumul;
+      minMBRLossIdx = i;
+    }
+    iter++;
+  }
+  /* Find sentence that minimises Bayes Risk under 1- BLEU loss */
+  return nBestList.at(minMBRLossIdx);
+  //return translations[minMBRLossIdx];
+}
+
+void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translation)
+{
+  const std::vector<const Hypothesis *> &edges = path.GetEdges();
+  const std::vector<FactorType>& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+  assert (outputFactorOrder.size() == 1);
+
+  // print the surface factor of the translation
+  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+    const Hypothesis &edge = *edges[currEdge];
+    const Phrase &phrase = edge.GetCurrTargetPhrase();
+    size_t size = phrase.GetSize();
+    for (size_t pos = 0 ; pos < size ; pos++) {
+
+      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+      translation.push_back(factor);
+    }
+  }
+}
+
--- a/Show More
+++ b/Show More