From ba99159763658a33c10d9a909377639bda53330f Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 14:41:49 +0100
Subject: [PATCH 01/38] Gzip consolidate on the fly

---
 scripts/training/train-model.perl.missing_bin_dir | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index d3748fdc9..a1f2dd2f8 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1516,7 +1516,7 @@ sub score_phrase_phrase_extract {
     # merging the two halves
     print STDERR "(6.6) consolidating the two halves @ ".`date`;
     return if $___CONTINUE && -e "$ttable_file.gz";
-    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e $ttable_file.half.e2f.sorted $ttable_file";
+    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e $ttable_file.half.e2f.sorted $ttable_file.gz";
     $cmd .= " --Hierarchical" if $_HIERARCHICAL;
     $cmd .= " --LogProb" if $LOG_PROB;
     $cmd .= " --NegLogProb" if $NEG_LOG_PROB;
@@ -1527,9 +1527,6 @@ sub score_phrase_phrase_extract {
     $cmd .= " --KneserNey $ttable_file.coc" if $KNESER_NEY;
     safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
     if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
-    if (! $___DONT_ZIP) {
-        safesystem("gzip $ttable_file") || die("ERROR: could not gzip $ttable_file");
-    }
 }
 
 sub score_phrase_memscore {

From 3d67e33b9e88b29216709590e6bead0eeba9b0c3 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 15:10:21 +0100
Subject: [PATCH 02/38] GZip extract on the fly

---
 scripts/training/phrase-extract/Jamfile       |  2 +-
 scripts/training/phrase-extract/extract.cpp   | 34 +++++++++++--------
 .../training/train-model.perl.missing_bin_dir | 12 +++----
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/scripts/training/phrase-extract/Jamfile b/scripts/training/phrase-extract/Jamfile
index 0872130f9..9c077fb12 100644
--- a/scripts/training/phrase-extract/Jamfile
+++ b/scripts/training/phrase-extract/Jamfile
@@ -10,7 +10,7 @@ obj XmlTree.o : XmlTree.cpp : <include>. ;
 alias filestreams : InputFileStream.cpp OutputFileStream.cpp : : : <include>. ;
 alias trees : SyntaxTree.cpp tables-core.o XmlTree.o : : : <include>. ;
 
-exe extract : tables-core.o SentenceAlignment.o extract.cpp InputFileStream ../../..//boost_iostreams ;
+exe extract : tables-core.o SentenceAlignment.o extract.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
 
 exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o SentenceAlignmentWithSyntax.cpp HoleCollection.cpp extract-rules.cpp ExtractedRule.cpp OutputFileStream.cpp InputFileStream ../../../moses/src//ThreadPool ../../..//boost_iostreams ;
 
diff --git a/scripts/training/phrase-extract/extract.cpp b/scripts/training/phrase-extract/extract.cpp
index f6d6cbb9b..16b413da9 100644
--- a/scripts/training/phrase-extract/extract.cpp
+++ b/scripts/training/phrase-extract/extract.cpp
@@ -22,6 +22,7 @@
 #include "SentenceAlignment.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
+#include "OutputFileStream.h"
 
 using namespace std;
 
@@ -82,15 +83,16 @@ bool hierModel = false;
 REO_MODEL_TYPE hierType = REO_MSD;
 
 
-ofstream extractFile;
-ofstream extractFileInv;
-ofstream extractFileOrientation;
-ofstream extractFileSentenceId;
+Moses::OutputFileStream extractFile;
+Moses::OutputFileStream extractFileInv;
+Moses::OutputFileStream extractFileOrientation;
+Moses::OutputFileStream extractFileSentenceId;
 int maxPhraseLength;
 bool orientationFlag = false;
 bool translationFlag = true;
 bool sentenceIdFlag = false; //create extract file with sentence id
 bool onlyOutputSpanInfo = false;
+bool gzOutput = false;
 
 int main(int argc, char* argv[])
 {
@@ -116,6 +118,8 @@ int main(int argc, char* argv[])
       translationFlag = false;
     } else if (strcmp(argv[i], "--SentenceId") == 0) {
       sentenceIdFlag = true;  
+    } else if (strcmp(argv[i], "--GZOutput") == 0) {
+      gzOutput = true;  
     } else if(strcmp(argv[i],"--model") == 0) {
       if (i+1 >= argc) {
         cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@@ -193,18 +197,18 @@ int main(int argc, char* argv[])
 
   // open output files
   if (translationFlag) {
-    string fileNameExtractInv = fileNameExtract + ".inv";
-    extractFile.open(fileNameExtract.c_str());
-    extractFileInv.open(fileNameExtractInv.c_str());
+    string fileNameExtractInv = fileNameExtract + ".inv" + (gzOutput?".gz":"");
+    extractFile.Open( (fileNameExtract + (gzOutput?".gz":"")).c_str());
+    extractFileInv.Open(fileNameExtractInv.c_str());
   }
   if (orientationFlag) {
-    string fileNameExtractOrientation = fileNameExtract + ".o";
-    extractFileOrientation.open(fileNameExtractOrientation.c_str());
+    string fileNameExtractOrientation = fileNameExtract + ".o" + (gzOutput?".gz":"");
+    extractFileOrientation.Open(fileNameExtractOrientation.c_str());
   }
 
   if (sentenceIdFlag) {
-    string fileNameExtractSentenceId = fileNameExtract + ".sid";
-    extractFileSentenceId.open(fileNameExtractSentenceId.c_str());
+    string fileNameExtractSentenceId = fileNameExtract + ".sid" + (gzOutput?".gz":"");
+    extractFileSentenceId.Open(fileNameExtractSentenceId.c_str());
   }
 
   int i=0;
@@ -239,12 +243,12 @@ int main(int argc, char* argv[])
   //az: only close if we actually opened it
   if (!onlyOutputSpanInfo) {
     if (translationFlag) {
-      extractFile.close();
-      extractFileInv.close();
+      extractFile.Close();
+      extractFileInv.Close();
     }
-    if (orientationFlag) extractFileOrientation.close();
+    if (orientationFlag) extractFileOrientation.Close();
     if (sentenceIdFlag) {
-      extractFileSentenceId.close();
+      extractFileSentenceId.Close();
     }
   }
 }
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index a1f2dd2f8..82b4fbce8 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1362,20 +1362,16 @@ sub extract_phrase {
         $cmd .= " ".$_EXTRACT_OPTIONS if defined($_EXTRACT_OPTIONS);
       }
     }
+    
+    $cmd .= " --GZOutput ";
+    
     map { die "File not found: $_" if ! -e $_ } ($alignment_file_e, $alignment_file_f, $alignment_file_a);
     print STDERR "$cmd\n";
     safesystem("$cmd") or die "ERROR: Phrase extraction failed (missing input files?)";
     foreach my $f (@tempfiles) {
       unlink $f;
     }
-    if (! $___DONT_ZIP) { 
-      safesystem("gzip $extract_file.o") if -e "$extract_file.o";
-      safesystem("gzip $extract_file.sid") if -e "$extract_file.sid";
-      if ($ttable_flag) {
-        safesystem("gzip $extract_file.inv") or die("ERROR");
-        safesystem("gzip $extract_file") or die("ERROR");
-      }
-    }
+    
 }
 
 ### (6) PHRASE SCORING

From 947bee50b3b8d2d9360de011760466aa06a18362 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 15:36:34 +0100
Subject: [PATCH 03/38] GZip extract.sorted

---
 .../training/train-model.perl.missing_bin_dir    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 82b4fbce8..49d1d37ed 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1453,16 +1453,16 @@ sub score_phrase_phrase_extract {
 	          $inverse = " --Inverse";
                   $extract_filename = $extract_file.".inv";
               }
-	      my $extract = "$extract_filename.sorted";
+	      my $extract = "$extract_filename.sorted.gz";
 
-	      if (!($___CONTINUE && -e "$extract_filename.sorted")) {
+	      if (!($___CONTINUE && -e "$extract_filename.sorted.gz")) {
 	          # sorting
 	          print STDERR "(6.".($substep++).")  sorting $direction @ ".`date`;
 	          if (-e "$extract_filename.gz") {
-		      safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_filename.sorted") or die("ERROR");
+		      safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip -c > $extract_filename.sorted.gz") or die("ERROR");
 	          }
 	          else {
-		      safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename > $extract_filename.sorted") or die("ERROR");
+		      safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename | gzip -c > $extract_filename.sorted.gz") or die("ERROR");
 	          }
               }
 
@@ -1592,10 +1592,10 @@ sub get_reordering_factored {
 sub get_reordering {
     my ($extract_file,$reo_model_path) = @_;
     if (-e "$extract_file.o.gz") {
-	safesystem("gunzip < $extract_file.o.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_file.o.sorted") or die("ERROR");
+	safesystem("gunzip < $extract_file.o.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip -c > $extract_file.o.sorted.gz") or die("ERROR");
     }
     else {
-        safesystem("LC_ALL=C sort -T $___TEMP_DIR $extract_file.o > $extract_file.o.sorted") or die("ERROR");
+        safesystem("LC_ALL=C sort -T $___TEMP_DIR $extract_file.o | gzip -c > $extract_file.o.sorted.gz") or die("ERROR");
     }
 
     my $smooth = $___REORDERING_SMOOTH;
@@ -1603,7 +1603,7 @@ sub get_reordering {
     print STDERR "(7.2) building tables @ ".`date`;
 
     #create cmd string for lexical reordering scoring
-    my $cmd = "$LEXICAL_REO_SCORER $extract_file.o.sorted $smooth $reo_model_path";
+    my $cmd = "$LEXICAL_REO_SCORER $extract_file.o.sorted.gz $smooth $reo_model_path";
     $cmd .= " --SmoothWithCounts" if ($smooth =~ /(.+)u$/);
     for my $mtype (keys %REORDERING_MODEL_TYPES) {
 	$cmd .= " --model \"$mtype $REORDERING_MODEL_TYPES{$mtype}";
@@ -1618,7 +1618,7 @@ sub get_reordering {
     #Call the lexical reordering scorer
     safesystem("$cmd") or die "ERROR: Lexical reordering scoring failed";
 
-    if (! $debug) { safesystem("rm $extract_file.o.sorted") or die("ERROR");}
+    if (! $debug) { safesystem("rm $extract_file.o.sorted.gz") or die("ERROR");}
 }
 
 

From 3de14f62795d2cc06f7488731865008db4368a0b Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 15:55:03 +0100
Subject: [PATCH 04/38] Gzip phrase-table.half.e2f.sorted on the fly

---
 scripts/training/train-model.perl.missing_bin_dir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 49d1d37ed..19841b948 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1484,7 +1484,7 @@ sub score_phrase_phrase_extract {
         # sorting inverse phrase-table-half to sync up with regular one
         if ($direction eq "e2f" && ! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) {
           print STDERR "(6." . ($substep++) . ") sorting inverse e2f table@ ".`date`;
-          safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f > $ttable_file.half.e2f.sorted") or die("ERROR");
+          safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f | gzip -c > $ttable_file.half.e2f.sorted.gz") or die("ERROR");
           if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); }
         }
 
@@ -1512,7 +1512,7 @@ sub score_phrase_phrase_extract {
     # merging the two halves
     print STDERR "(6.6) consolidating the two halves @ ".`date`;
     return if $___CONTINUE && -e "$ttable_file.gz";
-    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e $ttable_file.half.e2f.sorted $ttable_file.gz";
+    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e $ttable_file.half.e2f.sorted.gz $ttable_file.gz";
     $cmd .= " --Hierarchical" if $_HIERARCHICAL;
     $cmd .= " --LogProb" if $LOG_PROB;
     $cmd .= " --NegLogProb" if $NEG_LOG_PROB;

From 349409d1c6f80af89d48229537a713a6a95a168a Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 16:38:31 +0100
Subject: [PATCH 05/38] GZip scoring on the fly

---
 scripts/training/phrase-extract/Jamfile           | 2 +-
 scripts/training/phrase-extract/score.cpp         | 8 ++++----
 scripts/training/train-model.perl.missing_bin_dir | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/training/phrase-extract/Jamfile b/scripts/training/phrase-extract/Jamfile
index 9c077fb12..5ed3f20f1 100644
--- a/scripts/training/phrase-extract/Jamfile
+++ b/scripts/training/phrase-extract/Jamfile
@@ -16,7 +16,7 @@ exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o Sen
 
 exe extract-lex : extract-lex.cpp InputFileStream ;
 
-exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp InputFileStream ../../..//boost_iostreams ;
+exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
 
 exe consolidate : consolidate.cpp tables-core.o OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
 
diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
index af7401132..935bedaa5 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/scripts/training/phrase-extract/score.cpp
@@ -32,6 +32,7 @@
 #include "PhraseAlignment.h"
 #include "score.h"
 #include "InputFileStream.h"
+#include "OutputFileStream.h"
 
 using namespace std;
 
@@ -188,9 +189,9 @@ int main(int argc, char* argv[])
 		phraseTableFile = &cout;
 	}
 	else {
-		ofstream *outputFile = new ofstream();
-		outputFile->open(fileNamePhraseTable);
-		if (outputFile->fail()) {
+		Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
+		bool success = outputFile->Open(fileNamePhraseTable);
+		if (!success) {
 			cerr << "ERROR: could not open file phrase table file "
 					 << fileNamePhraseTable << endl;
 			exit(1);
@@ -245,7 +246,6 @@ int main(int argc, char* argv[])
 	
 	phraseTableFile->flush();
 	if (phraseTableFile != &cout) {
-		(dynamic_cast<ofstream*>(phraseTableFile))->close();
 		delete phraseTableFile;
 	}
 
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 19841b948..a4548d872 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1468,7 +1468,7 @@ sub score_phrase_phrase_extract {
 
 	      print STDERR "(6.".($substep++).")  creating table half $ttable_file.half.$direction @ ".`date`;
 
-        my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction $inverse";
+        my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction.gz $inverse";
         $cmd .= " --Hierarchical" if $_HIERARCHICAL;
         $cmd .= " --WordAlignment" if $_PHRASE_WORD_ALIGNMENT;
         $cmd .= " --KneserNey $ttable_file.coc" if $KNESER_NEY;
@@ -1484,7 +1484,7 @@ sub score_phrase_phrase_extract {
         # sorting inverse phrase-table-half to sync up with regular one
         if ($direction eq "e2f" && ! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) {
           print STDERR "(6." . ($substep++) . ") sorting inverse e2f table@ ".`date`;
-          safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f | gzip -c > $ttable_file.half.e2f.sorted.gz") or die("ERROR");
+          safesystem("zcat $ttable_file.half.e2f.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip -c > $ttable_file.half.e2f.sorted.gz") or die("ERROR");
           if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); }
         }
 
@@ -1512,7 +1512,7 @@ sub score_phrase_phrase_extract {
     # merging the two halves
     print STDERR "(6.6) consolidating the two halves @ ".`date`;
     return if $___CONTINUE && -e "$ttable_file.gz";
-    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e $ttable_file.half.e2f.sorted.gz $ttable_file.gz";
+    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e.gz $ttable_file.half.e2f.sorted.gz $ttable_file.gz";
     $cmd .= " --Hierarchical" if $_HIERARCHICAL;
     $cmd .= " --LogProb" if $LOG_PROB;
     $cmd .= " --NegLogProb" if $NEG_LOG_PROB;

From ce3ad73ebfaab3e5a1ab72e88b1c8042661e30da Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 17:20:40 +0100
Subject: [PATCH 06/38] coc filename determined from phrase-table.half name,
 rather than passed as argument

---
 scripts/training/phrase-extract/score.cpp     | 26 +++++++------------
 .../training/train-model.perl.missing_bin_dir |  8 +++---
 2 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
index 935bedaa5..1d081a054 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/scripts/training/phrase-extract/score.cpp
@@ -57,7 +57,7 @@ public:
 
 vector<string> tokenize( const char [] );
 
-void writeCountOfCounts( const char* fileNameCountOfCounts );
+void writeCountOfCounts( const string &fileNameCountOfCounts );
 void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile);
 PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair );
 void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile );
@@ -92,13 +92,13 @@ int main(int argc, char* argv[])
        << "scoring methods for extracted rules\n";
 
   if (argc < 4) {
-    cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring coc-file] [--KneserNey coc-file] [--WordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] \n";
+    cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--WordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] \n";
     exit(1);
   }
   char* fileNameExtract = argv[1];
   char* fileNameLex = argv[2];
   char* fileNamePhraseTable = argv[3];
-  char* fileNameCountOfCounts;
+  string fileNameCountOfCounts;
   char* fileNameFunctionWords;
 
   for(int i=4; i<argc; i++) {
@@ -116,19 +116,11 @@ int main(int argc, char* argv[])
       cerr << "not computing lexical translation score\n";
     } else if (strcmp(argv[i],"--GoodTuring") == 0) {
       goodTuringFlag = true;
-      if (i+1==argc) { 
-        cerr << "ERROR: specify count of count files for Good Turing discounting!\n";
-        exit(1);
-      }
-      fileNameCountOfCounts = argv[++i];
+			fileNameCountOfCounts = string(fileNamePhraseTable) + ".coc";
       cerr << "adjusting phrase translation probabilities with Good Turing discounting\n";
     } else if (strcmp(argv[i],"--KneserNey") == 0) {
       kneserNeyFlag = true;
-      if (i+1==argc) { 
-        cerr << "ERROR: specify count of count files for Kneser Ney discounting!\n";
-        exit(1);
-      }
-      fileNameCountOfCounts = argv[++i];
+			fileNameCountOfCounts = string(fileNamePhraseTable) + ".coc";
       cerr << "adjusting phrase translation probabilities with Kneser Ney discounting\n";
     } else if (strcmp(argv[i],"--UnalignedPenalty") == 0) {
       unalignedFlag = true;
@@ -255,12 +247,12 @@ int main(int argc, char* argv[])
   }
 }
 
-void writeCountOfCounts( const char* fileNameCountOfCounts )
+void writeCountOfCounts( const string &fileNameCountOfCounts )
 {
   // open file
-	ofstream countOfCountsFile;
-	countOfCountsFile.open(fileNameCountOfCounts);
-	if (countOfCountsFile.fail()) {
+	Moses::OutputFileStream countOfCountsFile;
+	bool success = countOfCountsFile.Open(fileNameCountOfCounts.c_str());
+	if (!success) {
 		cerr << "ERROR: could not open count-of-counts file "
 				 << fileNameCountOfCounts << endl;
     return;
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index a4548d872..c3efa4d30 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1471,8 +1471,8 @@ sub score_phrase_phrase_extract {
         my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction.gz $inverse";
         $cmd .= " --Hierarchical" if $_HIERARCHICAL;
         $cmd .= " --WordAlignment" if $_PHRASE_WORD_ALIGNMENT;
-        $cmd .= " --KneserNey $ttable_file.coc" if $KNESER_NEY;
-        $cmd .= " --GoodTuring $ttable_file.coc" if $GOOD_TURING && $inverse eq "";
+        $cmd .= " --KneserNey" if $KNESER_NEY;
+        $cmd .= " --GoodTuring" if $GOOD_TURING && $inverse eq "";
         $cmd .= " --UnalignedPenalty" if $UNALIGNED_COUNT;
         $cmd .= " --UnalignedFunctionWordPenalty ".($inverse ? $UNALIGNED_FW_F : $UNALIGNED_FW_E) if $UNALIGNED_FW_COUNT;
         $cmd .= " --MinCountHierarchical $MIN_COUNT_HIERARCHICAL" if $MIN_COUNT_HIERARCHICAL;
@@ -1519,8 +1519,8 @@ sub score_phrase_phrase_extract {
     $cmd .= " --OnlyDirect" if $ONLY_DIRECT;
     $cmd .= " --NoPhraseCount" unless $PHRASE_COUNT;
     $cmd .= " --LowCountFeature" if $LOW_COUNT;
-    $cmd .= " --GoodTuring $ttable_file.coc" if $GOOD_TURING;
-    $cmd .= " --KneserNey $ttable_file.coc" if $KNESER_NEY;
+    $cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
+    $cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
     safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
     if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
 }

From 761ac4f0fcbf1371daaeae65cfca7ded440fab9a Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 19:02:36 +0100
Subject: [PATCH 07/38] parallel extract. Works with reordering

---
 scripts/generic/extract-parallel.perl         | 86 +++++++++++------
 .../training/train-model.perl.missing_bin_dir | 92 +++++++++++--------
 2 files changed, 112 insertions(+), 66 deletions(-)

diff --git a/scripts/generic/extract-parallel.perl b/scripts/generic/extract-parallel.perl
index ec5366f2e..f60ed8470 100755
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@@ -6,11 +6,15 @@
 use strict;
 use File::Basename;
 
+sub RunFork($);
+sub systemCheck($);
 sub NumStr($);
 
 print "Started ".localtime() ."\n";
 
 my $numParallel= $ARGV[0];
+$numParallel = 1 if $numParallel < 1;
+
 my $splitCmd= $ARGV[1];
 my $sortCmd= $ARGV[2];
 my $extractCmd= $ARGV[3];
@@ -29,25 +33,34 @@ for (my $i = 8; $i < $#ARGV + 1; ++$i)
 my $TMPDIR=dirname($extract)  ."/tmp.$$";
 mkdir $TMPDIR;
 
-my $totalLines = int(`wc -l $align`);
+my $totalLines = int(`cat $align | wc -l`);
 my $linesPerSplit = int($totalLines / $numParallel) + 1;
 
 print "total=$totalLines line-per-split=$linesPerSplit \n";
 
+my @children;
+my $pid;
 my $cmd;
+
 if ($numParallel > 1)
 {
 	$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $target $TMPDIR/target.";
-	print STDERR "Executing: $cmd \n";
-	`$cmd`;
+	$pid = RunFork($cmd);
+	push(@children, $pid);
 	
 	$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $source $TMPDIR/source.";
-	print STDERR "Executing: $cmd \n";
-	`$cmd`;
+	$pid = RunFork($cmd);
+	push(@children, $pid);
 
 	$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $align $TMPDIR/align.";
-	print STDERR "Executing: $cmd \n";
-	`$cmd`;
+	$pid = RunFork($cmd);
+	push(@children, $pid);
+	
+	# wait for everything is finished
+	foreach (@children) {
+		waitpid($_, 0);
+	}
+
 }
 else
 {
@@ -67,15 +80,13 @@ else
 }
 
 # run extract
-my $isParent = 1;
-my @childs;
+@children = ();
 for (my $i = 0; $i < $numParallel; ++$i)
 {
   my $pid = fork();
   
   if ($pid == 0)
   { # child
-    $isParent = 0;
     my $numStr = NumStr($i);
     my $cmd = "$extractCmd $TMPDIR/target.$numStr $TMPDIR/source.$numStr $TMPDIR/align.$numStr $TMPDIR/extract.$numStr $otherExtractArgs \n";
     print STDERR $cmd;
@@ -85,20 +96,13 @@ for (my $i = 0; $i < $numParallel; ++$i)
   }
   else
   { # parent
-  	push(@childs, $pid);
+  	push(@children, $pid);
   }
 }
 
 # wait for everything is finished
-if ($isParent)
-{
-  foreach (@childs) {
-  	waitpid($_, 0);
-  }
-}
-else
-{
-    die "shouldn't be here";
+foreach (@children) {
+	waitpid($_, 0);
 }
 
 # merge
@@ -116,29 +120,52 @@ for (my $i = 0; $i < $numParallel; ++$i)
 $catCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.sorted.gz \n";
 $catInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.inv.sorted.gz \n";
 $catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.o.sorted.gz \n";
-print STDERR $catCmd;
-print STDERR $catInvCmd;
-print STDERR $catOCmd;
 
-systemCheck($catCmd);
-systemCheck($catInvCmd);
+
+@children = ();
+$pid = RunFork($catCmd);
+push(@children, $pid);
+
+$pid = RunFork($catInvCmd);
+push(@children, $pid);
 
 my $numStr = NumStr(0);
 if (-e "$TMPDIR/extract.$numStr.o.gz")
 {
-	systemCheck($catOCmd);
+	$pid = RunFork($catOCmd);
+	push(@children, $pid);
+}
+
+# wait for all sorting to finish
+foreach (@children) {
+	waitpid($_, 0);
 }
 
 
-$cmd = "rm -rf $TMPDIR \n";
-print STDERR $cmd;
-`$cmd`;
+#$cmd = "rm -rf $TMPDIR \n";
+#print STDERR $cmd;
+#`$cmd`;
 
 print STDERR "Finished ".localtime() ."\n";
 
 # -----------------------------------------
 # -----------------------------------------
 
+sub RunFork($)
+{
+  my $cmd = shift;
+
+  my $pid = fork();
+  
+  if ($pid == 0)
+  { # child
+    print STDERR $cmd;
+    systemCheck($cmd);
+    exit();
+  }
+  return $pid;
+}
+
 sub systemCheck($)
 {
   my $cmd = shift;
@@ -171,4 +198,3 @@ sub NumStr($)
     return $numStr;
 }
 
-
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index c3efa4d30..ff5d7454e 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -19,7 +19,7 @@ if ($SCRIPTS_ROOTDIR eq '') {
 $SCRIPTS_ROOTDIR =~ s/\/training$//;
 $SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
 
-my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_CORPUS,
+my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_SORT_COMPRESS, $_CORPUS,
    $_CORPUS_COMPRESSION, $_FIRST_STEP, $_LAST_STEP, $_F, $_E, $_MAX_PHRASE_LENGTH,
    $_LEXICAL_FILE, $_NO_LEXICAL_WEIGHTING, $_VERBOSE, $_ALIGNMENT,
    $_ALIGNMENT_FILE, $_ALIGNMENT_STEM, @_LM, $_EXTRACT_FILE, $_GIZA_OPTION, $_HELP, $_PARTS,
@@ -36,6 +36,7 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
    $_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
    $_ADDITIONAL_INI,
    $_DICTIONARY, $_EPPEX);
+my $_CORES = 1;
 
 my $debug = 0; # debug this script, do not delete any files in debug mode
 
@@ -58,6 +59,7 @@ $_HELP = 1
 		       'temp-dir=s' => \$_TEMP_DIR,
            'sort-buffer-size=s' => \$_SORT_BUFFER_SIZE,
            'sort-batch-size=s' => \$_SORT_BATCH_SIZE,
+           'sort-compress=s' => \$_SORT_COMPRESS,
 		       'extract-file=s' => \$_EXTRACT_FILE,
 		       'alignment=s' => \$_ALIGNMENT,
 		       'alignment-file=s' => \$_ALIGNMENT_FILE,
@@ -114,7 +116,8 @@ $_HELP = 1
 		       'force-factored-filenames' => \$_FORCE_FACTORED_FILENAMES,
 		       'dictionary=s' => \$_DICTIONARY,
 		       'eppex:s' => \$_EPPEX,
-           'additional-ini=s' => \$_ADDITIONAL_INI
+           'additional-ini=s' => \$_ADDITIONAL_INI, 
+           'cores=i' => \$_CORES
                );
 
 if ($_HELP) {
@@ -206,8 +209,36 @@ if(!defined $_MGIZA ){
 
 my $MKCLS = "$BINDIR/mkcls";
 
+# parallel extract
+my $SPLIT_EXEC = `gsplit --help 2>/dev/null`; 
+if($SPLIT_EXEC) {
+  $SPLIT_EXEC = 'gsplit';
+}
+else {
+  $SPLIT_EXEC = 'split';
+}
+
+my $SORT_EXEC = `gsort --help 2>/dev/null`; 
+if($SORT_EXEC) {
+  $SORT_EXEC = 'gsort';
+}
+else {
+  $SORT_EXEC = 'sort';
+}
+
+my $__SORT_BUFFER_SIZE = "";
+$__SORT_BUFFER_SIZE = "-S $_SORT_BUFFER_SIZE" if $_SORT_BUFFER_SIZE;
+
+my $__SORT_BATCH_SIZE = "";
+$__SORT_BATCH_SIZE = "--batch-size $_SORT_BATCH_SIZE" if $_SORT_BATCH_SIZE;
+
+my $__SORT_COMPRESS = "";
+$__SORT_COMPRESS = "--compress-program=$_SORT_COMPRESS" if $_SORT_COMPRESS;
+
 # supporting scripts/binaries from this package
 my $PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract";
+$PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS\" $PHRASE_EXTRACT";
+
 my $RULE_EXTRACT;
 if (defined($_GHKM)) {
   $RULE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract-ghkm/tools/extract-ghkm";
@@ -215,6 +246,8 @@ if (defined($_GHKM)) {
 else {
   $RULE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract-rules";
 }
+$RULE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS\" $RULE_EXTRACT";
+
 my $LEXICAL_REO_SCORER = "$SCRIPTS_ROOTDIR/training/lexical-reordering/score";
 my $MEMSCORE = "$SCRIPTS_ROOTDIR/training/memscore/memscore";
 my $EPPEX = "$SCRIPTS_ROOTDIR/training/eppex/eppex";
@@ -308,12 +341,6 @@ $_DONT_ZIP = $___DONT_ZIP unless $___DONT_ZIP;
 my $___TEMP_DIR = $___MODEL_DIR;
 $___TEMP_DIR = $_TEMP_DIR if $_TEMP_DIR;
 
-my $__SORT_BUFFER_SIZE = "";
-$__SORT_BUFFER_SIZE = "-S $_SORT_BUFFER_SIZE" if $_SORT_BUFFER_SIZE;
-
-my $__SORT_BATCH_SIZE = "";
-$__SORT_BATCH_SIZE = "--batch-size $_SORT_BATCH_SIZE" if $_SORT_BATCH_SIZE;
-
 my $___CONTINUE = 0; 
 $___CONTINUE = $_CONTINUE if $_CONTINUE;
 
@@ -1590,35 +1617,28 @@ sub get_reordering_factored {
 }
 
 sub get_reordering {
-    my ($extract_file,$reo_model_path) = @_;
-    if (-e "$extract_file.o.gz") {
-	safesystem("gunzip < $extract_file.o.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip -c > $extract_file.o.sorted.gz") or die("ERROR");
-    }
-    else {
-        safesystem("LC_ALL=C sort -T $___TEMP_DIR $extract_file.o | gzip -c > $extract_file.o.sorted.gz") or die("ERROR");
-    }
-
-    my $smooth = $___REORDERING_SMOOTH;
-
-    print STDERR "(7.2) building tables @ ".`date`;
-
-    #create cmd string for lexical reordering scoring
-    my $cmd = "$LEXICAL_REO_SCORER $extract_file.o.sorted.gz $smooth $reo_model_path";
-    $cmd .= " --SmoothWithCounts" if ($smooth =~ /(.+)u$/);
-    for my $mtype (keys %REORDERING_MODEL_TYPES) {
-	$cmd .= " --model \"$mtype $REORDERING_MODEL_TYPES{$mtype}";
-	foreach my $model (@REORDERING_MODELS) {
-	    if ($model->{"type"} eq $mtype) {
-		$cmd .= " ".$model->{"filename"};
-	    }
+	my ($extract_file,$reo_model_path) = @_;
+	my $smooth = $___REORDERING_SMOOTH;
+	
+	print STDERR "(7.2) building tables @ ".`date`;
+	
+	#create cmd string for lexical reordering scoring
+	my $cmd = "$LEXICAL_REO_SCORER $extract_file.o.sorted.gz $smooth $reo_model_path";
+	$cmd .= " --SmoothWithCounts" if ($smooth =~ /(.+)u$/);
+	for my $mtype (keys %REORDERING_MODEL_TYPES) {
+		$cmd .= " --model \"$mtype $REORDERING_MODEL_TYPES{$mtype}";
+		foreach my $model (@REORDERING_MODELS) {
+			if ($model->{"type"} eq $mtype) {
+				$cmd .= " ".$model->{"filename"};
+			}
+		}
+		$cmd .= "\"";
 	}
-	$cmd .= "\"";
-    }
-    
-    #Call the lexical reordering scorer
-    safesystem("$cmd") or die "ERROR: Lexical reordering scoring failed";
-
-    if (! $debug) { safesystem("rm $extract_file.o.sorted.gz") or die("ERROR");}
+	
+	#Call the lexical reordering scorer
+	safesystem("$cmd") or die "ERROR: Lexical reordering scoring failed";
+	
+	if (! $debug) { safesystem("rm $extract_file.o.sorted.gz") or die("ERROR");}
 }
 
 

From f33538ea5546e402d92645c8fa95289cdd12c988 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 19:12:23 +0100
Subject: [PATCH 08/38] Works with phrase-table scoring

---
 scripts/training/train-model.perl.missing_bin_dir | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index ff5d7454e..f599e00aa 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1482,17 +1482,6 @@ sub score_phrase_phrase_extract {
               }
 	      my $extract = "$extract_filename.sorted.gz";
 
-	      if (!($___CONTINUE && -e "$extract_filename.sorted.gz")) {
-	          # sorting
-	          print STDERR "(6.".($substep++).")  sorting $direction @ ".`date`;
-	          if (-e "$extract_filename.gz") {
-		      safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip -c > $extract_filename.sorted.gz") or die("ERROR");
-	          }
-	          else {
-		      safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename | gzip -c > $extract_filename.sorted.gz") or die("ERROR");
-	          }
-              }
-
 	      print STDERR "(6.".($substep++).")  creating table half $ttable_file.half.$direction @ ".`date`;
 
         my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction.gz $inverse";
@@ -1506,7 +1495,6 @@ sub score_phrase_phrase_extract {
         $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
         print $cmd."\n";
         safesystem($cmd) or die "ERROR: Scoring of phrases failed";	    
-        if (! $debug) { safesystem("rm -f $extract") or die("ERROR"); }
   
         # sorting inverse phrase-table-half to sync up with regular one
         if ($direction eq "e2f" && ! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) {

From f91b19fa4126ef15f89dcc6c3e870bf6846836ff Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 23 May 2012 19:26:37 +0100
Subject: [PATCH 09/38] Works with phrase-table scoring

---
 scripts/generic/extract-parallel.perl             | 8 ++++----
 scripts/training/train-model.perl.missing_bin_dir | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/scripts/generic/extract-parallel.perl b/scripts/generic/extract-parallel.perl
index f60ed8470..b810d9672 100755
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@@ -141,10 +141,10 @@ foreach (@children) {
 	waitpid($_, 0);
 }
 
-
-#$cmd = "rm -rf $TMPDIR \n";
-#print STDERR $cmd;
-#`$cmd`;
+# delete temporary files
+$cmd = "rm -rf $TMPDIR \n";
+print STDERR $cmd;
+`$cmd`;
 
 print STDERR "Finished ".localtime() ."\n";
 
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index f599e00aa..0dda1017f 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1626,7 +1626,6 @@ sub get_reordering {
 	#Call the lexical reordering scorer
 	safesystem("$cmd") or die "ERROR: Lexical reordering scoring failed";
 	
-	if (! $debug) { safesystem("rm $extract_file.o.sorted.gz") or die("ERROR");}
 }
 
 

From d979b4454d04e75440ce4e1607a31e8c679a0632 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Thu, 24 May 2012 15:05:23 +0100
Subject: [PATCH 10/38] parallel scoring

---
 scripts/generic/score-parallel.perl           | 287 ++++++++++++++++++
 .../training/train-model.perl.missing_bin_dir |  29 +-
 2 files changed, 304 insertions(+), 12 deletions(-)
 create mode 100755 scripts/generic/score-parallel.perl

diff --git a/scripts/generic/score-parallel.perl b/scripts/generic/score-parallel.perl
new file mode 100755
index 000000000..062d0df9c
--- /dev/null
+++ b/scripts/generic/score-parallel.perl
@@ -0,0 +1,287 @@
+#! /usr/bin/perl -w 
+
+# example
+# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.sorted.gz ./lex.2.f2e ./phrase-table.2.half.f2e  --GoodTuring ./phrase-table.2.coc 0
+# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.inv.sorted.gz ./lex.2.e2f ./phrase-table.2.half.e2f  --Inverse 1
+
+use strict;
+use File::Basename;
+
+sub RunFork($);
+sub systemCheck($);
+sub GetSourcePhrase($);
+sub NumStr($);
+
+#my $EXTRACT_SPLIT_LINES = 5000000;
+my $EXTRACT_SPLIT_LINES = 1000;
+
+print "Started ".localtime() ."\n";
+
+my $numParallel	= $ARGV[0];
+$numParallel = 1 if $numParallel < 1;
+
+my $sortCmd			= $ARGV[1];
+my $scoreCmd		= $ARGV[2];
+
+my $extractFile = $ARGV[3]; # 1st arg of extract argument
+my $lexFile 		= $ARGV[4]; 
+my $ptHalf 			= $ARGV[5]; # output
+
+my $otherExtractArgs= "";
+for (my $i = 6; $i < $#ARGV; ++$i)
+{
+  $otherExtractArgs .= $ARGV[$i] ." ";
+}
+#$scoreCmd $extractFile $lexFile $ptHalf $otherExtractArgs
+
+my $doSort			= $ARGV[$#ARGV]; # last arg
+
+my $TMPDIR=dirname($ptHalf)  ."/tmp.$$";
+mkdir $TMPDIR;
+
+my $cmd;
+
+my $fileCount = 0;
+if ($numParallel <= 1)
+{ # don't do parallel. Just link the extract file into place
+  $cmd = "ln -s $extractFile $TMPDIR/extract.0.gz";
+  print STDERR "$cmd \n";
+  systemCheck($cmd);
+  
+  $fileCount = 1;
+}
+else
+{	# cut up extract file into smaller mini-extract files.
+	if ($extractFile =~ /\.gz$/) {
+		open(IN, "gunzip -c $extractFile |") || die "can't open pipe to $extractFile";
+	}
+	else {
+		open(IN, $extractFile) || die "can't open $extractFile";
+	}
+	
+	my $filePath  = "$TMPDIR/extract.$fileCount.gz";
+	open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
+	
+	my $lineCount = 0;
+	my $line;
+	my $prevSourcePhrase = "";
+	while ($line=<IN>) 
+	{
+		chomp($line);
+		++$lineCount;
+	
+		if ($lineCount > $EXTRACT_SPLIT_LINES)
+		{ # over line limit. Cut off at next source phrase change
+			my $sourcePhrase = GetSourcePhrase($line);
+			
+			if ($prevSourcePhrase eq "")
+			{ # start comparing
+				$prevSourcePhrase = $sourcePhrase;
+			}
+			elsif ($sourcePhrase eq $prevSourcePhrase)
+			{ # can't cut off yet. Do nothing      
+			}
+			else
+			{ # cut off, open next min-extract file & write to that instead
+				close OUT;
+	
+				$prevSourcePhrase = "";
+				$lineCount = 0;
+				++$fileCount;
+				my $filePath  = $fileCount;
+				$filePath     = "$TMPDIR/extract.$filePath.gz";
+				open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
+			}
+		}
+		else
+		{ # keep on writing to current mini-extract file
+		}
+	
+		print OUT "$line\n";
+	
+	}
+	close OUT;
+	++$fileCount;
+}
+
+
+# create run scripts
+my @runFiles = (0..($numParallel-1));
+for (my $i = 0; $i < $numParallel; ++$i)
+{
+  my $path = "$TMPDIR/run.$i.sh";
+  open(my $fh, ">", $path) or die "cannot open $path: $!";
+  $runFiles[$i] = $fh;
+}
+
+# write scoring of mini-extracts to run scripts
+for (my $i = 0; $i < $fileCount; ++$i)
+{
+  my $numStr = NumStr($i);
+
+  my $fileInd = $i % $numParallel;
+  my $fh = $runFiles[$fileInd];
+  my $cmd = "$scoreCmd $TMPDIR/extract.$i.gz $lexFile $TMPDIR/phrase-table.half.$numStr.gz $otherExtractArgs\n";
+  print $fh $cmd;
+}
+
+# close run script files
+for (my $i = 0; $i < $numParallel; ++$i)
+{
+  close($runFiles[$i]);
+  my $path = "$TMPDIR/run.$i.sh";
+  systemCheck("chmod +x $path");
+}
+
+# run each score script in parallel
+my @children;
+for (my $i = 0; $i < $numParallel; ++$i)
+{
+  my $cmd = "$TMPDIR/run.$i.sh";
+	my $pid = RunFork($cmd);
+	push(@children, $pid);
+}
+
+# wait for everything is finished
+foreach (@children) {
+	waitpid($_, 0);
+}
+
+# merge & sort
+$cmd = "\n\nOH SHIT. This should have been filled in \n\n";
+if ($fileCount == 1 && !$doSort)
+{
+  my $numStr = NumStr(0);
+  $cmd = "mv $TMPDIR/phrase-table.half.$numStr.gz $ptHalf.gz \n";
+}
+else
+{
+  $cmd = "zcat $TMPDIR/phrase-table.half.*.gz";
+
+  if ($doSort) {
+    $cmd .= "| LC_ALL=C $sortCmd -T $TMPDIR ";
+  }
+
+  $cmd .= " | gzip -c >";
+
+  if ($doSort) {
+    $cmd .= " $ptHalf.sorted.gz \n";
+  }
+  else {
+    $cmd .= " $ptHalf.gz \n";
+  }
+}
+print STDERR $cmd;
+systemCheck($cmd);
+
+# merge coc
+my $numStr = NumStr(0);
+my $cocPath = "$TMPDIR/phrase-table.half.$numStr.gz.coc";
+
+if (-e $cocPath)
+{
+  my @arrayCOC;
+  my $line;
+
+  # 1st file
+  open(FHCOC, $cocPath) || die "can't open pipe to $cocPath";
+  while ($line = <FHCOC>)
+  {
+    my $coc = int($line);
+    push(@arrayCOC, $coc);
+  }
+  close(FHCOC);
+
+  # all other files
+  for (my $i = 1; $i < $fileCount; ++$i)
+  {
+  	$numStr = NumStr($i);
+    $cocPath = "$TMPDIR/phrase-table.half.$numStr.gz.coc";
+    open(FHCOC, $cocPath) || die "can't open pipe to $cocPath";
+    my $arrayInd = 0;
+    while ($line = <FHCOC>)
+    {
+      my $coc = int($line);
+      $arrayCOC[$arrayInd] += $coc;
+
+      ++$arrayInd;
+    }
+
+    close(FHCOC);
+  }
+
+  # output
+  $cocPath = "$ptHalf.coc";
+  open(FHCOC, ">", $cocPath) or die "cannot open $cocPath: $!";
+  for (my $i = 0; $i < @arrayCOC; ++$i)
+  {
+    print FHCOC $arrayCOC[$i]."\n";
+  }
+  close(FHCOC);
+}
+
+$cmd = "rm -rf $TMPDIR \n";
+print STDERR $cmd;
+systemCheck($cmd);
+
+print STDERR "Finished ".localtime() ."\n";
+
+# -----------------------------------------
+# -----------------------------------------
+
+sub RunFork($)
+{
+  my $cmd = shift;
+
+  my $pid = fork();
+  
+  if ($pid == 0)
+  { # child
+    print STDERR $cmd;
+    systemCheck($cmd);
+    exit();
+  }
+  return $pid;
+}
+sub systemCheck($)
+{
+  my $cmd = shift;
+  my $retVal = system($cmd);
+  if ($retVal != 0)
+  {
+    exit(1);
+  }
+}
+
+sub GetSourcePhrase($)
+{
+  my $line = shift;
+  my $pos = index($line, "|||");
+  my $sourcePhrase = substr($line, 0, $pos);
+  return $sourcePhrase;
+}
+
+
+sub NumStr($)
+{
+    my $i = shift;
+    my $numStr;
+    if ($i < 10) {
+	$numStr = "0000$i";
+    }
+    elsif ($i < 100) {
+	$numStr = "000$i";
+    }
+    elsif ($i < 1000) {
+	$numStr = "00$i";
+    }
+    elsif ($i < 10000) {
+	$numStr = "0$i";
+    }
+    else {
+	$numStr = $i;
+    }
+    return $numStr;
+}
+
+
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 0dda1017f..f168f38cb 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -253,7 +253,10 @@ my $MEMSCORE = "$SCRIPTS_ROOTDIR/training/memscore/memscore";
 my $EPPEX = "$SCRIPTS_ROOTDIR/training/eppex/eppex";
 my $SYMAL = "$SCRIPTS_ROOTDIR/training/symal/symal";
 my $GIZA2BAL = "$SCRIPTS_ROOTDIR/training/symal/giza2bal.pl";
+
 my $PHRASE_SCORE = "$SCRIPTS_ROOTDIR/training/phrase-extract/score";
+$PHRASE_SCORE = "$SCRIPTS_ROOTDIR/generic/score-parallel.perl $_CORES \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS\" $PHRASE_SCORE";
+
 my $PHRASE_CONSOLIDATE = "$SCRIPTS_ROOTDIR/training/phrase-extract/consolidate";
 
 # utilities
@@ -1484,7 +1487,7 @@ sub score_phrase_phrase_extract {
 
 	      print STDERR "(6.".($substep++).")  creating table half $ttable_file.half.$direction @ ".`date`;
 
-        my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction.gz $inverse";
+        my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction $inverse";
         $cmd .= " --Hierarchical" if $_HIERARCHICAL;
         $cmd .= " --WordAlignment" if $_PHRASE_WORD_ALIGNMENT;
         $cmd .= " --KneserNey" if $KNESER_NEY;
@@ -1493,16 +1496,18 @@ sub score_phrase_phrase_extract {
         $cmd .= " --UnalignedFunctionWordPenalty ".($inverse ? $UNALIGNED_FW_F : $UNALIGNED_FW_E) if $UNALIGNED_FW_COUNT;
         $cmd .= " --MinCountHierarchical $MIN_COUNT_HIERARCHICAL" if $MIN_COUNT_HIERARCHICAL;
         $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
-        print $cmd."\n";
+
+				# sorting
+				if ($direction eq "e2f") {
+					$cmd .= " 1 ";
+				}
+				else {
+					$cmd .= " 0 ";
+				}
+
+      print $cmd."\n";
         safesystem($cmd) or die "ERROR: Scoring of phrases failed";	    
   
-        # sorting inverse phrase-table-half to sync up with regular one
-        if ($direction eq "e2f" && ! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) {
-          print STDERR "(6." . ($substep++) . ") sorting inverse e2f table@ ".`date`;
-          safesystem("zcat $ttable_file.half.e2f.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip -c > $ttable_file.half.e2f.sorted.gz") or die("ERROR");
-          if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); }
-        }
-
         exit();
       }
       else
@@ -1534,10 +1539,10 @@ sub score_phrase_phrase_extract {
     $cmd .= " --OnlyDirect" if $ONLY_DIRECT;
     $cmd .= " --NoPhraseCount" unless $PHRASE_COUNT;
     $cmd .= " --LowCountFeature" if $LOW_COUNT;
-    $cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
-    $cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
+    $cmd .= " --GoodTuring $ttable_file.half.f2e.coc" if $GOOD_TURING;
+    $cmd .= " --KneserNey $ttable_file.half.f2e.coc" if $KNESER_NEY;
     safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
-    if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
+    #if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
 }
 
 sub score_phrase_memscore {

From e42f8d36aecdd484dfd5b6ff9393235af9036c38 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Thu, 24 May 2012 15:20:23 +0100
Subject: [PATCH 11/38] parallel scoring

---
 scripts/generic/score-parallel.perl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generic/score-parallel.perl b/scripts/generic/score-parallel.perl
index 062d0df9c..fbb4d4d02 100755
--- a/scripts/generic/score-parallel.perl
+++ b/scripts/generic/score-parallel.perl
@@ -13,7 +13,7 @@ sub GetSourcePhrase($);
 sub NumStr($);
 
 #my $EXTRACT_SPLIT_LINES = 5000000;
-my $EXTRACT_SPLIT_LINES = 1000;
+my $EXTRACT_SPLIT_LINES = 1000000;
 
 print "Started ".localtime() ."\n";
 

From 2b8eeac75ee86cdd2f9259b73cf1f3aa43b3fdc9 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 24 May 2012 16:47:40 +0100
Subject: [PATCH 12/38] Minor error

---
 scripts/training/phrase-extract/score.cpp         | 2 +-
 scripts/training/train-model.perl.missing_bin_dir | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
index 1d081a054..8bcc9be3b 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/scripts/training/phrase-extract/score.cpp
@@ -265,7 +265,7 @@ void writeCountOfCounts( const string &fileNameCountOfCounts )
   for(int i=1; i<=COC_MAX; i++) {
     countOfCountsFile << countOfCounts[ i ] << endl;
   }
-	countOfCountsFile.close();
+	countOfCountsFile.Close();
 }
 
 void processPhrasePairs( vector< PhraseAlignment > &phrasePair, ostream &phraseTableFile )
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index f168f38cb..b50b5f633 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -41,7 +41,7 @@ my $_CORES = 1;
 my $debug = 0; # debug this script, do not delete any files in debug mode
 
 # the following line is set installation time by 'make release'.  BEWARE!
-my $BINDIR="/Users/hieuhoang/workspace/bin/training-tools";
+my $BINDIR="/home/hieu/workspace/bin/training-tools/";
 
 $_HELP = 1
     unless &GetOptions('root-dir=s' => \$_ROOT_DIR,

From 042a9cca8d103d9d6daaef316c64ffac0f524efe Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 24 May 2012 17:10:55 +0100
Subject: [PATCH 13/38] delete pt.half after use

---
 scripts/training/train-model.perl.missing_bin_dir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index b50b5f633..0a15a72db 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1542,7 +1542,7 @@ sub score_phrase_phrase_extract {
     $cmd .= " --GoodTuring $ttable_file.half.f2e.coc" if $GOOD_TURING;
     $cmd .= " --KneserNey $ttable_file.half.f2e.coc" if $KNESER_NEY;
     safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
-    #if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
+    if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
 }
 
 sub score_phrase_memscore {

From fcf817fda26a010d9f7563c7b14b28534816d7c3 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Fri, 25 May 2012 12:42:11 +0100
Subject: [PATCH 14/38] add --parallel to sorting options

---
 .../training/train-model.perl.missing_bin_dir    | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index f168f38cb..9d04cc1a1 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -19,7 +19,7 @@ if ($SCRIPTS_ROOTDIR eq '') {
 $SCRIPTS_ROOTDIR =~ s/\/training$//;
 $SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
 
-my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_SORT_COMPRESS, $_CORPUS,
+my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE,  $_SORT_COMPRESS, $_SORT_PARALLEL, $_CORPUS,
    $_CORPUS_COMPRESSION, $_FIRST_STEP, $_LAST_STEP, $_F, $_E, $_MAX_PHRASE_LENGTH,
    $_LEXICAL_FILE, $_NO_LEXICAL_WEIGHTING, $_VERBOSE, $_ALIGNMENT,
    $_ALIGNMENT_FILE, $_ALIGNMENT_STEM, @_LM, $_EXTRACT_FILE, $_GIZA_OPTION, $_HELP, $_PARTS,
@@ -58,8 +58,9 @@ $_HELP = 1
 		       'model-dir=s' => \$_MODEL_DIR,
 		       'temp-dir=s' => \$_TEMP_DIR,
            'sort-buffer-size=s' => \$_SORT_BUFFER_SIZE,
-           'sort-batch-size=s' => \$_SORT_BATCH_SIZE,
+           'sort-batch-size=i' => \$_SORT_BATCH_SIZE,
            'sort-compress=s' => \$_SORT_COMPRESS,
+           'sort-parallel=i' => \$_SORT_PARALLEL,
 		       'extract-file=s' => \$_EXTRACT_FILE,
 		       'alignment=s' => \$_ALIGNMENT,
 		       'alignment-file=s' => \$_ALIGNMENT_FILE,
@@ -233,11 +234,14 @@ my $__SORT_BATCH_SIZE = "";
 $__SORT_BATCH_SIZE = "--batch-size $_SORT_BATCH_SIZE" if $_SORT_BATCH_SIZE;
 
 my $__SORT_COMPRESS = "";
-$__SORT_COMPRESS = "--compress-program=$_SORT_COMPRESS" if $_SORT_COMPRESS;
+$__SORT_COMPRESS = "--compress-program $_SORT_COMPRESS" if $_SORT_COMPRESS;
+
+my $__SORT_PARALLEL = "";
+$__SORT_PARALLEL = "--parallel $_SORT_PARALLEL" if $_SORT_PARALLEL;
 
 # supporting scripts/binaries from this package
 my $PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract";
-$PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS\" $PHRASE_EXTRACT";
+$PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS $__SORT_PARALLEL\" $PHRASE_EXTRACT";
 
 my $RULE_EXTRACT;
 if (defined($_GHKM)) {
@@ -246,7 +250,7 @@ if (defined($_GHKM)) {
 else {
   $RULE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract-rules";
 }
-$RULE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS\" $RULE_EXTRACT";
+$RULE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS $__SORT_PARALLEL\" $RULE_EXTRACT";
 
 my $LEXICAL_REO_SCORER = "$SCRIPTS_ROOTDIR/training/lexical-reordering/score";
 my $MEMSCORE = "$SCRIPTS_ROOTDIR/training/memscore/memscore";
@@ -255,7 +259,7 @@ my $SYMAL = "$SCRIPTS_ROOTDIR/training/symal/symal";
 my $GIZA2BAL = "$SCRIPTS_ROOTDIR/training/symal/giza2bal.pl";
 
 my $PHRASE_SCORE = "$SCRIPTS_ROOTDIR/training/phrase-extract/score";
-$PHRASE_SCORE = "$SCRIPTS_ROOTDIR/generic/score-parallel.perl $_CORES \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS\" $PHRASE_SCORE";
+$PHRASE_SCORE = "$SCRIPTS_ROOTDIR/generic/score-parallel.perl $_CORES \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS $__SORT_PARALLEL\" $PHRASE_SCORE";
 
 my $PHRASE_CONSOLIDATE = "$SCRIPTS_ROOTDIR/training/phrase-extract/consolidate";
 

From 8844be6f25bd87ae20ee19eda6e1aface0f27cc5 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Fri, 25 May 2012 13:41:06 +0100
Subject: [PATCH 15/38] Move queryOnDiskPt from contrib to OnDiskPt folder.
 Easier to amend Jam file

---
 Jamroot                                               | 2 +-
 OnDiskPt/Jamfile                                      | 3 +++
 {contrib/queryOnDiskPt => OnDiskPt}/queryOnDiskPt.cpp | 0
 3 files changed, 4 insertions(+), 1 deletion(-)
 rename {contrib/queryOnDiskPt => OnDiskPt}/queryOnDiskPt.cpp (100%)

diff --git a/Jamroot b/Jamroot
index 3c0862ab9..8ed134521 100644
--- a/Jamroot
+++ b/Jamroot
@@ -104,7 +104,7 @@ build-project scripts ;
 #Regression tests (only does anything if --with-regtest is passed)
 build-project regression-testing ;
 
-alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDisk mert//programs contrib/server//mosesserver misc//programs ;
+alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDisk OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs ;
 
 install-bin-libs programs ;
 install-headers headers-base : [ glob-tree *.h *.hh : jam-files dist kenlm moses ] : . ;
diff --git a/OnDiskPt/Jamfile b/OnDiskPt/Jamfile
index f9811c05b..9aa00fcae 100644
--- a/OnDiskPt/Jamfile
+++ b/OnDiskPt/Jamfile
@@ -1,2 +1,5 @@
 lib OnDiskPt : OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp ../moses/src//headers ;
+
 exe CreateOnDisk : Main.cpp ../moses/src//moses OnDiskPt ;
+exe queryOnDiskPt : queryOnDiskPt.cpp ../moses/src//moses OnDiskPt ;
+
diff --git a/contrib/queryOnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp
similarity index 100%
rename from contrib/queryOnDiskPt/queryOnDiskPt.cpp
rename to OnDiskPt/queryOnDiskPt.cpp

From 7d602aff2fcf94e0d4bfdff0e99c162383731c76 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Fri, 25 May 2012 13:41:54 +0100
Subject: [PATCH 16/38] Move queryOnDiskPt from contrib to OnDiskPt folder.
 Easier to amend Jam file

---
 contrib/queryOnDiskPt/Jamfile    | 41 --------------------------------
 contrib/queryOnDiskPt/compile.sh |  6 -----
 2 files changed, 47 deletions(-)
 delete mode 100644 contrib/queryOnDiskPt/Jamfile
 delete mode 100755 contrib/queryOnDiskPt/compile.sh

diff --git a/contrib/queryOnDiskPt/Jamfile b/contrib/queryOnDiskPt/Jamfile
deleted file mode 100644
index 6b4895022..000000000
--- a/contrib/queryOnDiskPt/Jamfile
+++ /dev/null
@@ -1,41 +0,0 @@
-#If you get compilation errors here, make sure you have xmlrpc-c installed properly, including the abyss server option.  
-
-import option ;
-import path ;
-
-with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
-if $(with-xmlrpc-c) {
-  build-moses-server = true ;
-  xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ;
-  if ! [ path.exists $(xmlrpc-command) ] {
-    exit Could not find $(xmlrpc-command) : 1 ;
-  }
-} else {
-  xmlrpc-check = [ _shell "xmlrpc-c-config --features 2>/dev/null" : exit-status ] ;
-  if $(xmlrpc-check[2]) = 0 {
-    if [ MATCH "(abyss-server)" : $(xmlrpc-check[1]) ] {
-      build-moses-server = true ;
-    } else {
-      echo "Found xmlrpc-c but it does not have abyss-server.  Skipping mosesserver." ;
-    } 
-  }
-  xmlrpc-command = "xmlrpc-c-config" ;
-}
-
-rule shell_or_die ( cmd ) {
-  local ret = [ _shell $(cmd) : exit-status ] ;
-  if $(ret[2]) != 0 {
-    exit "Failed to run $(cmd)" : 1 ;
-  }
-  return $(ret[1]) ;
-}
-
-if $(build-moses-server) = true
-{
-  xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
-  xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
-
-  exe queryOnDiskPt : queryOnDiskPt.cpp ../../moses/src//moses ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
-} else {
-  alias queryOnDiskPt ;
-}
diff --git a/contrib/queryOnDiskPt/compile.sh b/contrib/queryOnDiskPt/compile.sh
deleted file mode 100755
index 1643e27d1..000000000
--- a/contrib/queryOnDiskPt/compile.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-SRI=/Users/hieuhoang/workspace/srilm
-IRST=/Users/hieuhoang/workspace/irstlm/trunk
-
-g++ -o queryOnDiskPt queryOnDiskPt.cpp ../../moses/src/PhraseDictionary.cpp -I../../moses/src/ -I../../ -L../../dist/lib/ -I../../OnDiskPt -lmert_lib -ldynsa -lz -lmoses_internal -lOnDiskPt -lLM -lkenlm -lkenutil -lRuleTable -lCYKPlusParser -lScope3Parser -L$SRI/lib/macosx/ -ldstruct -lflm -llattice -lmisc -loolm -L/opt/local/lib -lboost_thread-mt -L$IRST/lib -lirstlm
-
-

From 721ce923ccaa209a4e73fbe9ddadebedfbd96892 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Fri, 25 May 2012 15:06:35 +0100
Subject: [PATCH 17/38] add -snt2cooc to train-model.perl. For giza's reduced
 memory snt2cooc.perl

---
 .../training/train-model.perl.missing_bin_dir | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index d3748fdc9..3d820a1a7 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -29,7 +29,7 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
    $_DECODING_GRAPH_BACKOFF,
    $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
    @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
-   $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS,  $_HMM_ALIGN, $_CONFIG,
+   $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
    $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_EXTRACT_OPTIONS,$_SCORE_OPTIONS,
    $_PHRASE_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
    $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
@@ -72,6 +72,7 @@ $_HELP = 1
 		       'help' => \$_HELP,
 		       'mgiza' => \$_MGIZA, # multi-thread 
 		       'mgiza-cpus=i' => \$_MGIZA_CPUS, # multi-thread 
+		       'snt2cooc=s' => \$_SNT2COOC, # override snt2cooc exe. For when you want to run reduced memory snt2cooc.perl from mgiza
 		       'hmm-align' => \$_HMM_ALIGN,
 		       'final-alignment-model=s' => \$_FINAL_ALIGNMENT_MODEL, # use word alignment model 1/2/hmm/3/4/5 as final (default is 4); value 'hmm' equivalent to the --hmm-align switch
 		       'debug' => \$debug,
@@ -185,25 +186,28 @@ my $SNT2COOC;
 if(!defined $_MGIZA ){
 	$GIZA = "$BINDIR/GIZA++";
 	if (-x "$BINDIR/snt2cooc.out") {
-   		$SNT2COOC = "$BINDIR/snt2cooc.out";
+  	$SNT2COOC = "$BINDIR/snt2cooc.out";
 	} elsif (-x "$BINDIR/snt2cooc") { # Since "snt2cooc.out" and "snt2cooc" work the same   
 		$SNT2COOC = "$BINDIR/snt2cooc";
 	}
 	print STDERR "Using single-thread GIZA\n";
 } else {
-    	$GIZA = "$BINDIR/mgiza";
+  $GIZA = "$BINDIR/mgiza";
 	if (-x "$BINDIR/snt2cooc") {
-                $SNT2COOC = "$BINDIR/snt2cooc";
-        } elsif (-x "$BINDIR/snt2cooc.out") { # Important for users that use MGIZA and copy only the "mgiza" file to $BINDIR 
-                $SNT2COOC = "$BINDIR/snt2cooc.out";
-        }
+  	$SNT2COOC = "$BINDIR/snt2cooc";
+  } elsif (-x "$BINDIR/snt2cooc.out") { # Important for users that use MGIZA and copy only the "mgiza" file to $BINDIR 
+    $SNT2COOC = "$BINDIR/snt2cooc.out";
+  }
 	print STDERR "Using multi-thread GIZA\n";	
-    	if (!defined($_MGIZA_CPUS)) {
-        	$_MGIZA_CPUS=4;
-    	}
-    die("ERROR: Cannot find $MGIZA_MERGE_ALIGN") unless (-x $MGIZA_MERGE_ALIGN);
+  if (!defined($_MGIZA_CPUS)) {
+  	$_MGIZA_CPUS=4;
+  }
+  die("ERROR: Cannot find $MGIZA_MERGE_ALIGN") unless (-x $MGIZA_MERGE_ALIGN);
 }
 
+# override
+$SNT2COOC = "$BINDIR/$_SNT2COOC" if defined($_SNT2COOC);
+
 my $MKCLS = "$BINDIR/mkcls";
 
 # supporting scripts/binaries from this package

From 2fab137aaeeda8077734e4c6e5627bfb44d27691 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Fri, 25 May 2012 17:24:08 +0100
Subject: [PATCH 18/38] Fix compile error.

---
 OnDiskPt/queryOnDiskPt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/OnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp
index f7435649a..9a2d97680 100644
--- a/OnDiskPt/queryOnDiskPt.cpp
+++ b/OnDiskPt/queryOnDiskPt.cpp
@@ -6,7 +6,7 @@
 #include <string>
 #include <vector>
 
-#include "util.h"
+#include "Util.h"
 #include "OnDiskWrapper.h"
 #include "SourcePhrase.h"
 

From 90c0bc9f5ceec4e7d33386ec597fd753e7d23d4a Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Fri, 25 May 2012 17:29:47 +0100
Subject: [PATCH 19/38] Add an optional PCFG scoring feature for target syntax
 models (similar to the p_cfg feature used in Marcu, Wang, Echihabi, and
 Knight (2006)).

---
 scripts/Jamfile                               |   2 +
 scripts/ems/experiment.meta                   |  15 +-
 scripts/ems/experiment.perl                   |   2 +
 .../training/phrase-extract/ExtractedRule.h   |   2 +
 scripts/training/phrase-extract/Jamfile       |   2 +
 .../phrase-extract/PhraseAlignment.cpp        |   7 +-
 .../training/phrase-extract/PhraseAlignment.h |   1 +
 .../phrase-extract/RuleExtractionOptions.h    |   2 +
 .../training/phrase-extract/SyntaxTree.cpp    |   3 +-
 scripts/training/phrase-extract/SyntaxTree.h  |  11 +-
 scripts/training/phrase-extract/XmlTree.cpp   |   9 +-
 .../extract-ghkm/AlignmentGraph.cpp           |   4 +
 .../extract-ghkm/ExtractGHKM.cpp              |   5 +
 .../phrase-extract/extract-ghkm/Node.h        |   6 +-
 .../phrase-extract/extract-ghkm/Options.h     |   2 +
 .../phrase-extract/extract-ghkm/ParseTree.h   |   7 +-
 .../phrase-extract/extract-ghkm/ScfgRule.cpp  |   1 +
 .../phrase-extract/extract-ghkm/ScfgRule.h    |   2 +
 .../extract-ghkm/ScfgRuleWriter.cpp           |  69 ++++----
 .../extract-ghkm/ScfgRuleWriter.h             |   4 +-
 .../phrase-extract/extract-ghkm/Subgraph.cpp  |  16 ++
 .../phrase-extract/extract-ghkm/Subgraph.h    |   8 +-
 .../extract-ghkm/XmlTreeParser.cpp            |   1 +
 .../training/phrase-extract/extract-rules.cpp |  32 +++-
 .../phrase-extract/pcfg-common/Jamfile        |   1 +
 .../phrase-extract/pcfg-common/exception.h    |  41 +++++
 .../phrase-extract/pcfg-common/numbered_set.h | 109 +++++++++++++
 .../phrase-extract/pcfg-common/pcfg.cc        | 106 ++++++++++++
 .../phrase-extract/pcfg-common/pcfg.h         |  61 +++++++
 .../phrase-extract/pcfg-common/pcfg_tree.h    |  77 +++++++++
 .../phrase-extract/pcfg-common/syntax_tree.h  |  91 +++++++++++
 .../phrase-extract/pcfg-common/tool.cc        |  80 +++++++++
 .../phrase-extract/pcfg-common/tool.h         |  91 +++++++++++
 .../phrase-extract/pcfg-common/typedef.h      |  37 +++++
 .../pcfg-common/xml_tree_parser.cc            |  85 ++++++++++
 .../pcfg-common/xml_tree_parser.h             |  56 +++++++
 .../pcfg-common/xml_tree_writer.h             | 127 +++++++++++++++
 .../phrase-extract/pcfg-extract/Jamfile       |   1 +
 .../phrase-extract/pcfg-extract/main.cc       |  25 +++
 .../phrase-extract/pcfg-extract/options.h     |  36 +++++
 .../pcfg-extract/pcfg_extract.cc              | 131 +++++++++++++++
 .../pcfg-extract/pcfg_extract.h               |  42 +++++
 .../pcfg-extract/rule_collection.cc           |  58 +++++++
 .../pcfg-extract/rule_collection.h            |  59 +++++++
 .../pcfg-extract/rule_extractor.cc            |  51 ++++++
 .../pcfg-extract/rule_extractor.h             |  45 ++++++
 .../phrase-extract/pcfg-score/Jamfile         |   1 +
 .../phrase-extract/pcfg-score/main.cc         |  25 +++
 .../phrase-extract/pcfg-score/options.h       |  36 +++++
 .../phrase-extract/pcfg-score/pcfg_score.cc   | 152 ++++++++++++++++++
 .../phrase-extract/pcfg-score/pcfg_score.h    |  42 +++++
 .../phrase-extract/pcfg-score/tree_scorer.cc  |  68 ++++++++
 .../phrase-extract/pcfg-score/tree_scorer.h   |  47 ++++++
 scripts/training/phrase-extract/score.cpp     |  23 +++
 .../training/train-model.perl.missing_bin_dir |   4 +
 55 files changed, 1970 insertions(+), 51 deletions(-)
 create mode 100644 scripts/training/phrase-extract/pcfg-common/Jamfile
 create mode 100644 scripts/training/phrase-extract/pcfg-common/exception.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/numbered_set.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/pcfg.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-common/pcfg.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/pcfg_tree.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/syntax_tree.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/tool.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-common/tool.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/typedef.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h
 create mode 100644 scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/Jamfile
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/main.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/options.h
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/rule_collection.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/rule_collection.h
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-extract/rule_extractor.h
 create mode 100644 scripts/training/phrase-extract/pcfg-score/Jamfile
 create mode 100644 scripts/training/phrase-extract/pcfg-score/main.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-score/options.h
 create mode 100644 scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-score/pcfg_score.h
 create mode 100644 scripts/training/phrase-extract/pcfg-score/tree_scorer.cc
 create mode 100644 scripts/training/phrase-extract/pcfg-score/tree_scorer.h

diff --git a/scripts/Jamfile b/scripts/Jamfile
index 6fb9bad39..b9eefcffe 100644
--- a/scripts/Jamfile
+++ b/scripts/Jamfile
@@ -42,6 +42,8 @@ if $(location) {
   install compactify : training/compact-rule-table//compactify : <location>$(location)/training/compact-rule-table/tools ;
 
   install phrase-extract : training/phrase-extract//programs : <location>$(location)/training/phrase-extract ;
+  install pcfg-extract : training/phrase-extract/pcfg-extract//pcfg-extract : <location>$(location)/training/phrase-extract/pcfg-extract ;
+  install pcfg-score : training/phrase-extract/pcfg-score//pcfg-score : <location>$(location)/training/phrase-extract/pcfg-score ;
   install lexical-reordering : training/lexical-reordering//score : <location>$(location)/training/lexical-reordering ;
   install symal : training/symal//symal : <location>$(location)/training/symal ;
 
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 51ac0f67a..b33c589d2 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -344,8 +344,21 @@ parse-relax
 	pass-unless: input-parse-relaxer output-parse-relaxer
 	template-if: input-parse-relaxer IN.$input-extension OUT.$input-extension
 	template-if: output-parse-relaxer IN.$output-extension OUT.$output-extension
+pcfg-extract
+  in: parse-relaxed-corpus
+  out: pcfg
+  default-name: model/pcfg
+  ignore-unless: use-pcfg-feature
+  rerun-on-change: use-pcfg-feature
+  template: $moses-script-dir/training/phrase-extract/pcfg-extract/pcfg-extract < IN.$output-extension > OUT.$output-extension
+pcfg-score
+  in: parse-relaxed-corpus pcfg
+  out: scored-corpus
+  default-name: model/scored-corpus
+  pass-unless: use-pcfg-feature
+  template: ln -s IN.$input-extension OUT.$input-extension ; $moses-script-dir/training/phrase-extract/pcfg-score/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension
 extract-phrases
-	in: word-alignment parse-relaxed-corpus
+	in: word-alignment scored-corpus
 	out: extracted-phrases
 	rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm
 	default-name: model/extract
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 59bd2788f..0c61a2a05 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2007,6 +2007,7 @@ sub get_training_setting {
     my $target_syntax = &get("GENERAL:output-parser");
     my $score_settings = &get("TRAINING:score-settings");
     my $parallel = &get("TRAINING:parallel");
+    my $pcfg = &get("TRAINING:use-pcfg-feature");
 
     my $xml = $source_syntax || $target_syntax;
 
@@ -2029,6 +2030,7 @@ sub get_training_setting {
     $cmd .= "-glue-grammar " if $hierarchical;
     $cmd .= "-score-options '".$score_settings."' " if $score_settings;
     $cmd .= "-parallel " if $parallel;
+    $cmd .= "-pcfg " if $pcfg;
 
     # factored training
     if (&backoff_and_get("TRAINING:input-factors")) {
diff --git a/scripts/training/phrase-extract/ExtractedRule.h b/scripts/training/phrase-extract/ExtractedRule.h
index 170ccf892..be6e30836 100644
--- a/scripts/training/phrase-extract/ExtractedRule.h
+++ b/scripts/training/phrase-extract/ExtractedRule.h
@@ -43,6 +43,7 @@ public:
   int startS;
   int endS;
   float count;
+  double pcfgScore;
 
   std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
   
@@ -58,6 +59,7 @@ public:
     , startS(sS)
     , endS(eS)
     , count(0)
+    , pcfgScore(0.0)
   {}
   
   void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength)
diff --git a/scripts/training/phrase-extract/Jamfile b/scripts/training/phrase-extract/Jamfile
index 5ed3f20f1..9be67e80a 100644
--- a/scripts/training/phrase-extract/Jamfile
+++ b/scripts/training/phrase-extract/Jamfile
@@ -33,3 +33,5 @@ alias programs : extract extract-rules extract-lex score consolidate consolidate
 install legacy : programs : <location>. <install-type>EXE ;
 
 build-project extract-ghkm ;
+build-project pcfg-extract ;
+build-project pcfg-score ;
diff --git a/scripts/training/phrase-extract/PhraseAlignment.cpp b/scripts/training/phrase-extract/PhraseAlignment.cpp
index c0bfbde3e..ceb74f04c 100644
--- a/scripts/training/phrase-extract/PhraseAlignment.cpp
+++ b/scripts/training/phrase-extract/PhraseAlignment.cpp
@@ -13,6 +13,8 @@
 #include "tables-core.h"
 #include "score.h"
 
+#include <cstdlib>
+
 using namespace std;
 
 extern Vocabulary vcbT;
@@ -111,6 +113,9 @@ void PhraseAlignment::create( char line[], int lineID )
     }
     else if (item == 5) { // non-term lengths
       addNTLength(token[j]);
+    } else if (item == 6) { // target syntax PCFG score
+      float pcfgScore = std::atof(token[j].c_str());
+      pcfgSum = pcfgScore * count;
     }
   }
 
@@ -119,7 +124,7 @@ void PhraseAlignment::create( char line[], int lineID )
   if (item == 3) {
     count = 1.0;
   }
-  if (item < 3 || item > 5) {
+  if (item < 3 || item > 6) {
     cerr << "ERROR: faulty line " << lineID << ": " << line << endl;
   }
 }
diff --git a/scripts/training/phrase-extract/PhraseAlignment.h b/scripts/training/phrase-extract/PhraseAlignment.h
index 8b8f5115c..8bd83503d 100644
--- a/scripts/training/phrase-extract/PhraseAlignment.h
+++ b/scripts/training/phrase-extract/PhraseAlignment.h
@@ -25,6 +25,7 @@ protected:
   void createAlignVec(size_t sourceSize, size_t targetSize);
   void addNTLength(const std::string &tok);
 public:
+  float pcfgSum;
   float count;
   std::vector< std::set<size_t> > alignedToT;
   std::vector< std::set<size_t> > alignedToS;
diff --git a/scripts/training/phrase-extract/RuleExtractionOptions.h b/scripts/training/phrase-extract/RuleExtractionOptions.h
index 70bb548c9..f9123de86 100644
--- a/scripts/training/phrase-extract/RuleExtractionOptions.h
+++ b/scripts/training/phrase-extract/RuleExtractionOptions.h
@@ -45,6 +45,7 @@ public:
   bool targetSyntax;
   bool duplicateRules;
   bool fractionalCounting;
+  bool pcfgScore;
   bool outputNTLengths;
   bool gzOutput;
   
@@ -74,6 +75,7 @@ public:
     , targetSyntax(false)
     , duplicateRules(true)
     , fractionalCounting(true)
+    , pcfgScore(false)
     , outputNTLengths(false)
     , gzOutput(false)
   {}
diff --git a/scripts/training/phrase-extract/SyntaxTree.cpp b/scripts/training/phrase-extract/SyntaxTree.cpp
index e181b1e8a..f2783ffd2 100644
--- a/scripts/training/phrase-extract/SyntaxTree.cpp
+++ b/scripts/training/phrase-extract/SyntaxTree.cpp
@@ -42,11 +42,12 @@ void SyntaxTree::Clear()
   m_index.clear();
 }
 
-void SyntaxTree::AddNode( int startPos, int endPos, std::string label )
+SyntaxNode *SyntaxTree::AddNode( int startPos, int endPos, std::string label )
 {
   SyntaxNode* newNode = new SyntaxNode( startPos, endPos, label );
   m_nodes.push_back( newNode );
   m_index[ startPos ][ endPos ].push_back( newNode );
+  return newNode;
 }
 
 ParentNodes SyntaxTree::Parse()
diff --git a/scripts/training/phrase-extract/SyntaxTree.h b/scripts/training/phrase-extract/SyntaxTree.h
index 0ca5ca472..17c106b49 100644
--- a/scripts/training/phrase-extract/SyntaxTree.h
+++ b/scripts/training/phrase-extract/SyntaxTree.h
@@ -34,12 +34,14 @@ protected:
   std::string m_label;
   std::vector< SyntaxNode* > m_children;
   SyntaxNode* m_parent;
+  float m_pcfgScore;
 public:
   SyntaxNode( int startPos, int endPos, std::string label )
     :m_start(startPos)
     ,m_end(endPos)
     ,m_label(label)
     ,m_parent(0)
+    ,m_pcfgScore(0.0f)
   {}
   int GetStart() const {
     return m_start;
@@ -50,6 +52,12 @@ public:
   std::string GetLabel() const {
     return m_label;
   }
+  float GetPcfgScore() const {
+    return m_pcfgScore;
+  }
+  void SetPcfgScore(float score) {
+    m_pcfgScore = score;
+  }
   SyntaxNode *GetParent() {
     return m_parent;
   }
@@ -89,11 +97,12 @@ public:
   }
   ~SyntaxTree();
 
+  SyntaxNode *AddNode( int startPos, int endPos, std::string label );
+
   SyntaxNode *GetTop() {
     return m_top;
   }
 
-  void AddNode( int startPos, int endPos, std::string label );
   ParentNodes Parse();
   bool HasNode( int startPos, int endPos ) const;
   const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;
diff --git a/scripts/training/phrase-extract/XmlTree.cpp b/scripts/training/phrase-extract/XmlTree.cpp
index 716414f86..b22c159a1 100644
--- a/scripts/training/phrase-extract/XmlTree.cpp
+++ b/scripts/training/phrase-extract/XmlTree.cpp
@@ -25,7 +25,7 @@
 #include <string>
 #include <set>
 #include <iostream>
-#include <stdlib.h>
+#include <cstdlib>
 #include <sstream>
 #include "SyntaxTree.h"
 #include "XmlException.h"
@@ -345,13 +345,18 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
         string label = ParseXmlTagAttribute(tagContent,"label");
         labelCollection.insert( label );
 
+        string pcfgString = ParseXmlTagAttribute(tagContent,"pcfg");
+        float pcfgScore = pcfgString == "" ? 0.0f
+                                           : std::atof(pcfgString.c_str());
+
         // report what we have processed so far
         if (0) {
           cerr << "XML TAG NAME IS: '" << tagName << "'" << endl;
           cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
           cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
         }
-        tree.AddNode( startPos, endPos-1, label );
+        SyntaxNode *node = tree.AddNode( startPos, endPos-1, label );
+        node->SetPcfgScore(pcfgScore);
       }
     }
   }
diff --git a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp b/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp
index 0ecffae5c..6bd32a13b 100644
--- a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp
@@ -212,6 +212,10 @@ Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
 
   std::auto_ptr<Node> n(new Node(root->GetLabel(), nodeType));
 
+  if (nodeType == TREE) {
+    n->SetPcfgScore(root->GetPcfgScore());
+  }
+
   const std::vector<ParseTree *> &children = root->GetChildren();
   std::vector<Node *> childNodes;
   childNodes.reserve(children.size());
diff --git a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index 008026e1a..397ce1e3c 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -285,6 +285,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
         "set maximum allowed scope")
     ("Minimal",
         "extract minimal rules only")
+    ("PCFG",
+        "include score based on PCFG scores in target corpus")
     ("UnknownWordLabel",
         po::value(&options.unknownWordFile),
         "write unknown word labels to named file")
@@ -361,6 +363,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
   if (vm.count("Minimal")) {
     options.minimal = true;
   }
+  if (vm.count("PCFG")) {
+    options.pcfg = true;
+  }
   if (vm.count("UnpairedExtractFormat")) {
     options.unpairedExtractFormat = true;
   }
diff --git a/scripts/training/phrase-extract/extract-ghkm/Node.h b/scripts/training/phrase-extract/extract-ghkm/Node.h
index 228fdc812..775473362 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Node.h
+++ b/scripts/training/phrase-extract/extract-ghkm/Node.h
@@ -41,8 +41,7 @@ class Node
   Node(const std::string &label, NodeType type)
       : m_label(label)
       , m_type(type)
-      , m_children()
-      , m_parents() {}
+      , m_pcfgScore(0.0f) {}
 
   ~Node();
 
@@ -50,12 +49,14 @@ class Node
   NodeType GetType() const { return m_type; }
   const std::vector<Node*> &GetChildren() const { return m_children; }
   const std::vector<Node*> &GetParents() const { return m_parents; }
+  float GetPcfgScore() const { return m_pcfgScore; }
   const Span &GetSpan() const { return m_span; }
   const Span &GetComplementSpan() const { return m_complementSpan; }
   const std::vector<const Subgraph*> &GetRules() const { return m_rules; }
 
   void SetChildren(const std::vector<Node*> &c) { m_children = c; }
   void SetParents(const std::vector<Node*> &p) { m_parents = p; }
+  void SetPcfgScore(float s) { m_pcfgScore = s; }
   void SetSpan(const Span &s) { m_span = s; }
   void SetComplementSpan(const Span &cs) { m_complementSpan = cs; }
 
@@ -92,6 +93,7 @@ class Node
   NodeType m_type;
   std::vector<Node*> m_children;
   std::vector<Node*> m_parents;
+  float m_pcfgScore;
   Span m_span;
   Span m_complementSpan;
   std::vector<const Subgraph*> m_rules;
diff --git a/scripts/training/phrase-extract/extract-ghkm/Options.h b/scripts/training/phrase-extract/extract-ghkm/Options.h
index 108e19d66..c4b57f311 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Options.h
+++ b/scripts/training/phrase-extract/extract-ghkm/Options.h
@@ -36,6 +36,7 @@ struct Options {
       , maxRuleSize(3)
       , maxScope(3)
       , minimal(false)
+      , pcfg(false)
       , unpairedExtractFormat(false) {}
 
   // Positional options
@@ -53,6 +54,7 @@ struct Options {
   int maxRuleSize;
   int maxScope;
   bool minimal;
+  bool pcfg;
   bool unpairedExtractFormat;
   std::string unknownWordFile;
 };
diff --git a/scripts/training/phrase-extract/extract-ghkm/ParseTree.h b/scripts/training/phrase-extract/extract-ghkm/ParseTree.h
index ec6fc147a..273e2e04e 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ParseTree.h
+++ b/scripts/training/phrase-extract/extract-ghkm/ParseTree.h
@@ -32,17 +32,19 @@ class ParseTree
  public:
   ParseTree(const std::string &label)
       : m_label(label)
-      , m_children()
-      , m_parent() {}
+      , m_parent(0)
+      , m_pcfgScore(0.0) {}
 
   ~ParseTree();
 
   const std::string &GetLabel() const { return m_label; }
   const std::vector<ParseTree*> &GetChildren() const { return m_children; }
   const ParseTree *GetParent() const { return m_parent; }
+  float GetPcfgScore() const { return m_pcfgScore; }
 
   void SetParent(ParseTree *);
   void SetChildren(const std::vector<ParseTree*> &);
+  void SetPcfgScore(float score) { m_pcfgScore = score; }
 
   void AddChild(ParseTree *);
 
@@ -59,6 +61,7 @@ class ParseTree
   std::string m_label;
   std::vector<ParseTree*> m_children;
   ParseTree *m_parent;
+  float m_pcfgScore;  // log probability
 };
 
 template<typename OutputIterator>
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp b/scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp
index 8473e4283..5dc70052c 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp
@@ -30,6 +30,7 @@ namespace GHKM {
 ScfgRule::ScfgRule(const Subgraph &fragment)
     : m_sourceLHS("X", NonTerminal)
     , m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
+    , m_pcfgScore(fragment.GetPcfgScore())
 {
   // Source RHS
 
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.h b/scripts/training/phrase-extract/extract-ghkm/ScfgRule.h
index 1ed534d9e..2405d8fa3 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.h
+++ b/scripts/training/phrase-extract/extract-ghkm/ScfgRule.h
@@ -57,6 +57,7 @@ class ScfgRule
   const std::vector<Symbol> &GetSourceRHS() const { return m_sourceRHS; }
   const std::vector<Symbol> &GetTargetRHS() const { return m_targetRHS; }
   const Alignment &GetAlignment() const { return m_alignment; }
+  float GetPcfgScore() const { return m_pcfgScore; }
 
   int Scope() const;
 
@@ -68,6 +69,7 @@ class ScfgRule
   std::vector<Symbol> m_sourceRHS;
   std::vector<Symbol> m_targetRHS;
   Alignment m_alignment;
+  float m_pcfgScore;
 };
 
 }  // namespace GHKM
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index 4be3f048d..d5d16b790 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -24,6 +24,7 @@
 #include "ScfgRule.h"
 
 #include <cassert>
+#include <cmath>
 #include <ostream>
 #include <map>
 #include <sstream>
@@ -34,14 +35,43 @@ namespace GHKM {
 
 void ScfgRuleWriter::Write(const ScfgRule &rule)
 {
+  std::ostringstream sourceSS;
+  std::ostringstream targetSS;
+
   if (m_options.unpairedExtractFormat) {
-    WriteUnpairedFormat(rule);
+    WriteUnpairedFormat(rule, sourceSS, targetSS);
   } else {
-    WriteStandardFormat(rule);
+    WriteStandardFormat(rule, sourceSS, targetSS);
   }
+
+  // Write the rule to the forward and inverse extract files.
+  m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
+  m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
+
+  const Alignment &alignment = rule.GetAlignment();
+  for (Alignment::const_iterator p = alignment.begin();
+       p != alignment.end(); ++p) {
+    m_fwd << " " << p->first << "-" << p->second;
+    m_inv << " " << p->second << "-" << p->first;
+  }
+
+  // Write a count of 1 and an empty NT length column to the forward extract
+  // file.
+  // TODO Add option to write NT length?
+  m_fwd << " ||| 1 ||| |||";
+  if (m_options.pcfg) {
+    // Write the PCFG score.
+    m_fwd << " " << std::exp(rule.GetPcfgScore());
+  }
+  m_fwd << std::endl;
+
+  // Write a count of 1 to the inverse extract file.
+  m_inv << " ||| 1" << std::endl;
 }
 
-void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
+void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule,
+                                         std::ostream &sourceSS,
+                                         std::ostream &targetSS)
 {
   const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
   const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
@@ -60,9 +90,6 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
     }
   }
 
-  std::ostringstream sourceSS;
-  std::ostringstream targetSS;
-
   // Write the source side of the rule to sourceSS.
   int i = 0;
   for (std::vector<Symbol>::const_iterator p(sourceRHS.begin());
@@ -88,27 +115,14 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
     targetSS << " ";
   }
   WriteSymbol(rule.GetTargetLHS(), targetSS);
-
-  // Write the rule to the forward and inverse extract files.
-  m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
-  m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
-  for (Alignment::const_iterator p(alignment.begin());
-       p != alignment.end(); ++p) {
-    m_fwd << " " << p->first << "-" << p->second;
-    m_inv << " " << p->second << "-" << p->first;
-  }
-  m_fwd << " ||| 1" << std::endl;
-  m_inv << " ||| 1" << std::endl;
 }
 
-void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
+void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule,
+                                         std::ostream &sourceSS,
+                                         std::ostream &targetSS)
 {
   const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
   const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
-  const Alignment &alignment = rule.GetAlignment();
-
-  std::ostringstream sourceSS;
-  std::ostringstream targetSS;
 
   // Write the source side of the rule to sourceSS.
   int i = 0;
@@ -127,17 +141,6 @@ void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
     targetSS << " ";
   }
   WriteSymbol(rule.GetTargetLHS(), targetSS);
-
-  // Write the rule to the forward and inverse extract files.
-  m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
-  m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
-  for (Alignment::const_iterator p(alignment.begin());
-       p != alignment.end(); ++p) {
-    m_fwd << " " << p->first << "-" << p->second;
-    m_inv << " " << p->second << "-" << p->first;
-  }
-  m_fwd << " ||| 1" << std::endl;
-  m_inv << " ||| 1" << std::endl;
 }
 
 void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h
index 738d09ce9..b92a432a1 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h
@@ -45,8 +45,8 @@ class ScfgRuleWriter
   ScfgRuleWriter(const ScfgRuleWriter &);
   ScfgRuleWriter &operator=(const ScfgRuleWriter &);
 
-  void WriteStandardFormat(const ScfgRule &);
-  void WriteUnpairedFormat(const ScfgRule &);
+  void WriteStandardFormat(const ScfgRule &, std::ostream &, std::ostream &);
+  void WriteUnpairedFormat(const ScfgRule &, std::ostream &, std::ostream &);
   void WriteSymbol(const Symbol &, std::ostream &);
 
   std::ostream &m_fwd;
diff --git a/scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp b/scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp
index e5aedbb16..e048f2c55 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp
@@ -101,5 +101,21 @@ int Subgraph::CalcDepth(const Node *n) const
   return maxChildDepth + 1;
 }
 
+float Subgraph::CalcPcfgScore() const
+{
+  if (m_root->GetType() != TREE || m_leaves.empty()) {
+    return 0.0f;
+  }
+  float score = m_root->GetPcfgScore();
+  for (std::set<const Node *>::const_iterator p = m_leaves.begin();
+       p != m_leaves.end(); ++p) {
+    const Node *leaf = *p;
+    if (leaf->GetType() == TREE) {
+      score -= leaf->GetPcfgScore();
+    }
+  }
+  return score;
+}
+
 }  // namespace Moses
 }  // namespace GHKM
diff --git a/scripts/training/phrase-extract/extract-ghkm/Subgraph.h b/scripts/training/phrase-extract/extract-ghkm/Subgraph.h
index e84903502..ede1233e9 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Subgraph.h
+++ b/scripts/training/phrase-extract/extract-ghkm/Subgraph.h
@@ -38,7 +38,8 @@ class Subgraph
       : m_root(root)
       , m_depth(0)
       , m_size(root->GetType() == TREE ? 1 : 0)
-      , m_nodeCount(1) {}
+      , m_nodeCount(1)
+      , m_pcfgScore(0.0f) {}
 
   Subgraph(const Node *root, const std::set<const Node *> &leaves)
       : m_root(root)
@@ -46,10 +47,12 @@ class Subgraph
       , m_depth(-1)
       , m_size(-1)
       , m_nodeCount(-1)
+      , m_pcfgScore(0.0f)
   {
     m_depth = CalcDepth(m_root);
     m_size = CalcSize(m_root);
     m_nodeCount = CountNodes(m_root);
+    m_pcfgScore = CalcPcfgScore();
   }
 
   const Node *GetRoot() const { return m_root; }
@@ -57,6 +60,7 @@ class Subgraph
   int GetDepth() const { return m_depth; }
   int GetSize() const { return m_size; }
   int GetNodeCount() const { return m_nodeCount; }
+  float GetPcfgScore() const { return m_pcfgScore; }
 
   bool IsTrivial() const { return m_leaves.empty(); }
 
@@ -66,6 +70,7 @@ class Subgraph
   void GetTargetLeaves(const Node *, std::vector<const Node *> &) const;
   int CalcDepth(const Node *) const;
   int CalcSize(const Node *) const;
+  float CalcPcfgScore() const;
   int CountNodes(const Node *) const;
 
   const Node *m_root;
@@ -73,6 +78,7 @@ class Subgraph
   int m_depth;
   int m_size;
   int m_nodeCount;
+  float m_pcfgScore;
 };
 
 }  // namespace GHKM
diff --git a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp b/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp
index 31c0e3843..cc961dc0c 100644
--- a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp
@@ -61,6 +61,7 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
     const std::vector<std::string> &words)
 {
   std::auto_ptr<ParseTree> root(new ParseTree(tree.GetLabel()));
+  root->SetPcfgScore(tree.GetPcfgScore());
   const std::vector<SyntaxNode*> &children = tree.GetChildren();
   if (children.empty()) {
     if (tree.GetStart() != tree.GetEnd()) {
diff --git a/scripts/training/phrase-extract/extract-rules.cpp b/scripts/training/phrase-extract/extract-rules.cpp
index 2cc9dc54d..a00667b82 100644
--- a/scripts/training/phrase-extract/extract-rules.cpp
+++ b/scripts/training/phrase-extract/extract-rules.cpp
@@ -90,7 +90,7 @@ void addHieroRule( int startT, int endT, int startS, int endS
 void printHieroPhrase( int startT, int endT, int startS, int endS
                        , HoleCollection &holeColl, LabelIndex &labelIndex);
 string printTargetHieroPhrase(  int startT, int endT, int startS, int endS
-                              , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex);
+                              , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore);
 string printSourceHieroPhrase( int startT, int endT, int startS, int endS
                                , HoleCollection &holeColl, const LabelIndex &labelIndex);
 void preprocessSourceHieroPhrase( int startT, int endT, int startS, int endS
@@ -257,6 +257,8 @@ int main(int argc, char* argv[])
     // if an source phrase is paired with two target phrases, then count(t|s) = 0.5
     else if (strcmp(argv[i],"--NoFractionalCounting") == 0) {
       options.fractionalCounting = false;
+    } else if (strcmp(argv[i],"--PCFG") == 0) {
+      options.pcfgScore = true;
     } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
       options.outputNTLengths = true;
 #ifdef WITH_THREADS
@@ -517,7 +519,7 @@ void ExtractTask::preprocessSourceHieroPhrase( int startT, int endT, int startS,
 }
 
 string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, int endS
-                              , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex)
+                              , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore)
 {
   HoleList::iterator iterHoleList = holeColl.GetHoles().begin();
   assert(iterHoleList != holeColl.GetHoles().end());
@@ -545,6 +547,11 @@ string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, in
 
       out += "[" + sourceLabel + "][" + targetLabel + "] ";
 
+      if (m_options.pcfgScore) {
+        double score = m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetPcfgScore();
+        logPCFGScore -= score;
+      }
+
       currPos = hole.GetEnd(1);
       hole.SetPos(outPos, 1);
       ++iterHoleList;
@@ -658,8 +665,16 @@ void ExtractTask::printHieroPhrase( int startT, int endT, int startS, int endS
   preprocessSourceHieroPhrase(startT, endT, startS, endS, indexS, holeColl, labelIndex);
 
   // target
-  rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex)
+  if (m_options.pcfgScore) {
+    double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[labelIndex[0]]->GetPcfgScore();
+    rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
                 + " [" + targetLabel + "]";
+    rule.pcfgScore = std::exp(logPCFGScore);
+  } else {
+    double logPCFGScore = 0.0f;
+    rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
+                + " [" + targetLabel + "]";
+  }
 
   // source
   // holeColl.SortSourceHoles();
@@ -877,6 +892,11 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist
     rule.target += m_sentence->target[ti] + " ";
   rule.target += "[" + targetLabel + "]";
 
+  if (m_options.pcfgScore) {
+    double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetPcfgScore();
+    rule.pcfgScore = std::exp(logPCFGScore);
+  }
+
   // alignment
   for(int ti=startT; ti<=endT; ti++) {
     for(unsigned int i=0; i<m_sentence->alignedToT[ti].size(); i++) {
@@ -957,11 +977,13 @@ void ExtractTask::writeRulesToFile()
     out << rule->source << " ||| "
         << rule->target << " ||| "
         << rule->alignment << " ||| "
-        << rule->count;
+        << rule->count << " ||| ";
     if (m_options.outputNTLengths) {
-      out << " ||| ";
       rule->OutputNTLengths(out); 
     }
+    if (m_options.pcfgScore) {
+      out << " ||| " << rule->pcfgScore;
+    }
     out << "\n";
 
     if (!m_options.onlyDirectFlag) {
diff --git a/scripts/training/phrase-extract/pcfg-common/Jamfile b/scripts/training/phrase-extract/pcfg-common/Jamfile
new file mode 100644
index 000000000..3dc272a56
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/Jamfile
@@ -0,0 +1 @@
+lib pcfg_common : [ glob *.cc ] ..//trees ;
diff --git a/scripts/training/phrase-extract/pcfg-common/exception.h b/scripts/training/phrase-extract/pcfg-common/exception.h
new file mode 100644
index 000000000..3dbd59d0e
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/exception.h
@@ -0,0 +1,41 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_EXCEPTION_H_
+#define PCFG_EXCEPTION_H_
+
+#include <string>
+
+namespace Moses {
+namespace PCFG {
+
+class Exception {
+ public:
+  Exception(const char *msg) : msg_(msg) {}
+  Exception(const std::string &msg) : msg_(msg) {}
+  const std::string &msg() const { return msg_; }
+ private:
+  std::string msg_;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/numbered_set.h b/scripts/training/phrase-extract/pcfg-common/numbered_set.h
new file mode 100644
index 000000000..f88d710ed
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/numbered_set.h
@@ -0,0 +1,109 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_NUMBERED_SET_H_
+#define PCFG_NUMBERED_SET_H_
+
+#include "exception.h"
+
+#include <boost/unordered_map.hpp>
+
+#include <limits>
+#include <sstream>
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+// Stores a set of elements of type T, each of which is allocated an integral
+// ID of type I.  IDs are contiguous starting at 0.  Individual elements cannot
+// be removed once inserted (but the whole set can be cleared).
+template<typename T, typename I=size_t>
+class NumberedSet {
+ private:
+  typedef boost::unordered_map<T, I> ElementToIdMap;
+  typedef std::vector<const T *> IdToElementMap;
+
+ public:
+  typedef I IdType;
+  typedef typename IdToElementMap::const_iterator const_iterator;
+
+  NumberedSet() {}
+
+  const_iterator begin() const { return id_to_element_.begin(); }
+  const_iterator end() const { return id_to_element_.end(); }
+
+  // Static value
+  static I NullId() { return std::numeric_limits<I>::max(); }
+
+  bool Empty() const { return id_to_element_.empty(); }
+  size_t Size() const { return id_to_element_.size(); }
+
+  // Insert the given object and return its ID.
+  I Insert(const T &);
+
+  I Lookup(const T &) const;
+  const T &Lookup(I) const;
+
+  void Clear();
+
+ private:
+  ElementToIdMap element_to_id_;
+  IdToElementMap id_to_element_;
+};
+
+template<typename T, typename I>
+I NumberedSet<T, I>::Lookup(const T &s) const {
+  typename ElementToIdMap::const_iterator p = element_to_id_.find(s);
+  return (p == element_to_id_.end()) ? NullId() : p->second;
+}
+
+template<typename T, typename I>
+const T &NumberedSet<T, I>::Lookup(I id) const {
+  if (id < 0 || id >= id_to_element_.size()) {
+    std::ostringstream msg;
+    msg << "Value not found: " << id;
+    throw Exception(msg.str());
+  }
+  return *(id_to_element_[id]);
+}
+
+template<typename T, typename I>
+I NumberedSet<T, I>::Insert(const T &x) {
+  std::pair<T, I> value(x, id_to_element_.size());
+  std::pair<typename ElementToIdMap::iterator, bool> result =
+      element_to_id_.insert(value);
+  if (result.second) {
+    // x is a new element.
+    id_to_element_.push_back(&result.first->first);
+  }
+  return result.first->second;
+}
+
+template<typename T, typename I>
+void NumberedSet<T, I>::Clear() {
+  element_to_id_.clear();
+  id_to_element_.clear();
+}
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.cc b/scripts/training/phrase-extract/pcfg-common/pcfg.cc
new file mode 100644
index 000000000..d045b820b
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/pcfg.cc
@@ -0,0 +1,106 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "pcfg.h"
+
+#include "exception.h"
+
+#include <boost/algorithm/string.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <cassert>
+
+namespace Moses {
+namespace PCFG {
+
+void Pcfg::Add(const Key &key, double score) {
+  rules_[key] = score;
+}
+
+bool Pcfg::Lookup(const Key &key, double &score) const {
+  Map::const_iterator p = rules_.find(key);
+  if (p == rules_.end()) {
+    return false;
+  }
+  score = p->second;
+  return true;
+}
+
+void Pcfg::Read(std::istream &input, Vocabulary &vocab) {
+  std::string line;
+  std::string lhs_string;
+  std::vector<std::string> rhs_strings;
+  std::string score_string;
+  Key key;
+  while (std::getline(input, line)) {
+    // Read LHS.
+    size_t pos = line.find("|||");
+    if (pos == std::string::npos) {
+      throw Exception("missing first delimiter");
+    }
+    lhs_string = line.substr(0, pos);
+    boost::trim(lhs_string);
+
+    // Read RHS.
+    size_t begin = pos+3;
+    pos = line.find("|||", begin);
+    if (pos == std::string::npos) {
+      throw Exception("missing second delimiter");
+    }
+    std::string rhs_text = line.substr(begin, pos-begin);
+    boost::trim(rhs_text);
+    rhs_strings.clear();
+    boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(),
+                 boost::algorithm::token_compress_on);
+
+    // Read score.
+    score_string = line.substr(pos+3);
+    boost::trim(score_string);
+
+    // Construct key.
+    key.clear();
+    key.reserve(rhs_strings.size()+1);
+    key.push_back(vocab.Insert(lhs_string));
+    for (std::vector<std::string>::const_iterator p = rhs_strings.begin();
+         p != rhs_strings.end(); ++p) {
+      key.push_back(vocab.Insert(*p));
+    }
+
+    // Add rule.
+    double score = boost::lexical_cast<double>(score_string);
+    Add(key, score);
+  }
+}
+
+void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
+  for (const_iterator p = begin(); p != end(); ++p) {
+    const Key &key = p->first;
+    double score = p->second;
+    std::vector<size_t>::const_iterator q = key.begin();
+    std::vector<size_t>::const_iterator end = key.end();
+    output << vocab.Lookup(*q++) << " |||";
+    while (q != end) {
+      output << " " << vocab.Lookup(*q++);
+    }
+    output << " ||| " << score << std::endl;
+  }
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.h b/scripts/training/phrase-extract/pcfg-common/pcfg.h
new file mode 100644
index 000000000..757eea449
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/pcfg.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_PCFG_H_
+#define PCFG_PCFG_H_
+
+#include "typedef.h"
+
+#include <istream>
+#include <map>
+#include <ostream>
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+class Pcfg {
+ public:
+  typedef std::vector<size_t> Key;
+  typedef std::map<Key, double> Map;
+  typedef Map::iterator iterator;
+  typedef Map::const_iterator const_iterator;
+
+  Pcfg() {}
+
+  iterator begin() { return rules_.begin(); }
+  const_iterator begin() const { return rules_.begin(); }
+
+  iterator end() { return rules_.end(); }
+  const_iterator end() const { return rules_.end(); }
+
+  void Add(const Key &, double);
+  bool Lookup(const Key &, double &) const;
+  void Read(std::istream &, Vocabulary &);
+  void Write(const Vocabulary &, std::ostream &) const;
+
+ private:
+  Map rules_;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg_tree.h b/scripts/training/phrase-extract/pcfg-common/pcfg_tree.h
new file mode 100644
index 000000000..bdac64dfc
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/pcfg_tree.h
@@ -0,0 +1,77 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_PCFG_TREE_H_
+#define PCFG_PCFG_TREE_H_
+
+#include "syntax_tree.h"
+#include "xml_tree_writer.h"
+
+#include <string>
+
+namespace Moses {
+namespace PCFG {
+
+template<typename DerivedType>
+class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType> {
+ public:
+  typedef std::string LabelType;
+  typedef SyntaxTreeBase<LabelType, DerivedType> BaseType;
+
+  PcfgTreeBase(const LabelType &label) : BaseType(label), score_(0.0) {}
+
+  double score() const { return score_; }
+  void set_score(double s) { score_ = s; }
+
+ private:
+  double score_;
+};
+
+class PcfgTree : public PcfgTreeBase<PcfgTree> {
+ public:
+  typedef PcfgTreeBase<PcfgTree> BaseType;
+  PcfgTree(const BaseType::LabelType &label) : BaseType(label) {}
+};
+
+// Specialise XmlOutputHandler for PcfgTree.
+template<>
+class XmlOutputHandler<PcfgTree> {
+ public:
+  typedef std::map<std::string, std::string> AttributeMap;
+
+  void GetLabel(const PcfgTree &tree, std::string &label) const {
+    label = tree.label();
+  }
+
+  void GetAttributes(const PcfgTree &tree, AttributeMap &attribute_map) const {
+    attribute_map.clear();
+    double score = tree.score();
+    if (score != 0.0) {
+      std::ostringstream out;
+      out << tree.score();
+      attribute_map["pcfg"] = out.str();
+    }
+  }
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/syntax_tree.h b/scripts/training/phrase-extract/pcfg-common/syntax_tree.h
new file mode 100644
index 000000000..37f72dd58
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/syntax_tree.h
@@ -0,0 +1,91 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_SYNTAX_TREE_H_
+#define PCFG_SYNTAX_TREE_H_
+
+#include <cassert>
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+// Base class for SyntaxTree, AgreementTree, and friends.
+template<typename T, typename DerivedType>
+class SyntaxTreeBase {
+ public:
+  // Constructors
+  SyntaxTreeBase(const T &label)
+      : label_(label)
+      , children_()
+      , parent_(0) {}
+
+  SyntaxTreeBase(const T &label, const std::vector<DerivedType *> &children)
+      : label_(label)
+      , children_(children)
+      , parent_(0) {}
+
+  // Destructor
+  virtual ~SyntaxTreeBase();
+
+  const T &label() const { return label_; }
+  const DerivedType *parent() const { return parent_; }
+  DerivedType *parent() { return parent_; }
+  const std::vector<DerivedType *> &children() const { return children_; }
+  std::vector<DerivedType *> &children() { return children_; }
+
+  void set_label(const T &label) { label_ = label; }
+  void set_parent(DerivedType *parent) { parent_ = parent; }
+  void set_children(const std::vector<DerivedType *> &c) { children_ = c; }
+
+  bool IsLeaf() const { return children_.empty(); }
+
+  bool IsPreterminal() const {
+    return children_.size() == 1 && children_[0]->IsLeaf();
+  }
+
+  void AddChild(DerivedType *child) { children_.push_back(child); }
+
+ private:
+  T label_;
+  std::vector<DerivedType *> children_;
+  DerivedType *parent_;
+};
+
+template<typename T>
+class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> > {
+ public:
+  typedef SyntaxTreeBase<T, SyntaxTree<T> > BaseType;
+  SyntaxTree(const T &label) : BaseType(label) {}
+  SyntaxTree(const T &label, const std::vector<SyntaxTree *> &children)
+      : BaseType(label, children) {}
+};
+
+template<typename T, typename DerivedType>
+SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase() {
+  for (size_t i = 0; i < children_.size(); ++i) {
+    delete children_[i];
+  }
+}
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/tool.cc b/scripts/training/phrase-extract/pcfg-common/tool.cc
new file mode 100644
index 000000000..bebd220e1
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/tool.cc
@@ -0,0 +1,80 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "tool.h"
+
+#include <sstream>
+
+namespace Moses {
+namespace PCFG {
+
+std::istream &Tool::OpenInputOrDie(const std::string &filename) {
+  // TODO Check that function is only called once?
+  if (filename.empty() || filename == "-") {
+    input_ptr_ = &(std::cin);
+  } else {
+    input_file_stream_.open(filename.c_str());
+    if (!input_file_stream_) {
+      std::ostringstream msg;
+      msg << "failed to open input file: " << filename;
+      Error(msg.str());
+    }
+    input_ptr_ = &input_file_stream_;
+  }
+  return *input_ptr_;
+}
+
+std::ostream &Tool::OpenOutputOrDie(const std::string &filename) {
+  // TODO Check that function is only called once?
+  if (filename.empty() || filename == "-") {
+    output_ptr_ = &(std::cout);
+  } else {
+    output_file_stream_.open(filename.c_str());
+    if (!output_file_stream_) {
+      std::ostringstream msg;
+      msg << "failed to open output file: " << filename;
+      Error(msg.str());
+    }
+    output_ptr_ = &output_file_stream_;
+  }
+  return *output_ptr_;
+}
+
+void Tool::OpenNamedInputOrDie(const std::string &filename,
+                               std::ifstream &stream) {
+  stream.open(filename.c_str());
+  if (!stream) {
+    std::ostringstream msg;
+    msg << "failed to open input file: " << filename;
+    Error(msg.str());
+  }
+}
+
+void Tool::OpenNamedOutputOrDie(const std::string &filename,
+                                std::ofstream &stream) {
+  stream.open(filename.c_str());
+  if (!stream) {
+    std::ostringstream msg;
+    msg << "failed to open output file: " << filename;
+    Error(msg.str());
+  }
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-common/tool.h b/scripts/training/phrase-extract/pcfg-common/tool.h
new file mode 100644
index 000000000..0af342569
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/tool.h
@@ -0,0 +1,91 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_TOOL_H_
+#define PCFG_TOOL_H_
+
+#include <boost/program_options/cmdline.hpp>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+namespace Moses {
+namespace PCFG {
+
+class Tool {
+ public:
+  virtual ~Tool() {}
+
+  const std::string &name() const { return name_; }
+
+  virtual int Main(int argc, char *argv[]) = 0;
+
+ protected:
+  Tool(const std::string &name) : name_(name) {}
+
+  // Returns the boost::program_options style that should be used by all tools.
+  static int CommonOptionStyle() {
+    namespace cls = boost::program_options::command_line_style;
+    return cls::default_style & (~cls::allow_guessing);
+  }
+
+  void Warn(const std::string &msg) const {
+    std::cerr << name_ << ": warning: " << msg << std::endl;
+  }
+
+  void Error(const std::string &msg) const {
+    std::cerr << name_ << ": error: " << msg << std::endl;
+    std::exit(1);
+  }
+
+  // Initialises the tool's main input stream and returns a reference that is
+  // valid for the remainder of the tool's lifetime.  If filename is empty or
+  // "-" then input is standard input; otherwise it is the named file.  Calls
+  // Error() if the file cannot be opened for reading.
+  std::istream &OpenInputOrDie(const std::string &filename);
+
+  // Initialises the tool's main output stream and returns a reference that is
+  // valid for the remainder of the tool's lifetime.  If filename is empty or
+  // "-" then output is standard output; otherwise it is the named file.  Calls
+  // Error() if the file cannot be opened for writing.
+  std::ostream &OpenOutputOrDie(const std::string &filename);
+
+  // Opens the named input file using the supplied ifstream.  Calls Error() if
+  // the file cannot be opened for reading.
+  void OpenNamedInputOrDie(const std::string &, std::ifstream &);
+
+  // Opens the named output file using the supplied ofstream.  Calls Error() if
+  // the file cannot be opened for writing.
+  void OpenNamedOutputOrDie(const std::string &, std::ofstream &);
+
+ private:
+  std::string name_;
+  std::istream *input_ptr_;
+  std::ifstream input_file_stream_;
+  std::ostream *output_ptr_;
+  std::ofstream output_file_stream_;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/typedef.h b/scripts/training/phrase-extract/pcfg-common/typedef.h
new file mode 100644
index 000000000..49a12d681
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/typedef.h
@@ -0,0 +1,37 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_TYPEDEF_H_
+#define PCFG_TYPEDEF_H_
+
+#include "numbered_set.h"
+#include "syntax_tree.h"
+
+#include <string>
+
+namespace Moses {
+namespace PCFG {
+
+typedef NumberedSet<std::string> Vocabulary;
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc b/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc
new file mode 100644
index 000000000..5c596a0fb
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc
@@ -0,0 +1,85 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "xml_tree_parser.h"
+
+#include "exception.h"
+#include "tables-core.h"
+#include "XmlException.h"
+#include "XmlTree.h"
+
+#include <cassert>
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+XmlTreeParser::XmlTreeParser()
+{
+}
+
+std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
+{
+  m_line = line;
+  m_tree.Clear();
+  try {
+    if (!ProcessAndStripXMLTags(m_line, m_tree, m_labelSet, m_topLabelSet)) {
+      throw Exception("");
+    }
+  } catch (const XmlException &e) {
+    throw Exception(e.getMsg());
+  }
+  m_tree.ConnectNodes();
+  SyntaxNode *root = m_tree.GetTop();
+  assert(root);
+  m_words = tokenize(m_line.c_str());
+  return ConvertTree(*root, m_words);
+}
+
+// Converts a SyntaxNode tree to a Moses::PCFG::PcfgTree.
+std::auto_ptr<PcfgTree> XmlTreeParser::ConvertTree(
+    const SyntaxNode &tree,
+    const std::vector<std::string> &words)
+{
+  std::auto_ptr<PcfgTree> root(new PcfgTree(tree.GetLabel()));
+  const std::vector<SyntaxNode*> &children = tree.GetChildren();
+  if (children.empty()) {
+    if (tree.GetStart() != tree.GetEnd()) {
+      std::ostringstream msg;
+      msg << "leaf node covers multiple words (" << tree.GetStart()
+          << "-" << tree.GetEnd() << "): this is currently unsupported";
+      throw Exception(msg.str());
+    }
+    std::auto_ptr<PcfgTree> leaf(new PcfgTree(words[tree.GetStart()]));
+    leaf->set_parent(root.get());
+    root->AddChild(leaf.release());
+  } else {
+    for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
+         p != children.end(); ++p) {
+      assert(*p);
+      std::auto_ptr<PcfgTree> child = ConvertTree(**p, words);
+      child->set_parent(root.get());
+      root->AddChild(child.release());
+    }
+  }
+  return root;
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h b/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h
new file mode 100644
index 000000000..6b418c44e
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h
@@ -0,0 +1,56 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_XML_TREE_PARSER_H_
+#define PCFG_XML_TREE_PARSER_H_
+
+#include "pcfg_tree.h"
+#include "SyntaxTree.h"
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+// Parses a string in Moses' XML parse tree format and returns a PcfgTree
+// object.
+class XmlTreeParser {
+ public:
+  XmlTreeParser();
+  std::auto_ptr<PcfgTree> Parse(const std::string &);
+ private:
+  std::auto_ptr<PcfgTree> ConvertTree(const SyntaxNode &,
+                                      const std::vector<std::string> &);
+
+  std::set<std::string> m_labelSet;
+  std::map<std::string, int> m_topLabelSet;
+  std::string m_line;
+  ::SyntaxTree m_tree;
+  std::vector<std::string> m_words;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h b/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
new file mode 100644
index 000000000..347c352bb
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
@@ -0,0 +1,127 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_XML_TREE_WRITER_H_
+#define PCFG_XML_TREE_WRITER_H_
+
+#include "syntax_tree.h"
+
+#include "XmlTree.h"
+
+#include <cassert>
+#include <map>
+#include <memory>
+#include <ostream>
+#include <vector>
+#include <string>
+
+namespace Moses {
+namespace PCFG {
+
+template<typename InputTree>
+class XmlOutputHandler {
+ public:
+  typedef std::map<std::string, std::string> AttributeMap;
+
+  void GetLabel(const InputTree &, std::string &) const;
+  void GetAttributes(const InputTree &, AttributeMap &) const;
+};
+
+template<typename InputTree>
+class XmlTreeWriter : public XmlOutputHandler<InputTree> {
+ public:
+  typedef XmlOutputHandler<InputTree> Base;
+  void Write(const InputTree &, std::ostream &) const;
+ private:
+  std::string Escape(const std::string &) const;
+};
+
+template<typename InputTree>
+void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
+                                     std::ostream &out) const {
+  assert(!tree.IsLeaf());
+
+  // Opening tag
+
+  std::string label;
+  Base::GetLabel(tree, label);
+  out << "<tree label=\"" << Escape(label) << "\"";
+
+  typename Base::AttributeMap attribute_map;
+  Base::GetAttributes(tree, attribute_map);
+
+  for (typename Base::AttributeMap::const_iterator p = attribute_map.begin();
+       p != attribute_map.end(); ++p) {
+    out << " " << p->first << "=\"" << p->second << "\"";
+  }
+
+  out << ">";
+
+  // Children
+
+  const std::vector<InputTree *> &children = tree.children();
+  for (typename std::vector<InputTree *>::const_iterator p = children.begin();
+       p != children.end(); ++p) {
+    InputTree &child = **p;
+    if (child.IsLeaf()) {
+      Base::GetLabel(child, label);
+      out << " " << Escape(label);
+    } else {
+      out << " ";
+      Write(**p, out);
+    }
+  }
+
+  // Closing tag
+  out << " </tree>";
+
+  if (tree.parent() == 0) {
+    out << std::endl;
+  }
+}
+
+// Escapes XML special characters.
+template<typename InputTree>
+std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
+  std::string t;
+  size_t len = s.size();
+  t.reserve(len);
+  for (size_t i = 0; i < len; ++i) {
+    if (s[i] == '<') {
+      t += "&lt;";
+    } else if (s[i] == '>') {
+      t += "&gt;";
+    } else if (s[i] == '&') {
+      t += "&amp;";
+    } else if (s[i] == '\'') {
+      t += "&apos;";
+    } else if (s[i] == '"') {
+      t += "&quot;";
+    } else {
+      t += s[i];
+    }
+  }
+  return t;
+}
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-extract/Jamfile b/scripts/training/phrase-extract/pcfg-extract/Jamfile
new file mode 100644
index 000000000..be91d6d2f
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/Jamfile
@@ -0,0 +1 @@
+exe pcfg-extract : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ;
diff --git a/scripts/training/phrase-extract/pcfg-extract/main.cc b/scripts/training/phrase-extract/pcfg-extract/main.cc
new file mode 100644
index 000000000..47b45afc3
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/main.cc
@@ -0,0 +1,25 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "pcfg_extract.h"
+
+int main(int argc, char *argv[]) {
+  Moses::PCFG::PcfgExtract tool;
+  return tool.Main(argc, argv);
+}
diff --git a/scripts/training/phrase-extract/pcfg-extract/options.h b/scripts/training/phrase-extract/pcfg-extract/options.h
new file mode 100644
index 000000000..3acb31b58
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/options.h
@@ -0,0 +1,36 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_EXTRACT_OPTIONS_H_
+#define PCFG_EXTRACT_OPTIONS_H_
+
+#include <string>
+
+namespace Moses {
+namespace PCFG {
+
+struct Options {
+  std::string corpus_file;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc b/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc
new file mode 100644
index 000000000..151c9959c
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc
@@ -0,0 +1,131 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "pcfg_extract.h"
+
+#include "options.h"
+#include "rule_collection.h"
+#include "rule_extractor.h"
+
+#include "pcfg-common/exception.h"
+#include "pcfg-common/pcfg.h"
+#include "pcfg-common/pcfg_tree.h"
+#include "pcfg-common/syntax_tree.h"
+#include "pcfg-common/typedef.h"
+#include "pcfg-common/xml_tree_parser.h"
+
+#include <boost/program_options.hpp>
+
+#include <cassert>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+int PcfgExtract::Main(int argc, char *argv[]) {
+  // Process command-line options.
+  Options options;
+  ProcessOptions(argc, argv, options);
+
+  // Extract PCFG rules from corpus.
+  Vocabulary non_term_vocab;
+  RuleExtractor rule_extractor(non_term_vocab);
+  RuleCollection rule_collection;
+  XmlTreeParser parser;
+  std::string line;
+  size_t line_num = 0;
+  std::auto_ptr<PcfgTree> tree;
+  while (std::getline(std::cin, line)) {
+    ++line_num;
+    try {
+      tree = parser.Parse(line);
+    } catch (Exception &e) {
+      std::ostringstream msg;
+      msg << "line " << line_num << ": " << e.msg();
+      Error(msg.str());
+    }
+    if (!tree.get()) {
+      std::ostringstream msg;
+      msg << "no tree at line " << line_num;
+      Warn(msg.str());
+      continue;
+    }
+    rule_extractor.Extract(*tree, rule_collection);
+  }
+
+  // Score rules and write PCFG to output.
+  Pcfg pcfg;
+  rule_collection.CreatePcfg(pcfg);
+  pcfg.Write(non_term_vocab, std::cout);
+
+  return 0;
+}
+
+void PcfgExtract::ProcessOptions(int argc, char *argv[],
+                                 Options &options) const {
+  namespace po = boost::program_options;
+
+  std::ostringstream usage_top;
+  usage_top << "Usage: " << name() << "\n\n" << "Options";
+
+  // Declare the command line options that are visible to the user.
+  po::options_description visible(usage_top.str());
+  visible.add_options()
+    ("help", "print help message and exit")
+  ;
+
+  // Declare the command line options that are hidden from the user
+  // (these are used as positional options).
+  po::options_description hidden("Hidden options");
+  hidden.add_options();
+
+  // Compose the full set of command-line options.
+  po::options_description cmd_line_options;
+  cmd_line_options.add(visible).add(hidden);
+
+  // Register the positional options.
+  po::positional_options_description p;
+
+  // Process the command-line.
+  po::variables_map vm;
+  try {
+    po::store(po::command_line_parser(argc, argv).style(CommonOptionStyle()).
+              options(cmd_line_options).positional(p).run(), vm);
+    po::notify(vm);
+  } catch (const std::exception &e) {
+    std::ostringstream msg;
+    msg << e.what() << "\n\n" << visible;
+    Error(msg.str());
+  }
+
+  if (vm.count("help")) {
+    std::cout << visible << std::endl;
+    std::exit(0);
+  }
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h b/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h
new file mode 100644
index 000000000..1af6cb4fe
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h
@@ -0,0 +1,42 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_EXTRACT_PCFG_EXTRACT_H_
+#define PCFG_EXTRACT_PCFG_EXTRACT_H_
+
+#include "pcfg-common/tool.h"
+
+namespace Moses {
+namespace PCFG {
+
+class Options;
+
+class PcfgExtract : public Tool {
+ public:
+  PcfgExtract() : Tool("pcfg-extract") {}
+  virtual int Main(int, char *[]);
+ private:
+  void ProcessOptions(int, char *[], Options &) const;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc b/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc
new file mode 100644
index 000000000..503b1a9e6
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc
@@ -0,0 +1,58 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "rule_collection.h"
+
+#include "pcfg-common/pcfg.h"
+
+#include <cmath>
+
+namespace Moses {
+namespace PCFG {
+
+void RuleCollection::Add(size_t lhs, const std::vector<size_t> &rhs) {
+  ++collection_[lhs][rhs];
+}
+
+void RuleCollection::CreatePcfg(Pcfg &pcfg) {
+  std::vector<size_t> key;
+  for (const_iterator p = begin(); p != end(); ++p) {
+    size_t lhs = p->first;
+    const RhsCountMap &rhs_counts = p->second;
+    size_t total = 0;
+    for (RhsCountMap::const_iterator q = rhs_counts.begin();
+         q != rhs_counts.end(); ++q) {
+      total += q->second;
+    }
+    for (RhsCountMap::const_iterator q = rhs_counts.begin();
+         q != rhs_counts.end(); ++q) {
+      const std::vector<size_t> &rhs = q->first;
+      size_t count = q->second;
+      double score = std::log(static_cast<double>(count) /
+                              static_cast<double>(total));
+      key.clear();
+      key.push_back(lhs);
+      key.insert(key.end(), rhs.begin(), rhs.end());
+      pcfg.Add(key, score);
+    }
+  }
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.h b/scripts/training/phrase-extract/pcfg-extract/rule_collection.h
new file mode 100644
index 000000000..1b768dd21
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/rule_collection.h
@@ -0,0 +1,59 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_EXTRACT_RULE_COLLECTION_H_
+#define PCFG_EXTRACT_RULE_COLLECTION_H_
+
+#include "pcfg-common/pcfg.h"
+
+#include <boost/unordered_map.hpp>
+
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+// Contains PCFG rules and their counts.
+class RuleCollection {
+ public:
+  typedef boost::unordered_map<std::vector<size_t>, size_t> RhsCountMap;
+  typedef boost::unordered_map<size_t, RhsCountMap> Map;
+  typedef Map::iterator iterator;
+  typedef Map::const_iterator const_iterator;
+
+  RuleCollection() {}
+
+  iterator begin() { return collection_.begin(); }
+  const_iterator begin() const { return collection_.begin(); }
+
+  iterator end() { return collection_.end(); }
+  const_iterator end() const { return collection_.end(); }
+
+  void Add(size_t, const std::vector<size_t> &);
+  void CreatePcfg(Pcfg &);
+
+ private:
+  Map collection_;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc b/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc
new file mode 100644
index 000000000..48a82a6d0
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc
@@ -0,0 +1,51 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "rule_extractor.h"
+
+#include "pcfg-common/pcfg_tree.h"
+
+namespace Moses {
+namespace PCFG {
+
+RuleExtractor::RuleExtractor(Vocabulary &non_term_vocab)
+    : non_term_vocab_(non_term_vocab) {
+}
+
+void RuleExtractor::Extract(const PcfgTree &tree, RuleCollection &rc) const {
+  if (tree.IsPreterminal() || tree.IsLeaf()) {
+    return;
+  }
+
+  size_t lhs = non_term_vocab_.Insert(tree.label());
+  std::vector<size_t> rhs;
+
+  const std::vector<PcfgTree *> &children = tree.children();
+  rhs.reserve(children.size());
+  for (std::vector<PcfgTree *>::const_iterator p(children.begin());
+       p != children.end(); ++p) {
+    const PcfgTree &child = **p;
+    rhs.push_back(non_term_vocab_.Insert(child.label()));
+    Extract(child, rc);
+  }
+  rc.Add(lhs, rhs);
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.h b/scripts/training/phrase-extract/pcfg-extract/rule_extractor.h
new file mode 100644
index 000000000..6bcffbc61
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-extract/rule_extractor.h
@@ -0,0 +1,45 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_EXTRACT_RULE_EXTRACTOR_H_
+#define PCFG_EXTRACT_RULE_EXTRACTOR_H_
+
+#include "rule_collection.h"
+
+#include "pcfg-common/typedef.h"
+
+namespace Moses {
+namespace PCFG {
+
+class PcfgTree;
+
+// Extracts PCFG rules from syntax trees and adds them to a RuleCollection.
+class RuleExtractor {
+ public:
+  RuleExtractor(Vocabulary &);
+  void Extract(const PcfgTree &, RuleCollection &) const;
+ private:
+  Vocabulary &non_term_vocab_;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-score/Jamfile b/scripts/training/phrase-extract/pcfg-score/Jamfile
new file mode 100644
index 000000000..7225381c0
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-score/Jamfile
@@ -0,0 +1 @@
+exe pcfg-score : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ;
diff --git a/scripts/training/phrase-extract/pcfg-score/main.cc b/scripts/training/phrase-extract/pcfg-score/main.cc
new file mode 100644
index 000000000..da5392add
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-score/main.cc
@@ -0,0 +1,25 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "pcfg_score.h"
+
+int main(int argc, char *argv[]) {
+  Moses::PCFG::PcfgScore tool;
+  return tool.Main(argc, argv);
+}
diff --git a/scripts/training/phrase-extract/pcfg-score/options.h b/scripts/training/phrase-extract/pcfg-score/options.h
new file mode 100644
index 000000000..e54b2a0b9
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-score/options.h
@@ -0,0 +1,36 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_SCORE_OPTIONS_H_
+#define PCFG_SCORE_OPTIONS_H_
+
+#include <string>
+
+namespace Moses {
+namespace PCFG {
+
+struct Options {
+  std::string pcfg_file;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc b/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
new file mode 100644
index 000000000..d780200ad
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
@@ -0,0 +1,152 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "pcfg_score.h"
+
+#include "options.h"
+#include "tree_scorer.h"
+
+#include "pcfg-common/exception.h"
+#include "pcfg-common/pcfg.h"
+#include "pcfg-common/pcfg_tree.h"
+#include "pcfg-common/syntax_tree.h"
+#include "pcfg-common/typedef.h"
+#include "pcfg-common/xml_tree_parser.h"
+
+#include <boost/program_options.hpp>
+
+#include <cassert>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace Moses {
+namespace PCFG {
+
+int PcfgScore::Main(int argc, char *argv[]) {
+  // Process command-line options.
+  Options options;
+  ProcessOptions(argc, argv, options);
+
+  // Open PCFG stream.
+  std::ifstream pcfg_stream;
+  OpenNamedInputOrDie(options.pcfg_file, pcfg_stream);
+
+  // Read PCFG.
+  Pcfg pcfg;
+  Vocabulary non_term_vocab;
+  pcfg.Read(pcfg_stream, non_term_vocab);
+
+  // Score corpus according to PCFG.
+  TreeScorer scorer(pcfg, non_term_vocab);
+  XmlTreeParser parser;
+  XmlTreeWriter<PcfgTree> writer;
+  std::string line;
+  size_t line_num = 0;
+  std::auto_ptr<PcfgTree> tree;
+  while (std::getline(std::cin, line)) {
+    ++line_num;
+    try {
+      tree = parser.Parse(line);
+    } catch (Exception &e) {
+      std::ostringstream msg;
+      msg << "line " << line_num << ": " << e.msg();
+      Error(msg.str());
+    }
+    if (!tree.get()) {
+      std::ostringstream msg;
+      msg << "no tree at line " << line_num;
+      Warn(msg.str());
+      std::cout << std::endl;
+      continue;
+    }
+    if (!scorer.Score(*tree)) {
+      std::ostringstream msg;
+      msg << "failed to score tree at line " << line_num;
+      Warn(msg.str());
+      std::cout << std::endl;
+      continue;
+    }
+    writer.Write(*tree, std::cout);
+  }
+
+  return 0;
+}
+
+void PcfgScore::ProcessOptions(int argc, char *argv[], Options &options) const {
+  namespace po = boost::program_options;
+
+  std::ostringstream usage_top;
+  usage_top << "Usage: " << name() << " PCFG\n\n"
+            << "Options";
+
+  // Declare the command line options that are visible to the user.
+  po::options_description visible(usage_top.str());
+  visible.add_options()
+    ("help", "print help message and exit")
+  ;
+
+  // Declare the command line options that are hidden from the user
+  // (these are used as positional options).
+  po::options_description hidden("Hidden options");
+  hidden.add_options()
+    ("pcfg-file", po::value(&options.pcfg_file), "pcfg file")
+  ;
+
+  // Compose the full set of command-line options.
+  po::options_description cmd_line_options;
+  cmd_line_options.add(visible).add(hidden);
+
+  // Register the positional options.
+  po::positional_options_description p;
+  p.add("pcfg-file", 1);
+
+  // Process the command-line.
+  po::variables_map vm;
+  try {
+    po::store(po::command_line_parser(argc, argv).style(CommonOptionStyle()).
+              options(cmd_line_options).positional(p).run(), vm);
+    po::notify(vm);
+  } catch (const std::exception &e) {
+    std::ostringstream msg;
+    msg << e.what() << "\n\n" << visible;
+    Error(msg.str());
+  }
+
+  if (vm.count("help")) {
+    std::cout << visible << std::endl;
+    std::exit(0);
+  }
+
+  // Check positional options were given.
+
+  if (!vm.count("pcfg-file")) {
+    std::ostringstream msg;
+    msg << "missing required argument\n\n" << visible << std::endl;
+    Error(msg.str());
+  }
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.h b/scripts/training/phrase-extract/pcfg-score/pcfg_score.h
new file mode 100644
index 000000000..5e506c39d
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-score/pcfg_score.h
@@ -0,0 +1,42 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_SCORE_PCFG_SCORE_H_
+#define PCFG_SCORE_PCFG_SCORE_H_
+
+#include "pcfg-common/tool.h"
+
+namespace Moses {
+namespace PCFG {
+
+class Options;
+
+class PcfgScore : public Tool {
+ public:
+  PcfgScore() : Tool("pcfg-score") {}
+  virtual int Main(int, char *[]);
+ private:
+  void ProcessOptions(int, char *[], Options &) const;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc b/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc
new file mode 100644
index 000000000..5f695e4fc
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc
@@ -0,0 +1,68 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "tree_scorer.h"
+
+#include <cassert>
+
+namespace Moses {
+namespace PCFG {
+
+TreeScorer::TreeScorer(const Pcfg &pcfg, const Vocabulary &non_term_vocab)
+    : pcfg_(pcfg)
+    , non_term_vocab_(non_term_vocab) {
+}
+
+bool TreeScorer::Score(PcfgTree &root) const {
+  if (root.IsPreterminal() || root.IsLeaf()) {
+    return true;
+  }
+
+  const std::vector<PcfgTree *> &children = root.children();
+
+  double log_prob = 0.0;
+
+  std::vector<size_t> key;
+  key.reserve(children.size()+1);
+  key.push_back(non_term_vocab_.Lookup(root.label()));
+
+  for (std::vector<PcfgTree *>::const_iterator p(children.begin());
+       p != children.end(); ++p) {
+    PcfgTree *child = *p;
+    assert(!child->IsLeaf());
+    key.push_back(non_term_vocab_.Lookup(child->label()));
+    if (!Score(*child)) {
+      return false;
+    }
+    if (!child->IsPreterminal()) {
+      log_prob += child->score();
+    }
+  }
+  double rule_score;
+  bool found = pcfg_.Lookup(key, rule_score);
+  if (!found) {
+    return false;
+  }
+  log_prob += rule_score;
+  root.set_score(log_prob);
+  return true;
+}
+
+}  // namespace PCFG
+}  // namespace Moses
diff --git a/scripts/training/phrase-extract/pcfg-score/tree_scorer.h b/scripts/training/phrase-extract/pcfg-score/tree_scorer.h
new file mode 100644
index 000000000..36f4e1e99
--- /dev/null
+++ b/scripts/training/phrase-extract/pcfg-score/tree_scorer.h
@@ -0,0 +1,47 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+#ifndef PCFG_SCORE_TREE_SCORER_H_
+#define PCFG_SCORE_TREE_SCORER_H_
+
+#include "pcfg-common/pcfg.h"
+#include "pcfg-common/pcfg_tree.h"
+#include "pcfg-common/typedef.h"
+
+namespace Moses {
+namespace PCFG {
+
+class TreeScorer {
+ public:
+  TreeScorer(const Pcfg &, const Vocabulary &);
+
+  // Score tree according to PCFG.  Returns false if unsuccessful (due to
+  // missing rule).
+  bool Score(PcfgTree &) const;
+
+ private:
+  const Pcfg &pcfg_;
+  const Vocabulary &non_term_vocab_;
+};
+
+}  // namespace PCFG
+}  // namespace Moses
+
+#endif
diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
index 8bcc9be3b..c5fb0b99f 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/scripts/training/phrase-extract/score.cpp
@@ -72,6 +72,7 @@ void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
 LexicalTable lexTable;
 bool inverseFlag = false;
 bool hierarchicalFlag = false;
+bool pcfgFlag = false;
 bool wordAlignmentFlag = false;
 bool goodTuringFlag = false;
 bool kneserNeyFlag = false;
@@ -108,6 +109,9 @@ int main(int argc, char* argv[])
     } else if (strcmp(argv[i],"--Hierarchical") == 0) {
       hierarchicalFlag = true;
       cerr << "processing hierarchical rules\n";
+    } else if (strcmp(argv[i],"--PCFG") == 0) {
+      pcfgFlag = true;
+      cerr << "including PCFG scores\n";
     } else if (strcmp(argv[i],"--WordAlignment") == 0) {
       wordAlignmentFlag = true;
       cerr << "outputing word alignment" << endl;
@@ -193,6 +197,7 @@ int main(int argc, char* argv[])
 	
   // loop through all extracted phrase translations
   float lastCount = 0.0f;
+  float lastPcfgSum = 0.0f;
   vector< PhraseAlignment > phrasePairsWithSameF;
   int i=0;
   char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
@@ -207,6 +212,7 @@ int main(int argc, char* argv[])
     // identical to last line? just add count
     if (strcmp(line,lastLine) == 0) {
       lastPhrasePair->count += lastCount;
+      lastPhrasePair->pcfgSum += lastPcfgSum;
       continue;
     }
     strcpy( lastLine, line );
@@ -215,10 +221,12 @@ int main(int argc, char* argv[])
     PhraseAlignment phrasePair;
     phrasePair.create( line, i );
     lastCount = phrasePair.count;
+    lastPcfgSum = phrasePair.pcfgSum;
 
     // only differs in count? just add count
     if (lastPhrasePair != NULL && lastPhrasePair->equals( phrasePair )) {
       lastPhrasePair->count += phrasePair.count;
+      lastPhrasePair->pcfgSum += phrasePair.pcfgSum;
       continue;
     }
 
@@ -438,6 +446,16 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
       countOfCounts[ countInt ]++;
   }
 
+  // compute PCFG score
+  float pcfgScore;
+  if (pcfgFlag && !inverseFlag) {
+    float pcfgSum = 0;
+    for(size_t i=0; i<phrasePair.size(); ++i) {
+        pcfgSum += phrasePair[i]->pcfgSum;
+    }
+    pcfgScore = pcfgSum / count;
+  }
+
   // output phrases
   const PHRASE &phraseS = phrasePair[0]->GetSource();
   const PHRASE &phraseT = phrasePair[0]->GetTarget();
@@ -493,6 +511,11 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
     phraseTableFile << " " << ( logProbFlag ? negLogProb*log(penalty) : penalty );
   }
 
+  // target-side PCFG score
+  if (pcfgFlag && !inverseFlag) {
+    phraseTableFile << " " << pcfgScore;
+  }
+
   phraseTableFile << " ||| ";
 
   // alignment info for non-terminals
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 1a7cb3a39..41ea2d682 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -105,6 +105,7 @@ $_HELP = 1
 		       'glue-grammar-file=s' => \$_GLUE_GRAMMAR_FILE,
 		       'unknown-word-label-file=s' => \$_UNKNOWN_WORD_LABEL_FILE,
 		       'ghkm' => \$_GHKM,
+		       'pcfg' => \$_PCFG,
 		       'extract-options=s' => \$_EXTRACT_OPTIONS,
 		       'score-options=s' => \$_SCORE_OPTIONS,
 		       'source-syntax' => \$_SOURCE_SYNTAX,
@@ -1373,6 +1374,7 @@ sub extract_phrase {
         $cmd = "$RULE_EXTRACT $alignment_file_e $alignment_file_f $alignment_file_a $extract_file";
         $cmd .= " --GlueGrammar $___GLUE_GRAMMAR_FILE" if $_GLUE_GRAMMAR;
         $cmd .= " --UnknownWordLabel $_UNKNOWN_WORD_LABEL_FILE" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_LABEL_FILE);
+        $cmd .= " --PCFG" if $_PCFG;
         if (!defined($_GHKM)) {
           $cmd .= " --SourceSyntax" if $_SOURCE_SYNTAX;
           $cmd .= " --TargetSyntax" if $_TARGET_SYNTAX;
@@ -1503,6 +1505,7 @@ sub score_phrase_phrase_extract {
         $cmd .= " --UnalignedPenalty" if $UNALIGNED_COUNT;
         $cmd .= " --UnalignedFunctionWordPenalty ".($inverse ? $UNALIGNED_FW_F : $UNALIGNED_FW_E) if $UNALIGNED_FW_COUNT;
         $cmd .= " --MinCountHierarchical $MIN_COUNT_HIERARCHICAL" if $MIN_COUNT_HIERARCHICAL;
+        $cmd .= " --PCFG" if $_PCFG;
         $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
 
 				# sorting
@@ -1801,6 +1804,7 @@ sub create_ini {
    $basic_weight_count /= 2 if defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /OnlyDirect/;
    $basic_weight_count++ unless defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NoPhraseCount/; # phrase count feature
    $basic_weight_count++ if defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /LowCountFeature/; # low count feature
+   $basic_weight_count++ if $_PCFG;
    foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {
      $num_of_ttables++;
      my $ff = $f;

From a72744c49b7821bf0355e7fe4638c392a74b0d60 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Fri, 25 May 2012 17:39:21 +0100
Subject: [PATCH 20/38] Fix bug in previous commit.

---
 scripts/training/train-model.perl.missing_bin_dir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 41ea2d682..869f979fc 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -30,7 +30,7 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
    $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
    @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
    $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
-   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_EXTRACT_OPTIONS,$_SCORE_OPTIONS,
+   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,$_EXTRACT_OPTIONS,$_SCORE_OPTIONS,
    $_PHRASE_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
    $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
    $_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,

From 180dd773f6507829c551c5512aaad7128f958385 Mon Sep 17 00:00:00 2001
From: phikoehn <pkoehn@inf.ed.ac.uk>
Date: Sat, 26 May 2012 00:06:34 +0100
Subject: [PATCH 21/38] bolt specific settings

---
 scripts/ems/experiment.meta | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 51ac0f67a..aed6049ea 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -738,6 +738,20 @@ ibm-bleu-c
         ignore-unless: ibm-bleu-c
         rerun-on-change: ibm-bleu-c
         template: $ibm-bleu-c -s $input-sgm -r IN1 -t IN > OUT
+bolt-bleu
+	in: detokenized-output
+	out: bolt-bleu-score
+	default-name: evaluation/bolt-bleu
+	ignore-unless: bolt-bleu
+	rerun-on-change: bolt-bleu
+	template: $bolt-bleu IN > OUT
+bolt-bleu-c
+	in: detokenized-output
+	out: bolt-bleu-c-score
+	default-name: evaluation/bolt-bleu-c
+	ignore-unless: bolt-bleu-c
+	rerun-on-change: bolt-bleu-c
+	template: $bolt-bleu-c IN > OUT
 multi-bleu
 	in: cleaned-output reference
 	out: multi-bleu-score
@@ -793,6 +807,6 @@ analysis-precision
 
 [REPORTING] single
 report
-	in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model
+	in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model
 	out: report
 	default-name: evaluation/report

From 561b9ac9567d3e5b0bbc56fdae3b29961b8bc728 Mon Sep 17 00:00:00 2001
From: phikoehn <pkoehn@inf.ed.ac.uk>
Date: Sat, 26 May 2012 00:09:50 +0100
Subject: [PATCH 22/38] minor fixes

---
 scripts/ems/experiment.machines               |  3 ++-
 scripts/ems/experiment.perl                   |  2 +-
 .../generic-multicore-parallelizer.perl       | 15 +++++++++++---
 .../ems/support/report-experiment-scores.perl | 20 +++++++++++++++++++
 scripts/tokenizer/deescape-special-chars.perl |  2 ++
 scripts/tokenizer/detokenizer.perl            |  7 +++++--
 scripts/tokenizer/escape-special-chars.perl   |  8 +++++---
 scripts/tokenizer/tokenizer.perl              |  4 ++--
 scripts/training/mert-moses.pl                |  4 ++--
 scripts/training/phrase-extract/XmlTree.cpp   | 10 ++++++++++
 10 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/scripts/ems/experiment.machines b/scripts/ems/experiment.machines
index 9e0294d60..7fdecd9cd 100644
--- a/scripts/ems/experiment.machines
+++ b/scripts/ems/experiment.machines
@@ -1,3 +1,4 @@
 cluster: townhill seville hermes lion seville sannox lutzow frontend
-multicore-8: tyr thor odin crom saxnot vali vili freyja bragi hoenir
+multicore-8: tyr thor odin crom
+multicore-16: saxnot vali vili freyja bragi hoenir
 multicore-24: syn hel skaol saga
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 59bd2788f..45537681a 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -1020,7 +1020,7 @@ sub execute_steps {
     }
 	}
 
-	print "number of steps doable or running: ".(scalar keys %DO)."\n";
+	print "number of steps doable or running: ".(scalar keys %DO)." at ".`date`;
   foreach my $step (keys %DO) { print "\t".($DO{$step}==2?"running: ":"doable: ").$DO_STEP[$step]."\n"; }
 	return unless scalar keys %DO;
 	
diff --git a/scripts/ems/support/generic-multicore-parallelizer.perl b/scripts/ems/support/generic-multicore-parallelizer.perl
index 862536137..d7e030ad2 100755
--- a/scripts/ems/support/generic-multicore-parallelizer.perl
+++ b/scripts/ems/support/generic-multicore-parallelizer.perl
@@ -3,6 +3,7 @@
 use strict;
 
 my $cores = 8;
+my $serial = 1;
 my ($infile,$outfile,$cmd,$tmpdir);
 my $parent = $$; 
 
@@ -12,6 +13,7 @@ GetOptions('cores=i' => \$cores,
 	   'in=s' => \$infile,
 	   'out=s' => \$outfile,
 	   'cmd=s' => \$cmd,
+     'serial=i' => \$serial
     ) or exit(1);
 
 die("ERROR: specify command with -cmd") unless $cmd;
@@ -24,8 +26,9 @@ die("ERROR: you need to specify a tempdir with -tmpdir") unless $tmpdir;
 
 # create split input files
 my $sentenceN = `cat $infile | wc -l`;
-my $splitN = int(($sentenceN+$cores-0.5) / $cores); 
-`split -a 2 -l $splitN $infile $tmpdir/in-$parent-`;
+my $splitN = int(($sentenceN+($cores*$serial)-0.5) / ($cores*$serial)); 
+print STDERR "split -a 3 -l $splitN $infile $tmpdir/in-$parent-\n";
+`split -a 4 -l $splitN $infile $tmpdir/in-$parent-`;
 
 # find out the names of the processes
 my @CORE=`ls $tmpdir/in-$parent-*`;
@@ -33,17 +36,23 @@ chomp(@CORE);
 grep(s/.+in\-\d+\-([a-z]+)$/$1/e,@CORE);
 
 # create core scripts
-foreach my $core (@CORE){
+for(my $i=0;$i<scalar(@CORE);$i++) {
+    my $core = $CORE[$i];
     open(BASH,">$tmpdir/core-$parent-$core.bash") or die "Cannot open: $!";
     print  BASH "#bash\n\n";
 #    print  BASH "export PATH=$ENV{PATH}\n\n";
     printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
+    for(my $j=2;$j<=$serial;$j++) {
+      $core = $CORE[++$i];
+      printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
+    }
     close(BASH);
 }
 
 # fork processes
 my (@CHILDREN);
 foreach my $core (@CORE){
+    next unless -e "$tmpdir/core-$parent-$core.bash";
     my $child = fork();
     if (! $child) { # I am child
 	print STDERR "running child $core\n";
diff --git a/scripts/ems/support/report-experiment-scores.perl b/scripts/ems/support/report-experiment-scores.perl
index 2efd86517..e881ec17a 100755
--- a/scripts/ems/support/report-experiment-scores.perl
+++ b/scripts/ems/support/report-experiment-scores.perl
@@ -14,6 +14,10 @@ $TYPE{"multi-bleu-c"}= "BLEU-c";
 $TYPE{"ibm-bleu"}    = "IBM";
 $TYPE{"ibm-bleu-c"}  = "IBM-c";
 $TYPE{"meteor"} = "METEOR";
+$TYPE{"bolt-bleu"}     = "BLEU";
+$TYPE{"bolt-bleu-c"}   = "BLEU-c";
+$TYPE{"bolt-ter"}      = "TER";
+$TYPE{"bolt-ter-c"}    = "TER-c";
 
 my %SCORE;
 my %AVERAGE;
@@ -60,6 +64,9 @@ sub process {
     elsif ($type eq 'meteor') {
 	$SCORE{$set} .= &extract_meteor($file,$type)." ";
     }
+    elsif ($type =~ /^bolt-(.+)$/) {
+      $SCORE{$set} .= &extract_bolt($file,$1)." ";
+    }
 }
 
 sub extract_nist_bleu {
@@ -115,6 +122,19 @@ sub extract_multi_bleu {
     return $output.$TYPE{$type};
 }
 
+sub extract_bolt {
+  my ($file,$type) = @_;
+  my $score;
+  foreach (`cat $file`) {
+    $score = $1 if $type eq 'bleu' && /Lowercase BLEU\s+([\d\.]+)/;
+    $score = $1 if $type eq 'bleu-c' && /Cased BLEU\s+([\d\.]+)/;
+    $score = $1 if $type eq 'ter' && /Lowercase TER\s+([\d\.]+)/;
+    $score = $1 if $type eq 'ter-c' && /Cased TER\s+([\d\.]+)/;
+  }
+  my $output = sprintf("%.02f ",$score*100);
+  $AVERAGE{"bolt-".$type} += $score*100;
+  return $output.$TYPE{"bolt-".$type};
+}
 sub extract_meteor {
     my ($file,$type) = @_;
     my ($meteor, $precision);
diff --git a/scripts/tokenizer/deescape-special-chars.perl b/scripts/tokenizer/deescape-special-chars.perl
index c98e01ccc..55035ae6d 100755
--- a/scripts/tokenizer/deescape-special-chars.perl
+++ b/scripts/tokenizer/deescape-special-chars.perl
@@ -8,6 +8,8 @@ while(<STDIN>) {
   s/\&gt;/\>/g;
   s/\&bra;/\[/g;
   s/\&ket;/\]/g;
+  s/\&#91;/\[/g;
+  s/\&#93;/\]/g;
   s/\&amp;/\&/g;
   print $_;
 }
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index e2d7ea0bb..e55a1a26e 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -33,8 +33,9 @@ if ($HELP) {
 	exit;
 }
 
-die "No built-in rules for language $language, claim en for default behaviour."
-	if $language !~ /^(cs|en|fr|it)$/;
+if ($language !~ /^(cs|en|fr|it)$/) {
+  print STDERR "Warning: No built-in rules for language $language.\n"
+}
 
 if (!$QUIET) {
 	print STDERR "Detokenizer Version ".'$Revision: 4134 $'."\n";
@@ -70,6 +71,8 @@ sub detokenize {
   $text =~ s/\&gt;/\>/g;
   $text =~ s/\&bra;/\[/g;
   $text =~ s/\&ket;/\]/g;
+  $text =~ s/\&#91;/\[/g;
+  $text =~ s/\&#93;/\]/g;
   $text =~ s/\&amp;/\&/g;
 
 	my $word;
diff --git a/scripts/tokenizer/escape-special-chars.perl b/scripts/tokenizer/escape-special-chars.perl
index 5c4dc9bb3..f4c1b4dd5 100755
--- a/scripts/tokenizer/escape-special-chars.perl
+++ b/scripts/tokenizer/escape-special-chars.perl
@@ -6,18 +6,20 @@ while(<STDIN>) {
   chop;
 
   # avoid general madness
+  s/[\000-\037]//g;
   s/\s+/ /g;
 	s/^ //g;
 	s/ $//g;
-  s/[\000-\037]//g;
 
   # special characters in moses
   s/\&/\&amp;/g;
   s/\|/\&bar;/g;
   s/\</\&lt;/g;
   s/\>/\&gt;/g;
-  s/\[/\&bra;/g;
-  s/\]/\&ket;/g;
+  s/\[/\&#91;/g;
+  s/\]/\&#93;/g;
   
+  # restore xml instructions
+  s/\&lt;(\S+) translation="([^\"]+)"&gt; (.+?) &lt;\/(\S+)&gt;/\<$1 translation=\"$2\"> $3 <\/$4>/g;
   print $_."\n";
 }
diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl
index a97d5e160..70bb318f7 100755
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@@ -153,8 +153,8 @@ sub tokenize {
   $text =~ s/\|/\&bar;/g;
   $text =~ s/\</\&lt;/g;
   $text =~ s/\>/\&gt;/g;
-  $text =~ s/\[/\&bra;/g;
-  $text =~ s/\]/\&ket;/g;
+  $text =~ s/\[/\&#91;/g;
+  $text =~ s/\]/\&#93;/g;
 
 	#ensure final line break
 	$text .= "\n" unless $text =~ /\n$/;
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 6ce8341c0..2abd5ef7c 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -1089,7 +1089,7 @@ sub get_order_of_scores_from_nbestlist {
   # return the score labels in order
   my $fname_or_source = shift;
   # print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
-  open my $fh, '<', $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source': $!";
+  open my $fh, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source': $!";
   my $line = <$fh>;
   close $fh;
   die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
@@ -1169,7 +1169,7 @@ sub create_config {
   }
 
   if (defined($sparse_weights_file)) {
-    push @{$P{"weights-file"}}, File::Spec->catfile($___WORKING_DIR, $sparse_weights_file);
+    push @{$P{"weight-file"}}, File::Spec->catfile($___WORKING_DIR, $sparse_weights_file);
   }
 
   # create new moses.ini decoder config file by cloning and overriding the original one
diff --git a/scripts/training/phrase-extract/XmlTree.cpp b/scripts/training/phrase-extract/XmlTree.cpp
index 716414f86..19825c02c 100644
--- a/scripts/training/phrase-extract/XmlTree.cpp
+++ b/scripts/training/phrase-extract/XmlTree.cpp
@@ -128,6 +128,16 @@ string unescape(const string& str)
       s += string("<");
     } else if (name == "gt") {
       s += string(">");
+    } else if (name == "#91") {
+      s += string("[");
+    } else if (name == "#93") {
+      s += string("]");
+    } else if (name == "bra") {
+      s += string("[");
+    } else if (name == "ket") {
+      s += string("]");
+    } else if (name == "bar") {
+      s += string("|");
     } else if (name == "amp") {
       s += string("&");
     } else if (name == "apos") {

From 145df588c2e03624156a38276b8bb339d8067c35 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Sat, 26 May 2012 12:15:34 +0100
Subject: [PATCH 23/38] Fix some input handling bugs in pcfg-extract and
 pcfg-score.

---
 .../training/phrase-extract/pcfg-common/xml_tree_parser.cc  | 5 ++++-
 .../training/phrase-extract/pcfg-common/xml_tree_writer.h   | 6 ++++++
 scripts/training/phrase-extract/pcfg-score/pcfg_score.cc    | 4 ++--
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc b/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc
index 5c596a0fb..fd9d11334 100644
--- a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc
+++ b/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc
@@ -47,7 +47,10 @@ std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
   }
   m_tree.ConnectNodes();
   SyntaxNode *root = m_tree.GetTop();
-  assert(root);
+  if (!root) {
+    // There is no XML tree.
+    return std::auto_ptr<PcfgTree>();
+  }
   m_words = tokenize(m_line.c_str());
   return ConvertTree(*root, m_words);
 }
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h b/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
index 347c352bb..c5171a905 100644
--- a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
+++ b/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
@@ -108,6 +108,12 @@ std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
       t += "&lt;";
     } else if (s[i] == '>') {
       t += "&gt;";
+    } else if (s[i] == '[') {
+      t += "&#91;";
+    } else if (s[i] == ']') {
+      t += "&#93;";
+    } else if (s[i] == '|') {
+      t += "&bar;";
     } else if (s[i] == '&') {
       t += "&amp;";
     } else if (s[i] == '\'') {
diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc b/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
index d780200ad..16691707b 100644
--- a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
+++ b/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
@@ -78,14 +78,14 @@ int PcfgScore::Main(int argc, char *argv[]) {
       std::ostringstream msg;
       msg << "no tree at line " << line_num;
       Warn(msg.str());
-      std::cout << std::endl;
+      std::cout << line << std::endl;
       continue;
     }
     if (!scorer.Score(*tree)) {
       std::ostringstream msg;
       msg << "failed to score tree at line " << line_num;
       Warn(msg.str());
-      std::cout << std::endl;
+      std::cout << line << std::endl;
       continue;
     }
     writer.Write(*tree, std::cout);

From 4c90c88733ed2bfa5a131394f1e75eb2adf0a863 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Sat, 26 May 2012 12:21:32 +0100
Subject: [PATCH 24/38] make phrase-table.half naming consistent. Requested by
 Phil Williams

---
 scripts/generic/score-parallel.perl               | 11 ++---------
 scripts/training/train-model.perl.missing_bin_dir | 10 +++++-----
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/scripts/generic/score-parallel.perl b/scripts/generic/score-parallel.perl
index fbb4d4d02..b399a83ba 100755
--- a/scripts/generic/score-parallel.perl
+++ b/scripts/generic/score-parallel.perl
@@ -152,7 +152,7 @@ $cmd = "\n\nOH SHIT. This should have been filled in \n\n";
 if ($fileCount == 1 && !$doSort)
 {
   my $numStr = NumStr(0);
-  $cmd = "mv $TMPDIR/phrase-table.half.$numStr.gz $ptHalf.gz \n";
+  $cmd = "mv $TMPDIR/phrase-table.half.$numStr.gz $ptHalf";
 }
 else
 {
@@ -162,14 +162,7 @@ else
     $cmd .= "| LC_ALL=C $sortCmd -T $TMPDIR ";
   }
 
-  $cmd .= " | gzip -c >";
-
-  if ($doSort) {
-    $cmd .= " $ptHalf.sorted.gz \n";
-  }
-  else {
-    $cmd .= " $ptHalf.gz \n";
-  }
+  $cmd .= " | gzip -c > $ptHalf";
 }
 print STDERR $cmd;
 systemCheck($cmd);
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 869f979fc..61e49970f 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1486,7 +1486,7 @@ sub score_phrase_phrase_extract {
       if ($pid == 0)
       {
 	      next if $___CONTINUE && -e "$ttable_file.half.$direction";
-	      next if $___CONTINUE && $direction eq "e2f" && -e "$ttable_file.half.e2f.sorted";
+	      next if $___CONTINUE && $direction eq "e2f" && -e "$ttable_file.half.e2f.gz";
 	      my $inverse = "";
               my $extract_filename = $extract_file;
 	      if ($direction eq "e2f") {
@@ -1497,7 +1497,7 @@ sub score_phrase_phrase_extract {
 
 	      print STDERR "(6.".($substep++).")  creating table half $ttable_file.half.$direction @ ".`date`;
 
-        my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction $inverse";
+        my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction.gz $inverse";
         $cmd .= " --Hierarchical" if $_HIERARCHICAL;
         $cmd .= " --WordAlignment" if $_PHRASE_WORD_ALIGNMENT;
         $cmd .= " --KneserNey" if $KNESER_NEY;
@@ -1543,15 +1543,15 @@ sub score_phrase_phrase_extract {
     # merging the two halves
     print STDERR "(6.6) consolidating the two halves @ ".`date`;
     return if $___CONTINUE && -e "$ttable_file.gz";
-    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e.gz $ttable_file.half.e2f.sorted.gz $ttable_file.gz";
+    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e.gz $ttable_file.half.e2f.gz $ttable_file.gz";
     $cmd .= " --Hierarchical" if $_HIERARCHICAL;
     $cmd .= " --LogProb" if $LOG_PROB;
     $cmd .= " --NegLogProb" if $NEG_LOG_PROB;
     $cmd .= " --OnlyDirect" if $ONLY_DIRECT;
     $cmd .= " --NoPhraseCount" unless $PHRASE_COUNT;
     $cmd .= " --LowCountFeature" if $LOW_COUNT;
-    $cmd .= " --GoodTuring $ttable_file.half.f2e.coc" if $GOOD_TURING;
-    $cmd .= " --KneserNey $ttable_file.half.f2e.coc" if $KNESER_NEY;
+    $cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
+    $cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
     safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
     if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
 }

From cae4f803c3186d3f0c0143bb2f93be1929f7119a Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Sat, 26 May 2012 12:27:50 +0100
Subject: [PATCH 25/38] faster consolidating - redirect to stdout then zip

---
 scripts/training/train-model.perl.missing_bin_dir | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 61e49970f..aac6cef96 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -1543,7 +1543,7 @@ sub score_phrase_phrase_extract {
     # merging the two halves
     print STDERR "(6.6) consolidating the two halves @ ".`date`;
     return if $___CONTINUE && -e "$ttable_file.gz";
-    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e.gz $ttable_file.half.e2f.gz $ttable_file.gz";
+    my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e.gz $ttable_file.half.e2f.gz /dev/stdout";
     $cmd .= " --Hierarchical" if $_HIERARCHICAL;
     $cmd .= " --LogProb" if $LOG_PROB;
     $cmd .= " --NegLogProb" if $NEG_LOG_PROB;
@@ -1552,6 +1552,9 @@ sub score_phrase_phrase_extract {
     $cmd .= " --LowCountFeature" if $LOW_COUNT;
     $cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
     $cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
+    
+    $cmd .= " | gzip -c > $ttable_file.gz";
+    
     safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
     if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
 }

From 82580280bc0b30607b00a55ffe0f22d5665269a3 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Sat, 26 May 2012 13:13:23 +0100
Subject: [PATCH 26/38] Fix compile error by using std::size_t instead of
 size_t. Thanks to Tomas Hudik for reporting that.

---
 .../phrase-extract/pcfg-common/numbered_set.h        |  4 ++--
 scripts/training/phrase-extract/pcfg-common/pcfg.cc  |  8 ++++----
 scripts/training/phrase-extract/pcfg-common/pcfg.h   |  2 +-
 .../phrase-extract/pcfg-common/syntax_tree.h         |  2 +-
 .../phrase-extract/pcfg-common/xml_tree_writer.h     |  4 ++--
 .../phrase-extract/pcfg-extract/pcfg_extract.cc      |  2 +-
 .../phrase-extract/pcfg-extract/rule_collection.cc   | 12 ++++++------
 .../phrase-extract/pcfg-extract/rule_collection.h    |  6 +++---
 .../phrase-extract/pcfg-extract/rule_extractor.cc    |  4 ++--
 .../training/phrase-extract/pcfg-score/pcfg_score.cc |  2 +-
 .../phrase-extract/pcfg-score/tree_scorer.cc         |  2 +-
 11 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/scripts/training/phrase-extract/pcfg-common/numbered_set.h b/scripts/training/phrase-extract/pcfg-common/numbered_set.h
index f88d710ed..15e768b4c 100644
--- a/scripts/training/phrase-extract/pcfg-common/numbered_set.h
+++ b/scripts/training/phrase-extract/pcfg-common/numbered_set.h
@@ -35,7 +35,7 @@ namespace PCFG {
 // Stores a set of elements of type T, each of which is allocated an integral
 // ID of type I.  IDs are contiguous starting at 0.  Individual elements cannot
 // be removed once inserted (but the whole set can be cleared).
-template<typename T, typename I=size_t>
+template<typename T, typename I=std::size_t>
 class NumberedSet {
  private:
   typedef boost::unordered_map<T, I> ElementToIdMap;
@@ -54,7 +54,7 @@ class NumberedSet {
   static I NullId() { return std::numeric_limits<I>::max(); }
 
   bool Empty() const { return id_to_element_.empty(); }
-  size_t Size() const { return id_to_element_.size(); }
+  std::size_t Size() const { return id_to_element_.size(); }
 
   // Insert the given object and return its ID.
   I Insert(const T &);
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.cc b/scripts/training/phrase-extract/pcfg-common/pcfg.cc
index d045b820b..054e20a48 100644
--- a/scripts/training/phrase-extract/pcfg-common/pcfg.cc
+++ b/scripts/training/phrase-extract/pcfg-common/pcfg.cc
@@ -50,7 +50,7 @@ void Pcfg::Read(std::istream &input, Vocabulary &vocab) {
   Key key;
   while (std::getline(input, line)) {
     // Read LHS.
-    size_t pos = line.find("|||");
+    std::size_t pos = line.find("|||");
     if (pos == std::string::npos) {
       throw Exception("missing first delimiter");
     }
@@ -58,7 +58,7 @@ void Pcfg::Read(std::istream &input, Vocabulary &vocab) {
     boost::trim(lhs_string);
 
     // Read RHS.
-    size_t begin = pos+3;
+    std::size_t begin = pos+3;
     pos = line.find("|||", begin);
     if (pos == std::string::npos) {
       throw Exception("missing second delimiter");
@@ -92,8 +92,8 @@ void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
   for (const_iterator p = begin(); p != end(); ++p) {
     const Key &key = p->first;
     double score = p->second;
-    std::vector<size_t>::const_iterator q = key.begin();
-    std::vector<size_t>::const_iterator end = key.end();
+    std::vector<std::size_t>::const_iterator q = key.begin();
+    std::vector<std::size_t>::const_iterator end = key.end();
     output << vocab.Lookup(*q++) << " |||";
     while (q != end) {
       output << " " << vocab.Lookup(*q++);
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.h b/scripts/training/phrase-extract/pcfg-common/pcfg.h
index 757eea449..b87336584 100644
--- a/scripts/training/phrase-extract/pcfg-common/pcfg.h
+++ b/scripts/training/phrase-extract/pcfg-common/pcfg.h
@@ -33,7 +33,7 @@ namespace PCFG {
 
 class Pcfg {
  public:
-  typedef std::vector<size_t> Key;
+  typedef std::vector<std::size_t> Key;
   typedef std::map<Key, double> Map;
   typedef Map::iterator iterator;
   typedef Map::const_iterator const_iterator;
diff --git a/scripts/training/phrase-extract/pcfg-common/syntax_tree.h b/scripts/training/phrase-extract/pcfg-common/syntax_tree.h
index 37f72dd58..89c6ec0c3 100644
--- a/scripts/training/phrase-extract/pcfg-common/syntax_tree.h
+++ b/scripts/training/phrase-extract/pcfg-common/syntax_tree.h
@@ -80,7 +80,7 @@ class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> > {
 
 template<typename T, typename DerivedType>
 SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase() {
-  for (size_t i = 0; i < children_.size(); ++i) {
+  for (std::size_t i = 0; i < children_.size(); ++i) {
     delete children_[i];
   }
 }
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h b/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
index c5171a905..6a9a3de05 100644
--- a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
+++ b/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
@@ -101,9 +101,9 @@ void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
 template<typename InputTree>
 std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
   std::string t;
-  size_t len = s.size();
+  std::size_t len = s.size();
   t.reserve(len);
-  for (size_t i = 0; i < len; ++i) {
+  for (std::size_t i = 0; i < len; ++i) {
     if (s[i] == '<') {
       t += "&lt;";
     } else if (s[i] == '>') {
diff --git a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc b/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc
index 151c9959c..71c2e31c3 100644
--- a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc
+++ b/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc
@@ -56,7 +56,7 @@ int PcfgExtract::Main(int argc, char *argv[]) {
   RuleCollection rule_collection;
   XmlTreeParser parser;
   std::string line;
-  size_t line_num = 0;
+  std::size_t line_num = 0;
   std::auto_ptr<PcfgTree> tree;
   while (std::getline(std::cin, line)) {
     ++line_num;
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc b/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc
index 503b1a9e6..32b63e0ef 100644
--- a/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc
+++ b/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc
@@ -26,24 +26,24 @@
 namespace Moses {
 namespace PCFG {
 
-void RuleCollection::Add(size_t lhs, const std::vector<size_t> &rhs) {
+void RuleCollection::Add(std::size_t lhs, const std::vector<std::size_t> &rhs) {
   ++collection_[lhs][rhs];
 }
 
 void RuleCollection::CreatePcfg(Pcfg &pcfg) {
-  std::vector<size_t> key;
+  std::vector<std::size_t> key;
   for (const_iterator p = begin(); p != end(); ++p) {
-    size_t lhs = p->first;
+    std::size_t lhs = p->first;
     const RhsCountMap &rhs_counts = p->second;
-    size_t total = 0;
+    std::size_t total = 0;
     for (RhsCountMap::const_iterator q = rhs_counts.begin();
          q != rhs_counts.end(); ++q) {
       total += q->second;
     }
     for (RhsCountMap::const_iterator q = rhs_counts.begin();
          q != rhs_counts.end(); ++q) {
-      const std::vector<size_t> &rhs = q->first;
-      size_t count = q->second;
+      const std::vector<std::size_t> &rhs = q->first;
+      std::size_t count = q->second;
       double score = std::log(static_cast<double>(count) /
                               static_cast<double>(total));
       key.clear();
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.h b/scripts/training/phrase-extract/pcfg-extract/rule_collection.h
index 1b768dd21..452fa0e97 100644
--- a/scripts/training/phrase-extract/pcfg-extract/rule_collection.h
+++ b/scripts/training/phrase-extract/pcfg-extract/rule_collection.h
@@ -33,8 +33,8 @@ namespace PCFG {
 // Contains PCFG rules and their counts.
 class RuleCollection {
  public:
-  typedef boost::unordered_map<std::vector<size_t>, size_t> RhsCountMap;
-  typedef boost::unordered_map<size_t, RhsCountMap> Map;
+  typedef boost::unordered_map<std::vector<std::size_t>, std::size_t> RhsCountMap;
+  typedef boost::unordered_map<std::size_t, RhsCountMap> Map;
   typedef Map::iterator iterator;
   typedef Map::const_iterator const_iterator;
 
@@ -46,7 +46,7 @@ class RuleCollection {
   iterator end() { return collection_.end(); }
   const_iterator end() const { return collection_.end(); }
 
-  void Add(size_t, const std::vector<size_t> &);
+  void Add(std::size_t, const std::vector<std::size_t> &);
   void CreatePcfg(Pcfg &);
 
  private:
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc b/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc
index 48a82a6d0..217574e7d 100644
--- a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc
+++ b/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc
@@ -33,8 +33,8 @@ void RuleExtractor::Extract(const PcfgTree &tree, RuleCollection &rc) const {
     return;
   }
 
-  size_t lhs = non_term_vocab_.Insert(tree.label());
-  std::vector<size_t> rhs;
+  std::size_t lhs = non_term_vocab_.Insert(tree.label());
+  std::vector<std::size_t> rhs;
 
   const std::vector<PcfgTree *> &children = tree.children();
   rhs.reserve(children.size());
diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc b/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
index 16691707b..345d7fc60 100644
--- a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
+++ b/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
@@ -63,7 +63,7 @@ int PcfgScore::Main(int argc, char *argv[]) {
   XmlTreeParser parser;
   XmlTreeWriter<PcfgTree> writer;
   std::string line;
-  size_t line_num = 0;
+  std::size_t line_num = 0;
   std::auto_ptr<PcfgTree> tree;
   while (std::getline(std::cin, line)) {
     ++line_num;
diff --git a/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc b/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc
index 5f695e4fc..f9ce97ae0 100644
--- a/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc
+++ b/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc
@@ -38,7 +38,7 @@ bool TreeScorer::Score(PcfgTree &root) const {
 
   double log_prob = 0.0;
 
-  std::vector<size_t> key;
+  std::vector<std::size_t> key;
   key.reserve(children.size()+1);
   key.push_back(non_term_vocab_.Lookup(root.label()));
 

From e3e62846bfe84d9a7edd78affd23f020d8ae2468 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Sun, 27 May 2012 12:43:16 +0100
Subject: [PATCH 27/38] train-model.perl: add -alt-direct-rule-score-1 and
 -alt-direct-rule-score-2 options, which use either p(RHS_t|RHS_s,LHS) or
 p(LHS,RHS_t|RHS_s), respectively, as a grammar rule's direct translation
 score.

---
 .../phrase-extract/RuleExtractionOptions.h    |  4 +
 .../extract-ghkm/ExtractGHKM.cpp              |  3 +
 .../phrase-extract/extract-ghkm/Options.h     |  2 +
 .../extract-ghkm/ScfgRuleWriter.cpp           | 12 ++-
 .../training/phrase-extract/extract-rules.cpp | 43 ++++++---
 scripts/training/phrase-extract/score.cpp     | 94 +++++++++++++++----
 scripts/training/phrase-extract/score.h       |  8 +-
 .../training/train-model.perl.missing_bin_dir |  9 +-
 8 files changed, 139 insertions(+), 36 deletions(-)

diff --git a/scripts/training/phrase-extract/RuleExtractionOptions.h b/scripts/training/phrase-extract/RuleExtractionOptions.h
index f9123de86..272af2c76 100644
--- a/scripts/training/phrase-extract/RuleExtractionOptions.h
+++ b/scripts/training/phrase-extract/RuleExtractionOptions.h
@@ -48,6 +48,8 @@ public:
   bool pcfgScore;
   bool outputNTLengths;
   bool gzOutput;
+  bool unpairedExtractFormat;
+  bool conditionOnTargetLhs;
   
   RuleExtractionOptions()
     : maxSpan(10)
@@ -78,6 +80,8 @@ public:
     , pcfgScore(false)
     , outputNTLengths(false)
     , gzOutput(false)
+    , unpairedExtractFormat(false)
+    , conditionOnTargetLhs(false)
   {}
 };
 
diff --git a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index 397ce1e3c..6b6fbb7eb 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -357,6 +357,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
   if (vm.count("AllowUnary")) {
     options.allowUnary = true;
   }
+  if (vm.count("ConditionOnTargetLHS")) {
+    options.conditionOnTargetLhs = true;
+  }
   if (vm.count("GZOutput")) {
     options.gzOutput = true;
   }
diff --git a/scripts/training/phrase-extract/extract-ghkm/Options.h b/scripts/training/phrase-extract/extract-ghkm/Options.h
index c4b57f311..362fc95d2 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Options.h
+++ b/scripts/training/phrase-extract/extract-ghkm/Options.h
@@ -30,6 +30,7 @@ struct Options {
  public:
   Options()
       : allowUnary(false)
+      , conditionOnTargetLhs(false)
       , gzOutput(false)
       , maxNodes(15)
       , maxRuleDepth(3)
@@ -47,6 +48,7 @@ struct Options {
 
   // All other options
   bool allowUnary;
+  bool conditionOnTargetLhs;
   std::string glueGrammarFile;
   bool gzOutput;
   int maxNodes;
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index d5d16b790..cd993d6e8 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -101,7 +101,11 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule,
     }
     sourceSS << " ";
   }
-  WriteSymbol(rule.GetSourceLHS(), sourceSS);
+  if (m_options.conditionOnTargetLhs) {
+    WriteSymbol(rule.GetTargetLHS(), sourceSS);
+  } else {
+    WriteSymbol(rule.GetSourceLHS(), sourceSS);
+  }
 
   // Write the target side of the rule to targetSS.
   i = 0;
@@ -131,7 +135,11 @@ void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule,
     WriteSymbol(*p, sourceSS);
     sourceSS << " ";
   }
-  WriteSymbol(rule.GetSourceLHS(), sourceSS);
+  if (m_options.conditionOnTargetLhs) {
+    WriteSymbol(rule.GetTargetLHS(), sourceSS);
+  } else {
+    WriteSymbol(rule.GetSourceLHS(), sourceSS);
+  }
 
   // Write the target side of the rule to targetSS.
   i = 0;
diff --git a/scripts/training/phrase-extract/extract-rules.cpp b/scripts/training/phrase-extract/extract-rules.cpp
index a00667b82..997038224 100644
--- a/scripts/training/phrase-extract/extract-rules.cpp
+++ b/scripts/training/phrase-extract/extract-rules.cpp
@@ -140,7 +140,9 @@ int main(int argc, char* argv[])
          << " | --MaxNonTerm[" << options.maxNonTerm << "]"
          << " | --MaxScope[" << options.maxScope << "]"
          << " | --SourceSyntax | --TargetSyntax"
-         << " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource |  --NoNonTermFirstWord | --NoFractionalCounting ]\n";
+         << " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource |  --NoNonTermFirstWord | --NoFractionalCounting"
+         << " | --UnpairedExtractFormat"
+         << " | --ConditionOnTargetLHS ]\n";
     exit(1);
   }
   char* &fileNameT = argv[1];
@@ -261,6 +263,10 @@ int main(int argc, char* argv[])
       options.pcfgScore = true;
     } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
       options.outputNTLengths = true;
+    } else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
+      options.unpairedExtractFormat = true;
+    } else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
+      options.conditionOnTargetLhs = true;
 #ifdef WITH_THREADS
     } else if (strcmp(argv[i],"-threads") == 0 || 
                strcmp(argv[i],"--threads") == 0 ||
@@ -545,7 +551,11 @@ string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, in
                            m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[ labelI ]->GetLabel() : "X";
       hole.SetLabel(targetLabel, 1);
 
-      out += "[" + sourceLabel + "][" + targetLabel + "] ";
+      if (m_options.unpairedExtractFormat) {
+        out += "[" + targetLabel + "] ";
+      } else {
+        out += "[" + sourceLabel + "][" + targetLabel + "] ";
+      }
 
       if (m_options.pcfgScore) {
         double score = m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetPcfgScore();
@@ -591,7 +601,11 @@ string ExtractTask::printSourceHieroPhrase( int startT, int endT, int startS, in
       assert(targetLabel != "");
 
       const string &sourceLabel =  hole.GetLabel(0);
-      out += "[" + sourceLabel + "][" + targetLabel + "] ";
+      if (m_options.unpairedExtractFormat) {
+        out += "[" + sourceLabel + "] ";
+      } else {
+        out += "[" + sourceLabel + "][" + targetLabel + "] ";
+      }
 
       currPos = hole.GetEnd(0);
       hole.SetPos(outPos, 0);
@@ -659,7 +673,6 @@ void ExtractTask::printHieroPhrase( int startT, int endT, int startS, int endS
                        m_sentence->targetTree.GetNodes(startT,endT)[ labelIndex[0] ]->GetLabel() : "X";
   string sourceLabel = m_options.sourceSyntax ?
                        m_sentence->sourceTree.GetNodes(startS,endS)[ labelIndex[1] ]->GetLabel() : "X";
-  //string sourceLabel = "X";
 
   // create non-terms on the source side
   preprocessSourceHieroPhrase(startT, endT, startS, endS, indexS, holeColl, labelIndex);
@@ -677,9 +690,12 @@ void ExtractTask::printHieroPhrase( int startT, int endT, int startS, int endS
   }
 
   // source
-  // holeColl.SortSourceHoles();
-  rule.source = printSourceHieroPhrase(startT, endT, startS, endS, holeColl, labelIndex)
-                + " [" + sourceLabel + "]";
+  rule.source = printSourceHieroPhrase(startT, endT, startS, endS, holeColl, labelIndex);
+  if (m_options.conditionOnTargetLhs) {
+    rule.source += " [" + targetLabel + "]";
+  } else {
+    rule.source += " [" + sourceLabel + "]";
+  }
 
   // alignment
   printHieroAlignment(startT, endT, startS, endS, indexS, indexT, holeColl, rule);
@@ -875,10 +891,15 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist
 
   // phrase labels
   string targetLabel,sourceLabel;
-  sourceLabel = m_options.sourceSyntax ?
-                m_sentence->sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
-  targetLabel = m_options.targetSyntax ?
-                m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel() : "X";
+  if (m_options.targetSyntax && m_options.conditionOnTargetLhs) {
+    sourceLabel = targetLabel = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel();
+  }
+  else {
+    sourceLabel = m_options.sourceSyntax ?
+                  m_sentence->sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
+    targetLabel = m_options.targetSyntax ?
+                  m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel() : "X";
+  }
 
   // source
   rule.source = "";
diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
index c5fb0b99f..5e0ade627 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/scripts/training/phrase-extract/score.cpp
@@ -69,10 +69,15 @@ double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, PhraseAlignmen
 void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
                       , map<size_t, map<size_t, float> > &sourceProb
                       , map<size_t, map<size_t, float> > &targetProb);
+void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
+void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
+
 LexicalTable lexTable;
 bool inverseFlag = false;
 bool hierarchicalFlag = false;
 bool pcfgFlag = false;
+bool unpairedExtractFormatFlag = false;
+bool conditionOnTargetLhsFlag = false;
 bool wordAlignmentFlag = false;
 bool goodTuringFlag = false;
 bool kneserNeyFlag = false;
@@ -93,7 +98,7 @@ int main(int argc, char* argv[])
        << "scoring methods for extracted rules\n";
 
   if (argc < 4) {
-    cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--WordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] \n";
+    cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--WordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS]\n";
     exit(1);
   }
   char* fileNameExtract = argv[1];
@@ -112,6 +117,12 @@ int main(int argc, char* argv[])
     } else if (strcmp(argv[i],"--PCFG") == 0) {
       pcfgFlag = true;
       cerr << "including PCFG scores\n";
+    } else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
+      unpairedExtractFormatFlag = true;
+      cerr << "processing unpaired extract format\n";
+    } else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
+      conditionOnTargetLhsFlag = true;
+      cerr << "processing unpaired extract format\n";
     } else if (strcmp(argv[i],"--WordAlignment") == 0) {
       wordAlignmentFlag = true;
       cerr << "outputing word alignment" << endl;
@@ -470,27 +481,18 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
 
   // source phrase (unless inverse)
   if (! inverseFlag) {
-    for(size_t j=0; j<phraseS.size(); j++) {
-      phraseTableFile << vcbS.getWord( phraseS[j] );
-      phraseTableFile << " ";
-    }
-    phraseTableFile << "||| ";
+    printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
+    phraseTableFile << " ||| ";
   }
 
   // target phrase
-  for(size_t j=0; j<phraseT.size(); j++) {
-    phraseTableFile << vcbT.getWord( phraseT[j] );
-    phraseTableFile << " ";
-  }
-  phraseTableFile << "||| ";
+  printTargetPhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
+  phraseTableFile << " ||| ";
 
   // source phrase (if inverse)
   if (inverseFlag) {
-    for(size_t j=0; j<phraseS.size(); j++) {
-      phraseTableFile << vcbS.getWord( phraseS[j] );
-      phraseTableFile << " ";
-    }
-    phraseTableFile << "||| ";
+    printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
+    phraseTableFile << " ||| ";
   }
 
   // lexical translation probability
@@ -683,6 +685,66 @@ void LexicalTable::load( char *fileName )
   cerr << endl;
 }
 
+void printSourcePhrase(const PHRASE &phraseS, const PHRASE &phraseT,
+                       const PhraseAlignment &bestAlignment, ostream &out)
+{
+  // output source symbols, except root, in rule table format
+  for (std::size_t i = 0; i < phraseS.size()-1; ++i) {
+    const std::string &word = vcbS.getWord(phraseS[i]);
+    if (!unpairedExtractFormatFlag || !isNonTerminal(word)) {
+      out << word << " ";
+      continue;
+    }
+    // get corresponding target non-terminal and output pair
+    std::set<std::size_t> alignmentPoints = bestAlignment.alignedToS[i];
+    assert(alignmentPoints.size() == 1);
+    int j = *(alignmentPoints.begin());
+    if (inverseFlag) {
+      out << vcbT.getWord(phraseT[j]) << word << " ";
+    } else {
+      out << word << vcbT.getWord(phraseT[j]) << " ";
+    }
+  }
+  // output source root symbol
+  if (conditionOnTargetLhsFlag && !inverseFlag) {
+    out << "[X]";
+  } else {
+    out << vcbS.getWord(phraseS.back());
+  }
+}
+
+void printTargetPhrase(const PHRASE &phraseS, const PHRASE &phraseT,
+                       const PhraseAlignment &bestAlignment, ostream &out)
+{
+  // output target symbols, except root, in rule table format
+  for (std::size_t i = 0; i < phraseT.size()-1; ++i) {
+    const std::string &word = vcbT.getWord(phraseT[i]);
+    if (!unpairedExtractFormatFlag || !isNonTerminal(word)) {
+      out << word << " ";
+      continue;
+    }
+    // get corresponding source non-terminal and output pair
+    std::set<std::size_t> alignmentPoints = bestAlignment.alignedToT[i];
+    assert(alignmentPoints.size() == 1);
+    int j = *(alignmentPoints.begin());
+    if (inverseFlag) {
+      out << word << vcbS.getWord(phraseS[j]) << " ";
+    } else {
+      out << vcbS.getWord(phraseS[j]) << word << " ";
+    }
+  }
+  // output target root symbol
+  if (conditionOnTargetLhsFlag) {
+    if (inverseFlag) {
+      out << "[X]";
+    } else {
+      out << vcbS.getWord(phraseS.back());
+    }
+  } else {
+    out << vcbT.getWord(phraseT.back());
+  }
+}
+
 std::pair<PhrasePairGroup::Coll::iterator,bool> PhrasePairGroup::insert ( const PhraseAlignmentCollection& obj )
 {
   std::pair<iterator,bool> ret = m_coll.insert(obj);
diff --git a/scripts/training/phrase-extract/score.h b/scripts/training/phrase-extract/score.h
index dc94ecfde..9faa144c5 100644
--- a/scripts/training/phrase-extract/score.h
+++ b/scripts/training/phrase-extract/score.h
@@ -59,11 +59,7 @@ private:
 };
 
 // other functions *********************************************
-inline bool isNonTerminal( std::string &word )
+inline bool isNonTerminal( const std::string &word )
 {
-  return (word.length()>=3 &&
-          word.substr(0,1).compare("[") == 0 &&
-          word.substr(word.length()-1,1).compare("]") == 0);
+  return (word.length()>=3 && word[0] == '[' && word[word.length()-1] == ']');
 }
-
-
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index aac6cef96..0db2ee437 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -31,6 +31,7 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
    @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
    $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
    $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,$_EXTRACT_OPTIONS,$_SCORE_OPTIONS,
+   $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2,
    $_PHRASE_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
    $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
    $_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
@@ -106,6 +107,8 @@ $_HELP = 1
 		       'unknown-word-label-file=s' => \$_UNKNOWN_WORD_LABEL_FILE,
 		       'ghkm' => \$_GHKM,
 		       'pcfg' => \$_PCFG,
+		       'alt-direct-rule-score-1' => \$_ALT_DIRECT_RULE_SCORE_1,
+		       'alt-direct-rule-score-2' => \$_ALT_DIRECT_RULE_SCORE_2,
 		       'extract-options=s' => \$_EXTRACT_OPTIONS,
 		       'score-options=s' => \$_SCORE_OPTIONS,
 		       'source-syntax' => \$_SOURCE_SYNTAX,
@@ -1375,6 +1378,8 @@ sub extract_phrase {
         $cmd .= " --GlueGrammar $___GLUE_GRAMMAR_FILE" if $_GLUE_GRAMMAR;
         $cmd .= " --UnknownWordLabel $_UNKNOWN_WORD_LABEL_FILE" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_LABEL_FILE);
         $cmd .= " --PCFG" if $_PCFG;
+        $cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
+        $cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
         if (!defined($_GHKM)) {
           $cmd .= " --SourceSyntax" if $_SOURCE_SYNTAX;
           $cmd .= " --TargetSyntax" if $_TARGET_SYNTAX;
@@ -1506,10 +1511,12 @@ sub score_phrase_phrase_extract {
         $cmd .= " --UnalignedFunctionWordPenalty ".($inverse ? $UNALIGNED_FW_F : $UNALIGNED_FW_E) if $UNALIGNED_FW_COUNT;
         $cmd .= " --MinCountHierarchical $MIN_COUNT_HIERARCHICAL" if $MIN_COUNT_HIERARCHICAL;
         $cmd .= " --PCFG" if $_PCFG;
+        $cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
+        $cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
         $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
 
 				# sorting
-				if ($direction eq "e2f") {
+				if ($direction eq "e2f" || $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2) {
 					$cmd .= " 1 ";
 				}
 				else {

From db1e6040b241c74ed01b9da0e4a8bd2f4c15f176 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Sun, 27 May 2012 17:58:13 +0100
Subject: [PATCH 28/38] Fix bug in previous commit.

---
 scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index 6b6fbb7eb..dae876116 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -266,6 +266,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
     //("help", "print this help message and exit")
     ("AllowUnary",
         "allow fully non-lexical unary rules")
+    ("ConditionOnTargetLHS",
+        "write target LHS instead of \"X\" as source LHS")
     ("GlueGrammar",
         po::value(&options.glueGrammarFile),
         "write glue grammar to named file")

From ef26388aff03e95882091c96eb3764c872f6c81f Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Mon, 28 May 2012 17:29:46 +0100
Subject: [PATCH 29/38] eclipse project

---
 contrib/other-builds/OnDiskPt/.cproject       |  131 +
 contrib/other-builds/OnDiskPt/.project        |  185 +
 contrib/other-builds/lm/.cproject             |  125 +
 contrib/other-builds/lm/.project              |  360 ++
 contrib/other-builds/moses-cmd/.cproject      |  140 +
 contrib/other-builds/moses-cmd/.project       |  199 ++
 contrib/other-builds/moses/.cproject          |  164 +
 contrib/other-builds/moses/.project           | 3055 +++++++++++++++++
 contrib/other-builds/util/.cproject           |  133 +
 contrib/other-builds/util/.project            |   90 +
 lm/bhiksha.cc                                 |    1 +
 moses-cmd/src/IOWrapper.h                     |    9 +-
 moses/src/LM/ORLM.h                           |    2 +-
 .../training/train-model.perl.missing_bin_dir |    2 +-
 util/bit_packing.cc                           |    2 +-
 15 files changed, 4591 insertions(+), 7 deletions(-)
 create mode 100644 contrib/other-builds/OnDiskPt/.cproject
 create mode 100644 contrib/other-builds/OnDiskPt/.project
 create mode 100644 contrib/other-builds/lm/.cproject
 create mode 100644 contrib/other-builds/lm/.project
 create mode 100644 contrib/other-builds/moses-cmd/.cproject
 create mode 100644 contrib/other-builds/moses-cmd/.project
 create mode 100644 contrib/other-builds/moses/.cproject
 create mode 100644 contrib/other-builds/moses/.project
 create mode 100644 contrib/other-builds/util/.cproject
 create mode 100644 contrib/other-builds/util/.project

diff --git a/contrib/other-builds/OnDiskPt/.cproject b/contrib/other-builds/OnDiskPt/.cproject
new file mode 100644
index 000000000..41f2a5141
--- /dev/null
+++ b/contrib/other-builds/OnDiskPt/.cproject
@@ -0,0 +1,131 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings>
+					<externalSetting>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/OnDiskPt"/>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/OnDiskPt/Debug"/>
+						<entry flags="RESOLVED" kind="libraryFile" name="OnDiskPt"/>
+					</externalSetting>
+				</externalSettings>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.725420545" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1586272140" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
+							<builder buildPath="${workspace_loc:/OnDiskPt/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1909553559" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.30521110" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.478334849" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1328561226" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.108239817" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1825070846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.901309550" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
+								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="/opt/local/include"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.576529322" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.426851981" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1925590121" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.726316251" name="Main.h" rcbsApplicability="disable" resourcePath="Main.h" toolsToInvoke=""/>
+					<sourceEntries>
+						<entry excluding="Main.h|Main.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+					</sourceEntries>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.701931933">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.701931933" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+					<folderInfo id="cdt.managedbuild.config.macosx.exe.release.701931933." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.5036266" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.396818757" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
+							<builder buildPath="${workspace_loc:/OnDiskPt/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1081186575" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.894082374" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.640159085" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1673993744" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.596082362" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.851420859" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.385722535" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.21058138" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1704184753" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.171488636" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.843129626" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1014721928" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="OnDiskPt.cdt.managedbuild.target.macosx.exe.542902806" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384;cdt.managedbuild.tool.gnu.c.compiler.input.1014721928">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634;cdt.managedbuild.tool.gnu.c.compiler.input.1925590121">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511;cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413;cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="refreshScope" versionNumber="1">
+		<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
+	</storageModule>
+</cproject>
diff --git a/contrib/other-builds/OnDiskPt/.project b/contrib/other-builds/OnDiskPt/.project
new file mode 100644
index 000000000..73a7ac0a7
--- /dev/null
+++ b/contrib/other-builds/OnDiskPt/.project
@@ -0,0 +1,185 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>OnDiskPt</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+				<dictionary>
+					<key>?name?</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.append_environment</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildArguments</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildCommand</key>
+					<value>make</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildLocation</key>
+					<value>${workspace_loc:/OnDiskPt/Debug}</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+					<value>clean</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.contents</key>
+					<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+					<value>false</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableFullBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.stopOnError</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+					<value>true</value>
+				</dictionary>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+	<linkedResources>
+		<link>
+			<name>Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>Main.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.cpp</locationURI>
+		</link>
+		<link>
+			<name>Main.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.h</locationURI>
+		</link>
+		<link>
+			<name>OnDiskWrapper.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/OnDiskWrapper.cpp</locationURI>
+		</link>
+		<link>
+			<name>OnDiskWrapper.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/OnDiskWrapper.h</locationURI>
+		</link>
+		<link>
+			<name>Phrase.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Phrase.cpp</locationURI>
+		</link>
+		<link>
+			<name>Phrase.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Phrase.h</locationURI>
+		</link>
+		<link>
+			<name>PhraseNode.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/PhraseNode.cpp</locationURI>
+		</link>
+		<link>
+			<name>PhraseNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/PhraseNode.h</locationURI>
+		</link>
+		<link>
+			<name>SourcePhrase.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/SourcePhrase.cpp</locationURI>
+		</link>
+		<link>
+			<name>SourcePhrase.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/SourcePhrase.h</locationURI>
+		</link>
+		<link>
+			<name>TargetPhrase.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhrase.cpp</locationURI>
+		</link>
+		<link>
+			<name>TargetPhrase.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhrase.h</locationURI>
+		</link>
+		<link>
+			<name>TargetPhraseCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhraseCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>TargetPhraseCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhraseCollection.h</locationURI>
+		</link>
+		<link>
+			<name>Vocab.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Vocab.cpp</locationURI>
+		</link>
+		<link>
+			<name>Vocab.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Vocab.h</locationURI>
+		</link>
+		<link>
+			<name>Word.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Word.cpp</locationURI>
+		</link>
+		<link>
+			<name>Word.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Word.h</locationURI>
+		</link>
+		<link>
+			<name>queryOnDiskPt.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/queryOnDiskPt.cpp</locationURI>
+		</link>
+	</linkedResources>
+</projectDescription>
diff --git a/contrib/other-builds/lm/.cproject b/contrib/other-builds/lm/.cproject
new file mode 100644
index 000000000..f89e80f49
--- /dev/null
+++ b/contrib/other-builds/lm/.cproject
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings>
+					<externalSetting>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/lm"/>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/lm/Debug"/>
+						<entry flags="RESOLVED" kind="libraryFile" name="lm"/>
+					</externalSetting>
+				</externalSettings>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.640882096" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.793478365" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
+							<builder buildPath="${workspace_loc:/lm/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.36011795" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1252826468" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1024598065" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.139111896" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.62265891" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.588438623" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.775866405" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
+								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.586969644" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/opt/local/include"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.934764060" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.2078705375" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1028526865" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.203229648">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.203229648" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.203229648" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+					<folderInfo id="cdt.managedbuild.config.macosx.exe.release.203229648." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1942852701" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.2107180060" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
+							<builder buildPath="${workspace_loc:/lm/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.127652112" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1668850519" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.934899611" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.794276660" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.362272521" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.370659018" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2103660404" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1671568858" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.230723898" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1934130159" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1848737807" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1294441742" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="lm.cdt.managedbuild.target.macosx.exe.1399596076" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140;cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990;cdt.managedbuild.tool.gnu.c.compiler.input.1294441742">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722;cdt.managedbuild.tool.gnu.c.compiler.input.1028526865">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795;cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="refreshScope"/>
+</cproject>
diff --git a/contrib/other-builds/lm/.project b/contrib/other-builds/lm/.project
new file mode 100644
index 000000000..0d30e24cb
--- /dev/null
+++ b/contrib/other-builds/lm/.project
@@ -0,0 +1,360 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>lm</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+				<dictionary>
+					<key>?name?</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.append_environment</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildArguments</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildCommand</key>
+					<value>make</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildLocation</key>
+					<value>${workspace_loc:/lm/Debug}</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+					<value>clean</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.contents</key>
+					<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+					<value>false</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableFullBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.stopOnError</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+					<value>true</value>
+				</dictionary>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+	<linkedResources>
+		<link>
+			<name>.DS_Store</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/.DS_Store</locationURI>
+		</link>
+		<link>
+			<name>COPYING</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/COPYING</locationURI>
+		</link>
+		<link>
+			<name>COPYING.LESSER</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/COPYING.LESSER</locationURI>
+		</link>
+		<link>
+			<name>Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>LICENSE</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/LICENSE</locationURI>
+		</link>
+		<link>
+			<name>README</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/README</locationURI>
+		</link>
+		<link>
+			<name>bhiksha.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/bhiksha.cc</locationURI>
+		</link>
+		<link>
+			<name>bhiksha.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/bhiksha.hh</locationURI>
+		</link>
+		<link>
+			<name>binary_format.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/binary_format.cc</locationURI>
+		</link>
+		<link>
+			<name>binary_format.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/binary_format.hh</locationURI>
+		</link>
+		<link>
+			<name>blank.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/blank.hh</locationURI>
+		</link>
+		<link>
+			<name>build_binary</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/build_binary</locationURI>
+		</link>
+		<link>
+			<name>build_binary.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/build_binary.cc</locationURI>
+		</link>
+		<link>
+			<name>clean.sh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/clean.sh</locationURI>
+		</link>
+		<link>
+			<name>compile.sh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/compile.sh</locationURI>
+		</link>
+		<link>
+			<name>config.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/config.cc</locationURI>
+		</link>
+		<link>
+			<name>config.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/config.hh</locationURI>
+		</link>
+		<link>
+			<name>enumerate_vocab.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/enumerate_vocab.hh</locationURI>
+		</link>
+		<link>
+			<name>facade.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/facade.hh</locationURI>
+		</link>
+		<link>
+			<name>left.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/left.hh</locationURI>
+		</link>
+		<link>
+			<name>left_test.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/left_test.cc</locationURI>
+		</link>
+		<link>
+			<name>libkenlm.dylib</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/libkenlm.dylib</locationURI>
+		</link>
+		<link>
+			<name>libkenutil.dylib</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/libkenutil.dylib</locationURI>
+		</link>
+		<link>
+			<name>lm_exception.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/lm_exception.cc</locationURI>
+		</link>
+		<link>
+			<name>lm_exception.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/lm_exception.hh</locationURI>
+		</link>
+		<link>
+			<name>max_order.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/max_order.hh</locationURI>
+		</link>
+		<link>
+			<name>model.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/model.cc</locationURI>
+		</link>
+		<link>
+			<name>model.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/model.hh</locationURI>
+		</link>
+		<link>
+			<name>model_test.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/model_test.cc</locationURI>
+		</link>
+		<link>
+			<name>model_type.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/model_type.hh</locationURI>
+		</link>
+		<link>
+			<name>ngram_query.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/ngram_query.cc</locationURI>
+		</link>
+		<link>
+			<name>ngram_query.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/ngram_query.hh</locationURI>
+		</link>
+		<link>
+			<name>quantize.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/quantize.cc</locationURI>
+		</link>
+		<link>
+			<name>quantize.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/quantize.hh</locationURI>
+		</link>
+		<link>
+			<name>query</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/query</locationURI>
+		</link>
+		<link>
+			<name>read_arpa.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/read_arpa.cc</locationURI>
+		</link>
+		<link>
+			<name>read_arpa.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/read_arpa.hh</locationURI>
+		</link>
+		<link>
+			<name>return.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/return.hh</locationURI>
+		</link>
+		<link>
+			<name>search_hashed.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/search_hashed.cc</locationURI>
+		</link>
+		<link>
+			<name>search_hashed.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/search_hashed.hh</locationURI>
+		</link>
+		<link>
+			<name>search_trie.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/search_trie.cc</locationURI>
+		</link>
+		<link>
+			<name>search_trie.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/search_trie.hh</locationURI>
+		</link>
+		<link>
+			<name>test.arpa</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/test.arpa</locationURI>
+		</link>
+		<link>
+			<name>test.sh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/test.sh</locationURI>
+		</link>
+		<link>
+			<name>test_nounk.arpa</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/test_nounk.arpa</locationURI>
+		</link>
+		<link>
+			<name>trie.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/trie.cc</locationURI>
+		</link>
+		<link>
+			<name>trie.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/trie.hh</locationURI>
+		</link>
+		<link>
+			<name>trie_sort.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.cc</locationURI>
+		</link>
+		<link>
+			<name>trie_sort.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.hh</locationURI>
+		</link>
+		<link>
+			<name>virtual_interface.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/virtual_interface.cc</locationURI>
+		</link>
+		<link>
+			<name>virtual_interface.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/virtual_interface.hh</locationURI>
+		</link>
+		<link>
+			<name>vocab.cc</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/vocab.cc</locationURI>
+		</link>
+		<link>
+			<name>vocab.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/vocab.hh</locationURI>
+		</link>
+		<link>
+			<name>weights.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/weights.hh</locationURI>
+		</link>
+		<link>
+			<name>word_index.hh</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/lm/word_index.hh</locationURI>
+		</link>
+	</linkedResources>
+</projectDescription>
diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject
new file mode 100644
index 000000000..53c112cb8
--- /dev/null
+++ b/contrib/other-builds/moses-cmd/.cproject
@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1679946908" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.451172468" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
+							<builder buildPath="${workspace_loc:/moses-cmd/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1382407954" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.2118670613" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.84059290" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
+								<option id="macosx.cpp.link.option.libs.1641794848" name="Libraries (-l)" superClass="macosx.cpp.link.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="OnDiskPt"/>
+									<listOptionValue builtIn="false" value="lm"/>
+									<listOptionValue builtIn="false" value="util"/>
+									<listOptionValue builtIn="false" value="irstlm"/>
+								</option>
+								<option id="macosx.cpp.link.option.paths.1615268628" name="Library search path (-L)" superClass="macosx.cpp.link.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/moses/Debug"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/OnDiskPt/Debug"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/lm/Debug"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/util/Debug"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/irstlm/lib"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.412058804" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.896987906" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.187427846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2033983602" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
+								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.2018824611" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1176009559" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1024398579" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/opt/local/include"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.748558048" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1014626120" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2031799877" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+					<sourceEntries>
+						<entry excluding="LatticeMBRGrid.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+					</sourceEntries>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.1916112479">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.1916112479" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.1916112479" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+					<folderInfo id="cdt.managedbuild.config.macosx.exe.release.1916112479." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1528572752" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.1976002706" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
+							<builder buildPath="${workspace_loc:/moses-cmd/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1470455063" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.335066624" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1173017253" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.675070011" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.174060449" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1018665338" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.440711813" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1940339824" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1648308879" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.2105388501" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1692046412" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1452105399" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="moses-cmd.cdt.managedbuild.target.macosx.exe.1016275955" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
+	</storageModule>
+	<storageModule moduleId="refreshScope" versionNumber="1">
+		<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865;cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697;cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+</cproject>
diff --git a/contrib/other-builds/moses-cmd/.project b/contrib/other-builds/moses-cmd/.project
new file mode 100644
index 000000000..c71651563
--- /dev/null
+++ b/contrib/other-builds/moses-cmd/.project
@@ -0,0 +1,199 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>moses-cmd</name>
+	<comment></comment>
+	<projects>
+		<project>lm</project>
+		<project>moses</project>
+		<project>OnDiskPt</project>
+		<project>util</project>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+				<dictionary>
+					<key>?name?</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.append_environment</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildArguments</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildCommand</key>
+					<value>make</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildLocation</key>
+					<value>${workspace_loc:/moses-cmd/Debug}</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+					<value>clean</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.contents</key>
+					<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+					<value>false</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableFullBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.stopOnError</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+					<value>true</value>
+				</dictionary>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+	<linkedResources>
+		<link>
+			<name>IOWrapper.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.cpp</locationURI>
+		</link>
+		<link>
+			<name>IOWrapper.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.h</locationURI>
+		</link>
+		<link>
+			<name>IOWrapper.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.o</locationURI>
+		</link>
+		<link>
+			<name>Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>LatticeMBR.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.cpp</locationURI>
+		</link>
+		<link>
+			<name>LatticeMBR.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.h</locationURI>
+		</link>
+		<link>
+			<name>LatticeMBR.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.o</locationURI>
+		</link>
+		<link>
+			<name>LatticeMBRGrid.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.cpp</locationURI>
+		</link>
+		<link>
+			<name>LatticeMBRGrid.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.o</locationURI>
+		</link>
+		<link>
+			<name>Main.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.cpp</locationURI>
+		</link>
+		<link>
+			<name>Main.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.h</locationURI>
+		</link>
+		<link>
+			<name>Main.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.o</locationURI>
+		</link>
+		<link>
+			<name>TranslationAnalysis.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationAnalysis.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationAnalysis.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.o</locationURI>
+		</link>
+		<link>
+			<name>libkenlm.dylib</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenlm.dylib</locationURI>
+		</link>
+		<link>
+			<name>libkenutil.dylib</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenutil.dylib</locationURI>
+		</link>
+		<link>
+			<name>lmbrgrid</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/lmbrgrid</locationURI>
+		</link>
+		<link>
+			<name>mbr.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.cpp</locationURI>
+		</link>
+		<link>
+			<name>mbr.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.h</locationURI>
+		</link>
+		<link>
+			<name>mbr.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.o</locationURI>
+		</link>
+		<link>
+			<name>moses</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/moses</locationURI>
+		</link>
+	</linkedResources>
+</projectDescription>
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
new file mode 100644
index 000000000..2995d5eae
--- /dev/null
+++ b/contrib/other-builds/moses/.cproject
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings>
+					<externalSetting>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses/Debug"/>
+						<entry flags="RESOLVED" kind="libraryFile" name="moses"/>
+					</externalSetting>
+				</externalSettings>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.497902212" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1820609450" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
+							<builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1998579330" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1330311562" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1226580551" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.102127808" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool command="as" commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.1556759720" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.897776351" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1820797229" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
+								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.1898625650" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.806998992" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1819917957" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/opt/local/include"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.1569452418" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="LM_SRI"/>
+									<listOptionValue builtIn="false" value="LM_IRST"/>
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.753046525" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1396911098" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1919272901" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1722029461" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1432960145" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1906856645" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.460380900" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1692203139" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.538301588" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.854427429" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
+					<sourceEntries>
+						<entry excluding="SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.cpp|LM/LDHT.h|LM/Remote.h|LM/Remote.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+					</sourceEntries>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.722580523">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.722580523" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.722580523" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+					<folderInfo id="cdt.managedbuild.config.macosx.exe.release.722580523." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.2070671582" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.503591386" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
+							<builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.108117223" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1203406445" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1539915639" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1333560300" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1693865756" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.2000339940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.505919286" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1036481202" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.484015287" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.1089615214" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="LM_SRI"/>
+									<listOptionValue builtIn="false" value="LM_IRST"/>
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.include.paths.1722702487" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/opt/local/include"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1487222992" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1171203697" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1172147378" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+					<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1831545277" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1743378025" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1490362543" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
+					<sourceEntries>
+						<entry excluding="LM/LDHT.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+					</sourceEntries>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="moses.cdt.managedbuild.target.macosx.exe.1209017164" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202;cdt.managedbuild.tool.gnu.c.compiler.input.1919272901">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839;cdt.managedbuild.tool.gnu.c.compiler.input.1172147378">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805;cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925;cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="refreshScope" versionNumber="1">
+		<resource resourceType="PROJECT" workspacePath="/moses"/>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
+</cproject>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
new file mode 100644
index 000000000..8d534dbd4
--- /dev/null
+++ b/contrib/other-builds/moses/.project
@@ -0,0 +1,3055 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>moses</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+				<dictionary>
+					<key>?name?</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.append_environment</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildArguments</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildCommand</key>
+					<value>make</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildLocation</key>
+					<value>${workspace_loc:/moses/Debug}</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+					<value>clean</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.contents</key>
+					<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+					<value>false</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableFullBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.stopOnError</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+					<value>true</value>
+				</dictionary>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+	<linkedResources>
+		<link>
+			<name>AlignmentInfo.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/AlignmentInfo.cpp</locationURI>
+		</link>
+		<link>
+			<name>AlignmentInfo.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/AlignmentInfo.h</locationURI>
+		</link>
+		<link>
+			<name>AlignmentInfoCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/AlignmentInfoCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>AlignmentInfoCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/AlignmentInfoCollection.h</locationURI>
+		</link>
+		<link>
+			<name>BilingualDynSuffixArray.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/BilingualDynSuffixArray.cpp</locationURI>
+		</link>
+		<link>
+			<name>BilingualDynSuffixArray.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/BilingualDynSuffixArray.h</locationURI>
+		</link>
+		<link>
+			<name>BitmapContainer.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/BitmapContainer.cpp</locationURI>
+		</link>
+		<link>
+			<name>BitmapContainer.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/BitmapContainer.h</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CellCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CellCollection.h</locationURI>
+		</link>
+		<link>
+			<name>ChartCell.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartCell.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartCell.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartCell.h</locationURI>
+		</link>
+		<link>
+			<name>ChartCellCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartCellCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartCellCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartCellCollection.h</locationURI>
+		</link>
+		<link>
+			<name>ChartCellLabel.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartCellLabel.h</locationURI>
+		</link>
+		<link>
+			<name>ChartCellLabelSet.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartCellLabelSet.h</locationURI>
+		</link>
+		<link>
+			<name>ChartHypothesis.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartHypothesis.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartHypothesis.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartHypothesis.h</locationURI>
+		</link>
+		<link>
+			<name>ChartHypothesisCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartHypothesisCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartHypothesisCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartHypothesisCollection.h</locationURI>
+		</link>
+		<link>
+			<name>ChartManager.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartManager.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartManager.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartManager.h</locationURI>
+		</link>
+		<link>
+			<name>ChartRuleLookupManager.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartRuleLookupManager.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTranslationOption.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTranslationOption.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartTranslationOption.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTranslationOption.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTranslationOptionCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTranslationOptionCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartTranslationOptionCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTranslationOptionCollection.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTranslationOptionList.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTranslationOptionList.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartTranslationOptionList.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTranslationOptionList.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisDetour.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisDetour.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisDetour.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisDetour.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisDetourQueue.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisDetourQueue.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisDetourQueue.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisDetourQueue.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisNode.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisNode.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisNode.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisPath.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisPath.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisPath.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisPath.h</locationURI>
+		</link>
+		<link>
+			<name>ChartTrellisPathList.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisPathList.h</locationURI>
+		</link>
+		<link>
+			<name>ConfusionNet.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ConfusionNet.cpp</locationURI>
+		</link>
+		<link>
+			<name>ConfusionNet.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ConfusionNet.h</locationURI>
+		</link>
+		<link>
+			<name>DecodeFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>DecodeFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeFeature.h</locationURI>
+		</link>
+		<link>
+			<name>DecodeGraph.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeGraph.cpp</locationURI>
+		</link>
+		<link>
+			<name>DecodeGraph.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeGraph.h</locationURI>
+		</link>
+		<link>
+			<name>DecodeStep.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeStep.cpp</locationURI>
+		</link>
+		<link>
+			<name>DecodeStep.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeStep.h</locationURI>
+		</link>
+		<link>
+			<name>DecodeStepGeneration.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeStepGeneration.cpp</locationURI>
+		</link>
+		<link>
+			<name>DecodeStepGeneration.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeStepGeneration.h</locationURI>
+		</link>
+		<link>
+			<name>DecodeStepTranslation.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeStepTranslation.cpp</locationURI>
+		</link>
+		<link>
+			<name>DecodeStepTranslation.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DecodeStepTranslation.h</locationURI>
+		</link>
+		<link>
+			<name>Dictionary.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Dictionary.cpp</locationURI>
+		</link>
+		<link>
+			<name>Dictionary.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Dictionary.h</locationURI>
+		</link>
+		<link>
+			<name>DummyScoreProducers.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DummyScoreProducers.cpp</locationURI>
+		</link>
+		<link>
+			<name>DummyScoreProducers.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DummyScoreProducers.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSuffixArray.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSuffixArray.cpp</locationURI>
+		</link>
+		<link>
+			<name>DynSuffixArray.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSuffixArray.h</locationURI>
+		</link>
+		<link>
+			<name>FFState.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FFState.cpp</locationURI>
+		</link>
+		<link>
+			<name>FFState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FFState.h</locationURI>
+		</link>
+		<link>
+			<name>Factor.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Factor.cpp</locationURI>
+		</link>
+		<link>
+			<name>Factor.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Factor.h</locationURI>
+		</link>
+		<link>
+			<name>FactorCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FactorCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>FactorCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FactorCollection.h</locationURI>
+		</link>
+		<link>
+			<name>FactorTypeSet.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FactorTypeSet.cpp</locationURI>
+		</link>
+		<link>
+			<name>FactorTypeSet.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FactorTypeSet.h</locationURI>
+		</link>
+		<link>
+			<name>FeatureFunction.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FeatureFunction.cpp</locationURI>
+		</link>
+		<link>
+			<name>FeatureFunction.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FeatureFunction.h</locationURI>
+		</link>
+		<link>
+			<name>File.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/File.cpp</locationURI>
+		</link>
+		<link>
+			<name>File.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/File.h</locationURI>
+		</link>
+		<link>
+			<name>FilePtr.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FilePtr.h</locationURI>
+		</link>
+		<link>
+			<name>FloydWarshall.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FloydWarshall.cpp</locationURI>
+		</link>
+		<link>
+			<name>FloydWarshall.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/FloydWarshall.h</locationURI>
+		</link>
+		<link>
+			<name>GenerationDictionary.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/GenerationDictionary.cpp</locationURI>
+		</link>
+		<link>
+			<name>GenerationDictionary.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/GenerationDictionary.h</locationURI>
+		</link>
+		<link>
+			<name>GlobalLexicalModel.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/GlobalLexicalModel.cpp</locationURI>
+		</link>
+		<link>
+			<name>GlobalLexicalModel.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/GlobalLexicalModel.h</locationURI>
+		</link>
+		<link>
+			<name>HypoList.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/HypoList.h</locationURI>
+		</link>
+		<link>
+			<name>Hypothesis.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Hypothesis.cpp</locationURI>
+		</link>
+		<link>
+			<name>Hypothesis.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Hypothesis.h</locationURI>
+		</link>
+		<link>
+			<name>HypothesisStack.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/HypothesisStack.cpp</locationURI>
+		</link>
+		<link>
+			<name>HypothesisStack.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/HypothesisStack.h</locationURI>
+		</link>
+		<link>
+			<name>HypothesisStackCubePruning.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/HypothesisStackCubePruning.cpp</locationURI>
+		</link>
+		<link>
+			<name>HypothesisStackCubePruning.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/HypothesisStackCubePruning.h</locationURI>
+		</link>
+		<link>
+			<name>HypothesisStackNormal.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/HypothesisStackNormal.cpp</locationURI>
+		</link>
+		<link>
+			<name>HypothesisStackNormal.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/HypothesisStackNormal.h</locationURI>
+		</link>
+		<link>
+			<name>IRST.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/IRST.lo</locationURI>
+		</link>
+		<link>
+			<name>IRST.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/IRST.o</locationURI>
+		</link>
+		<link>
+			<name>InputFileStream.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/InputFileStream.cpp</locationURI>
+		</link>
+		<link>
+			<name>InputFileStream.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/InputFileStream.h</locationURI>
+		</link>
+		<link>
+			<name>InputType.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/InputType.cpp</locationURI>
+		</link>
+		<link>
+			<name>InputType.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/InputType.h</locationURI>
+		</link>
+		<link>
+			<name>Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>LM</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LMList.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LMList.cpp</locationURI>
+		</link>
+		<link>
+			<name>LMList.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LMList.h</locationURI>
+		</link>
+		<link>
+			<name>LVoc.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LVoc.cpp</locationURI>
+		</link>
+		<link>
+			<name>LVoc.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LVoc.h</locationURI>
+		</link>
+		<link>
+			<name>LexicalReordering.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LexicalReordering.cpp</locationURI>
+		</link>
+		<link>
+			<name>LexicalReordering.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LexicalReordering.h</locationURI>
+		</link>
+		<link>
+			<name>LexicalReorderingState.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LexicalReorderingState.cpp</locationURI>
+		</link>
+		<link>
+			<name>LexicalReorderingState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LexicalReorderingState.h</locationURI>
+		</link>
+		<link>
+			<name>LexicalReorderingTable.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LexicalReorderingTable.cpp</locationURI>
+		</link>
+		<link>
+			<name>LexicalReorderingTable.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LexicalReorderingTable.h</locationURI>
+		</link>
+		<link>
+			<name>Manager.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Manager.cpp</locationURI>
+		</link>
+		<link>
+			<name>Manager.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Manager.h</locationURI>
+		</link>
+		<link>
+			<name>NonTerminal.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/NonTerminal.cpp</locationURI>
+		</link>
+		<link>
+			<name>NonTerminal.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/NonTerminal.h</locationURI>
+		</link>
+		<link>
+			<name>ObjectPool.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ObjectPool.h</locationURI>
+		</link>
+		<link>
+			<name>OutputCollector.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/OutputCollector.h</locationURI>
+		</link>
+		<link>
+			<name>PCNTools.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PCNTools.cpp</locationURI>
+		</link>
+		<link>
+			<name>PCNTools.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PCNTools.h</locationURI>
+		</link>
+		<link>
+			<name>PDTAimp.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PDTAimp.h</locationURI>
+		</link>
+		<link>
+			<name>ParallelBackoff.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ParallelBackoff.lo</locationURI>
+		</link>
+		<link>
+			<name>ParallelBackoff.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ParallelBackoff.o</locationURI>
+		</link>
+		<link>
+			<name>Parameter.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Parameter.cpp</locationURI>
+		</link>
+		<link>
+			<name>Parameter.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Parameter.h</locationURI>
+		</link>
+		<link>
+			<name>PartialTranslOptColl.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PartialTranslOptColl.cpp</locationURI>
+		</link>
+		<link>
+			<name>PartialTranslOptColl.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PartialTranslOptColl.h</locationURI>
+		</link>
+		<link>
+			<name>Phrase.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Phrase.cpp</locationURI>
+		</link>
+		<link>
+			<name>Phrase.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Phrase.h</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionary.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionary.cpp</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionary.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionary.h</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryDynSuffixArray.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryDynSuffixArray.cpp</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryDynSuffixArray.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryDynSuffixArray.h</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryMemory.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryMemory.cpp</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryMemory.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryMemory.h</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryNode.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryNode.cpp</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryNode.h</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryTree.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryTree.cpp</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryTree.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryTree.h</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryTreeAdaptor.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryTreeAdaptor.cpp</locationURI>
+		</link>
+		<link>
+			<name>PhraseDictionaryTreeAdaptor.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PhraseDictionaryTreeAdaptor.h</locationURI>
+		</link>
+		<link>
+			<name>PrefixTree.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PrefixTree.h</locationURI>
+		</link>
+		<link>
+			<name>PrefixTreeMap.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PrefixTreeMap.cpp</locationURI>
+		</link>
+		<link>
+			<name>PrefixTreeMap.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/PrefixTreeMap.h</locationURI>
+		</link>
+		<link>
+			<name>ReorderingConstraint.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ReorderingConstraint.cpp</locationURI>
+		</link>
+		<link>
+			<name>ReorderingConstraint.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ReorderingConstraint.h</locationURI>
+		</link>
+		<link>
+			<name>ReorderingStack.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ReorderingStack.cpp</locationURI>
+		</link>
+		<link>
+			<name>ReorderingStack.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ReorderingStack.h</locationURI>
+		</link>
+		<link>
+			<name>RuleCube.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleCube.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleCube.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleCube.h</locationURI>
+		</link>
+		<link>
+			<name>RuleCubeItem.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleCubeItem.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleCubeItem.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleCubeItem.h</locationURI>
+		</link>
+		<link>
+			<name>RuleCubeQueue.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleCubeQueue.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleCubeQueue.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleCubeQueue.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>SRI.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SRI.lo</locationURI>
+		</link>
+		<link>
+			<name>SRI.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SRI.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>ScoreComponentCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ScoreComponentCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>ScoreComponentCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ScoreComponentCollection.h</locationURI>
+		</link>
+		<link>
+			<name>ScoreIndexManager.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ScoreIndexManager.cpp</locationURI>
+		</link>
+		<link>
+			<name>ScoreIndexManager.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ScoreIndexManager.h</locationURI>
+		</link>
+		<link>
+			<name>ScoreProducer.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ScoreProducer.cpp</locationURI>
+		</link>
+		<link>
+			<name>ScoreProducer.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ScoreProducer.h</locationURI>
+		</link>
+		<link>
+			<name>Search.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Search.cpp</locationURI>
+		</link>
+		<link>
+			<name>Search.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Search.h</locationURI>
+		</link>
+		<link>
+			<name>SearchCubePruning.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SearchCubePruning.cpp</locationURI>
+		</link>
+		<link>
+			<name>SearchCubePruning.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SearchCubePruning.h</locationURI>
+		</link>
+		<link>
+			<name>SearchNormal.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SearchNormal.cpp</locationURI>
+		</link>
+		<link>
+			<name>SearchNormal.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SearchNormal.h</locationURI>
+		</link>
+		<link>
+			<name>Sentence.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Sentence.cpp</locationURI>
+		</link>
+		<link>
+			<name>Sentence.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Sentence.h</locationURI>
+		</link>
+		<link>
+			<name>SentenceStats.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SentenceStats.cpp</locationURI>
+		</link>
+		<link>
+			<name>SentenceStats.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SentenceStats.h</locationURI>
+		</link>
+		<link>
+			<name>SquareMatrix.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SquareMatrix.cpp</locationURI>
+		</link>
+		<link>
+			<name>SquareMatrix.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SquareMatrix.h</locationURI>
+		</link>
+		<link>
+			<name>StackVec.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/StackVec.h</locationURI>
+		</link>
+		<link>
+			<name>StaticData.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/StaticData.cpp</locationURI>
+		</link>
+		<link>
+			<name>StaticData.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/StaticData.h</locationURI>
+		</link>
+		<link>
+			<name>StaticData.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/StaticData.lo</locationURI>
+		</link>
+		<link>
+			<name>StaticData.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/StaticData.o</locationURI>
+		</link>
+		<link>
+			<name>SyntacticLanguageModel.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SyntacticLanguageModel.cpp</locationURI>
+		</link>
+		<link>
+			<name>SyntacticLanguageModel.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SyntacticLanguageModel.h</locationURI>
+		</link>
+		<link>
+			<name>SyntacticLanguageModelFiles.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SyntacticLanguageModelFiles.h</locationURI>
+		</link>
+		<link>
+			<name>SyntacticLanguageModelState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/SyntacticLanguageModelState.h</locationURI>
+		</link>
+		<link>
+			<name>TargetPhrase.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TargetPhrase.cpp</locationURI>
+		</link>
+		<link>
+			<name>TargetPhrase.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TargetPhrase.h</locationURI>
+		</link>
+		<link>
+			<name>TargetPhraseCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TargetPhraseCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>TargetPhraseCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TargetPhraseCollection.h</locationURI>
+		</link>
+		<link>
+			<name>Terminal.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Terminal.h</locationURI>
+		</link>
+		<link>
+			<name>ThreadPool.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ThreadPool.cpp</locationURI>
+		</link>
+		<link>
+			<name>ThreadPool.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/ThreadPool.h</locationURI>
+		</link>
+		<link>
+			<name>Timer.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Timer.cpp</locationURI>
+		</link>
+		<link>
+			<name>Timer.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Timer.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationOption.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOption.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationOption.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOption.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionCollection.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionCollectionConfusionNet.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionCollectionConfusionNet.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionCollectionConfusionNet.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionCollectionConfusionNet.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionCollectionText.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionCollectionText.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionCollectionText.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionCollectionText.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionList.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionList.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationOptionList.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationOptionList.h</locationURI>
+		</link>
+		<link>
+			<name>TranslationSystem.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationSystem.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationSystem.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TranslationSystem.h</locationURI>
+		</link>
+		<link>
+			<name>TreeInput.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TreeInput.cpp</locationURI>
+		</link>
+		<link>
+			<name>TreeInput.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TreeInput.h</locationURI>
+		</link>
+		<link>
+			<name>TreeInput.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TreeInput.lo</locationURI>
+		</link>
+		<link>
+			<name>TreeInput.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TreeInput.o</locationURI>
+		</link>
+		<link>
+			<name>TrellisPath.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPath.cpp</locationURI>
+		</link>
+		<link>
+			<name>TrellisPath.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPath.h</locationURI>
+		</link>
+		<link>
+			<name>TrellisPath.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPath.lo</locationURI>
+		</link>
+		<link>
+			<name>TrellisPath.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPath.o</locationURI>
+		</link>
+		<link>
+			<name>TrellisPathCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPathCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>TrellisPathCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPathCollection.h</locationURI>
+		</link>
+		<link>
+			<name>TrellisPathCollection.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPathCollection.lo</locationURI>
+		</link>
+		<link>
+			<name>TrellisPathCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPathCollection.o</locationURI>
+		</link>
+		<link>
+			<name>TrellisPathList.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TrellisPathList.h</locationURI>
+		</link>
+		<link>
+			<name>TypeDef.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/TypeDef.h</locationURI>
+		</link>
+		<link>
+			<name>UniqueObject.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/UniqueObject.h</locationURI>
+		</link>
+		<link>
+			<name>UserMessage.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/UserMessage.cpp</locationURI>
+		</link>
+		<link>
+			<name>UserMessage.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/UserMessage.h</locationURI>
+		</link>
+		<link>
+			<name>UserMessage.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/UserMessage.lo</locationURI>
+		</link>
+		<link>
+			<name>UserMessage.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/UserMessage.o</locationURI>
+		</link>
+		<link>
+			<name>Util.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Util.cpp</locationURI>
+		</link>
+		<link>
+			<name>Util.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Util.h</locationURI>
+		</link>
+		<link>
+			<name>Util.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Util.lo</locationURI>
+		</link>
+		<link>
+			<name>Util.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Util.o</locationURI>
+		</link>
+		<link>
+			<name>Word.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Word.cpp</locationURI>
+		</link>
+		<link>
+			<name>Word.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Word.h</locationURI>
+		</link>
+		<link>
+			<name>Word.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Word.lo</locationURI>
+		</link>
+		<link>
+			<name>Word.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Word.o</locationURI>
+		</link>
+		<link>
+			<name>WordLattice.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordLattice.cpp</locationURI>
+		</link>
+		<link>
+			<name>WordLattice.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordLattice.h</locationURI>
+		</link>
+		<link>
+			<name>WordLattice.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordLattice.lo</locationURI>
+		</link>
+		<link>
+			<name>WordLattice.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordLattice.o</locationURI>
+		</link>
+		<link>
+			<name>WordsBitmap.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsBitmap.cpp</locationURI>
+		</link>
+		<link>
+			<name>WordsBitmap.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsBitmap.h</locationURI>
+		</link>
+		<link>
+			<name>WordsBitmap.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsBitmap.lo</locationURI>
+		</link>
+		<link>
+			<name>WordsBitmap.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsBitmap.o</locationURI>
+		</link>
+		<link>
+			<name>WordsRange.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsRange.cpp</locationURI>
+		</link>
+		<link>
+			<name>WordsRange.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsRange.h</locationURI>
+		</link>
+		<link>
+			<name>WordsRange.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsRange.lo</locationURI>
+		</link>
+		<link>
+			<name>WordsRange.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/WordsRange.o</locationURI>
+		</link>
+		<link>
+			<name>XmlOption.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/XmlOption.cpp</locationURI>
+		</link>
+		<link>
+			<name>XmlOption.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/XmlOption.h</locationURI>
+		</link>
+		<link>
+			<name>XmlOption.lo</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/XmlOption.lo</locationURI>
+		</link>
+		<link>
+			<name>XmlOption.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/XmlOption.o</locationURI>
+		</link>
+		<link>
+			<name>bin</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>gzfilebuf.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/gzfilebuf.h</locationURI>
+		</link>
+		<link>
+			<name>hash.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/hash.cpp</locationURI>
+		</link>
+		<link>
+			<name>hash.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/hash.h</locationURI>
+		</link>
+		<link>
+			<name>hypergraph.proto</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/hypergraph.proto</locationURI>
+		</link>
+		<link>
+			<name>libmoses.la</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/libmoses.la</locationURI>
+		</link>
+		<link>
+			<name>rule.proto</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/rule.proto</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerCYKPlus.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerMemory.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.cpp</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerMemory.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/ChartRuleLookupManagerOnDisk.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.h</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/DotChart.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/DotChart.h</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/DotChartInMemory.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/DotChartInMemory.cpp</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/DotChartInMemory.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/DotChartInMemory.h</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/DotChartOnDisk.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/DotChartOnDisk.cpp</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/DotChartOnDisk.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/DotChartOnDisk.h</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/RandLMCache.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/RandLMCache.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/RandLMFilter.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/RandLMFilter.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/fdstream.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/fdstream.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/file.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/file.cpp</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/file.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/file.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/hash.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/hash.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/onlineRLM.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/onlineRLM.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/params.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/params.cpp</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/params.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/params.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/perfectHash.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/perfectHash.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/quantizer.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/quantizer.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/types.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/types.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/utils.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/utils.h</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/vocab.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/vocab.cpp</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/vocab.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/vocab.h</locationURI>
+		</link>
+		<link>
+			<name>LM/Base.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Base.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/Base.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Base.h</locationURI>
+		</link>
+		<link>
+			<name>LM/Factory.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Factory.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/Factory.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Factory.h</locationURI>
+		</link>
+		<link>
+			<name>LM/IRST.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/IRST.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/IRST.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/IRST.h</locationURI>
+		</link>
+		<link>
+			<name>LM/Implementation.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Implementation.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/Implementation.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Implementation.h</locationURI>
+		</link>
+		<link>
+			<name>LM/Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>LM/Joint.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Joint.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/Joint.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Joint.h</locationURI>
+		</link>
+		<link>
+			<name>LM/Ken.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Ken.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/Ken.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Ken.h</locationURI>
+		</link>
+		<link>
+			<name>LM/LDHT.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/LDHT.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/LDHT.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/LDHT.h</locationURI>
+		</link>
+		<link>
+			<name>LM/MultiFactor.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/MultiFactor.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/MultiFactor.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/MultiFactor.h</locationURI>
+		</link>
+		<link>
+			<name>LM/ORLM.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/ORLM.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/ORLM.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/ORLM.h</locationURI>
+		</link>
+		<link>
+			<name>LM/ParallelBackoff.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/ParallelBackoff.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/ParallelBackoff.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/ParallelBackoff.h</locationURI>
+		</link>
+		<link>
+			<name>LM/Rand.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Rand.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/Rand.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Rand.h</locationURI>
+		</link>
+		<link>
+			<name>LM/Remote.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Remote.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/Remote.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/Remote.h</locationURI>
+		</link>
+		<link>
+			<name>LM/SRI.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/SRI.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/SRI.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/SRI.h</locationURI>
+		</link>
+		<link>
+			<name>LM/SingleFactor.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/SingleFactor.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/SingleFactor.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/SingleFactor.h</locationURI>
+		</link>
+		<link>
+			<name>LM/bin</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/Loader.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Loader.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderCompact.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderCompact.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderCompact.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderCompact.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderFactory.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderFactory.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderFactory.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderFactory.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderHiero.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderHiero.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderHiero.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderHiero.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderStandard.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderStandard.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/LoaderStandard.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderStandard.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionaryALSuffixArray.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionaryALSuffixArray.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryALSuffixArray.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionaryNodeSCFG.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryNodeSCFG.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionaryNodeSCFG.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryNodeSCFG.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionaryOnDisk.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryOnDisk.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionaryOnDisk.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryOnDisk.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionarySCFG.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionarySCFG.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/PhraseDictionarySCFG.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionarySCFG.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/Trie.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Trie.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/Trie.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Trie.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/UTrie.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrie.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/UTrie.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrie.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/UTrieNode.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrieNode.cpp</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/UTrieNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrieNode.h</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/ApplicableRuleTrie.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.cpp</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/ApplicableRuleTrie.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/IntermediateVarSpanNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/IntermediateVarSpanNode.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/Jamfile</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Jamfile</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/Parser.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.cpp</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/Parser.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/SentenceMap.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/SentenceMap.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/StackLattice.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLattice.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/StackLatticeBuilder.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.cpp</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/StackLatticeBuilder.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/StackLatticeSearcher.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeSearcher.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/VarSpanNode.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanNode.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/VarSpanTrieBuilder.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.cpp</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/VarSpanTrieBuilder.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.h</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/clang-darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/lm.log</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/lm.log</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ChartRuleLookupManagerCYKPlus.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ChartRuleLookupManagerCYKPlus.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ChartRuleLookupManagerMemory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ChartRuleLookupManagerMemory.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ChartRuleLookupManagerOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ChartRuleLookupManagerOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/DotChartInMemory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/DotChartInMemory.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/DotChartOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/DotChartOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Base.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Base.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Factory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Factory.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/IRST.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/IRST.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Implementation.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Implementation.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Joint.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Joint.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Ken.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Ken.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/MultiFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/MultiFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ParallelBackoff.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ParallelBackoff.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Remote.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Remote.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/SRI.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/SRI.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/SingleFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/SingleFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/Base.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/Base.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/Factory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/Factory.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/Implementation.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/Implementation.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/Joint.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/Joint.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/Ken.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/Ken.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/MultiFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/MultiFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/Remote.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/Remote.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/SingleFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/SingleFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/link-static/threading-multi/libLM.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/link-static/threading-multi/libLM.a</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderCompact.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderCompact.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderFactory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderFactory.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderHiero.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderHiero.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderStandard.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderStandard.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryALSuffixArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryALSuffixArray.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryNodeSCFG.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryNodeSCFG.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionarySCFG.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionarySCFG.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Trie.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Trie.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrie.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrie.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrieNode.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrieNode.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ApplicableRuleTrie.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ApplicableRuleTrie.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Parser.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Parser.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/StackLatticeBuilder.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/StackLatticeBuilder.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/VarSpanTrieBuilder.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/VarSpanTrieBuilder.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/AlignmentInfo.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/AlignmentInfo.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/AlignmentInfoCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/AlignmentInfoCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BilingualDynSuffixArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BilingualDynSuffixArray.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BitmapContainer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/BitmapContainer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartCell.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartCell.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartCellCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartCellCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartHypothesis.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartHypothesis.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartHypothesisCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartHypothesisCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartManager.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartManager.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTranslationOption.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTranslationOption.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTranslationOptionCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTranslationOptionCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTranslationOptionList.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTranslationOptionList.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisDetour.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisDetour.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisDetourQueue.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisDetourQueue.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisNode.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisNode.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisPath.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartTrellisPath.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ConfusionNet.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ConfusionNet.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeFeature.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeFeature.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeGraph.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeGraph.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeStep.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeStep.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeStepGeneration.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeStepGeneration.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeStepTranslation.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DecodeStepTranslation.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Dictionary.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Dictionary.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DummyScoreProducers.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DummyScoreProducers.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSuffixArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSuffixArray.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FFState.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FFState.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Factor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Factor.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FactorCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FactorCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FactorTypeSet.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FactorTypeSet.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FeatureFunction.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FeatureFunction.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/File.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/File.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FloydWarshall.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/FloydWarshall.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/GenerationDictionary.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/GenerationDictionary.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/GlobalLexicalModel.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/GlobalLexicalModel.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Hypothesis.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Hypothesis.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/HypothesisStack.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/HypothesisStack.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/HypothesisStackCubePruning.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/HypothesisStackCubePruning.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/HypothesisStackNormal.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/HypothesisStackNormal.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/InputFileStream.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/InputFileStream.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/InputType.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/InputType.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LMList.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LMList.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LVoc.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LVoc.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReordering.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReordering.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingState.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingState.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTable.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LexicalReorderingTable.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Manager.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Manager.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/NonTerminal.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/NonTerminal.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PCNTools.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PCNTools.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parameter.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parameter.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PartialTranslOptColl.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PartialTranslOptColl.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Phrase.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Phrase.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryDynSuffixArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryDynSuffixArray.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryMemory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryMemory.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNode.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNode.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryTree.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryTree.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryTreeAdaptor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryTreeAdaptor.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PrefixTreeMap.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PrefixTreeMap.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ReorderingConstraint.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ReorderingConstraint.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ReorderingStack.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ReorderingStack.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/RuleCube.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/RuleCube.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/RuleCubeItem.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/RuleCubeItem.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/RuleCubeQueue.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/RuleCubeQueue.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ScoreComponentCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ScoreComponentCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ScoreIndexManager.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ScoreIndexManager.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ScoreProducer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ScoreProducer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Search.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Search.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SearchCubePruning.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SearchCubePruning.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SearchNormal.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SearchNormal.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Sentence.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Sentence.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SentenceStats.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SentenceStats.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SquareMatrix.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SquareMatrix.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StaticData.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StaticData.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TargetPhrase.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TargetPhrase.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TargetPhraseCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TargetPhraseCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ThreadPool.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ThreadPool.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Timer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Timer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOption.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOption.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionCollectionConfusionNet.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionCollectionConfusionNet.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionCollectionText.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionCollectionText.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionList.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationOptionList.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationSystem.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TranslationSystem.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TreeInput.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TreeInput.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TrellisPath.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TrellisPath.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TrellisPathCollection.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/TrellisPathCollection.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UserMessage.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UserMessage.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Util.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Util.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Word.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Word.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/WordLattice.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/WordLattice.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/WordsBitmap.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/WordsBitmap.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/WordsRange.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/WordsRange.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/XmlOption.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/XmlOption.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/hash.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/hash.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libmoses_internal.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libmoses_internal.a</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerCYKPlus.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerCYKPlus.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemory.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartInMemory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartInMemory.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Base.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Base.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Factory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Factory.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/IRST.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/IRST.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Implementation.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Implementation.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Joint.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Joint.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Ken.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Ken.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/MultiFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/MultiFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ORLM.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ORLM.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ParallelBackoff.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ParallelBackoff.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Rand.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Rand.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Remote.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Remote.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SRI.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SRI.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SingleFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/SingleFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libLM.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libLM.a</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Base.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Base.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Factory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Factory.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Implementation.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Implementation.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Joint.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Joint.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Ken.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Ken.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/MultiFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/MultiFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Remote.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/Remote.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/SingleFactor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/SingleFactor.o</locationURI>
+		</link>
+		<link>
+			<name>LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/libLM.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/libLM.a</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderCompact.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderCompact.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderFactory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderFactory.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderHiero.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderHiero.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderStandard.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderStandard.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryALSuffixArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryALSuffixArray.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNodeSCFG.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNodeSCFG.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryOnDisk.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryOnDisk.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionarySCFG.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionarySCFG.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Trie.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Trie.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrie.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrie.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrieNode.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrieNode.o</locationURI>
+		</link>
+		<link>
+			<name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libRuleTable.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libRuleTable.a</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parser.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parser.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</locationURI>
+		</link>
+		<link>
+			<name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</locationURI>
+		</link>
+		<link>
+			<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</locationURI>
+		</link>
+		<link>
+			<name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
+		</link>
+	</linkedResources>
+</projectDescription>
diff --git a/contrib/other-builds/util/.cproject b/contrib/other-builds/util/.cproject
new file mode 100644
index 000000000..46e9a02b6
--- /dev/null
+++ b/contrib/other-builds/util/.cproject
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings>
+					<externalSetting>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/util"/>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/util/Debug"/>
+						<entry flags="RESOLVED" kind="libraryFile" name="util"/>
+					</externalSetting>
+				</externalSettings>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1388624938" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1873607607" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
+							<builder buildPath="${workspace_loc:/util/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.2045214944" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.589471640" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1543780089" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.635667684" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.726000130" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.592875056" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1252745601" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1018784824" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
+								<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.623959371" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.892917290" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1401298824" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+									<listOptionValue builtIn="false" value="/opt/local/include"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.36067607" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.460849578" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.289923594" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+					<sourceEntries>
+						<entry excluding="util/bit_packing_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+					</sourceEntries>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.172239955">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.172239955" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.172239955" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
+					<folderInfo id="cdt.managedbuild.config.macosx.exe.release.172239955." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.822279811" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.533470822" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
+							<builder buildPath="${workspace_loc:/util/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1705559832" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.476073423" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
+							<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.384294309" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1583097070" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1872669585" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.453642480" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1010248526" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.549134109" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1741196615" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1171704152" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.883129829" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.685540722" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.279247859" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1371842588" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1581172024" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1632081663" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="util.cdt.managedbuild.target.macosx.exe.2006203724" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.172239955;cdt.managedbuild.config.macosx.exe.release.172239955.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.279247859;cdt.managedbuild.tool.gnu.c.compiler.input.1632081663">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.172239955;cdt.managedbuild.config.macosx.exe.release.172239955.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.549134109;cdt.managedbuild.tool.gnu.cpp.compiler.input.685540722">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447;cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1018784824;cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447;cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901;cdt.managedbuild.tool.gnu.c.compiler.input.289923594">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="refreshScope" versionNumber="1">
+		<resource resourceType="PROJECT" workspacePath="/util"/>
+	</storageModule>
+</cproject>
diff --git a/contrib/other-builds/util/.project b/contrib/other-builds/util/.project
new file mode 100644
index 000000000..537def437
--- /dev/null
+++ b/contrib/other-builds/util/.project
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>util</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+				<dictionary>
+					<key>?name?</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.append_environment</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildArguments</key>
+					<value></value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildCommand</key>
+					<value>make</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.buildLocation</key>
+					<value>${workspace_loc:/util/Debug}</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+					<value>clean</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.contents</key>
+					<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+					<value>false</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.enableFullBuild</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+					<value>all</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.stopOnError</key>
+					<value>true</value>
+				</dictionary>
+				<dictionary>
+					<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+					<value>true</value>
+				</dictionary>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+	<linkedResources>
+		<link>
+			<name>util</name>
+			<type>2</type>
+			<locationURI>PARENT-3-PROJECT_LOC/util</locationURI>
+		</link>
+	</linkedResources>
+</projectDescription>
diff --git a/lm/bhiksha.cc b/lm/bhiksha.cc
index cdeafb478..870a4eee5 100644
--- a/lm/bhiksha.cc
+++ b/lm/bhiksha.cc
@@ -1,6 +1,7 @@
 #include "lm/bhiksha.hh"
 #include "lm/config.hh"
 #include "util/file.hh"
+#include "util/exception.hh"
 
 #include <limits>
 
diff --git a/moses-cmd/src/IOWrapper.h b/moses-cmd/src/IOWrapper.h
index e7936f33c..83c428d47 100644
--- a/moses-cmd/src/IOWrapper.h
+++ b/moses-cmd/src/IOWrapper.h
@@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef moses_cmd_IOWrapper_h
 #define moses_cmd_IOWrapper_h
 
+#include <cassert>
 #include <fstream>
 #include <ostream>
 #include <vector>
@@ -121,13 +122,13 @@ IOWrapper *GetIODevice(const Moses::StaticData &staticData);
 bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
 void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors);
 void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
-                 const TranslationSystem* system, long translationId, bool reportSegmentation);
+                 const Moses::TranslationSystem* system, long translationId, bool reportSegmentation);
 void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
 void OutputBestHypo(const std::vector<Moses::Word>&  mbrBestHypo, long /*translationId*/,
                     bool reportSegmentation, bool reportAllFactors, std::ostream& out);
 void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out);
-void OutputInput(std::ostream& os, const Hypothesis* hypo);
-void OutputAlignment(OutputCollector* collector, size_t lineNo, const Hypothesis *hypo);
-void OutputAlignment(OutputCollector* collector, size_t lineNo,  const TrellisPath &path);
+void OutputInput(std::ostream& os, const Moses::Hypothesis* hypo);
+void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::Hypothesis *hypo);
+void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo,  const Moses::TrellisPath &path);
 
 #endif
diff --git a/moses/src/LM/ORLM.h b/moses/src/LM/ORLM.h
index c1ccb4387..55adb9d82 100644
--- a/moses/src/LM/ORLM.h
+++ b/moses/src/LM/ORLM.h
@@ -5,7 +5,7 @@
 #include "Factor.h"
 #include "Util.h"
 #include "LM/SingleFactor.h"
-#include "onlineRLM.h"
+#include "DynSAInclude/onlineRLM.h"
 //#include "multiOnlineRLM.h"
 #include "DynSAInclude/file.h"
 #include "DynSAInclude/vocab.h"
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir
index 0db2ee437..f33067dc0 100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@@ -42,7 +42,7 @@ my $_CORES = 1;
 my $debug = 0; # debug this script, do not delete any files in debug mode
 
 # the following line is set installation time by 'make release'.  BEWARE!
-my $BINDIR="/home/hieu/workspace/bin/training-tools/";
+my $BINDIR="/Users/hieuhoang/workspace/bin/training-tools/";
 
 $_HELP = 1
     unless &GetOptions('root-dir=s' => \$_ROOT_DIR,
diff --git a/util/bit_packing.cc b/util/bit_packing.cc
index 41999b726..b5a14008b 100644
--- a/util/bit_packing.cc
+++ b/util/bit_packing.cc
@@ -10,7 +10,7 @@ template <bool> struct StaticCheck {};
 template <> struct StaticCheck<true> { typedef bool StaticAssertionPassed; };
 
 // If your float isn't 4 bytes, we're hosed.  
-typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
+//typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
 
 } // namespace
 

From 6d1165654caf8edc995a41a4c6c9666e65ebce96 Mon Sep 17 00:00:00 2001
From: phikoehn <pkoehn@inf.ed.ac.uk>
Date: Mon, 28 May 2012 20:15:58 +0100
Subject: [PATCH 30/38] script updates and added ems config help

---
 scripts/ems/example/config.basic        |   3 +-
 scripts/ems/example/config.factored     |   3 +-
 scripts/ems/example/config.hierarchical |   3 +-
 scripts/ems/example/config.syntax       |   3 +-
 scripts/ems/example/config.toy          |   3 +-
 scripts/generic/compound-splitter.perl  | 174 ++++++++++++++++++++----
 6 files changed, 161 insertions(+), 28 deletions(-)

diff --git a/scripts/ems/example/config.basic b/scripts/ems/example/config.basic
index c08f51764..939e13aad 100644
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@@ -260,7 +260,8 @@ script = $moses-script-dir/training/train-model.perl
 ### general options
 # these are options that are passed on to train-model.perl, for instance
 # * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
-# * "-sort-buffer-size 8G" to reduce on-disk sorting
+# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
+# * "-sort-parallel 8 -cores 8" to speed up phrase table building
 #
 #training-options = ""
 
diff --git a/scripts/ems/example/config.factored b/scripts/ems/example/config.factored
index 4bc198a6b..df9f28f33 100644
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@@ -280,7 +280,8 @@ script = $moses-script-dir/training/train-model.perl
 ### general options
 # these are options that are passed on to train-model.perl, for instance
 # * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
-# * "-sort-buffer-size 8G" to reduce on-disk sorting
+# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
+# * "-sort-parallel 8 -cores 8" to speed up phrase table building
 #
 #training-options = ""
 
diff --git a/scripts/ems/example/config.hierarchical b/scripts/ems/example/config.hierarchical
index b9858f393..6161f6ac4 100644
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@@ -260,7 +260,8 @@ script = $moses-script-dir/training/train-model.perl
 ### general options
 # these are options that are passed on to train-model.perl, for instance
 # * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
-# * "-sort-buffer-size 8G" to reduce on-disk sorting
+# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
+# * "-sort-parallel 8 -cores 8" to speed up phrase table building
 #
 #training-options = ""
 
diff --git a/scripts/ems/example/config.syntax b/scripts/ems/example/config.syntax
index 7c97b9ac4..635585844 100644
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@@ -264,7 +264,8 @@ script = $moses-script-dir/training/train-model.perl
 ### general options
 # these are options that are passed on to train-model.perl, for instance
 # * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
-# * "-sort-buffer-size 8G" to reduce on-disk sorting
+# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
+# * "-sort-parallel 8 -cores 8" to speed up phrase table building
 #
 #training-options = ""
 
diff --git a/scripts/ems/example/config.toy b/scripts/ems/example/config.toy
index 140a45229..7b8c95faa 100644
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@@ -244,7 +244,8 @@ script = $moses-script-dir/training/train-model.perl
 ### general options
 # these are options that are passed on to train-model.perl, for instance
 # * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
-# * "-sort-buffer-size 8G" to reduce on-disk sorting
+# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
+# * "-sort-parallel 8 -cores 8" to speed up phrase table building
 #
 #training-options = ""
 
diff --git a/scripts/generic/compound-splitter.perl b/scripts/generic/compound-splitter.perl
index ced661e3f..9948c648e 100755
--- a/scripts/generic/compound-splitter.perl
+++ b/scripts/generic/compound-splitter.perl
@@ -8,15 +8,23 @@ my $FILLER = ":s:es";
 my $MIN_SIZE = 3;
 my $MIN_COUNT = 5;
 my $MAX_COUNT = 5;
+my $FACTORED = 0;
+my $SYNTAX = 0;
+my $MARK_SPLIT = 0;
+my $BINARIZE = 0;
 $HELP = 1
     unless &GetOptions('corpus=s' => \$CORPUS,
 		       'model=s' => \$MODEL,
 		       'filler=s' => \$FILLER,
+           'factored' => \$FACTORED,
 		       'min-size=i' => \$MIN_SIZE,
 		       'min-count=i' => \$MIN_COUNT,
 		       'max-count=i' => \$MAX_COUNT,
 		       'help' => \$HELP,
 		       'verbose' => \$VERBOSE,
+           'syntax' => \$SYNTAX,
+           'binarize' => \$BINARIZE,
+           'mark-split' => \$MARK_SPLIT,
 		       'train' => \$TRAIN);
 
 if ($HELP ||
@@ -29,59 +37,152 @@ if ($HELP ||
     print "options: -min-size: minimum word size (default $MIN_SIZE)\n";
     print "         -min-count: minimum word count (default $MIN_COUNT)\n";
     print "         -filler: filler letters between words (default $FILLER)\n";
+    print "         -factor: factored data, assuming factor 0 as surface (default $FACTORED)\n";
+    print "         -syntax: syntactically parsed data (default $SYNTAX)\n";
+    print "         -mark-split: mark non-terminal label of split words (default $MARK_SPLIT)\n";
+    print "         -binarize: binarize subtree for split word (default $BINARIZE)\n";
     exit;
 }
 
 if ($TRAIN) {
-    &train;
+    if ($SYNTAX)      { &train_syntax(); }
+    elsif ($FACTORED) { &train_factored(); }
+    else              { &train(); }
 }
 else {
-    &apply;
+    &apply();
 }
 
 sub train {
-    my %WORD;
+    my %COUNT;
     open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
     while(<CORPUS>) {
 	chop; s/\s+/ /g; s/^ //; s/ $//;
 	foreach (split) {
-	    $WORD{$_}++;
+	    $COUNT{$_}++;
 	}
     }
-    close($CORPUS);
+    close(CORPUS);
+    &save_trained_model(\%COUNT);
+}
+
+sub save_trained_model {
+    my ($COUNT) = @_;
     my $id = 0;
     open(MODEL,">".$MODEL);
-    foreach my $word (keys %WORD) {
-	print MODEL "".(++$id)."\t".$word."\t".$WORD{$word}."\n";
+    foreach my $word (keys %$COUNT) {
+	print MODEL "".(++$id)."\t".$word."\t".$$COUNT{$word}."\n";
     }
     close(MODEL);
-    print STDERR "written model file with ".(scalar keys %WORD)." words.\n";
+    print STDERR "written model file with ".(scalar keys %$COUNT)." words.\n";
+}
+
+sub train_factored {
+  my (%COUNT,%FACTORED_COUNT);
+  # collect counts for interpretations for each surface word
+  open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
+  while(<CORPUS>) {
+    chop; s/\s+/ /g; s/^ //; s/ $//;
+    foreach my $factored_word (split) {
+      my $word = $factored_word;
+      $word =~ s/\|.+//g; # just first factor
+      $FACTORED_COUNT{$word}{$factored_word}++;
+	  }
+  }
+  close(CORPUS);
+  # only preserve most frequent interpretation, assign sum of counts
+  foreach my $word (keys %FACTORED_COUNT) {
+    my ($max,$best,$total) = (0,"",0);
+    foreach my $factored_word (keys %{$FACTORED_COUNT{$word}}) {
+      my $count = $FACTORED_COUNT{$word}{$factored_word};
+      $total += $count;
+      if ($count > $max) {
+        $max = $count; 
+        $best = $factored_word; 
+      }
+    }
+    $COUNT{$best} = $total;
+  }
+  &save_trained_model(\%COUNT);
+}
+
+sub train_syntax {
+  my (%COUNT,%LABELED_COUNT);
+  # collect counts for interpretations for each surface word
+  open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
+  while(<CORPUS>) {
+    chop; s/\s+/ /g; s/^ //; s/ $//;
+    my $label;
+    foreach (split) {
+      if (/^label="([^\"]+)"/) {
+        $label = $1;
+      }
+      elsif (! /^</) {
+        $LABELED_COUNT{$_}{$label}++;
+      }
+	  }
+  }
+  close(CORPUS);
+
+  # only preserve most frequent label, assign sum of counts
+  foreach my $word (keys %LABELED_COUNT) {
+    my ($max,$best,$total) = (0,"",0);
+    foreach my $label (keys %{$LABELED_COUNT{$word}}) {
+      my $count = $LABELED_COUNT{$word}{$label};
+      $total += $count;
+      if ($count > $max) {
+        $max = $count; 
+        $best = "$word $label"; 
+      }
+    }
+    $COUNT{$best} = $total;
+  }
+  &save_trained_model(\%COUNT);
 }
 
 sub apply {
-    my (%WORD,%TRUECASE);
+    my (%COUNT,%TRUECASE,%LABEL);
     open(MODEL,$MODEL) || die("ERROR: could not open model '$MODEL'");
     while(<MODEL>) {
 	chomp;
-	my ($id,$word,$count) = split(/\t/);
+	my ($id,$factored_word,$count) = split(/\t/);
+        my $label;
+        ($factored_word,$label) = split(/ /,$factored_word);
+        my $word = $factored_word;
+        $word =~ s/\|.+//g; # just first factor
         my $lc = lc($word);
 	# if word exists with multipe casings, only record most frequent
-        next if defined($WORD{$lc}) && $WORD{$lc} > $count;
-	$WORD{$lc} = $count;
-	$TRUECASE{$lc} = $word;
+        next if defined($COUNT{$lc}) && $COUNT{$lc} > $count;
+	$COUNT{$lc} = $count;
+	$TRUECASE{$lc} = $factored_word;
+  $LABEL{$lc} = $label if $SYNTAX;
     }
     close(MODEL);
 
     while(<STDIN>) {
 	my $first = 1;
 	chop; s/\s+/ /g; s/^ //; s/ $//;
-	foreach my $word (split) {
+  my @BUFFER; # for xml tags
+	foreach my $factored_word (split) {
 	    print " " unless $first;	    
 	    $first = 0;
 
+      # syntax: don't split xml
+      if ($SYNTAX && ($factored_word =~ /^</ || $factored_word =~ />$/)) {
+        push @BUFFER,$factored_word;
+        $first = 1;
+        next;
+      }
+
+      # get case class
+      my $word = $factored_word;
+      $word =~ s/\|.+//g; # just first factor
+      my $lc = lc($word);
+
 	    # don't split frequent words
-	    if (defined($WORD{$word}) && $WORD{$word}>=$MAX_COUNT) {
-		print $word;
+	    if (defined($COUNT{$lc}) && $COUNT{$lc}>=$MAX_COUNT) {
+    print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
+		print $factored_word;
 		next;
 	    }
 
@@ -100,17 +201,18 @@ sub apply {
 			my $subword = lc(substr($word,
 					        $start+length($filler),
 					        $end-$start+1-length($filler)));
-			next unless defined($WORD{$subword});			
-			next unless $WORD{$subword} >= $MIN_COUNT;
-			print STDERR "\tmatching word $start .. $end ($filler)$subword $WORD{$subword}\n" if $VERBOSE;
-			push @{$REACHABLE{$end}},"$start $TRUECASE{$subword} $WORD{$subword}";	
+			next unless defined($COUNT{$subword});			
+			next unless $COUNT{$subword} >= $MIN_COUNT;
+			print STDERR "\tmatching word $start .. $end ($filler)$subword $COUNT{$subword}\n" if $VERBOSE;
+			push @{$REACHABLE{$end}},"$start $TRUECASE{$subword} $COUNT{$subword}";	
 		    }
 		}
 	    }
 
 	    # no matches at all?
 	    if (!defined($REACHABLE{$final})) {
-		print $word;
+    print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
+		print $factored_word;
 		next;
 	    }
 
@@ -152,9 +254,35 @@ sub apply {
 		last unless scalar @{$REACHABLE{$final}} > $ITERATOR{$final};
 		for(my $i=0;$i<$increase;$i++) { $ITERATOR{$i}=0; }		    
 	    }
-	    $best_split = $word unless $best_split =~ / /; # do not change case for unsplit words
-	    print $best_split;
+      if ($best_split !~ / /) {
+        print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
+        print $word; # do not change case for unsplit words
+        next;
+      }
+      if (!$SYNTAX) {
+        print $best_split;
+      }
+      else {
+        $BUFFER[$#BUFFER] =~ s/label=\"/label=\"SPLIT-/ if $MARK_SPLIT;
+        $BUFFER[$#BUFFER] =~ /label=\"([^\"]+)\"/ || die("ERROR: $BUFFER[$#BUFFER]\n");
+        my $pos = $1;
+        print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
+
+        my @SPLIT = split(/ /,$best_split);
+        my @OUT = ();
+        if ($BINARIZE) {
+          for(my $w=0;$w<scalar(@SPLIT)-2;$w++) {
+            push @OUT,"<tree label=\"\@$pos\">";
+          }
+        }
+        for(my $w=0;$w<scalar(@SPLIT);$w++) {
+          if ($BINARIZE && $w>=2) { push @OUT, "</tree>"; }
+          push @OUT,"<tree label=\"".$LABEL{lc($SPLIT[$w])}."\"> $SPLIT[$w] </tree>";
+        }
+        print join(" ",@OUT);
+      }
 	}
+  print " ".join(" ",@BUFFER) if scalar(@BUFFER); @BUFFER = (); # clear buffer
 	print "\n";
     }
 }

From fd577d7a65cab923b9102d61873a032654d573a1 Mon Sep 17 00:00:00 2001
From: Colin Cherry <colin.a.cherry@gmail.com>
Date: Tue, 29 May 2012 13:38:57 -0400
Subject: [PATCH 31/38] Batch k-best MIRA is written and integrated into
 mert-moses.pl Regression tests all check out, and kbmira seems to work fine
 on a Hansard French->English task.

HypPackEnumerator class may be of interest to pro.cpp and future
optimizers, as it abstracts a lot of the boilerplate involved in
enumerating multiple k-best lists.

MiraWeightVector is not really mira-specific - just a weight vector
that enables efficient averaging. Could be useful to a perceptron
as well. Same goes for MiraFeatureVector.

Interaction with sparse features is written, but untested.
---
 .gitignore                     |   1 +
 mert/BleuScorer.cpp            |  41 +++++
 mert/BleuScorer.h              |  10 ++
 mert/BleuScorerTest.cpp        |   6 +-
 mert/DataTest.cpp              |   4 +-
 mert/FeatureDataIterator.cpp   |  11 ++
 mert/FeatureDataIterator.h     |   3 +
 mert/FeatureDataTest.cpp       |   4 +-
 mert/FeatureStats.cpp          |  39 +++++
 mert/FeatureStats.h            |  10 +-
 mert/HypPackEnumerator.cpp     | 187 +++++++++++++++++++++
 mert/HypPackEnumerator.h       | 101 +++++++++++
 mert/Jamfile                   |   7 +-
 mert/MiraFeatureVector.cpp     | 144 ++++++++++++++++
 mert/MiraFeatureVector.h       |  51 ++++++
 mert/MiraWeightVector.cpp      | 143 ++++++++++++++++
 mert/MiraWeightVector.h        | 106 ++++++++++++
 mert/kbmira.cpp                | 298 +++++++++++++++++++++++++++++++++
 scripts/training/mert-moses.pl |  25 ++-
 19 files changed, 1181 insertions(+), 10 deletions(-)
 create mode 100644 mert/HypPackEnumerator.cpp
 create mode 100644 mert/HypPackEnumerator.h
 create mode 100644 mert/MiraFeatureVector.cpp
 create mode 100644 mert/MiraFeatureVector.h
 create mode 100644 mert/MiraWeightVector.cpp
 create mode 100644 mert/MiraWeightVector.h
 create mode 100644 mert/kbmira.cpp

diff --git a/.gitignore b/.gitignore
index d4493bce1..0d6997e8d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ mert/extractor
 mert/mert
 mert/megam_i686.opt
 mert/pro
+mert/kbmira
 misc/processLexicalTable
 misc/processPhraseTable
 misc/queryLexicalTable
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 22ce81798..a8a0256f2 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -232,3 +232,44 @@ float sentenceLevelBleuPlusOne(const vector<float>& stats) {
   }
   return exp(logbleu);
 }
+
+float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
+{
+  // Sum sent and background
+  std::vector<float> stats;
+  CHECK(sent.size()==bg.size());
+  CHECK(sent.size()==kBleuNgramOrder*2+1);
+  for(size_t i=0;i<sent.size();i++) 
+    stats.push_back(sent[i]+bg[i]);
+
+  // Calculate BLEU
+  float logbleu = 0.0;
+  for (int j = 0; j < kBleuNgramOrder; j++) {
+    logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
+  }
+  logbleu /= kBleuNgramOrder;
+  const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
+  
+  if (brevity < 0.0) {
+    logbleu += brevity;
+  }
+
+  // Exponentiate and scale by reference length (as per Chiang et al 08)
+  return exp(logbleu) * stats[kBleuNgramOrder*2];
+}
+
+float unsmoothedBleu(const std::vector<float>& stats) {
+  CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
+
+  float logbleu = 0.0;
+  for (int j = 0; j < kBleuNgramOrder; j++) {
+    logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
+  }
+  logbleu /= kBleuNgramOrder;
+  const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
+
+  if (brevity < 0.0) {
+    logbleu += brevity;
+  }
+  return exp(logbleu);
+}
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 1f568f744..8f1384f5a 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -70,4 +70,14 @@ private:
  */
 float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
 
+/** Computes sentence-level BLEU score given a background corpus.
+ * This function is used in batch MIRA.
+ */
+float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg);
+
+/**
+ * Computes plain old BLEU from a vector of stats
+ */
+float unsmoothedBleu(const std::vector<float>& stats);
+
 #endif  // MERT_BLEU_SCORER_H_
diff --git a/mert/BleuScorerTest.cpp b/mert/BleuScorerTest.cpp
index 5a7de9654..5960507e8 100644
--- a/mert/BleuScorerTest.cpp
+++ b/mert/BleuScorerTest.cpp
@@ -152,10 +152,10 @@ BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
   //          "girl with a telescope", "with a telescope ."
   NgramCounts counts;
   BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
-  BOOST_CHECK_EQUAL(25, counts.size());
+  BOOST_CHECK_EQUAL((std::size_t)25, counts.size());
 
   mert::Vocabulary* vocab = scorer.GetVocab();
-  BOOST_CHECK_EQUAL(7, vocab->size());
+  BOOST_CHECK_EQUAL((std::size_t)7, vocab->size());
 
   std::vector<std::string> res;
   Tokenize(line.c_str(), ' ', &res);
@@ -203,7 +203,7 @@ BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
   ScoreStats entry;
   scorer.prepareStats(0, line, entry);
 
-  BOOST_CHECK_EQUAL(entry.size(), 2 * kBleuNgramOrder + 1);
+  BOOST_CHECK_EQUAL(entry.size(), (std::size_t)(2 * kBleuNgramOrder + 1));
 
   // Test hypothesis ngram counts
   BOOST_CHECK_EQUAL(entry.get(0), 5);  // unigram
diff --git a/mert/DataTest.cpp b/mert/DataTest.cpp
index b538c99cb..e94d4ffe9 100644
--- a/mert/DataTest.cpp
+++ b/mert/DataTest.cpp
@@ -33,8 +33,8 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
   std::vector<Data> shards;
   data.createShards(2,0,"",shards);
 
-  BOOST_CHECK_EQUAL(shards.size(),2);
-  BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),2);
+  BOOST_CHECK_EQUAL(shards.size(),(std::size_t)2);
+  BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),(std::size_t)2);
 }
 
 BOOST_AUTO_TEST_CASE(init_feature_map_test) {
diff --git a/mert/FeatureDataIterator.cpp b/mert/FeatureDataIterator.cpp
index 00b59bc38..c0ace87e6 100644
--- a/mert/FeatureDataIterator.cpp
+++ b/mert/FeatureDataIterator.cpp
@@ -18,6 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 #include <iostream>
 #include <sstream>
+#include <boost/functional/hash.hpp>
 
 #include "util/tokenize_piece.hh"
 
@@ -47,6 +48,16 @@ float ParseFloat(const StringPiece& str) {
   return value;
 }
 
+bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2) {
+  return item1.dense==item1.dense && item1.sparse==item1.sparse;
+}
+
+size_t hash_value(FeatureDataItem const& item) {
+  size_t seed = 0;
+  boost::hash_combine(seed,item.dense);
+  boost::hash_combine(seed,item.sparse);
+  return seed;
+}
 
 
 FeatureDataIterator::FeatureDataIterator() {}
diff --git a/mert/FeatureDataIterator.h b/mert/FeatureDataIterator.h
index 58345829c..9bc5f03f7 100644
--- a/mert/FeatureDataIterator.h
+++ b/mert/FeatureDataIterator.h
@@ -61,6 +61,9 @@ class FeatureDataItem
     SparseVector sparse;
 };
 
+bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2);
+std::size_t hash_value(FeatureDataItem const& item);
+
 class FeatureDataIterator : 
   public boost::iterator_facade<FeatureDataIterator,
                                 const std::vector<FeatureDataItem>,
diff --git a/mert/FeatureDataTest.cpp b/mert/FeatureDataTest.cpp
index 49c9d0fd5..ed70f7971 100644
--- a/mert/FeatureDataTest.cpp
+++ b/mert/FeatureDataTest.cpp
@@ -13,7 +13,7 @@ void CheckFeatureMap(const FeatureData* feature_data,
     std::stringstream ss;
     ss << str << "_" << i;
     const std::string& s = ss.str();
-    BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), *cnt);
+    BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), (std::size_t)(*cnt));
     BOOST_CHECK_EQUAL(feature_data->getFeatureName(*cnt).c_str(), s);
     ++(*cnt);
   }
@@ -35,6 +35,6 @@ BOOST_AUTO_TEST_CASE(set_feature_map) {
   CheckFeatureMap(&feature_data, "lm", 2, &cnt);
   CheckFeatureMap(&feature_data, "tm", 5, &cnt);
 
-  BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), cnt);
+  BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), (std::size_t)cnt);
   BOOST_CHECK_EQUAL(feature_data.getFeatureName(cnt).c_str(), "w_0");
 }
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 5d7c5c7b4..2c6cdb88f 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -10,6 +10,8 @@
 
 #include <fstream>
 #include <cmath>
+#include <boost/functional/hash.hpp>
+
 #include "Util.h"
 
 using namespace std;
@@ -81,6 +83,43 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs) {
   return res;
 }
 
+std::vector<std::size_t> SparseVector::feats() const {
+  std::vector<std::size_t> toRet;
+  for(fvector_t::const_iterator iter = m_fvector.begin();
+      iter!=m_fvector.end();
+      iter++) {
+    toRet.push_back(iter->first);
+  }
+  return toRet;
+}
+
+std::size_t SparseVector::encode(const std::string& name) {
+  name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
+  size_t id = 0;
+  if (name2id_iter == m_name_to_id.end()) {
+    id = m_id_to_name.size();
+    m_id_to_name.push_back(name);
+    m_name_to_id[name] = id;
+  } else {
+    id = name2id_iter->second;
+  }
+  return id;
+}
+
+std::string SparseVector::decode(std::size_t id) {
+  return m_id_to_name[id];
+}
+
+bool operator==(SparseVector const& item1, SparseVector const& item2) {
+  return item1.m_fvector==item2.m_fvector;
+}
+
+std::size_t hash_value(SparseVector const& item) {
+  boost::hash<SparseVector::fvector_t> hasher;
+  return hasher(item.m_fvector);
+}
+
+
 FeatureStats::FeatureStats()
     : m_available_size(kAvailableSize), m_entries(0),
       m_array(new FeatureStatsType[m_available_size]) {}
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index 69591001b..2a4e9882c 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -28,11 +28,19 @@ public:
   void set(const std::string& name, FeatureStatsType value);
   void clear();
   std::size_t size() const { return m_fvector.size(); }
-
+   
   void write(std::ostream& out, const std::string& sep = " ") const;
 
   SparseVector& operator-=(const SparseVector& rhs);
 
+  // Added by cherryc
+  std::vector<std::size_t> feats() const;
+  friend bool operator==(SparseVector const& item1, SparseVector const& item2);
+  friend std::size_t hash_value(SparseVector const& item);
+  static std::size_t encode(const std::string& feat);
+  static std::string decode(std::size_t feat);
+  // End added by cherryc
+
 private:
   static name2id_t m_name_to_id;
   static id2name_t m_id_to_name;
diff --git a/mert/HypPackEnumerator.cpp b/mert/HypPackEnumerator.cpp
new file mode 100644
index 000000000..9da627212
--- /dev/null
+++ b/mert/HypPackEnumerator.cpp
@@ -0,0 +1,187 @@
+#include "HypPackEnumerator.h"
+
+#include <cassert>
+#include <algorithm>
+#include <boost/unordered_set.hpp>
+
+StreamingHypPackEnumerator::StreamingHypPackEnumerator
+(
+ vector<std::string> const& featureFiles,
+ vector<std::string> const& scoreFiles
+ )
+  : m_featureFiles(featureFiles),
+    m_scoreFiles(scoreFiles)
+{
+  if (scoreFiles.size() == 0 || featureFiles.size() == 0) {
+    cerr << "No data to process" << endl;
+    exit(0);
+  }
+  
+  if (featureFiles.size() != scoreFiles.size()) {
+    cerr << "Error: Number of feature files (" << featureFiles.size() <<
+      ") does not match number of score files (" << scoreFiles.size() << ")" << endl;
+    exit(1);
+  }
+  
+  m_num_lists = scoreFiles.size();
+  m_primed = false;
+  m_iNumDense = -1;
+}
+
+size_t StreamingHypPackEnumerator::num_dense() const {
+  if(m_iNumDense<0) {
+    cerr << "Error: Requested num_dense() for an unprimed StreamingHypPackEnumerator" << endl;
+    exit(1);
+  }
+  return (size_t) m_iNumDense;
+}
+
+void StreamingHypPackEnumerator::prime(){
+  m_current_indexes.clear();
+  boost::unordered_set<FeatureDataItem> seen;
+  m_primed = true;
+  
+  for (size_t i = 0; i < m_num_lists; ++i) {
+    if (m_featureDataIters[i] == FeatureDataIterator::end()) {
+      cerr << "Error: Feature file " << i << " ended prematurely" << endl;
+      exit(1);
+    }
+    if (m_scoreDataIters[i] == ScoreDataIterator::end()) {
+      cerr << "Error: Score file " << i << " ended prematurely" << endl;
+      exit(1);
+    }
+    if (m_featureDataIters[i]->size() != m_scoreDataIters[i]->size()) {
+      cerr << "Error: For sentence " << m_sentenceId << " features and scores have different size" << endl;
+      exit(1);
+    }
+    for (size_t j = 0; j < m_featureDataIters[i]->size(); ++j) {
+      FeatureDataItem item = m_featureDataIters[i]->operator[](j);
+      // Dedup
+      if(seen.find(item)==seen.end()) {
+        seen.insert(item);
+        // Confirm dense features are always the same
+        int iDense = item.dense.size();
+        if(m_iNumDense != iDense) {
+          if(m_iNumDense==-1) m_iNumDense = iDense;
+          else {
+            cerr << "Error: expecting constant number of dense features: "
+                 << m_iNumDense << " != " << iDense << endl;
+            exit(1);
+          }
+        }
+        // Store item for retrieval
+        m_current_indexes.push_back(pair<size_t,size_t>(i,j));
+      }
+    }
+  }
+}
+
+void StreamingHypPackEnumerator::reset(){
+  m_featureDataIters.clear();
+  m_scoreDataIters.clear();
+  for (size_t i = 0; i < m_num_lists; ++i) {
+    m_featureDataIters.push_back(FeatureDataIterator(m_featureFiles[i]));
+    m_scoreDataIters.push_back(ScoreDataIterator(m_scoreFiles[i]));
+  }
+  m_sentenceId=0;
+  prime();
+}
+
+bool StreamingHypPackEnumerator::finished(){
+  return m_featureDataIters[0]==FeatureDataIterator::end();
+}
+
+void StreamingHypPackEnumerator::next(){
+  if(!m_primed) {
+    cerr << "Enumerating an unprimed HypPackEnumerator" << endl;
+    exit(1);
+  }
+  for (size_t i = 0; i < m_num_lists; ++i) {
+    ++m_featureDataIters[i];
+    ++m_scoreDataIters[i];
+  }
+  m_sentenceId++;
+  if(!finished()) prime();
+}
+
+size_t StreamingHypPackEnumerator::cur_size(){
+  if(!m_primed) {
+    cerr << "Querying size from an unprimed HypPackEnumerator" << endl;
+    exit(1);
+  }
+  return m_current_indexes.size();
+}
+
+const FeatureDataItem& StreamingHypPackEnumerator::featuresAt(size_t index){
+  if(!m_primed) {
+    cerr << "Querying features from an unprimed HypPackEnumerator" << endl;
+    exit(1);
+  }
+  const pair<size_t,size_t>& pij = m_current_indexes[index];
+  return m_featureDataIters[pij.first]->operator[](pij.second);
+}
+
+const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
+  if(!m_primed) {
+    cerr << "Querying scores from an unprimed HypPackEnumerator" << endl;
+    exit(1);
+  }
+  const pair<size_t,size_t>& pij = m_current_indexes[index];
+  return m_scoreDataIters[pij.first]->operator[](pij.second);
+}
+
+/* --------- RandomAccessHypPackEnumerator ------------- */
+
+RandomAccessHypPackEnumerator::RandomAccessHypPackEnumerator(vector<string> const& featureFiles,
+                                                             vector<string> const& scoreFiles,
+                                                             bool no_shuffle)
+{
+  StreamingHypPackEnumerator train(featureFiles,scoreFiles);
+  size_t index=0;
+  for(train.reset(); !train.finished(); train.next()) {
+    m_features.push_back(vector<FeatureDataItem>());
+    m_scores.push_back(vector<ScoreDataItem>());
+    for(size_t j=0;j<train.cur_size();j++) {
+      m_features.back().push_back(train.featuresAt(j));
+      m_scores.back().push_back(train.scoresAt(j));
+    }
+    m_indexes.push_back(index++);
+  }
+
+  m_cur_index = 0;
+  m_no_shuffle = no_shuffle;
+  m_num_dense = train.num_dense();
+}
+
+size_t RandomAccessHypPackEnumerator::num_dense() const {
+  return m_num_dense;
+}
+  
+void RandomAccessHypPackEnumerator::reset() {
+  m_cur_index = 0;
+  if(!m_no_shuffle) random_shuffle(m_indexes.begin(),m_indexes.end());
+}
+bool RandomAccessHypPackEnumerator::finished() {
+  return m_cur_index >= m_indexes.size();
+}
+void RandomAccessHypPackEnumerator::next() {
+  m_cur_index++;
+}
+
+size_t RandomAccessHypPackEnumerator::cur_size() {
+  assert(m_features[m_indexes[m_cur_index]].size()==m_scores[m_indexes[m_cur_index]].size());
+  return m_features[m_indexes[m_cur_index]].size();
+}
+const FeatureDataItem& RandomAccessHypPackEnumerator::featuresAt(size_t i) {
+  return m_features[m_indexes[m_cur_index]][i];
+}
+const ScoreDataItem& RandomAccessHypPackEnumerator::scoresAt(size_t i) {
+  return m_scores[m_indexes[m_cur_index]][i];
+}
+
+  
+// --Emacs trickery--
+// Local Variables:
+// mode:c++
+// c-basic-offset:2
+// End:
diff --git a/mert/HypPackEnumerator.h b/mert/HypPackEnumerator.h
new file mode 100644
index 000000000..9f44c3372
--- /dev/null
+++ b/mert/HypPackEnumerator.h
@@ -0,0 +1,101 @@
+/*
+ * HypPackCollection.h
+ * kbmira - k-best Batch MIRA
+ *
+ * Abstracts away the mess of iterating through multiple
+ * collections of k-best lists, as well as deduping
+ */
+
+#ifndef MERT_HYP_PACK_COLLECTION_H
+#define MERT_HYP_PACK_COLLECTION_H
+
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "FeatureDataIterator.h"
+#include "ScoreDataIterator.h"
+
+using namespace std;
+
+// Start with these abstract classes
+
+class HypPackEnumerator {
+public:
+  virtual void reset() = 0;
+  virtual bool finished() = 0;
+  virtual void next() = 0;
+
+  virtual size_t cur_size() = 0;
+  virtual size_t num_dense() const = 0;
+  virtual const FeatureDataItem& featuresAt(size_t i) = 0;
+  virtual const ScoreDataItem& scoresAt(size_t i) = 0;
+};
+
+// Instantiation that streams from disk
+// Low-memory, low-speed, sequential access
+class StreamingHypPackEnumerator : public HypPackEnumerator {
+public:
+  StreamingHypPackEnumerator(vector<string> const& featureFiles,
+                             vector<string> const& scoreFiles
+                             );
+
+  virtual size_t num_dense() const;
+  
+  virtual void reset();
+  virtual bool finished();
+  virtual void next();
+
+  virtual size_t cur_size();
+  virtual const FeatureDataItem& featuresAt(size_t i);
+  virtual const ScoreDataItem& scoresAt(size_t i);
+  
+private:
+  void prime();
+  size_t m_num_lists;
+  size_t m_sentenceId;
+  vector<string> m_featureFiles;
+  vector<string> m_scoreFiles;
+
+  bool m_primed;
+  int m_iNumDense;
+  vector<FeatureDataIterator>  m_featureDataIters;
+  vector<ScoreDataIterator>    m_scoreDataIters;
+  vector<pair<size_t,size_t> > m_current_indexes;
+};
+
+// Instantiation that reads into memory
+// High-memory, high-speed, random access
+// (Actually randomizes with each call to reset)
+class RandomAccessHypPackEnumerator : public HypPackEnumerator {
+public:
+  RandomAccessHypPackEnumerator(vector<string> const& featureFiles,
+                                vector<string> const& scoreFiles,
+                                bool no_shuffle);
+
+  virtual size_t num_dense() const;
+  
+  virtual void reset();
+  virtual bool finished();
+  virtual void next();
+
+  virtual size_t cur_size();
+  virtual const FeatureDataItem& featuresAt(size_t i);
+  virtual const ScoreDataItem& scoresAt(size_t i);
+
+private:
+  bool m_no_shuffle;
+  size_t m_cur_index;
+  size_t m_num_dense;
+  vector<size_t> m_indexes;
+  vector<vector<FeatureDataItem> > m_features;
+  vector<vector<ScoreDataItem> > m_scores;
+};
+
+#endif // MERT_HYP_PACK_COLLECTION_H
+
+// --Emacs trickery--
+// Local Variables:
+// mode:c++
+// c-basic-offset:2
+// End:
diff --git a/mert/Jamfile b/mert/Jamfile
index 2eaa7143c..00219f878 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -15,6 +15,9 @@ FeatureStats.cpp
 FeatureArray.cpp
 FeatureData.cpp
 FeatureDataIterator.cpp
+MiraFeatureVector.cpp
+MiraWeightVector.cpp
+HypPackEnumerator.cpp
 Data.cpp
 BleuScorer.cpp
 SemposScorer.cpp
@@ -52,7 +55,9 @@ exe evaluator : evaluator.cpp mert_lib ;
 
 exe pro : pro.cpp mert_lib ..//boost_program_options ;
 
-alias programs : mert extractor evaluator pro ;
+exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ;
+
+alias programs : mert extractor evaluator pro kbmira ;
 
 unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
new file mode 100644
index 000000000..9636b2fcd
--- /dev/null
+++ b/mert/MiraFeatureVector.cpp
@@ -0,0 +1,144 @@
+#include <cmath>
+
+#include "MiraFeatureVector.h"
+
+MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
+  : m_dense(vec.dense)
+{
+  vector<size_t> sparseFeats = vec.sparse.feats();
+  bool bFirst = true;
+  size_t lastFeat = 0;
+  for(size_t i=0;i<sparseFeats.size();i++)
+  {
+    size_t feat = m_dense.size() + sparseFeats[i];
+    m_sparseFeats.push_back(feat);
+    m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
+
+    // Check ordered property
+    if(bFirst) {
+      bFirst = false;
+    }
+    else {
+      if(lastFeat>=feat) {
+        cerr << "Error: Feature indeces must be strictly ascending coming out of SparseVector" << endl;
+        exit(1);
+      }
+    }
+    lastFeat = feat;
+  }
+}
+
+MiraFeatureVector::MiraFeatureVector(const MiraFeatureVector& other)
+  : m_dense(other.m_dense),
+    m_sparseFeats(other.m_sparseFeats),
+    m_sparseVals(other.m_sparseVals)
+{
+  if(m_sparseVals.size()!=m_sparseFeats.size()) {
+    cerr << "Error: mismatching sparse feat and val sizes" << endl;
+    exit(1);
+  }
+}
+
+MiraFeatureVector::MiraFeatureVector(const vector<ValType>& dense,
+                                     const vector<size_t>& sparseFeats,
+                                     const vector<ValType>& sparseVals)
+  : m_dense(dense),
+    m_sparseFeats(sparseFeats),
+    m_sparseVals(sparseVals)
+{
+  if(m_sparseVals.size()!=m_sparseFeats.size()) {
+    cerr << "Error: mismatching sparse feat and val sizes" << endl;
+    exit(1);
+  }
+}
+
+ValType MiraFeatureVector::val(size_t index) const {
+  if(index < m_dense.size())
+    return m_dense[index];
+  else
+    return m_sparseVals[index];
+}
+
+size_t MiraFeatureVector::feat(size_t index) const {
+  if(index < m_dense.size())
+    return index;
+  else
+    return m_sparseFeats[index];
+}
+
+size_t MiraFeatureVector::size() const {
+  return m_dense.size() + m_sparseVals.size();
+}
+
+ValType MiraFeatureVector::sqrNorm() const {
+  ValType toRet = 0.0;
+  for(size_t i=0;i<m_dense.size();i++)
+    toRet += m_dense[i]*m_dense[i];
+  for(size_t i=0;i<m_sparseVals.size();i++)
+    toRet += m_sparseVals[i] * m_sparseVals[i];
+  return toRet;
+}
+
+MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector& b)
+{
+  // Dense subtraction
+  vector<ValType> dense;
+  if(a.m_dense.size()!=b.m_dense.size()) {
+    cerr << "Mismatching dense vectors passed to MiraFeatureVector subtraction" << endl;
+    exit(1);
+  }
+  for(size_t i=0;i<a.m_dense.size();i++) {
+    dense.push_back(a.m_dense[i] - b.m_dense[i]);
+  }
+
+  // Sparse subtraction
+  size_t i=0;
+  size_t j=0;
+  vector<ValType> sparseVals;
+  vector<size_t> sparseFeats;
+  while(i < a.m_sparseFeats.size() && j < b.m_sparseFeats.size()) {
+    
+    if(a.m_sparseFeats[i] < b.m_sparseFeats[j]) {
+      sparseFeats.push_back(a.m_sparseFeats[i]);
+      sparseVals.push_back(a.m_sparseVals[i]);
+      i++;
+    }
+
+    else if(b.m_sparseFeats[j] < a.m_sparseFeats[i]) {
+      sparseFeats.push_back(b.m_sparseFeats[j]);
+      sparseVals.push_back(-b.m_sparseVals[j]);
+      j++;
+    }
+
+    else {
+      ValType newVal  = a.m_sparseVals[i] - b.m_sparseVals[j];
+      if(abs(newVal)>1e-6) {
+        sparseFeats.push_back(a.m_sparseFeats[i]);
+        sparseVals.push_back(newVal);
+      }
+      i++;
+      j++;
+    }
+  }
+
+  while(i<a.m_sparseFeats.size()) {
+    sparseFeats.push_back(a.m_sparseFeats[i]);
+    sparseVals.push_back(a.m_sparseVals[i]);
+    i++;
+  }
+
+  while(j<b.m_sparseFeats.size()) {
+    sparseFeats.push_back(b.m_sparseFeats[j]);
+    sparseVals.push_back(-b.m_sparseVals[j]);
+    j++;
+  }
+
+  // Create and return vector
+  return MiraFeatureVector(dense,sparseFeats,sparseVals);
+}
+  
+// --Emacs trickery--
+// Local Variables:
+// mode:c++
+// c-basic-offset:2
+// End:
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
new file mode 100644
index 000000000..14336c56f
--- /dev/null
+++ b/mert/MiraFeatureVector.h
@@ -0,0 +1,51 @@
+/*
+ * MiraFeatureVector.h
+ * kbmira - k-best Batch MIRA
+ *
+ * An alternative to the existing SparseVector
+ * and FeatureDataItem combo. Should be as memory
+ * efficient, and a little more time efficient,
+ * and should save me from constantly hacking
+ * SparseVector
+ */
+
+#ifndef MERT_MIRA_FEATURE_VECTOR_H
+#define MERT_MIRA_FEATURE_VECTOR_H
+
+#include <vector>
+
+#include "FeatureDataIterator.h"
+
+using namespace std;
+
+typedef FeatureStatsType ValType;
+
+class MiraFeatureVector {
+public:
+  MiraFeatureVector(const FeatureDataItem& vec);
+  MiraFeatureVector(const MiraFeatureVector& other);
+  MiraFeatureVector(const vector<ValType>& dense,
+                    const vector<size_t>& sparseFeats,
+                    const vector<ValType>& sparseVals);
+  
+  ValType val(size_t index) const;
+  size_t feat(size_t index) const;
+  size_t size() const;
+  ValType sqrNorm() const;
+  
+  friend MiraFeatureVector operator-(const MiraFeatureVector& a,
+                                     const MiraFeatureVector& b);
+  
+private:
+  vector<ValType> m_dense;
+  vector<size_t>  m_sparseFeats;
+  vector<ValType> m_sparseVals;
+};
+
+#endif // MERT_FEATURE_VECTOR_H
+
+// --Emacs trickery--
+// Local Variables:
+// mode:c++
+// c-basic-offset:2
+// End:
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
new file mode 100644
index 000000000..8b46044fa
--- /dev/null
+++ b/mert/MiraWeightVector.cpp
@@ -0,0 +1,143 @@
+#include "MiraWeightVector.h"
+
+/**
+ * Constructor, initializes to the zero vector
+ */
+MiraWeightVector::MiraWeightVector()
+  : m_weights(),
+    m_totals(),
+    m_lastUpdated()
+{
+  m_numUpdates = 0;
+}
+
+/**
+ * Constructor with provided initial vector
+ * \param init Initial feature values
+ */
+MiraWeightVector::MiraWeightVector(const vector<ValType>& init)
+  : m_weights(init),
+    m_totals(init),
+    m_lastUpdated(init.size(), 0)
+{
+  m_numUpdates = 0;
+}
+
+/**
+ * Update a the model
+ * \param fv  Feature vector to be added to the weights
+ * \param tau FV will be scaled by this value before update
+ */
+void MiraWeightVector::update(const MiraFeatureVector& fv, float tau) {
+  m_numUpdates++;
+  for(size_t i=0;i<fv.size();i++) {
+    update(fv.feat(i), fv.val(i)*tau);
+  }
+}
+
+/**
+ * Perform an empty update (affects averaging)
+ */
+void MiraWeightVector::tick() {
+  m_numUpdates++;
+}
+
+/**
+ * Score a feature vector according to the model
+ * \param fv Feature vector to be scored
+ */
+ValType MiraWeightVector::score(const MiraFeatureVector& fv) const {
+  ValType toRet = 0.0;
+  for(size_t i=0; i<fv.size(); i++) {
+    toRet += weight(fv.feat(i)) * fv.val(i);
+  }
+  return toRet;
+}
+
+/**
+ * Return an averaged view of this weight vector
+ */
+AvgWeightVector MiraWeightVector::avg() {
+  this->fixTotals();
+  return AvgWeightVector(*this);
+}
+
+/**
+ * Updates a weight and lazily updates its total
+ */
+void MiraWeightVector::update(size_t index, ValType delta) {
+
+  // Handle previously unseen weights
+  while(index>=m_weights.size()) {
+    m_weights.push_back(0.0);
+    m_totals.push_back(0.0);
+    m_lastUpdated.push_back(0);
+  }
+
+  // Book keeping for w = w + delta
+  m_totals[index] += (m_numUpdates - m_lastUpdated[index]) * m_weights[index] + delta;
+  m_weights[index] += delta;
+  m_lastUpdated[index] = m_numUpdates;
+}
+
+/**
+ * Make sure everyone's total is up-to-date
+ */
+void MiraWeightVector::fixTotals() {
+  for(size_t i=0; i<m_weights.size(); i++) update(i,0);
+}
+
+/**
+ * Helper to handle out of range weights
+ */
+ValType MiraWeightVector::weight(size_t index) const {
+  if(index < m_weights.size()) {
+    return m_weights[index];
+  }
+  else {
+    return 0;
+  }
+}
+
+ValType MiraWeightVector::sqrNorm() const {
+  ValType toRet = 0;
+  for(size_t i=0;i<m_weights.size();i++) {
+    toRet += weight(i) * weight(i);
+  }
+  return toRet;
+}
+
+AvgWeightVector::AvgWeightVector(const MiraWeightVector& wv)
+  :m_wv(wv)
+{}
+
+ValType AvgWeightVector::weight(size_t index) const
+{
+  if(m_wv.m_numUpdates==0) return m_wv.weight(index);
+  else {
+    if(index < m_wv.m_totals.size()) {
+      return m_wv.m_totals[index] / m_wv.m_numUpdates;
+    }
+    else {
+      return 0;
+    }
+  }
+}
+
+ValType AvgWeightVector::score(const MiraFeatureVector& fv) const {
+  ValType toRet = 0.0;
+  for(size_t i=0; i<fv.size(); i++) {
+    toRet += weight(fv.feat(i)) * fv.val(i);
+  }
+  return toRet;
+}
+
+size_t AvgWeightVector::size() const {
+  return m_wv.m_weights.size();
+}
+
+// --Emacs trickery--
+// Local Variables:
+// mode:c++
+// c-basic-offset:2
+// End:
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
new file mode 100644
index 000000000..375858634
--- /dev/null
+++ b/mert/MiraWeightVector.h
@@ -0,0 +1,106 @@
+/*
+ * MiraWeightVector.h
+ * kbmira - k-best Batch MIRA
+ *
+ * A self-averaging weight-vector. Good for
+ * perceptron learning as well.
+ * 
+ */
+
+#ifndef MERT_MIRA_WEIGHT_VECTOR_H
+#define MERT_MIRA_WEIGHT_VECTOR_H
+
+#include <vector>
+
+#include "MiraFeatureVector.h"
+
+using namespace std;
+
+class AvgWeightVector;
+
+class MiraWeightVector {
+public:
+  /**
+   * Constructor, initializes to the zero vector
+   */
+  MiraWeightVector();
+
+  /**
+   * Constructor with provided initial vector
+   * \param init Initial feature values
+   */
+  MiraWeightVector(const vector<ValType>& init); 
+
+  /**
+   * Update a the model
+   * \param fv  Feature vector to be added to the weights
+   * \param tau FV will be scaled by this value before update
+   */
+  void update(const MiraFeatureVector& fv, float tau);
+
+  /**
+   * Perform an empty update (affects averaging)
+   */
+  void tick();
+
+  /**
+   * Score a feature vector according to the model
+   * \param fv Feature vector to be scored
+   */
+  ValType score(const MiraFeatureVector& fv) const;
+
+  /**
+   * Squared norm of the weight vector
+   */
+  ValType sqrNorm() const;
+
+  /**
+   * Return an averaged view of this weight vector
+   */
+  AvgWeightVector avg();
+
+  friend class AvgWeightVector;
+  
+private:
+  /**
+   * Updates a weight and lazily updates its total
+   */
+  void update(size_t index, ValType delta);
+
+  /**
+   * Make sure everyone's total is up-to-date
+   */
+  void fixTotals();
+
+  /**
+   * Helper to handle out-of-range weights
+   */
+  ValType weight(size_t index) const;
+  
+  vector<ValType> m_weights;
+  vector<ValType> m_totals;
+  vector<size_t>  m_lastUpdated;
+  size_t          m_numUpdates;
+};
+
+/**
+ * Averaged view of a weight vector
+ */
+class AvgWeightVector {
+public:
+  AvgWeightVector(const MiraWeightVector& wv);
+  ValType score(const MiraFeatureVector& fv) const;
+  ValType weight(size_t index) const;
+  size_t size() const;
+private:
+  const MiraWeightVector& m_wv;
+};
+
+
+#endif // MERT_WEIGHT_VECTOR_H
+
+// --Emacs trickery--
+// Local Variables:
+// mode:c++
+// c-basic-offset:2
+// End:
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
new file mode 100644
index 000000000..fa01b41a2
--- /dev/null
+++ b/mert/kbmira.cpp
@@ -0,0 +1,298 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+
+***********************************************************************/
+
+/**
+  * k-best Batch Mira, as described in:
+  *
+  * Colin Cherry and George Foster
+  * Batch Tuning Strategies for Statistical Machine Translation
+  * NAACL 2012
+  *
+  * Implemented by colin.cherry@nrc-cnrc.gc.ca
+  *
+  * To license implementations of any of the other tuners in that paper,
+  * please get in touch with any member of NRC Canada's Portage project
+  *
+  * Input is a set of n-best lists, encoded as feature and score files.
+  *
+  * Output is a weight file that results from running MIRA on these
+  * n-btest lists for J iterations. Will return the set that maximizes
+  * training BLEU.
+ **/
+
+#include <cmath>
+#include <cstddef>
+#include <cstdlib>
+#include <ctime>
+#include <cassert>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <utility>
+#include <algorithm>
+
+#include <boost/program_options.hpp>
+#include <boost/scoped_ptr.hpp>
+
+#include "BleuScorer.h"
+#include "HypPackEnumerator.h"
+#include "MiraFeatureVector.h"
+#include "MiraWeightVector.h"
+
+using namespace std;
+
+namespace po = boost::program_options;
+
+ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv) {
+  vector<ValType> stats(kBleuNgramOrder*2+1,0);
+  for(train->reset(); !train->finished(); train->next()) {
+    // Find max model
+    size_t max_index=0;
+    ValType max_score=0;
+    for(size_t i=0;i<train->cur_size();i++) {
+      MiraFeatureVector vec(train->featuresAt(i));
+      ValType score = wv.score(vec);
+      if(i==0 || score > max_score) {
+        max_index = i;
+        max_score = score;
+      }
+    }
+    // Update stats
+    const vector<float>& sent = train->scoresAt(max_index);    
+    for(size_t i=0;i<sent.size();i++) {
+      stats[i]+=sent[i];
+    }
+  }
+  return unsmoothedBleu(stats);
+}
+
+int main(int argc, char** argv)
+{
+  bool help;
+  string denseInitFile;
+  string sparseInitFile;
+  vector<string> scoreFiles;
+  vector<string> featureFiles;
+  int seed;
+  string outputFile;
+  float c = 0.01;      // Step-size cap C
+  float decay = 0.999; // Pseudo-corpus decay \gamma
+  int n_iters = 60;    // Max epochs J
+  bool streaming = false; // Stream all k-best lists?
+  bool no_shuffle = false; // Don't shuffle, even for in memory version
+  bool model_bg = false; // Use model for background corpus
+  
+  // Command-line processing follows pro.cpp
+  po::options_description desc("Allowed options");
+  desc.add_options()
+      ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+      ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
+      ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+      ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
+      ("output-file,o", po::value<string>(&outputFile), "Output file")
+      ("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
+      ("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
+      ("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
+      ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features")
+      ("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
+      ("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
+      ("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
+      ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background");
+      ;
+
+  po::options_description cmdline_options;
+  cmdline_options.add(desc);
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc,argv).
+            options(cmdline_options).run(), vm);
+  po::notify(vm);
+  if (help) {
+      cout << "Usage: " + string(argv[0]) +  " [options]" << endl;
+      cout << desc << endl;
+      exit(0);
+  }
+
+  if (vm.count("random-seed")) {
+    cerr << "Initialising random seed to " << seed << endl;
+    srand(seed);
+  } else {
+    cerr << "Initialising random seed from system clock" << endl;
+    srand(time(NULL));
+  }
+
+  // Initialize weights
+  ///
+  // Dense
+  vector<parameter_t> initParams;
+  if(!denseInitFile.empty()) {
+    ifstream opt(denseInitFile.c_str());
+    string buffer; istringstream strstrm(buffer);
+    if (opt.fail()) {
+      cerr << "could not open dense initfile: " << denseInitFile << endl;
+      exit(3);
+    }
+    parameter_t val;
+    getline(opt,buffer);
+    while(strstrm >> val) initParams.push_back(val);
+    opt.close();
+  }
+  size_t initDenseSize = initParams.size();
+  // Sparse
+  if(!sparseInitFile.empty()) {
+    if(initDenseSize==0) {
+      cerr << "sparse initialization requires dense initialization" << endl;
+      exit(3);
+    }
+    ifstream opt(sparseInitFile.c_str());
+    if(opt.fail()) {
+      cerr << "could not open sparse initfile: " << sparseInitFile << endl;
+      exit(3);
+    }
+    int sparseCount=0;
+    parameter_t val; std::string name;
+    while(opt >> name >> val) {
+      size_t id = SparseVector::encode(name) + initDenseSize;
+      while(initParams.size()<=id) initParams.push_back(0.0);
+      initParams[id] = val;
+      sparseCount++;
+    }
+    cerr << "Found " << sparseCount << " initial sparse features" << endl;
+    opt.close();
+  }
+  
+  MiraWeightVector wv(initParams);
+
+  // Initialize background corpus
+  vector<ValType> bg;
+  for(int j=0;j<kBleuNgramOrder;j++){
+    bg.push_back(kBleuNgramOrder-j);
+    bg.push_back(kBleuNgramOrder-j);
+  }
+  bg.push_back(kBleuNgramOrder);
+  
+  // Training loop
+  boost::scoped_ptr<HypPackEnumerator> train;
+  if(streaming)
+    train.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
+  else
+    train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
+  cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
+  ValType bestBleu = 0;
+  for(int j=0;j<n_iters;j++)
+  {
+    // MIRA train for one epoch
+    int iNumHyps = 0;
+    int iNumExamples = 0;
+    int iNumUpdates = 0;
+    ValType totalLoss = 0.0;
+    for(train->reset(); !train->finished(); train->next()) {
+      
+      // Hope / fear decode
+      size_t hope_index=0, fear_index=0, model_index=0;
+      ValType hope_score=0, fear_score=0, model_score=0;
+      for(size_t i=0; i< train->cur_size(); i++) {
+        MiraFeatureVector vec(train->featuresAt(i));
+        ValType score = wv.score(vec);
+        ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
+        // Hope
+        if(i==0 || (score + bleu) > hope_score) {
+          hope_score = score + bleu;
+          hope_index = i;
+        }
+        // Fear
+        if(i==0 || (score - bleu) > fear_score) {
+          fear_score = score - bleu;
+          fear_index = i;
+        }
+        // Model
+        if(i==0 || score > model_score) {
+          model_score = score;
+          model_index = i;
+        }
+        iNumHyps++;
+      }
+      // Update weights
+      if(hope_index!=fear_index) {
+        // Vector difference
+        MiraFeatureVector hope(train->featuresAt(hope_index));
+        MiraFeatureVector fear(train->featuresAt(fear_index));
+        MiraFeatureVector diff = hope - fear;
+        // Bleu difference
+        const vector<float>& hope_stats = train->scoresAt(hope_index);
+        ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
+        const vector<float>& fear_stats = train->scoresAt(fear_index);
+        ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
+        assert(hopeBleu > fearBleu);
+        ValType delta = hopeBleu - fearBleu;
+        // Loss and update
+        ValType diff_score = wv.score(diff);
+        ValType loss = delta - diff_score;
+        if(loss > 0) {
+          ValType eta = min(c, loss / diff.sqrNorm());
+          wv.update(diff,eta);
+          totalLoss+=loss;
+          iNumUpdates++;
+        }
+        // Update BLEU statistics
+        const vector<float>& model_stats = train->scoresAt(model_index);
+        for(size_t k=0;k<bg.size();k++) {
+          bg[k]*=decay;
+          if(model_bg)
+            bg[k]+=model_stats[k];
+          else
+            bg[k]+=hope_stats[k];
+        }
+      }
+      iNumExamples++;
+    }
+    // Training Epoch summary
+    cerr << iNumUpdates << "/" << iNumExamples << " updates"
+         << ", avg loss = " << (totalLoss / iNumExamples);
+         
+
+    // Evaluate current average weights
+    AvgWeightVector avg = wv.avg();
+    ValType bleu = evaluate(train.get(), avg);
+    cerr << ", BLEU = " << bleu << endl;
+    if(bleu > bestBleu) {
+      size_t num_dense = train->num_dense();
+      if(initDenseSize>0 && initDenseSize!=num_dense) {
+        cerr << "Error: Initial dense feature count and dense feature count from n-best do not match: "
+             << initDenseSize << "!=" << num_dense << endl;
+        exit(1);
+      }
+      // Write to a file
+      ostream* out;
+      ofstream outFile;
+      if (!outputFile.empty() ) {
+        outFile.open(outputFile.c_str());
+        if (!(outFile)) {
+          cerr << "Error: Failed to open " << outputFile << endl;
+          exit(1);
+        }
+        out = &outFile;
+      } else {
+        out = &cout;
+      }
+      for(size_t i=0;i<avg.size();i++) {
+        if(i<num_dense)
+          *out << "F" << i << " " << avg.weight(i) << endl;
+        else {
+          if(abs(avg.weight(i))>1e-8) 
+            *out << SparseVector::decode(i-num_dense) << " " << avg.weight(i) << endl;
+        }
+      }
+      outFile.close();
+      bestBleu = bleu;
+    }
+  }
+  cerr << "Best BLEU = " << bestBleu << endl;
+}
+// --Emacs trickery--
+// Local Variables:
+// mode:c++
+// c-basic-offset:2
+// End:
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 2abd5ef7c..a430aa520 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -117,6 +117,9 @@ my $___HISTORIC_INTERPOLATION = 0; # interpolate optimize weights with previous
 # TODO: Should we also add these values to options of this script?
 my $megam_default_options = "-fvals -maxi 30 -nobias binary";
 
+# Flags related to Batch MIRA (Cherry & Foster, 2012)
+my $___BATCH_MIRA = 0; # flg to enable batch MIRA
+
 my $__THREADS = 0;
 
 # Parameter for effective reference length when computing BLEU score
@@ -206,6 +209,7 @@ GetOptions(
   "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
   "pro-starting-point" => \$___PRO_STARTING_POINT,
   "historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
+  "batch-mira" => \$___BATCH_MIRA,
   "threads=i" => \$__THREADS
 ) or exit(1);
 
@@ -324,10 +328,12 @@ if (!defined $mertdir) {
 my $mert_extract_cmd = File::Spec->catfile($mertdir, "extractor");
 my $mert_mert_cmd    = File::Spec->catfile($mertdir, "mert");
 my $mert_pro_cmd     = File::Spec->catfile($mertdir, "pro");
+my $mert_mira_cmd    = File::Spec->catfile($mertdir, "kbmira");
 
 die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
 die "Not executable: $mert_mert_cmd"    if ! -x $mert_mert_cmd;
 die "Not executable: $mert_pro_cmd"     if ! -x $mert_pro_cmd;
+die "Not executable: $mert_mira_cmd"    if ! -x $mert_mira_cmd;
 
 my $pro_optimizer = File::Spec->catfile($mertdir, "megam_i686.opt");  # or set to your installation
 
@@ -727,6 +733,11 @@ while (1) {
     $scfiles = "$score_file";
   }
 
+  my $mira_settings = "";
+  $mira_settings .= " --dense-init run$run.$weights_in_file";
+  if (-e "run$run.sparse-weights") {
+    $mira_settings .= " --sparse-init run$run.sparse-weights";
+  }
   my $file_settings = " --ffile $ffiles --scfile $scfiles";
   my $pro_file_settings = "--ffile " . join(" --ffile ", split(/,/, $ffiles)) .
                           " --scfile " .  join(" --scfile ", split(/,/, $scfiles));
@@ -759,6 +770,10 @@ while (1) {
     # ... and run mert
     $cmd =~ s/(--ifile \S+)/$1,run$run.init.pro/;
     &submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
+  } elsif ($___BATCH_MIRA) { # batch MIRA optimization
+    safesystem("echo 'not used' > $weights_out_file") or die;
+    $cmd = "$mert_mira_cmd $mira_settings $seed_settings $pro_file_settings -o $mert_outfile";
+    &submit_or_exec($cmd, "run$run.mira.out", $mert_logfile);
   } else {  # just mert
     &submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
   }
@@ -906,7 +921,7 @@ chdir($cwd);
 sub get_weights_from_mert {
   my ($outfile, $logfile, $weight_count, $sparse_weights) = @_;
   my ($bestpoint, $devbleu);
-  if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)) {
+  if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/) || $___BATCH_MIRA) {
     open my $fh, '<', $outfile or die "Can't open $outfile: $!";
     my (@WEIGHT, $sum);
     for (my $i = 0; $i < $weight_count; $i++) { push @WEIGHT, 0; }
@@ -923,6 +938,14 @@ sub get_weights_from_mert {
     foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
     $bestpoint = join(" ", @WEIGHT);
     close $fh;
+    if($___BATCH_MIRA) {
+      open my $fh2, '<', $logfile or die "Can't open $logfile: $!";
+      while(<$fh2>) {
+        if(/Best BLEU = ([\-\d\.]+)/) {
+          $devbleu = $1;
+        }
+      }
+    }
   } else {
     open my $fh, '<', $logfile or die "Can't open $logfile: $!";
     while (<$fh>) {

From 2e370ed11b0cd8989118891dc4385619837dd39f Mon Sep 17 00:00:00 2001
From: phikoehn <pkoehn@inf.ed.ac.uk>
Date: Wed, 30 May 2012 00:58:18 +0100
Subject: [PATCH 32/38] more escaping in tokenizer; wrapper for berkeley parser
 (german)

---
 scripts/tokenizer/deescape-special-chars.perl | 18 +++----
 scripts/tokenizer/detokenizer.perl            | 18 +++----
 scripts/tokenizer/escape-special-chars.perl   | 14 +++---
 scripts/tokenizer/tokenizer.perl              | 14 +++---
 .../wrappers/berkeleyparsed2mosesxml.perl     | 36 ++++++++++++++
 .../wrappers/mosesxml2berkeleyparsed.perl     | 44 +++++++++++++++++
 .../training/wrappers/parse-de-berkeley.perl  | 48 +++++++++++++++++++
 .../wrappers/syntax-hyphen-splitting.perl     | 43 +++++++++++++++++
 8 files changed, 207 insertions(+), 28 deletions(-)
 create mode 100755 scripts/training/wrappers/berkeleyparsed2mosesxml.perl
 create mode 100755 scripts/training/wrappers/mosesxml2berkeleyparsed.perl
 create mode 100755 scripts/training/wrappers/parse-de-berkeley.perl
 create mode 100755 scripts/training/wrappers/syntax-hyphen-splitting.perl

diff --git a/scripts/tokenizer/deescape-special-chars.perl b/scripts/tokenizer/deescape-special-chars.perl
index 55035ae6d..345555990 100755
--- a/scripts/tokenizer/deescape-special-chars.perl
+++ b/scripts/tokenizer/deescape-special-chars.perl
@@ -3,13 +3,15 @@
 use strict;
 
 while(<STDIN>) {
-  s/\&bar;/\|/g;
-  s/\&lt;/\</g;
-  s/\&gt;/\>/g;
-  s/\&bra;/\[/g;
-  s/\&ket;/\]/g;
-  s/\&#91;/\[/g;
-  s/\&#93;/\]/g;
-  s/\&amp;/\&/g;
+  s/\&bar;/\|/g;   # factor separator
+  s/\&lt;/\</g;    # xml
+  s/\&gt;/\>/g;    # xml
+  s/\&bra;/\[/g;   # syntax non-terminal (legacy)
+  s/\&ket;/\]/g;   # syntax non-terminal (legacy)
+  s/\&quot;/\"/g;  # xml
+  s/\&apos;/\'/g;  # xml
+  s/\&#91;/\[/g;   # syntax non-terminal
+  s/\&#93;/\]/g;   # syntax non-terminal
+  s/\&amp;/\&/g;   # escape escape
   print $_;
 }
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index e55a1a26e..8233b419c 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -66,14 +66,16 @@ sub detokenize {
 	$text = " $text ";
   $text =~ s/ \@\-\@ /-/g;
   # de-escape special chars
-  $text =~ s/\&bar;/\|/g;
-  $text =~ s/\&lt;/\</g;
-  $text =~ s/\&gt;/\>/g;
-  $text =~ s/\&bra;/\[/g;
-  $text =~ s/\&ket;/\]/g;
-  $text =~ s/\&#91;/\[/g;
-  $text =~ s/\&#93;/\]/g;
-  $text =~ s/\&amp;/\&/g;
+  $text =~ s/\&bar;/\|/g;   # factor separator
+  $text =~ s/\&lt;/\</g;    # xml
+  $text =~ s/\&gt;/\>/g;    # xml
+  $text =~ s/\&bra;/\[/g;   # syntax non-terminal (legacy)
+  $text =~ s/\&ket;/\]/g;   # syntax non-terminal (legacy)
+  $text =~ s/\&quot;/\"/g;  # xml
+  $text =~ s/\&apos;/\'/g;  # xml
+  $text =~ s/\&#91;/\[/g;   # syntax non-terminal
+  $text =~ s/\&#93;/\]/g;   # syntax non-terminal
+  $text =~ s/\&amp;/\&/g;   # escape escape
 
 	my $word;
 	my $i;
diff --git a/scripts/tokenizer/escape-special-chars.perl b/scripts/tokenizer/escape-special-chars.perl
index f4c1b4dd5..5d9690c04 100755
--- a/scripts/tokenizer/escape-special-chars.perl
+++ b/scripts/tokenizer/escape-special-chars.perl
@@ -12,12 +12,14 @@ while(<STDIN>) {
 	s/ $//g;
 
   # special characters in moses
-  s/\&/\&amp;/g;
-  s/\|/\&bar;/g;
-  s/\</\&lt;/g;
-  s/\>/\&gt;/g;
-  s/\[/\&#91;/g;
-  s/\]/\&#93;/g;
+  s/\&/\&amp;/g;   # escape escape
+  s/\|/\&bar;/g;   # factor separator
+  s/\</\&lt;/g;    # xml
+  s/\>/\&gt;/g;    # xml
+  s/\'/\&apos;/g;  # xml
+  s/\"/\&quot;/g;  # xml
+  s/\[/\&#91;/g;   # syntax non-terminal
+  s/\]/\&#93;/g;   # syntax non-terminal
   
   # restore xml instructions
   s/\&lt;(\S+) translation="([^\"]+)"&gt; (.+?) &lt;\/(\S+)&gt;/\<$1 translation=\"$2\"> $3 <\/$4>/g;
diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl
index 70bb318f7..0cb713740 100755
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@@ -149,12 +149,14 @@ sub tokenize {
 	$text =~ s/DOTMULTI/./g;
 
   #escape special chars
-  $text =~ s/\&/\&amp;/g;
-  $text =~ s/\|/\&bar;/g;
-  $text =~ s/\</\&lt;/g;
-  $text =~ s/\>/\&gt;/g;
-  $text =~ s/\[/\&#91;/g;
-  $text =~ s/\]/\&#93;/g;
+  $text =~ s/\&/\&amp;/g;   # escape escape
+  $text =~ s/\|/\&bar;/g;   # factor separator
+  $text =~ s/\</\&lt;/g;    # xml
+  $text =~ s/\>/\&gt;/g;    # xml
+  $text =~ s/\'/\&apos;/g;  # xml
+  $text =~ s/\"/\&quot;/g;  # xml
+  $text =~ s/\[/\&#91;/g;   # syntax non-terminal
+  $text =~ s/\]/\&#93;/g;   # syntax non-terminal
 
 	#ensure final line break
 	$text .= "\n" unless $text =~ /\n$/;
diff --git a/scripts/training/wrappers/berkeleyparsed2mosesxml.perl b/scripts/training/wrappers/berkeleyparsed2mosesxml.perl
new file mode 100755
index 000000000..6a4ed731e
--- /dev/null
+++ b/scripts/training/wrappers/berkeleyparsed2mosesxml.perl
@@ -0,0 +1,36 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+while(<STDIN>) {
+  if (/^\(\(\)\)/) {
+    print "\n"; # parse failures
+    next;
+  }
+
+  # prep
+  s/^\( /\(TOP /;
+
+  # escape words
+  s/\&/\&amp;/g;   # escape escape
+  s/\|/\&bar;/g;   # factor separator
+  s/\</\&lt;/g;    # xml
+  s/\>/\&gt;/g;    # xml
+  s/\'/\&apos;/g;  # xml
+  s/\"/\&quot;/g;  # xml
+  s/\[/\&#91;/g;   # syntax non-terminal
+  s/\]/\&#93;/g;   # syntax non-terminal
+  
+  # convert into tree
+  s/\((\S+) /<tree label=\"$1\"> /g;
+  s/\)/ <\/tree> /g;
+  s/\"\-LRB\-\"/\"LRB\"/g; # labels
+  s/\"\-RRB\-\"/\"RRB\"/g;
+  s/\-LRB\-/\(/g; # tokens
+  s/\-RRB\-/\)/g;
+  s/ +/ /g;
+  s/ $//g;
+
+  # output, replace words with original
+  print $_;
+}
diff --git a/scripts/training/wrappers/mosesxml2berkeleyparsed.perl b/scripts/training/wrappers/mosesxml2berkeleyparsed.perl
new file mode 100755
index 000000000..ef6e66024
--- /dev/null
+++ b/scripts/training/wrappers/mosesxml2berkeleyparsed.perl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+#( (NP (NP (NN resumption)) (PP (IN of) (NP (DT the) (NN session)))) )
+#( (S (@S (@S (@S (S (NP (PRP I)) (VP (VB declare) (VP (@VP (VBD resumed) (NP (@NP (NP (DT the) (NN session)) (PP (IN of) (NP (@NP (DT the) (NNP European)) (NNP Parliament)))) (VP (VBN adjourned) (PP (IN on) (NP (NNP Friday) (CD 17)))))) (NP (NNP December) (CD 1999))))) (, ,)) (CC and)) (S (NP (PRP I)) (VP (MD would) (VP (VB like) (S (ADVP (RB once) (RB again)) (VP (TO to) (VP (@VP (VB wish) (NP (PRP you))) (NP (NP (@NP (@NP (DT a) (JJ happy)) (JJ new)) (NN year)) (PP (IN in) (NP (@NP (DT the) (NN hope)) (SBAR (IN that) (S (NP (PRP you)) (VP (VBD enjoyed) (NP (@NP (@NP (DT a) (JJ pleasant)) (JJ festive)) (NN period))))))))))))))) (. .)) )
+
+while(<STDIN>) {
+  if (/^$/) {
+    print "\n"; # parse failures
+    next;
+  }
+
+  # parenheses
+  s/\(/\-LRB\-/g; # tokens
+  s/\)/\-RRB\-/g;
+  s/\"LRB\"/\"\-LRB\-\"/g; # labels
+  s/\"RRB\"/\"\-RRB\-\"/g;
+
+  # main
+  s/<tree label=\"([^\"]+)\">/\($1/g;
+  s/ *<\/tree>/\)/g;
+  s/^\(TOP/\(/;
+
+  # de-escape
+  s/\&bar;/\|/g;   # factor separator
+  s/\&lt;/\</g;    # xml
+  s/\&gt;/\>/g;    # xml
+  s/\&bra;/\[/g;   # syntax non-terminal (legacy)
+  s/\&ket;/\]/g;   # syntax non-terminal (legacy)
+  s/\&quot;/\"/g;  # xml
+  s/\&apos;/\'/g;  # xml
+  s/\&#91;/\[/g;   # syntax non-terminal
+  s/\&#93;/\]/g;   # syntax non-terminal
+  s/\&amp;/\&/g;   # escape escape
+
+  # cleanup
+  s/ +/ /g;
+  s/ $//g;
+  s/\)$/ \)/g;
+
+  # output
+  print $_;
+}
diff --git a/scripts/training/wrappers/parse-de-berkeley.perl b/scripts/training/wrappers/parse-de-berkeley.perl
new file mode 100755
index 000000000..6482d11f3
--- /dev/null
+++ b/scripts/training/wrappers/parse-de-berkeley.perl
@@ -0,0 +1,48 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long "GetOptions";
+use FindBin qw($Bin);
+
+my ($JAR,$GRAMMAR,$SPLIT_HYPHEN,$MARK_SPLIT,$BINARIZE);
+
+die("ERROR: syntax is: parse-de-berkeley.perl [-split-hyphen] [-mark-split] [-binarize] -jar jar-file -gr grammar < in > out\n") 
+  unless &GetOptions
+  ('jar=s' => \$JAR,
+   'gr=s' => \$GRAMMAR,
+   'split-hyphen' => \$SPLIT_HYPHEN,
+   'mark-split' => \$MARK_SPLIT,
+   'binarize' => \$BINARIZE)
+  && defined($JAR) && defined($GRAMMAR);
+
+die("ERROR: could not find jar file '$JAR'\n") unless -e $JAR;
+die("ERROR: could not find grammar file '$GRAMMAR'\n") unless -e $GRAMMAR;
+
+$BINARIZE = $BINARIZE ? "-binarize" : "";
+$SPLIT_HYPHEN = $SPLIT_HYPHEN ? "| $Bin/syntax-hyphen-splitting.perl $BINARIZE" : "";
+$SPLIT_HYPHEN .= " -mark-split" if $SPLIT_HYPHEN && $MARK_SPLIT;
+
+my $tmp = "/tmp/parse-de-berkeley.$$";
+
+open(TMP,"| $Bin/../../tokenizer/deescape-special-chars.perl > $tmp");
+while(<STDIN>) {
+  # unsplit hyphens
+  s/ \@-\@ /-/g if $SPLIT_HYPHEN;
+
+  # handle parentheses
+  s/\(/*LRB*/g;
+  s/\)/*RRB*/g;
+
+  print TMP $_;
+}
+close(TMP);
+
+my $cmd = "cat $tmp | java -Xmx10000m -Xms10000m -Dfile.encoding=UTF8 -jar $JAR -gr $GRAMMAR -maxLength 1000 $BINARIZE | $Bin/berkeleyparsed2mosesxml.perl $SPLIT_HYPHEN";
+print STDERR $cmd."\n";
+
+open(PARSE,"$cmd|");
+while(<PARSE>) {
+  print $_;
+}
+close(PARSE);
+`rm $tmp`;
diff --git a/scripts/training/wrappers/syntax-hyphen-splitting.perl b/scripts/training/wrappers/syntax-hyphen-splitting.perl
new file mode 100755
index 000000000..69290e51d
--- /dev/null
+++ b/scripts/training/wrappers/syntax-hyphen-splitting.perl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long "GetOptions";
+
+my $MARK_HYP = 0;
+my $BINARIZE = 0;
+
+die unless &GetOptions('binarize' => \$BINARIZE,'mark-split' => \$MARK_HYP);
+
+while(<STDIN>) {
+  chop;
+  my @OUT = ();
+  foreach (split) {
+    if (/^</ || />$/) {
+      push @OUT, $_;
+    }
+    elsif(/([\p{IsAlnum}])\-([\p{IsAlnum}])/) {
+      s/([\p{IsAlnum}])\-([\p{IsAlnum}])/$1 \@-\@ $2/g;
+      my @WORD = split;
+      $OUT[$#OUT] =~ /label=\"([^\"]+)\"/;
+      my $pos = $1;
+      if ($MARK_HYP) {
+        $OUT[$#OUT] =~ s/label=\"/label=\"HYP-/;
+      }
+      if ($BINARIZE) {
+        for(my $i=0;$i<scalar(@WORD)-2;$i++) {
+          push @OUT,"<tree label=\"\@".($MARK_HYP ? "HYP-" : "")."$pos\">";
+        }
+      }
+      for(my $i=0;$i<scalar(@WORD);$i++) {
+        if ($BINARIZE && $i>=2) {
+          push @OUT, "</tree>";
+        }
+        push @OUT,"<tree label=\"".(($WORD[$i] eq "\@-\@") ? "HYP" : $pos)."\"> $WORD[$i] </tree>";
+      }
+    }
+    else {
+      push @OUT, $_;
+    }
+  }
+  print join(" ",@OUT)."\n";
+}

From 2b20de8ea944f122e3e367db69f88d95cc83e393 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 30 May 2012 09:49:43 +0100
Subject: [PATCH 33/38] xcode build supports threads. Abort when using Adam's
 suffix arrays with threads

---
 .../moses-chart-cmd.xcodeproj/project.pbxproj      | 14 ++++++++++++--
 .../other-builds/moses.xcodeproj/project.pbxproj   |  2 ++
 .../RuleTable/PhraseDictionaryALSuffixArray.cpp    |  9 +++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj b/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
index 897a4881b..82fe6607c 100644
--- a/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
@@ -307,6 +307,7 @@
 				LIBRARY_SEARCH_PATHS = (
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
+					/opt/local/lib,
 				);
 				OTHER_LDFLAGS = (
 					"-lz",
@@ -316,6 +317,7 @@
 					"-loolm",
 					"-lflm",
 					"-llattice",
+					"-lboost_thread-mt",
 				);
 				PRODUCT_NAME = "moses-chart-cmd";
 				USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
@@ -338,6 +340,7 @@
 				LIBRARY_SEARCH_PATHS = (
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
+					/opt/local/lib,
 				);
 				OTHER_LDFLAGS = (
 					"-lz",
@@ -347,6 +350,7 @@
 					"-loolm",
 					"-lflm",
 					"-llattice",
+					"-lboost_thread-mt",
 				);
 				PRODUCT_NAME = "moses-chart-cmd";
 				USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
@@ -359,7 +363,10 @@
 				ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = TRACE_ENABLE;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					TRACE_ENABLE,
+					WITH_THREADS,
+				);
 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
@@ -378,7 +385,10 @@
 			buildSettings = {
 				ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
 				GCC_C_LANGUAGE_STANDARD = gnu99;
-				GCC_PREPROCESSOR_DEFINITIONS = TRACE_ENABLE;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					TRACE_ENABLE,
+					WITH_THREADS,
+				);
 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
diff --git a/contrib/other-builds/moses.xcodeproj/project.pbxproj b/contrib/other-builds/moses.xcodeproj/project.pbxproj
index b12427138..b870a74c9 100644
--- a/contrib/other-builds/moses.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses.xcodeproj/project.pbxproj
@@ -1357,6 +1357,7 @@
 					LM_IRST,
 					"_FILE_OFFSET_BITS=64",
 					_LARGE_FILES,
+					WITH_THREADS,
 				);
 				HEADER_SEARCH_PATHS = (
 					../..,
@@ -1399,6 +1400,7 @@
 					LM_IRST,
 					"_FILE_OFFSET_BITS=64",
 					_LARGE_FILES,
+					WITH_THREADS,
 				);
 				HEADER_SEARCH_PATHS = (
 					../..,
diff --git a/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp b/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp
index 93fc083e5..5a886d32d 100644
--- a/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp
@@ -13,6 +13,8 @@
 #include "RuleTable/Loader.h"
 #include "RuleTable/LoaderFactory.h"
 #include "TypeDef.h"
+#include "StaticData.h"
+#include "UserMessage.h"
 
 using namespace std;
 
@@ -27,6 +29,13 @@ bool PhraseDictionaryALSuffixArray::Load(const std::vector<FactorType> &input
                                  , const LMList &languageModels
                                  , const WordPenaltyProducer* wpProducer)
 {
+  const StaticData &staticData = StaticData::Instance();
+  if (staticData.ThreadCount() > 1)
+  {
+    UserMessage::Add("Suffix array implementation is not threadsafe");
+    return false;
+  }
+  
   // file path is the directory of the rules for eacg, NOT the file of all the rules
   SetFilePath(filePath);
   m_tableLimit = tableLimit;

From 9f03125418abb4749b5a46154f4d1c3e19276600 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 30 May 2012 11:50:01 +0100
Subject: [PATCH 34/38] Minor rollback

---
 util/bit_packing.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/bit_packing.cc b/util/bit_packing.cc
index b5a14008b..41999b726 100644
--- a/util/bit_packing.cc
+++ b/util/bit_packing.cc
@@ -10,7 +10,7 @@ template <bool> struct StaticCheck {};
 template <> struct StaticCheck<true> { typedef bool StaticAssertionPassed; };
 
 // If your float isn't 4 bytes, we're hosed.  
-//typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
+typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
 
 } // namespace
 

From 45870348ff4f8860b9a9b35a4d20952023ead4d7 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 30 May 2012 12:47:20 +0100
Subject: [PATCH 35/38] xcode build supports threads. move 'using namespace'
 out from .h file to stop namespace pollution

---
 .../moses-cmd.xcodeproj/project.pbxproj       | 16 ++++-
 mert/HypPackEnumerator.h                      | 27 ++++----
 mert/MiraFeatureVector.h                      | 12 ++--
 moses-cmd/src/LatticeMBR.cpp                  |  1 +
 moses-cmd/src/LatticeMBR.h                    | 66 +++++++++----------
 5 files changed, 67 insertions(+), 55 deletions(-)

diff --git a/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj b/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
index 927961b2f..619ecf76c 100644
--- a/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
@@ -311,6 +311,7 @@
 					LM_SRI,
 					LM_IRST,
 					TRACE_ENABLE,
+					WITH_THREADS,
 				);
 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
@@ -324,6 +325,7 @@
 				LIBRARY_SEARCH_PATHS = (
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
+					/opt/local/lib,
 				);
 				OTHER_LDFLAGS = (
 					"-lflm",
@@ -332,6 +334,7 @@
 					"-ldstruct",
 					"-lz",
 					"-lirstlm",
+					"-lboost_thread-mt",
 				);
 				PREBINDING = NO;
 				PRODUCT_NAME = "moses-cmd";
@@ -348,9 +351,10 @@
 				GCC_MODEL_TUNING = G5;
 				GCC_OPTIMIZATION_LEVEL = 3;
 				GCC_PREPROCESSOR_DEFINITIONS = (
-					LM_IRST,
 					LM_SRI,
+					LM_IRST,
 					TRACE_ENABLE,
+					WITH_THREADS,
 				);
 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
@@ -364,6 +368,7 @@
 				LIBRARY_SEARCH_PATHS = (
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
+					/opt/local/lib,
 				);
 				OTHER_LDFLAGS = (
 					"-lflm",
@@ -372,6 +377,7 @@
 					"-ldstruct",
 					"-lz",
 					"-lirstlm",
+					"-lboost_thread-mt",
 				);
 				PREBINDING = NO;
 				PRODUCT_NAME = "moses-cmd";
@@ -384,6 +390,12 @@
 			buildSettings = {
 				GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
 				GCC_MODEL_TUNING = G5;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					LM_SRI,
+					LM_IRST,
+					TRACE_ENABLE,
+					WITH_THREADS,
+				);
 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
@@ -396,6 +408,7 @@
 				LIBRARY_SEARCH_PATHS = (
 					../../irstlm/lib,
 					../../srilm/lib/macosx,
+					/opt/local/lib,
 				);
 				OTHER_LDFLAGS = (
 					"-lflm",
@@ -404,6 +417,7 @@
 					"-ldstruct",
 					"-lz",
 					"-lirstlm",
+					"-lboost_thread-mt",
 				);
 				PREBINDING = NO;
 				PRODUCT_NAME = "moses-cmd";
diff --git a/mert/HypPackEnumerator.h b/mert/HypPackEnumerator.h
index 9f44c3372..07f12e91b 100644
--- a/mert/HypPackEnumerator.h
+++ b/mert/HypPackEnumerator.h
@@ -12,12 +12,11 @@
 #include <string>
 #include <vector>
 #include <utility>
+#include <stddef.h>
 
 #include "FeatureDataIterator.h"
 #include "ScoreDataIterator.h"
 
-using namespace std;
-
 // Start with these abstract classes
 
 class HypPackEnumerator {
@@ -36,8 +35,8 @@ public:
 // Low-memory, low-speed, sequential access
 class StreamingHypPackEnumerator : public HypPackEnumerator {
 public:
-  StreamingHypPackEnumerator(vector<string> const& featureFiles,
-                             vector<string> const& scoreFiles
+  StreamingHypPackEnumerator(std::vector<std::string> const& featureFiles,
+                             std::vector<std::string> const& scoreFiles
                              );
 
   virtual size_t num_dense() const;
@@ -54,14 +53,14 @@ private:
   void prime();
   size_t m_num_lists;
   size_t m_sentenceId;
-  vector<string> m_featureFiles;
-  vector<string> m_scoreFiles;
+  std::vector<std::string> m_featureFiles;
+  std::vector<std::string> m_scoreFiles;
 
   bool m_primed;
   int m_iNumDense;
-  vector<FeatureDataIterator>  m_featureDataIters;
-  vector<ScoreDataIterator>    m_scoreDataIters;
-  vector<pair<size_t,size_t> > m_current_indexes;
+  std::vector<FeatureDataIterator>  m_featureDataIters;
+  std::vector<ScoreDataIterator>    m_scoreDataIters;
+  std::vector<std::pair<size_t,size_t> > m_current_indexes;
 };
 
 // Instantiation that reads into memory
@@ -69,8 +68,8 @@ private:
 // (Actually randomizes with each call to reset)
 class RandomAccessHypPackEnumerator : public HypPackEnumerator {
 public:
-  RandomAccessHypPackEnumerator(vector<string> const& featureFiles,
-                                vector<string> const& scoreFiles,
+  RandomAccessHypPackEnumerator(std::vector<std::string> const& featureFiles,
+                                std::vector<std::string> const& scoreFiles,
                                 bool no_shuffle);
 
   virtual size_t num_dense() const;
@@ -87,9 +86,9 @@ private:
   bool m_no_shuffle;
   size_t m_cur_index;
   size_t m_num_dense;
-  vector<size_t> m_indexes;
-  vector<vector<FeatureDataItem> > m_features;
-  vector<vector<ScoreDataItem> > m_scores;
+  std::vector<size_t> m_indexes;
+  std::vector<std::vector<FeatureDataItem> > m_features;
+  std::vector<std::vector<ScoreDataItem> > m_scores;
 };
 
 #endif // MERT_HYP_PACK_COLLECTION_H
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index 14336c56f..27a4510ad 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -24,9 +24,9 @@ class MiraFeatureVector {
 public:
   MiraFeatureVector(const FeatureDataItem& vec);
   MiraFeatureVector(const MiraFeatureVector& other);
-  MiraFeatureVector(const vector<ValType>& dense,
-                    const vector<size_t>& sparseFeats,
-                    const vector<ValType>& sparseVals);
+  MiraFeatureVector(const std::vector<ValType>& dense,
+                    const std::vector<size_t>& sparseFeats,
+                    const std::vector<ValType>& sparseVals);
   
   ValType val(size_t index) const;
   size_t feat(size_t index) const;
@@ -37,9 +37,9 @@ public:
                                      const MiraFeatureVector& b);
   
 private:
-  vector<ValType> m_dense;
-  vector<size_t>  m_sparseFeats;
-  vector<ValType> m_sparseVals;
+  std::vector<ValType> m_dense;
+  std::vector<size_t>  m_sparseFeats;
+  std::vector<ValType> m_sparseVals;
 };
 
 #endif // MERT_FEATURE_VECTOR_H
diff --git a/moses-cmd/src/LatticeMBR.cpp b/moses-cmd/src/LatticeMBR.cpp
index b579fb592..1b1ec8284 100644
--- a/moses-cmd/src/LatticeMBR.cpp
+++ b/moses-cmd/src/LatticeMBR.cpp
@@ -13,6 +13,7 @@
 #include <set>
 
 using namespace std;
+using namespace Moses;
 
 size_t bleu_order = 4;
 float UNKNGRAMLOGPROB = -20;
diff --git a/moses-cmd/src/LatticeMBR.h b/moses-cmd/src/LatticeMBR.h
index 8b54e6c51..fa0379aee 100644
--- a/moses-cmd/src/LatticeMBR.h
+++ b/moses-cmd/src/LatticeMBR.h
@@ -17,35 +17,33 @@
 #include "Manager.h"
 #include "TrellisPathList.h"
 
-using namespace Moses;
-
 
 
 class Edge;
 
-typedef std::vector< const Hypothesis *> Lattice;
+typedef std::vector< const Moses::Hypothesis *> Lattice;
 typedef std::vector<const Edge*> Path;
 typedef std::map<Path, size_t> PathCounts;
-typedef std::map<Phrase, PathCounts > NgramHistory;
+typedef std::map<Moses::Phrase, PathCounts > NgramHistory;
 
 class Edge
 {
-  const Hypothesis* m_tailNode;
-  const Hypothesis* m_headNode;
+  const Moses::Hypothesis* m_tailNode;
+  const Moses::Hypothesis* m_headNode;
   float m_score;
-  TargetPhrase m_targetPhrase;
+  Moses::TargetPhrase m_targetPhrase;
   NgramHistory m_ngrams;
 
 public:
-  Edge(const Hypothesis* from, const Hypothesis* to, float score, const TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
+  Edge(const Moses::Hypothesis* from, const Moses::Hypothesis* to, float score, const Moses::TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
     //cout << "Creating new edge from Node " << from->GetId() << ", to Node : " << to->GetId() << ", score: " << score << " phrase: " << targetPhrase << endl;
   }
 
-  const Hypothesis* GetHeadNode() const {
+  const Moses::Hypothesis* GetHeadNode() const {
     return m_headNode;
   }
 
-  const Hypothesis* GetTailNode() const {
+  const Moses::Hypothesis* GetTailNode() const {
     return m_tailNode;
   }
 
@@ -57,19 +55,19 @@ public:
     return m_targetPhrase.GetSize();
   }
 
-  const Phrase& GetWords() const {
+  const Moses::Phrase& GetWords() const {
     return m_targetPhrase;
   }
 
   friend std::ostream& operator<< (std::ostream& out, const Edge& edge);
 
-  const NgramHistory&  GetNgrams(  std::map<const Hypothesis*, std::vector<Edge> > & incomingEdges) ;
+  const NgramHistory&  GetNgrams(  std::map<const Moses::Hypothesis*, std::vector<Edge> > & incomingEdges) ;
 
   bool operator < (const Edge & compare) const;
 
-  void GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const;
+  void GetPhraseSuffix(const Moses::Phrase& origPhrase, size_t lastN, Moses::Phrase& targetPhrase) const;
 
-  void storeNgramHistory(const Phrase& phrase, Path & path, size_t count = 1) {
+  void storeNgramHistory(const Moses::Phrase& phrase, Path & path, size_t count = 1) {
     m_ngrams[phrase][path]+= count;
   }
 
@@ -84,16 +82,16 @@ public:
   NgramScores() {}
 
   /** logsum this score to the existing score */
-  void addScore(const Hypothesis* node, const Phrase& ngram, float score);
+  void addScore(const Moses::Hypothesis* node, const Moses::Phrase& ngram, float score);
 
   /** Iterate through ngrams for selected node */
-  typedef std::map<const Phrase*, float>::const_iterator NodeScoreIterator;
-  NodeScoreIterator nodeBegin(const Hypothesis* node);
-  NodeScoreIterator nodeEnd(const Hypothesis* node);
+  typedef std::map<const Moses::Phrase*, float>::const_iterator NodeScoreIterator;
+  NodeScoreIterator nodeBegin(const Moses::Hypothesis* node);
+  NodeScoreIterator nodeEnd(const Moses::Hypothesis* node);
 
 private:
-  std::set<Phrase> m_ngrams;
-  std::map<const Hypothesis*, std::map<const Phrase*, float> > m_scores;
+  std::set<Moses::Phrase> m_ngrams;
+  std::map<const Moses::Hypothesis*, std::map<const Moses::Phrase*, float> > m_scores;
 };
 
 
@@ -102,11 +100,11 @@ class LatticeMBRSolution
 {
 public:
   /** Read the words from the path */
-  LatticeMBRSolution(const TrellisPath& path, bool isMap);
+  LatticeMBRSolution(const Moses::TrellisPath& path, bool isMap);
   const std::vector<float>& GetNgramScores() const {
     return m_ngramScores;
   }
-  const std::vector<Word>& GetWords() const {
+  const std::vector<Moses::Word>& GetWords() const {
     return m_words;
   }
   float GetMapScore() const {
@@ -117,10 +115,10 @@ public:
   }
 
   /** Initialise ngram scores */
-  void CalcScore(std::map<Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
+  void CalcScore(std::map<Moses::Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
 
 private:
-  std::vector<Word> m_words;
+  std::vector<Moses::Word> m_words;
   float m_mapScore;
   std::vector<float> m_ngramScores;
   float m_score;
@@ -132,18 +130,18 @@ struct LatticeMBRSolutionComparator {
   }
 };
 
-void pruneLatticeFB(Lattice & connectedHyp, std::map < const Hypothesis*, std::set <const Hypothesis* > > & outgoingHyps, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges,
-                    const std::vector< float> & estimatedScores, const Hypothesis*, size_t edgeDensity,float scale);
+void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*, std::set <const Moses::Hypothesis* > > & outgoingHyps, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges,
+                    const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
 
 //Use the ngram scores to rerank the nbest list, return at most n solutions
-void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
+void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
 //calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
-void calcNgramExpectations(Lattice & connectedHyp, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Phrase,
+void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
                            float>& finalNgramScores, bool posteriors);
-void GetOutputFactors(const TrellisPath &path, std::vector <Word> &translation);
-void extract_ngrams(const std::vector<Word >& sentence, std::map < Phrase, int >  & allngrams);
-bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b);
-std::vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList);
-const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
-//std::vector<Word> doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
+void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
+void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int >  & allngrams);
+bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
+std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
 #endif

From d25805858df34eb944f2c2db6b47c21d960136d8 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <fishandfrolick@gmail.com>
Date: Wed, 30 May 2012 13:04:02 +0100
Subject: [PATCH 36/38] xcode build supports threads. move 'using namespace'
 out from .h file to stop namespace pollution

---
 mert/HypPackEnumerator.cpp                    |  2 ++
 .../SentenceAlignmentWithSyntax.cpp           |  2 ++
 .../training/phrase-extract/hierarchical.h    | 14 +++++-----
 scripts/training/phrase-extract/relax-parse.h |  4 +--
 .../training/phrase-extract/tables-core.cpp   |  2 ++
 scripts/training/phrase-extract/tables-core.h | 26 +++++++++----------
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/mert/HypPackEnumerator.cpp b/mert/HypPackEnumerator.cpp
index 9da627212..ffbf3cfb5 100644
--- a/mert/HypPackEnumerator.cpp
+++ b/mert/HypPackEnumerator.cpp
@@ -4,6 +4,8 @@
 #include <algorithm>
 #include <boost/unordered_set.hpp>
 
+using namespace std;
+
 StreamingHypPackEnumerator::StreamingHypPackEnumerator
 (
  vector<std::string> const& featureFiles,
diff --git a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp b/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp
index 39c95c221..06dc3919f 100644
--- a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp
+++ b/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp
@@ -27,6 +27,8 @@
 #include "XmlException.h"
 #include "XmlTree.h"
 
+using namespace std;
+
 bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID)
 {
   if (!m_options.targetSyntax) {
diff --git a/scripts/training/phrase-extract/hierarchical.h b/scripts/training/phrase-extract/hierarchical.h
index 40f6744ff..61c899013 100644
--- a/scripts/training/phrase-extract/hierarchical.h
+++ b/scripts/training/phrase-extract/hierarchical.h
@@ -14,22 +14,20 @@
 #include <set>
 #include <vector>
 
-using namespace std;
-
 // HPhraseVertex represents a point in the alignment matrix
-typedef pair <int, int> HPhraseVertex;
+typedef std::pair <int, int> HPhraseVertex;
 
 // Phrase represents a bi-phrase; each bi-phrase is defined by two points in the alignment matrix:
 // bottom-left and top-right
-typedef pair<HPhraseVertex, HPhraseVertex> HPhrase;
+typedef std::pair<HPhraseVertex, HPhraseVertex> HPhrase;
 
-// HPhraseVector is a vector of phrases
+// HPhraseVector is a std::vector of phrases
 // the bool value indicates if the associated phrase is within the length limit or not
-typedef vector < HPhrase > HPhraseVector;
+typedef std::vector < HPhrase > HPhraseVector;
 
 // SentenceVertices represents all vertices that have the same positioning of all extracted phrases
-// The key of the map is the English index and the value is a set of the foreign ones
-typedef map <int, set<int> > HSenteceVertices;
+// The key of the std::map is the English index and the value is a std::set of the foreign ones
+typedef std::map <int, std::set<int> > HSenteceVertices;
 
 
 #endif /* HIERARCHICAL_H_ */
diff --git a/scripts/training/phrase-extract/relax-parse.h b/scripts/training/phrase-extract/relax-parse.h
index cdde3f16c..ae5994641 100644
--- a/scripts/training/phrase-extract/relax-parse.h
+++ b/scripts/training/phrase-extract/relax-parse.h
@@ -31,8 +31,6 @@
 #include "SyntaxTree.h"
 #include "XmlTree.h"
 
-using namespace std;
-
 #define LINE_MAX_LENGTH 1000000
 
 bool leftBinarizeFlag = false;
@@ -41,7 +39,7 @@ char SAMTLevel = 0;
 
 // functions
 void init(int argc, char* argv[]);
-void store( SyntaxTree &tree, vector<string> &words );
+void store( SyntaxTree &tree, std::vector<std::string> &words );
 void LeftBinarize( SyntaxTree &tree, ParentNodes &parents );
 void RightBinarize( SyntaxTree &tree, ParentNodes &parents );
 void SAMT( SyntaxTree &tree, ParentNodes &parents );
diff --git a/scripts/training/phrase-extract/tables-core.cpp b/scripts/training/phrase-extract/tables-core.cpp
index de50f5024..93ad8b6a1 100644
--- a/scripts/training/phrase-extract/tables-core.cpp
+++ b/scripts/training/phrase-extract/tables-core.cpp
@@ -5,6 +5,8 @@
 #define TABLE_LINE_MAX_LENGTH 1000
 #define UNKNOWNSTR	"UNK"
 
+using namespace std;
+
 // as in beamdecoder/tables.cpp
 vector<string> tokenize( const char* input )
 {
diff --git a/scripts/training/phrase-extract/tables-core.h b/scripts/training/phrase-extract/tables-core.h
index 2db8086e5..1899b4d77 100644
--- a/scripts/training/phrase-extract/tables-core.h
+++ b/scripts/training/phrase-extract/tables-core.h
@@ -12,18 +12,16 @@
 #include <map>
 #include <cmath>
 
-using namespace std;
+extern std::vector<std::string> tokenize( const char*);
 
-extern vector<string> tokenize( const char*);
-
-typedef string WORD;
+typedef std::string WORD;
 typedef unsigned int WORD_ID;
 
 class Vocabulary
 {
 public:
-  map<WORD, WORD_ID>  lookup;
-  vector< WORD > vocab;
+  std::map<WORD, WORD_ID>  lookup;
+  std::vector< WORD > vocab;
   WORD_ID storeIfNew( const WORD& );
   WORD_ID getWordID( const WORD& );
   inline WORD &getWord( WORD_ID id ) {
@@ -31,14 +29,14 @@ public:
   }
 };
 
-typedef vector< WORD_ID > PHRASE;
+typedef std::vector< WORD_ID > PHRASE;
 typedef unsigned int PHRASE_ID;
 
 class PhraseTable
 {
 public:
-  map< PHRASE, PHRASE_ID > lookup;
-  vector< PHRASE > phraseTable;
+  std::map< PHRASE, PHRASE_ID > lookup;
+  std::vector< PHRASE > phraseTable;
   PHRASE_ID storeIfNew( const PHRASE& );
   PHRASE_ID getPhraseID( const PHRASE& );
   void clear();
@@ -47,21 +45,21 @@ public:
   }
 };
 
-typedef vector< pair< PHRASE_ID, double > > PHRASEPROBVEC;
+typedef std::vector< std::pair< PHRASE_ID, double > > PHRASEPROBVEC;
 
 class TTable
 {
 public:
-  map< PHRASE_ID, vector< pair< PHRASE_ID, double > > > ttable;
-  map< PHRASE_ID, vector< pair< PHRASE_ID, vector< double > > > > ttableMulti;
+  std::map< PHRASE_ID, std::vector< std::pair< PHRASE_ID, double > > > ttable;
+  std::map< PHRASE_ID, std::vector< std::pair< PHRASE_ID, std::vector< double > > > > ttableMulti;
 };
 
 class DTable
 {
 public:
-  map< int, double > dtable;
+  std::map< int, double > dtable;
   void init();
-  void load( const string& );
+  void load( const std::string& );
   double get( int );
 };
 

From 01eb60f35031157b5a780e539473dfd88a7714d1 Mon Sep 17 00:00:00 2001
From: Tetsuo Kiso <tetsuo-s@is.naist.jp>
Date: Wed, 30 May 2012 22:59:23 +0900
Subject: [PATCH 37/38] Add "virtual" destructor to the HypPackEnumerator
 class.

---
 mert/HypPackEnumerator.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mert/HypPackEnumerator.h b/mert/HypPackEnumerator.h
index 07f12e91b..5d2a230a5 100644
--- a/mert/HypPackEnumerator.h
+++ b/mert/HypPackEnumerator.h
@@ -21,6 +21,8 @@
 
 class HypPackEnumerator {
 public:
+  virtual ~HypPackEnumerator() {}
+
   virtual void reset() = 0;
   virtual bool finished() = 0;
   virtual void next() = 0;

From beb2256dbaf420ed525cc8354617ead0db315060 Mon Sep 17 00:00:00 2001
From: Tetsuo Kiso <tetsuo-s@is.naist.jp>
Date: Wed, 30 May 2012 23:11:09 +0900
Subject: [PATCH 38/38] Move 'using namespace std' out from .h.

Add "std" to size_t, too.
---
 mert/HypPackEnumerator.h   | 45 +++++++++++++++++++-------------------
 mert/MiraFeatureVector.cpp |  6 +++--
 mert/MiraFeatureVector.h   | 18 +++++++--------
 mert/MiraWeightVector.cpp  |  2 ++
 mert/MiraWeightVector.h    | 26 ++++++++++------------
 5 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/mert/HypPackEnumerator.h b/mert/HypPackEnumerator.h
index 5d2a230a5..d878c2625 100644
--- a/mert/HypPackEnumerator.h
+++ b/mert/HypPackEnumerator.h
@@ -27,10 +27,10 @@ public:
   virtual bool finished() = 0;
   virtual void next() = 0;
 
-  virtual size_t cur_size() = 0;
-  virtual size_t num_dense() const = 0;
-  virtual const FeatureDataItem& featuresAt(size_t i) = 0;
-  virtual const ScoreDataItem& scoresAt(size_t i) = 0;
+  virtual std::size_t cur_size() = 0;
+  virtual std::size_t num_dense() const = 0;
+  virtual const FeatureDataItem& featuresAt(std::size_t i) = 0;
+  virtual const ScoreDataItem& scoresAt(std::size_t i) = 0;
 };
 
 // Instantiation that streams from disk
@@ -38,23 +38,22 @@ public:
 class StreamingHypPackEnumerator : public HypPackEnumerator {
 public:
   StreamingHypPackEnumerator(std::vector<std::string> const& featureFiles,
-                             std::vector<std::string> const& scoreFiles
-                             );
+                             std::vector<std::string> const& scoreFiles);
+
+  virtual std::size_t num_dense() const;
 
-  virtual size_t num_dense() const;
-  
   virtual void reset();
   virtual bool finished();
   virtual void next();
 
-  virtual size_t cur_size();
-  virtual const FeatureDataItem& featuresAt(size_t i);
-  virtual const ScoreDataItem& scoresAt(size_t i);
-  
+  virtual std::size_t cur_size();
+  virtual const FeatureDataItem& featuresAt(std::size_t i);
+  virtual const ScoreDataItem& scoresAt(std::size_t i);
+
 private:
   void prime();
-  size_t m_num_lists;
-  size_t m_sentenceId;
+  std::size_t m_num_lists;
+  std::size_t m_sentenceId;
   std::vector<std::string> m_featureFiles;
   std::vector<std::string> m_scoreFiles;
 
@@ -62,7 +61,7 @@ private:
   int m_iNumDense;
   std::vector<FeatureDataIterator>  m_featureDataIters;
   std::vector<ScoreDataIterator>    m_scoreDataIters;
-  std::vector<std::pair<size_t,size_t> > m_current_indexes;
+  std::vector<std::pair<std::size_t,std::size_t> > m_current_indexes;
 };
 
 // Instantiation that reads into memory
@@ -74,21 +73,21 @@ public:
                                 std::vector<std::string> const& scoreFiles,
                                 bool no_shuffle);
 
-  virtual size_t num_dense() const;
-  
+  virtual std::size_t num_dense() const;
+
   virtual void reset();
   virtual bool finished();
   virtual void next();
 
-  virtual size_t cur_size();
-  virtual const FeatureDataItem& featuresAt(size_t i);
-  virtual const ScoreDataItem& scoresAt(size_t i);
+  virtual std::size_t cur_size();
+  virtual const FeatureDataItem& featuresAt(std::size_t i);
+  virtual const ScoreDataItem& scoresAt(std::size_t i);
 
 private:
   bool m_no_shuffle;
-  size_t m_cur_index;
-  size_t m_num_dense;
-  std::vector<size_t> m_indexes;
+  std::size_t m_cur_index;
+  std::size_t m_num_dense;
+  std::vector<std::size_t> m_indexes;
   std::vector<std::vector<FeatureDataItem> > m_features;
   std::vector<std::vector<ScoreDataItem> > m_scores;
 };
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index 9636b2fcd..b72d29595 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -2,6 +2,8 @@
 
 #include "MiraFeatureVector.h"
 
+using namespace std;
+
 MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
   : m_dense(vec.dense)
 {
@@ -97,7 +99,7 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
   vector<ValType> sparseVals;
   vector<size_t> sparseFeats;
   while(i < a.m_sparseFeats.size() && j < b.m_sparseFeats.size()) {
-    
+
     if(a.m_sparseFeats[i] < b.m_sparseFeats[j]) {
       sparseFeats.push_back(a.m_sparseFeats[i]);
       sparseVals.push_back(a.m_sparseVals[i]);
@@ -136,7 +138,7 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
   // Create and return vector
   return MiraFeatureVector(dense,sparseFeats,sparseVals);
 }
-  
+
 // --Emacs trickery--
 // Local Variables:
 // mode:c++
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index 27a4510ad..31dd025c3 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -16,8 +16,6 @@
 
 #include "FeatureDataIterator.h"
 
-using namespace std;
-
 typedef FeatureStatsType ValType;
 
 class MiraFeatureVector {
@@ -25,20 +23,20 @@ public:
   MiraFeatureVector(const FeatureDataItem& vec);
   MiraFeatureVector(const MiraFeatureVector& other);
   MiraFeatureVector(const std::vector<ValType>& dense,
-                    const std::vector<size_t>& sparseFeats,
+                    const std::vector<std::size_t>& sparseFeats,
                     const std::vector<ValType>& sparseVals);
-  
-  ValType val(size_t index) const;
-  size_t feat(size_t index) const;
-  size_t size() const;
+
+  ValType val(std::size_t index) const;
+  std::size_t feat(std::size_t index) const;
+  std::size_t size() const;
   ValType sqrNorm() const;
-  
+
   friend MiraFeatureVector operator-(const MiraFeatureVector& a,
                                      const MiraFeatureVector& b);
-  
+
 private:
   std::vector<ValType> m_dense;
-  std::vector<size_t>  m_sparseFeats;
+  std::vector<std::size_t> m_sparseFeats;
   std::vector<ValType> m_sparseVals;
 };
 
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index 8b46044fa..7e17a2714 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -1,5 +1,7 @@
 #include "MiraWeightVector.h"
 
+using namespace std;
+
 /**
  * Constructor, initializes to the zero vector
  */
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index 375858634..65b374625 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -4,7 +4,7 @@
  *
  * A self-averaging weight-vector. Good for
  * perceptron learning as well.
- * 
+ *
  */
 
 #ifndef MERT_MIRA_WEIGHT_VECTOR_H
@@ -14,8 +14,6 @@
 
 #include "MiraFeatureVector.h"
 
-using namespace std;
-
 class AvgWeightVector;
 
 class MiraWeightVector {
@@ -29,7 +27,7 @@ public:
    * Constructor with provided initial vector
    * \param init Initial feature values
    */
-  MiraWeightVector(const vector<ValType>& init); 
+  MiraWeightVector(const std::vector<ValType>& init);
 
   /**
    * Update a the model
@@ -60,12 +58,12 @@ public:
   AvgWeightVector avg();
 
   friend class AvgWeightVector;
-  
+
 private:
   /**
    * Updates a weight and lazily updates its total
    */
-  void update(size_t index, ValType delta);
+  void update(std::size_t index, ValType delta);
 
   /**
    * Make sure everyone's total is up-to-date
@@ -75,12 +73,12 @@ private:
   /**
    * Helper to handle out-of-range weights
    */
-  ValType weight(size_t index) const;
-  
-  vector<ValType> m_weights;
-  vector<ValType> m_totals;
-  vector<size_t>  m_lastUpdated;
-  size_t          m_numUpdates;
+  ValType weight(std::size_t index) const;
+
+  std::vector<ValType> m_weights;
+  std::vector<ValType> m_totals;
+  std::vector<std::size_t> m_lastUpdated;
+  std::size_t m_numUpdates;
 };
 
 /**
@@ -90,8 +88,8 @@ class AvgWeightVector {
 public:
   AvgWeightVector(const MiraWeightVector& wv);
   ValType score(const MiraFeatureVector& fv) const;
-  ValType weight(size_t index) const;
-  size_t size() const;
+  ValType weight(std::size_t index) const;
+  std::size_t size() const;
 private:
   const MiraWeightVector& m_wv;
 };