merged master into dynamic-models and solved conflicts

2024-09-11 11:25:40 +03:00 · 2014-04-28 19:18:38 +02:00 · 2014-04-28 19:18:38 +02:00 · 20381cbf89
commit 20381cbf89
parent 03825e3ffd 6182750b70
339 changed files with 19727 additions and 3681 deletions
--- a/.gitignore
+++ b/.gitignore
@ -76,3 +76,6 @@ mert/sentence-bleu
 build/
 nbproject/

+mingw/MosesGUI/MosesGUI.e4p
+mingw/MosesGUI/_eric4project/
+
--- a/30
+++ b/30
@ -64,6 +64,8 @@
 #
 # --max-factors                  maximum number of factors (default 4)
 #
+# --unlabelled-source            ignore source labels (redundant in hiero or string-to-tree system)
+#                                for better performance
 #CONTROLLING THE BUILD
 #-a to build from scratch
 #-j$NCPUS to compile in parallel
@ -89,7 +91,7 @@ if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_min
    requirements += <library>tcmalloc_and_profiler <library>unwind <cflags>-fno-omit-frame-pointer <cxxflags>-fno-omit-frame-pointer ;
  } else {
    external-lib tcmalloc_minimal ;
-    requirements += <threading>multi:<library>$(tcmalloc_minimal) ;
+    requirements += <threading>multi:<library>tcmalloc_minimal ;
  }
 } else {
  echo "Tip: install tcmalloc for faster threading.  See BUILD-INSTRUCTIONS.txt for more information." ;
@ -108,6 +110,9 @@ if [ option.get "enable-mpi" : : "yes" ] {

 requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
 requirements += [ option.get "enable-boost-pool" : : <define>USE_BOOST_POOL ] ;
+requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
+requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
+requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;

 if [ option.get "with-cmph" ] {
  requirements += <define>HAVE_CMPH ;
@ -137,6 +142,23 @@ project : requirements
 #Add directories here if you want their incidental targets too (i.e. tests).
 build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses-chart-cmd mira scripts regression-testing  ;

+if [ option.get "with-mm" : : "yes" ]
+{
+ alias mm :  
+  moses/TranslationModel/UG/mm//mtt-build 
+  moses/TranslationModel/UG/mm//mtt-dump 
+  moses/TranslationModel/UG/mm//symal2mam 
+  moses/TranslationModel/UG/mm//custom-pt 
+  moses/TranslationModel/UG/mm//mmlex-build 
+  moses/TranslationModel/UG/mm//mtt-count-words 
+  moses/TranslationModel/UG//try-align 
+  ;
+}
+else
+{
+ alias mm ; 
+}
+
 alias programs : 
 lm//programs 
 moses-chart-cmd//moses_chart 
@ -154,12 +176,10 @@ phrase-extract//pcfg-score
 biconcor 
 mira//mira 
 contrib/server//mosesserver 
-#moses/mm//mtt-build 
-#moses/mm//mtt-dump 
-#moses/mm//symal2mam 
-#moses/mm//custom-pt 
+mm
 ;

+
 install-bin-libs programs ;
 install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
 install-headers headers-moses : moses//headers-to-install : moses ;
--- a/OnDiskPt/Main.cpp
+++ b/OnDiskPt/Main.cpp
@ -127,14 +127,14 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
    } else {
      switch (stage) {
      case 0: {
-        WordPtr w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
+        WordPtr w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper, 1);
        if (w != NULL)
          out->AddWord(w);

        break;
      }
      case 1: {
-        Tokenize(targetPhrase, tok, false, true, onDiskWrapper);
+        Tokenize(targetPhrase, tok, false, true, onDiskWrapper, 0);
        break;
      }
      case 2: {
@ -189,8 +189,9 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr

 OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
                           , const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
-                           , OnDiskPt::OnDiskWrapper &onDiskWrapper)
+                           , OnDiskPt::OnDiskWrapper &onDiskWrapper, int retSourceTarget)
 {
+  // retSourceTarget: 0 = don't return anything. 1 = source, 2 = target

  bool nonTerm = false;
  size_t tokSize = token.size();
@ -218,6 +219,10 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
        WordPtr word(new Word());
        word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
        phrase.AddWord(word);
+
+        if (retSourceTarget == 1) {
+            out = word;
+        }
      }

      wordStr = token.substr(splitPos, tokSize - splitPos);
@ -225,7 +230,10 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
        WordPtr word(new Word());
        word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
        phrase.AddWord(word);
-        out = word;
+
+        if (retSourceTarget == 2) {
+            out = word;
+        }
      }

    }
--- a/OnDiskPt/Main.h
+++ b/OnDiskPt/Main.h
@ -27,7 +27,7 @@ typedef std::vector<AlignPair> AlignType;

 OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
                           , const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
-                           , OnDiskPt::OnDiskWrapper &onDiskWrapper);
+                           , OnDiskPt::OnDiskWrapper &onDiskWrapper, int retSourceTarget);
 OnDiskPt::PhrasePtr Tokenize(OnDiskPt::SourcePhrase &sourcePhrase, OnDiskPt::TargetPhrase &targetPhrase
                             , char *line, OnDiskPt::OnDiskWrapper &onDiskWrapper
                             , int numScores
--- a/OnDiskPt/Word.cpp
+++ b/OnDiskPt/Word.cpp
@ -109,7 +109,7 @@ void Word::ConvertToMoses(

  for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
    UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
-    overwrite.SetFactor(*t, factorColl.AddFactor(*tok));
+    overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
  }
  UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
 }
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@ -11,12 +11,12 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -72,13 +72,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/extract-mixed-syntax/.cproject
+++ b/contrib/other-builds/extract-mixed-syntax/.cproject
@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.456080129" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.582801917" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+							<builder buildPath="${workspace_loc:/extract-mixed-syntax/Debug}" id="cdt.managedbuild.builder.gnu.cross.1220166455" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.2055012191" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.1768196213" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2007889843" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+								<option id="gnu.cpp.compiler.option.optimization.level.855436310" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.506549229" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1497326561" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.606353571" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.740521305" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+								<option id="gnu.cpp.link.option.libs.1946120010" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="z"/>
+									<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
+								</option>
+								<option id="gnu.cpp.link.option.paths.1563475751" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.106010037" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.archiver.136661991" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2112208574" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.172930211" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.715007893">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
+					<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.715007893." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.99436307" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.801178939" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+							<builder buildPath="${workspace_loc:/extract-mixed-syntax/Release}" id="cdt.managedbuild.builder.gnu.cross.1999547547" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1481537766" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.1967527847" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.442342681" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+								<option id="gnu.cpp.compiler.option.optimization.level.1847950300" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.1130138972" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.158429528" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.2020667840" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1372779734" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.archiver.371006952" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1770045040" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.707592414" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="extract-mixed-syntax.cdt.managedbuild.target.gnu.cross.exe.1868010260" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038;cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906;cdt.managedbuild.tool.gnu.c.compiler.input.442342681">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915;cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568;cdt.managedbuild.tool.gnu.c.compiler.input.2007889843">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="refreshScope" versionNumber="2">
+		<configuration configurationName="Release">
+			<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
+		</configuration>
+		<configuration configurationName="Debug">
+			<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
+		</configuration>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+</cproject>
--- a/contrib/other-builds/extract-mixed-syntax/.project
+++ b/contrib/other-builds/extract-mixed-syntax/.project
@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>extract-mixed-syntax</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+</projectDescription>
--- a/contrib/other-builds/extract-mixed-syntax/Global.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Global.cpp
@ -0,0 +1,37 @@
+/*
+ *  Global.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 01/02/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "Global.h"
+
+bool g_debug = false;
+
+Global::Global()
+: minHoleSpanSourceDefault(2)
+, maxHoleSpanSourceDefault(7)
+, minHoleSpanSourceSyntax(1)
+, maxHoleSpanSourceSyntax(1000)
+, maxUnaligned(5)
+
+, maxSymbols(5)
+, maxNonTerm(3)
+, maxNonTermDefault(2)
+
+// int minHoleSize(1)
+// int minSubPhraseSize(1) // minimum size of a remaining lexical phrase 
+, glueGrammarFlag(false)
+, unknownWordLabelFlag(false)
+//bool zipFiles(false)
+, sourceSyntax(true)
+, targetSyntax(false)
+, mixed(true)
+, uppermostOnly(true)
+, allowDefaultNonTermEdge(true)
+, gzOutput(false)
+
+{}
--- a/contrib/other-builds/extract-mixed-syntax/Global.h
+++ b/contrib/other-builds/extract-mixed-syntax/Global.h
@ -0,0 +1,45 @@
+#pragma once
+/*
+ *  Global.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 01/02/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <set>
+#include <map>
+#include <string>
+
+class Global
+{
+public:
+	int minHoleSpanSourceDefault;
+	int maxHoleSpanSourceDefault;
+	int minHoleSpanSourceSyntax;
+	int maxHoleSpanSourceSyntax;
+
+	int maxSymbols;
+	bool glueGrammarFlag;
+	bool unknownWordLabelFlag;
+	int maxNonTerm;
+	int maxNonTermDefault;
+	bool sourceSyntax;
+	bool targetSyntax;
+	bool mixed;
+	int maxUnaligned;
+	bool uppermostOnly;
+	bool allowDefaultNonTermEdge;
+  bool gzOutput;
+
+	Global();
+
+	Global(const Global&);
+
+};
+
+extern bool g_debug;
+
+#define DEBUG_OUTPUT()	 void DebugOutput() const;
+
+
--- a/contrib/other-builds/extract-mixed-syntax/InputFileStream.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/InputFileStream.cpp
@ -0,0 +1,62 @@
+// $Id: InputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include "InputFileStream.h"
+#include "gzfilebuf.h"
+#include <iostream>
+
+using namespace std;
+
+namespace Moses
+{
+	InputFileStream::InputFileStream(const std::string &filePath)
+	: std::istream(NULL)
+	, m_streambuf(NULL)
+	{
+		if (filePath.size() > 3 &&
+				filePath.substr(filePath.size() - 3, 3) == ".gz")
+		{
+			m_streambuf = new gzfilebuf(filePath.c_str());
+		} else {
+			std::filebuf* fb = new std::filebuf();
+			fb = fb->open(filePath.c_str(), std::ios::in);
+			if (! fb) {
+				cerr << "Can't read " << filePath.c_str() << endl;
+				exit(1);
+			}
+			m_streambuf = fb;
+		}
+		this->init(m_streambuf);
+	}
+	
+	InputFileStream::~InputFileStream()
+	{
+		delete m_streambuf;
+		m_streambuf = NULL;
+	}
+	
+	void InputFileStream::Close()
+	{
+	}
+	
+	
+}
+
--- a/contrib/other-builds/extract-mixed-syntax/InputFileStream.h
+++ b/contrib/other-builds/extract-mixed-syntax/InputFileStream.h
@ -0,0 +1,48 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#ifndef moses_InputFileStream_h
+#define moses_InputFileStream_h
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+namespace Moses
+{
+	
+	/** Used in place of std::istream, can read zipped files if it ends in .gz
+	 */
+	class InputFileStream : public std::istream
+	{
+	protected:
+		std::streambuf *m_streambuf;
+	public:
+		
+		InputFileStream(const std::string &filePath);
+		~InputFileStream();
+		
+		void Close();
+	};
+	
+}
+
+#endif
--- a/contrib/other-builds/extract-mixed-syntax/Lattice.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Lattice.cpp
@ -0,0 +1,180 @@
+/*
+ *  Lattice.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 18/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include <cassert>
+#include "Lattice.h"
+#include "LatticeNode.h"
+#include "Tunnel.h"
+#include "TunnelCollection.h"
+#include "SyntaxTree.h"
+#include "SentenceAlignment.h"
+#include "tables-core.h"
+#include "Rule.h"
+#include "RuleCollection.h"
+
+using namespace std;
+
+Lattice::Lattice(size_t sourceSize)
+:m_stacks(sourceSize + 1)
+{
+}
+
+Lattice::~Lattice()
+{
+	std::vector<Stack>::iterator iterStack;
+	for (iterStack = m_stacks.begin(); iterStack != m_stacks.end(); ++iterStack)
+	{
+		Stack &stack = *iterStack;
+		RemoveAllInColl(stack);
+	}	
+}
+
+void Lattice::CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global)
+{	
+	// term
+	Stack &startStack = GetStack(startPos);
+	
+	LatticeNode *node = new LatticeNode(startPos, &sentence);
+	startStack.push_back(node);
+	
+	// non-term
+	for (size_t endPos = startPos + 1; endPos <= sentence.source.size(); ++endPos)
+	{
+		const TunnelList &tunnels = tunnelColl.GetTunnels(startPos, endPos - 1);
+		
+		TunnelList::const_iterator iterHole;
+		for (iterHole = tunnels.begin(); iterHole != tunnels.end(); ++iterHole)
+		{
+			const Tunnel &tunnel = *iterHole;
+			CreateArcsUsing1Hole(tunnel, sentence, global);
+		}
+	}
+}
+
+void Lattice::CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global)
+{
+	size_t startPos	= tunnel.GetRange(0).GetStartPos()
+				, endPos	= tunnel.GetRange(0).GetEndPos();
+	size_t numSymbols = tunnel.GetRange(0).GetWidth();
+	assert(numSymbols > 0);
+	
+	Stack &startStack = GetStack(startPos);
+
+		
+	// non-terms. cartesian product of source & target labels
+	assert(startPos == tunnel.GetRange(0).GetStartPos() && endPos == tunnel.GetRange(0).GetEndPos());
+	size_t startT	= tunnel.GetRange(1).GetStartPos()
+				,endT		= tunnel.GetRange(1).GetEndPos();
+	
+	const SyntaxNodes &nodesS = sentence.sourceTree.GetNodes(startPos, endPos);
+	const SyntaxNodes &nodesT = sentence.targetTree.GetNodes(startT, endT );
+
+	SyntaxNodes::const_iterator iterS, iterT;
+	for (iterS = nodesS.begin(); iterS != nodesS.end(); ++iterS)
+	{
+		const SyntaxNode *syntaxNodeS = *iterS;
+		
+		for (iterT = nodesT.begin(); iterT != nodesT.end(); ++iterT)
+		{
+			const SyntaxNode *syntaxNodeT = *iterT;
+			
+			bool isSyntax = syntaxNodeS->IsSyntax() || syntaxNodeT->IsSyntax();
+			size_t maxSourceNonTermSpan = isSyntax ? global.maxHoleSpanSourceSyntax : global.maxHoleSpanSourceDefault;
+			
+			if (maxSourceNonTermSpan >= endPos - startPos)
+			{				
+				LatticeNode *node = new LatticeNode(tunnel, syntaxNodeS, syntaxNodeT);
+				startStack.push_back(node);
+			}
+		}
+	}
+}
+
+Stack &Lattice::GetStack(size_t startPos)
+{
+	assert(startPos < m_stacks.size());
+	return m_stacks[startPos];
+}
+
+const Stack &Lattice::GetStack(size_t startPos) const
+{
+	assert(startPos < m_stacks.size());
+	return m_stacks[startPos];
+}
+
+void Lattice::CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global)
+{
+	const Stack &startStack = GetStack(startPos);
+	
+	Stack::const_iterator iterStack;
+	for (iterStack = startStack.begin(); iterStack != startStack.end(); ++iterStack)
+	{
+		const LatticeNode *node = *iterStack;
+		Rule *initRule = new Rule(node);
+		
+		if (initRule->CanRecurse(global, sentence.GetTunnelCollection()))
+		{ // may or maynot be valid, but can continue to build on this rule
+			initRule->CreateRules(m_rules, *this, sentence, global);
+		}
+
+		if (initRule->IsValid(global, sentence.GetTunnelCollection()))
+		{ // add to rule collection
+			m_rules.Add(global, initRule, sentence);
+		}
+		else
+		{
+			delete initRule;
+		}
+
+		
+	}
+}
+
+Stack Lattice::GetNonTermNode(const Range &sourceRange) const
+{
+	Stack ret;
+	size_t sourcePos = sourceRange.GetStartPos();
+	
+	const Stack &origStack = GetStack(sourcePos);
+	Stack::const_iterator iter;
+	for (iter = origStack.begin(); iter != origStack.end(); ++iter)
+	{
+		LatticeNode *node = *iter;
+		const Range &nodeRangeS = node->GetSourceRange();
+		
+		assert(nodeRangeS.GetStartPos() == sourceRange.GetStartPos());
+		
+		if (! node->IsTerminal() && nodeRangeS.GetEndPos() == sourceRange.GetEndPos())
+		{
+			ret.push_back(node);
+		}
+	}
+	
+	return ret;
+}
+
+std::ostream& operator<<(std::ostream &out, const Lattice &obj)
+{
+	std::vector<Stack>::const_iterator iter;
+	for (iter = obj.m_stacks.begin(); iter != obj.m_stacks.end(); ++iter)
+	{
+		const Stack &stack = *iter;
+
+		Stack::const_iterator iterStack;
+		for (iterStack = stack.begin(); iterStack != stack.end(); ++iterStack)
+		{
+			const LatticeNode &node = **iterStack;
+			out << node << " ";
+		}
+	}
+
+	return out;
+}
+
+
--- a/contrib/other-builds/extract-mixed-syntax/Lattice.h
+++ b/contrib/other-builds/extract-mixed-syntax/Lattice.h
@ -0,0 +1,47 @@
+#pragma once
+/*
+ *  Lattice.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 18/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <iostream>
+#include <vector>
+#include "RuleCollection.h"
+
+class Global;
+class LatticeNode;
+class Tunnel;
+class TunnelCollection;
+class SentenceAlignment;
+
+typedef std::vector<LatticeNode*> Stack;
+
+class Lattice
+{
+	friend std::ostream& operator<<(std::ostream&, const Lattice&);
+
+	std::vector<Stack> m_stacks;
+	RuleCollection m_rules;
+	
+	Stack &GetStack(size_t endPos);			
+	
+	void CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global);
+
+public:
+	Lattice(size_t sourceSize);
+	~Lattice();
+	
+	void CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global);
+	void CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global);
+
+	const Stack &GetStack(size_t startPos) const;			
+	const RuleCollection &GetRules() const
+	{ return m_rules; }
+	
+	Stack GetNonTermNode(const Range &sourceRange) const;			
+
+};
+
--- a/contrib/other-builds/extract-mixed-syntax/LatticeNode.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/LatticeNode.cpp
@ -0,0 +1,149 @@
+/*
+ *  LatticeNode.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 18/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <sstream>
+#include "LatticeNode.h"
+#include "SyntaxTree.h"
+#include "Tunnel.h"
+#include "SentenceAlignment.h"
+#include "SymbolSequence.h"
+
+size_t LatticeNode::s_count = 0;
+
+using namespace std;
+
+// for terms
+LatticeNode::LatticeNode(size_t pos, const SentenceAlignment *sentence)
+:m_tunnel(NULL)
+,m_isTerminal(true)
+,m_sourceTreeNode(NULL)
+,m_targetTreeNode(NULL)
+,m_sentence(sentence)
+,m_sourceRange(pos, pos)
+{
+	s_count++;
+	//cerr << *this << endl;
+}
+
+// for non-terms
+LatticeNode::LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode)
+:m_tunnel(&tunnel)
+,m_isTerminal(false)
+,m_sourceTreeNode(sourceTreeNode)
+,m_targetTreeNode(targetTreeNode)
+,m_sentence(NULL)
+,m_sourceRange(tunnel.GetRange(0))
+{
+	s_count++;
+	//cerr << *this << endl;
+}
+
+bool LatticeNode::IsSyntax() const
+{
+	assert(!m_isTerminal);
+	bool ret = m_sourceTreeNode->IsSyntax() || m_targetTreeNode->IsSyntax();
+	return ret;
+}
+
+size_t LatticeNode::GetNumSymbols(size_t direction) const
+{
+	return 1;
+}
+
+int LatticeNode::Compare(const LatticeNode &otherNode) const
+{
+	int ret = 0;
+	if (m_isTerminal != otherNode.m_isTerminal)
+	{
+		ret = m_isTerminal ? -1 : 1;
+	}
+	
+	// both term or non-term
+	else if (m_isTerminal)
+	{ // term. compare source span
+		if (m_sourceRange.GetStartPos() == otherNode.m_sourceRange.GetStartPos())
+			ret = 0;
+		else 
+			ret = (m_sourceRange.GetStartPos() < otherNode.m_sourceRange.GetStartPos()) ? -1 : +1;
+	}
+	else
+	{ // non-term. compare source span and BOTH label
+		assert(!m_isTerminal);
+		assert(!otherNode.m_isTerminal);
+
+		if (m_sourceTreeNode->IsSyntax())
+		{
+			ret = m_tunnel->Compare(*otherNode.m_tunnel, 0);
+			if (ret == 0 && m_sourceTreeNode->GetLabel() != otherNode.m_sourceTreeNode->GetLabel())
+			{
+				ret = (m_sourceTreeNode->GetLabel() < otherNode.m_sourceTreeNode->GetLabel()) ? -1 : +1;
+			}			
+		}
+
+		if (ret == 0 && m_targetTreeNode->IsSyntax())
+		{
+			ret = m_tunnel->Compare(*otherNode.m_tunnel, 1);
+			if (ret == 0 && m_targetTreeNode->GetLabel() != otherNode.m_targetTreeNode->GetLabel())
+			{
+				ret = (m_targetTreeNode->GetLabel() < otherNode.m_targetTreeNode->GetLabel()) ? -1 : +1;
+			}
+		}
+	}
+	
+	return ret;
+}
+
+void LatticeNode::CreateSymbols(size_t direction, SymbolSequence &symbols) const
+{
+	if (m_isTerminal)
+	{
+		/*
+		const std::vector<std::string> &words = (direction == 0 ? m_sentence->source : m_sentence->target);
+		size_t startPos = m_tunnel.GetStart(direction)
+						,endPos = m_tunnel.GetEnd(direction);
+		
+		for (size_t pos = startPos; pos <= endPos; ++pos)
+		{
+			Symbol symbol(words[pos], pos);
+			symbols.Add(symbol);
+		}
+		 */
+	}
+	else
+	{ // output both
+		
+		Symbol symbol(m_sourceTreeNode->GetLabel(), m_targetTreeNode->GetLabel()
+									, m_tunnel->GetRange(0).GetStartPos(), m_tunnel->GetRange(0).GetEndPos()
+									, m_tunnel->GetRange(1).GetStartPos(), m_tunnel->GetRange(1).GetEndPos()
+									, m_sourceTreeNode->IsSyntax(), m_targetTreeNode->IsSyntax());
+
+		symbols.Add(symbol);
+	}
+	
+}
+
+std::ostream& operator<<(std::ostream &out, const LatticeNode &obj)
+{	
+	if (obj.m_isTerminal)
+	{
+		assert(obj.m_sourceRange.GetWidth() == 1);
+		size_t pos = obj.m_sourceRange.GetStartPos();
+		
+		const SentenceAlignment &sentence = *obj.m_sentence;
+		out << obj.m_sourceRange << "=" << sentence.source[pos];		
+	}
+	else
+	{ 
+		assert(obj.m_tunnel);
+		out << obj.GetTunnel() << "=" << obj.m_sourceTreeNode->GetLabel() << obj.m_targetTreeNode->GetLabel() << " "; 
+	}
+	
+	return out;
+}
+
+
--- a/contrib/other-builds/extract-mixed-syntax/LatticeNode.h
+++ b/contrib/other-builds/extract-mixed-syntax/LatticeNode.h
@ -0,0 +1,77 @@
+#pragma once
+/*
+ *  LatticeNode.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 18/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <vector>
+#include <iostream>
+#include <cassert>
+#include "Range.h"
+
+class Tunnel;
+class SyntaxNode;
+class SentenceAlignment;
+class SymbolSequence;
+
+class LatticeNode
+{
+	friend std::ostream& operator<<(std::ostream&, const LatticeNode&);
+
+	bool m_isTerminal;
+
+	// for terms & non-term
+	Range m_sourceRange;
+
+	// non-terms. source range should be same as m_sourceRange
+	const Tunnel *m_tunnel;
+
+public:
+	static size_t s_count;
+	
+	
+	
+	const SyntaxNode *m_sourceTreeNode, *m_targetTreeNode;
+	const SentenceAlignment *m_sentence;
+	
+	// for terms
+	LatticeNode(size_t pos, const SentenceAlignment *sentence);
+
+	// for non-terms
+	LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode);
+	
+	bool IsTerminal() const
+	{ return m_isTerminal; }
+
+	bool IsSyntax() const;
+	
+	size_t GetNumSymbols(size_t direction) const;
+	
+	std::string ToString() const;
+	
+	int Compare(const LatticeNode &otherNode) const;
+	
+	void CreateSymbols(size_t direction, SymbolSequence &symbols) const;
+
+	const Tunnel &GetTunnel() const
+	{
+		assert(m_tunnel);
+		return *m_tunnel;
+	}
+	
+	const Range &GetSourceRange() const
+	{
+		return m_sourceRange;
+	}
+	const SyntaxNode &GetSyntaxNode(size_t direction) const
+	{
+		const SyntaxNode *node = direction == 0 ? m_sourceTreeNode : m_targetTreeNode;
+		assert(node);
+		return *node;
+	}
+	
+};
+
--- a/contrib/other-builds/extract-mixed-syntax/Makefile
+++ b/contrib/other-builds/extract-mixed-syntax/Makefile
@ -0,0 +1,13 @@
+all: extract 
+
+clean: 
+	rm -f *.o extract-mixed-syntax
+
+.cpp.o:
+	g++ -O6 -g -c $<
+
+extract: tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o
+
+	g++ tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o -lz -lboost_iostreams-mt -o extract-mixed-syntax
+
+
--- a/contrib/other-builds/extract-mixed-syntax/OutputFileStream.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/OutputFileStream.cpp
@ -0,0 +1,79 @@
+// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <boost/iostreams/filter/gzip.hpp>
+#include "OutputFileStream.h"
+#include "gzfilebuf.h"
+
+using namespace std;
+
+namespace Moses
+{
+OutputFileStream::OutputFileStream()
+  :boost::iostreams::filtering_ostream()
+  ,m_outFile(NULL)
+{
+}
+
+OutputFileStream::OutputFileStream(const std::string &filePath)
+  : m_outFile(NULL)
+{
+  Open(filePath);
+}
+
+OutputFileStream::~OutputFileStream()
+{
+  Close();
+}
+
+bool OutputFileStream::Open(const std::string &filePath)
+{
+  m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
+  if (m_outFile->fail()) {
+    return false;
+  }
+
+  if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
+    this->push(boost::iostreams::gzip_compressor());
+  }
+  this->push(*m_outFile);
+
+  return true;
+}
+
+void OutputFileStream::Close()
+{
+  if (m_outFile == NULL) {
+    return;
+  }
+
+  this->flush();
+  this->pop(); // file
+
+  m_outFile->close();
+  delete m_outFile;
+  m_outFile = NULL;
+  return;
+}
+
+
+}
+
--- a/contrib/other-builds/extract-mixed-syntax/OutputFileStream.h
+++ b/contrib/other-builds/extract-mixed-syntax/OutputFileStream.h
@ -0,0 +1,50 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#pragma once
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+#include <iostream>
+#include <boost/iostreams/filtering_stream.hpp>
+
+namespace Moses
+{
+
+/** Used in place of std::istream, can read zipped files if it ends in .gz
+ */
+class OutputFileStream : public boost::iostreams::filtering_ostream
+{
+protected:
+  std::ofstream *m_outFile;
+public:
+  OutputFileStream();
+
+  OutputFileStream(const std::string &filePath);
+  virtual ~OutputFileStream();
+
+  bool Open(const std::string &filePath);
+  void Close();
+};
+
+}
+
--- a/contrib/other-builds/extract-mixed-syntax/Range.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Range.cpp
@ -0,0 +1,74 @@
+/*
+ *  Range.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 22/02/2011.
+ *  Copyright 2011 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "Range.h"
+
+using namespace std;
+
+void Range::Merge(const Range &a, const Range &b)
+{
+	if (a.m_startPos == NOT_FOUND)
+	{ // get the other regardless
+		m_startPos = b.m_startPos;
+	}
+	else if (b.m_startPos == NOT_FOUND)
+	{ 	
+		m_startPos = a.m_startPos;
+	}
+	else
+	{
+		m_startPos = min(a.m_startPos, b.m_startPos);
+	}
+
+	if (a.m_endPos == NOT_FOUND)
+	{ // get the other regardless
+		m_endPos = b.m_endPos;
+	}
+	else if (b.m_endPos == NOT_FOUND)
+	{ // do nothing		
+		m_endPos = a.m_endPos;
+	}
+	else
+	{
+		m_endPos = max(a.m_endPos, b.m_endPos);
+	}
+	
+	
+}
+
+int Range::Compare(const Range &other) const
+{
+	if (m_startPos < other.m_startPos)
+		return -1;
+	else if (m_startPos > other.m_startPos)
+		return +1;
+	else if (m_endPos < other.m_endPos)
+		return -1;
+	else if (m_endPos > other.m_endPos)
+		return +1;
+	
+	return 0;
+	
+}
+
+bool Range::Overlap(const Range &other) const
+{
+	if ( other.m_endPos < m_startPos || other.m_startPos > m_endPos) 
+		return false;
+	
+	return true;	
+}
+
+std::ostream& operator<<(std::ostream &out, const Range &range)
+{
+	out << "[" << range.m_startPos << "-" << range.m_endPos << "]";
+	return out;
+}
+
+
--- a/contrib/other-builds/extract-mixed-syntax/Range.h
+++ b/contrib/other-builds/extract-mixed-syntax/Range.h
@ -0,0 +1,57 @@
+/*
+ *  Range.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 22/02/2011.
+ *  Copyright 2011 __MyCompanyName__. All rights reserved.
+ *
+ */
+#pragma once
+#include <string>
+#include <iostream>
+#include <limits>
+
+#define NOT_FOUND 			std::numeric_limits<size_t>::max()
+
+class Range
+{
+	friend std::ostream& operator<<(std::ostream&, const Range&);
+
+	size_t m_startPos, m_endPos;
+public:
+
+	Range()
+	:m_startPos(NOT_FOUND)
+	,m_endPos(NOT_FOUND)
+	{}
+	
+	Range(const Range &copy)
+	:m_startPos(copy.m_startPos)
+	,m_endPos(copy.m_endPos)
+	{}
+
+	Range(size_t startPos, size_t endPos)
+	:m_startPos(startPos)
+	,m_endPos(endPos)
+	{}
+	
+	size_t GetStartPos() const
+	{ return m_startPos; }
+	size_t GetEndPos() const
+	{ return m_endPos; }
+	size_t GetWidth() const
+	{ return m_endPos - m_startPos + 1; }
+
+	void SetStartPos(size_t startPos)
+	{ m_startPos = startPos; }
+	void SetEndPos(size_t endPos)
+	{ m_endPos = endPos; }
+	
+	void Merge(const Range &a, const Range &b);
+	
+	int Compare(const Range &other) const;
+
+	bool Overlap(const Range &other) const;
+	
+	
+};
--- a/contrib/other-builds/extract-mixed-syntax/Rule.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Rule.cpp
@ -0,0 +1,594 @@
+/*
+ *  Rule.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <algorithm>
+#include <sstream>
+#include "Rule.h"
+#include "Global.h"
+#include "LatticeNode.h"
+#include "Lattice.h"
+#include "SentenceAlignment.h"
+#include "Tunnel.h"
+#include "TunnelCollection.h"
+#include "RuleCollection.h"
+
+using namespace std;
+
+RuleElement::RuleElement(const RuleElement &copy)
+:m_latticeNode(copy.m_latticeNode)
+,m_alignmentPos(copy.m_alignmentPos)
+{
+}
+
+
+Rule::Rule(const LatticeNode *latticeNode)
+:m_lhs(NULL)
+{
+	RuleElement element(*latticeNode);
+	
+	m_coll.push_back(element);
+}
+
+Rule::Rule(const Rule &prevRule, const LatticeNode *latticeNode)
+:m_coll(prevRule.m_coll)
+,m_lhs(NULL)
+{	
+	RuleElement element(*latticeNode);
+	m_coll.push_back(element);
+}
+
+Rule::Rule(const Global &global, bool &isValid, const Rule &copy, const LatticeNode *lhs, const SentenceAlignment &sentence)
+:m_coll(copy.m_coll)
+,m_source(copy.m_source)
+,m_target(copy.m_target)
+,m_lhs(lhs)
+{	
+	CreateSymbols(global, isValid, sentence);
+}
+
+Rule::~Rule()
+{
+}
+
+// helper for sort
+struct CompareLatticeNodeTarget
+{
+ 	bool operator() (const RuleElement *a, const RuleElement *b)
+  {
+		 const Range	 &rangeA = a->GetLatticeNode().GetTunnel().GetRange(1)
+									,&rangeB = b->GetLatticeNode().GetTunnel().GetRange(1);
+		 return rangeA.GetEndPos() < rangeB.GetEndPos();
+	}
+};
+
+void Rule::CreateSymbols(const Global &global, bool &isValid, const SentenceAlignment &sentence)
+{
+	vector<RuleElement*> nonTerms;
+		
+	// source
+	for (size_t ind = 0; ind < m_coll.size(); ++ind)
+	{
+		RuleElement &element = m_coll[ind];
+		const LatticeNode &node = element.GetLatticeNode();
+		if (node.IsTerminal())
+		{
+			size_t sourcePos = node.GetSourceRange().GetStartPos();
+			const string &word = sentence.source[sourcePos];
+			Symbol symbol(word, sourcePos);
+			m_source.Add(symbol);			
+		}
+		else 
+		{	// non-term
+			const string &sourceWord = node.GetSyntaxNode(0).GetLabel();
+			const string &targetWord = node.GetSyntaxNode(1).GetLabel();
+			Symbol symbol(sourceWord, targetWord
+										, node.GetTunnel().GetRange(0).GetStartPos(), node.GetTunnel().GetRange(0).GetEndPos()
+										, node.GetTunnel().GetRange(1).GetStartPos(), node.GetTunnel().GetRange(1).GetEndPos()
+										, node.GetSyntaxNode(0).IsSyntax(), node.GetSyntaxNode(1).IsSyntax());
+			m_source.Add(symbol);		
+
+			// store current pos within phrase
+			element.m_alignmentPos.first = ind;
+
+			// for target symbols
+			nonTerms.push_back(&element);			
+		}
+		
+	}
+	
+	// target
+	isValid = true;
+	
+	const Range &lhsTargetRange = m_lhs->GetTunnel().GetRange(1);
+
+	// check spans of target non-terms
+	if (nonTerms.size())
+	{
+		// sort non-term rules elements by target range
+		std::sort(nonTerms.begin(), nonTerms.end(), CompareLatticeNodeTarget());
+
+		const Range &first = nonTerms.front()->GetLatticeNode().GetTunnel().GetRange(1);
+		const Range &last = nonTerms.back()->GetLatticeNode().GetTunnel().GetRange(1);
+
+		if (first.GetStartPos() < lhsTargetRange.GetStartPos()
+				|| last.GetEndPos() > lhsTargetRange.GetEndPos())
+		{			
+			isValid = false;
+		}
+	}
+	
+	if (isValid)
+	{
+		size_t indNonTerm = 0;
+		RuleElement *currNonTermElement = indNonTerm < nonTerms.size() ? nonTerms[indNonTerm] : NULL;
+		for (size_t targetPos = lhsTargetRange.GetStartPos(); targetPos <= lhsTargetRange.GetEndPos(); ++targetPos)
+		{		
+			if (currNonTermElement && targetPos == currNonTermElement->GetLatticeNode().GetTunnel().GetRange(1).GetStartPos())
+			{ // start of a non-term. print out non-terms & skip to the end
+				
+				const LatticeNode &node = currNonTermElement->GetLatticeNode();
+
+				const string &sourceWord = node.GetSyntaxNode(0).GetLabel();
+				const string &targetWord = node.GetSyntaxNode(1).GetLabel();
+				Symbol symbol(sourceWord, targetWord
+											, node.GetTunnel().GetRange(0).GetStartPos(), node.GetTunnel().GetRange(0).GetEndPos()
+											, node.GetTunnel().GetRange(1).GetStartPos(), node.GetTunnel().GetRange(1).GetEndPos()
+											, node.GetSyntaxNode(0).IsSyntax(), node.GetSyntaxNode(1).IsSyntax());
+				m_target.Add(symbol);			
+				
+				// store current pos within phrase
+				currNonTermElement->m_alignmentPos.second = m_target.GetSize() - 1;
+				
+				assert(currNonTermElement->m_alignmentPos.first != NOT_FOUND);
+
+				targetPos = node.GetTunnel().GetRange(1).GetEndPos();
+				indNonTerm++;
+				currNonTermElement = indNonTerm < nonTerms.size() ? nonTerms[indNonTerm] : NULL;			
+			}
+			else 
+			{ // term
+				const string &word = sentence.target[targetPos];
+
+				Symbol symbol(word, targetPos);
+				m_target.Add(symbol);
+
+			}
+		}
+				
+		assert(indNonTerm == nonTerms.size());
+
+		if (m_target.GetSize() > global.maxSymbols) {
+		  isValid = false;
+	    //cerr << "m_source=" << m_source.GetSize() << ":" << m_source << endl;
+	    //cerr << "m_target=" << m_target.GetSize() << ":" << m_target << endl;
+		}
+	}	
+}
+
+bool Rule::MoreDefaultNonTermThanTerm() const
+{
+	size_t numTerm = 0, numDefaultNonTerm = 0;
+	
+	CollType::const_iterator iter;
+	for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
+	{
+		const RuleElement &element = *iter;
+		const LatticeNode &node = element.GetLatticeNode();
+		if (node.IsTerminal())
+		{
+			++numTerm;
+		}
+		else if (!node.IsSyntax())
+		{
+			++numDefaultNonTerm;
+		}
+	}
+	
+	bool ret = numDefaultNonTerm > numTerm;
+	return ret;
+}
+
+bool Rule::SourceHasEdgeDefaultNonTerm() const
+{
+	assert(m_coll.size());
+	const LatticeNode &first = m_coll.front().GetLatticeNode();
+	const LatticeNode &last = m_coll.back().GetLatticeNode();
+
+	// 1st
+	if (!first.IsTerminal() && !first.IsSyntax())
+	{
+		return true;
+	}
+	if (!last.IsTerminal() && !last.IsSyntax())
+	{
+		return true;
+	}
+	
+	return false;	
+}
+
+bool Rule::IsValid(const Global &global, const TunnelCollection &tunnelColl) const
+{
+	if (m_coll.size() == 1 && !m_coll[0].GetLatticeNode().IsTerminal()) // can't be only 1 terminal
+	{
+		return false;
+	}
+
+	if (MoreDefaultNonTermThanTerm()) 
+	{ // must have at least as many terms as non-syntax non-terms
+		return false;
+	}
+
+	if (!global.allowDefaultNonTermEdge && SourceHasEdgeDefaultNonTerm())
+	{
+		return false;
+	}
+	
+	if (GetNumSymbols() > global.maxSymbols)
+	{
+		return false;
+	}
+	
+	if (AdjacentDefaultNonTerms())
+	{
+		return false;
+	}
+	
+	if (!IsHole(tunnelColl))
+	{
+		return false;
+	}
+
+	if (NonTermOverlap())
+	{
+		return false;
+	}
+	
+	/*
+	std::pair<size_t, size_t> spanS	= GetSpan(0)
+														,spanT= GetSpan(1);
+
+	if (tunnelColl.NumUnalignedWord(0, spanS.first, spanS.second) >= global.maxUnaligned)
+		return false;
+	if (tunnelColl.NumUnalignedWord(1, spanT.first, spanT.second) >= global.maxUnaligned)
+		return false;
+	*/
+	
+	return true;
+}
+
+bool Rule::NonTermOverlap() const
+{
+	vector<Range> ranges;
+	
+	CollType::const_iterator iter;
+	for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
+	{
+		const RuleElement &element = *iter;
+		if (!element.GetLatticeNode().IsTerminal())
+		{
+			const Range &range = element.GetLatticeNode().GetTunnel().GetRange(1);
+			ranges.push_back(range);
+		}
+	}
+	
+	vector<Range>::const_iterator outerIter;
+	for (outerIter = ranges.begin(); outerIter != ranges.end(); ++outerIter)
+	{
+		const Range &outer = *outerIter;
+		vector<Range>::const_iterator innerIter;
+		for (innerIter = outerIter + 1; innerIter != ranges.end(); ++innerIter)
+		{
+			const Range &inner = *innerIter;
+			if (outer.Overlap(inner))
+				return true;
+		}
+	}
+	
+	return false;
+}
+
+Range Rule::GetSourceRange() const
+{
+	assert(m_coll.size());
+	const Range &first = m_coll.front().GetLatticeNode().GetSourceRange();
+	const Range &last = m_coll.back().GetLatticeNode().GetSourceRange();
+	
+	Range ret(first.GetStartPos(), last.GetEndPos());
+	return ret;
+}
+
+
+bool Rule::IsHole(const TunnelCollection &tunnelColl) const
+{
+	const Range &spanS	= GetSourceRange();
+	const TunnelList &tunnels = tunnelColl.GetTunnels(spanS.GetStartPos(), spanS.GetEndPos());
+
+	bool ret = tunnels.size() > 0;
+	return ret;
+}
+
+
+bool Rule::CanRecurse(const Global &global, const TunnelCollection &tunnelColl) const
+{
+	if (GetNumSymbols() >= global.maxSymbols)
+		return false;
+	if (AdjacentDefaultNonTerms())
+		return false;
+	if (MaxNonTerm(global))
+		return false;
+	if (NonTermOverlap())
+	{
+		return false;
+	}
+	
+	const Range spanS	= GetSourceRange();
+
+	if (tunnelColl.NumUnalignedWord(0, spanS.GetStartPos(), spanS.GetEndPos()) >= global.maxUnaligned)
+		return false;
+//	if (tunnelColl.NumUnalignedWord(1, spanT.first, spanT.second) >= global.maxUnaligned)
+//		return false;
+	
+	
+	return true;
+}
+
+bool Rule::MaxNonTerm(const Global &global) const
+{
+	//cerr << *this << endl;
+	size_t numNonTerm = 0, numNonTermDefault = 0;
+	
+	CollType::const_iterator iter;
+	for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
+	{
+		const LatticeNode *node = &(*iter).GetLatticeNode();
+		if (!node->IsTerminal()  )
+		{
+			numNonTerm++;
+			if (!node->IsSyntax())
+			{
+				numNonTermDefault++;
+			}
+			if (numNonTerm >= global.maxNonTerm || numNonTermDefault >= global.maxNonTermDefault)
+				return true;
+		}
+	}
+	
+	return false;
+}
+
+
+bool Rule::AdjacentDefaultNonTerms() const
+{
+	assert(m_coll.size() > 0);
+	
+	const LatticeNode *prevNode = &m_coll.front().GetLatticeNode();
+	CollType::const_iterator iter;
+	for (iter = m_coll.begin() + 1; iter != m_coll.end(); ++iter)
+	{
+		const LatticeNode *node = &(*iter).GetLatticeNode();
+		if (!prevNode->IsTerminal() && !node->IsTerminal() && !prevNode->IsSyntax() && !node->IsSyntax() )
+		{
+			return true;
+		}
+		prevNode = node;
+	}
+	
+	return false;
+}
+
+
+
+size_t Rule::GetNumSymbols() const
+{
+	size_t ret = m_coll.size();	
+	return ret;
+}
+
+void Rule::CreateRules(RuleCollection &rules
+											 , const Lattice &lattice
+											 , const SentenceAlignment &sentence
+											 , const Global &global)
+{
+	assert(m_coll.size() > 0);
+	const LatticeNode *latticeNode = &m_coll.back().GetLatticeNode();
+	size_t endPos = latticeNode->GetSourceRange().GetEndPos() + 1;
+	
+	const Stack &stack = lattice.GetStack(endPos);
+	
+	Stack::const_iterator iter;
+	for (iter = stack.begin(); iter != stack.end(); ++iter)
+	{
+		const LatticeNode *newLatticeNode = *iter;
+		Rule *newRule = new Rule(*this, newLatticeNode);
+		//cerr << *newRule << endl;
+		
+		if (newRule->CanRecurse(global, sentence.GetTunnelCollection()))
+		{ // may or maynot be valid, but can continue to build on this rule
+			newRule->CreateRules(rules, lattice, sentence, global);
+		}
+		
+		if (newRule->IsValid(global, sentence.GetTunnelCollection()))
+		{ // add to rule collection
+			rules.Add(global, newRule, sentence);
+		}	
+		else 
+		{
+			delete newRule;
+		}
+
+	}
+}
+
+bool Rule::operator<(const Rule &compare) const
+{	
+	/*
+	if (g_debug)
+	{
+		cerr << *this << endl << compare;
+		cerr << endl;
+	}
+	*/
+	
+	bool ret = Compare(compare) < 0;
+	
+	/*
+	if (g_debug)
+	{
+		cerr << *this << endl << compare << endl << ret << endl << endl;
+	}
+	*/
+	
+	return ret;
+}
+
+int Rule::Compare(const Rule &compare) const
+{ 	
+	//cerr << *this << endl << compare << endl;
+	assert(m_coll.size() > 0);
+	assert(m_source.GetSize() > 0);
+	assert(m_target.GetSize() > 0);
+	
+	int ret = 0;
+	
+	// compare each fragment
+	ret = m_source.Compare(compare.m_source);
+	if (ret != 0)
+	{
+		return ret;
+	}
+
+	ret = m_target.Compare(compare.m_target);
+	if (ret != 0)
+	{
+		return ret;
+	}
+	
+	// compare lhs
+	const string &thisSourceLabel		= m_lhs->GetSyntaxNode(0).GetLabel();
+	const string &otherSourceLabel	= compare.m_lhs->GetSyntaxNode(0).GetLabel();
+	if (thisSourceLabel != otherSourceLabel)
+	{
+		ret = (thisSourceLabel < otherSourceLabel) ? -1 : +1;
+		return ret;
+	}
+
+	const string &thisTargetLabel		= m_lhs->GetSyntaxNode(1).GetLabel();
+	const string &otherTargetLabel	= compare.m_lhs->GetSyntaxNode(1).GetLabel();
+	if (thisTargetLabel != otherTargetLabel)
+	{
+		ret = (thisTargetLabel < otherTargetLabel) ? -1 : +1;
+		return ret;
+	}
+	
+	assert(ret == 0);
+	return ret;
+}
+
+
+const LatticeNode &Rule::GetLatticeNode(size_t ind) const
+{
+	assert(ind < m_coll.size());
+	return m_coll[ind].GetLatticeNode();
+}
+
+void Rule::DebugOutput() const
+{
+	Output(cerr);
+}
+
+void Rule::Output(std::ostream &out) const
+{
+
+  stringstream strmeS, strmeT;
+
+  std::vector<Symbol>::const_iterator iterSymbol;
+  for (iterSymbol = m_source.begin(); iterSymbol != m_source.end(); ++iterSymbol)
+  {
+    const Symbol &symbol = *iterSymbol;
+    strmeS << symbol << " ";
+  }
+
+  for (iterSymbol = m_target.begin(); iterSymbol != m_target.end(); ++iterSymbol)
+  {
+    const Symbol &symbol = *iterSymbol;
+    strmeT << symbol << " ";
+  }
+
+  // lhs
+  if (m_lhs)
+  {
+    strmeS << m_lhs->GetSyntaxNode(0).GetLabel();
+    strmeT << m_lhs->GetSyntaxNode(1).GetLabel();
+  }
+
+  out << strmeS.str() << " ||| " << strmeT.str() << " ||| ";
+
+  // alignment
+  Rule::CollType::const_iterator iter;
+  for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
+  {
+    const RuleElement &element = *iter;
+    const LatticeNode &node = element.GetLatticeNode();
+    bool isTerminal = node.IsTerminal();
+
+    if (!isTerminal)
+    {
+      out << element.m_alignmentPos.first << "-" << element.m_alignmentPos.second << " ";
+    }
+  }
+
+  out << "||| 1";
+
+}
+
+void Rule::OutputInv(std::ostream &out) const
+{
+  stringstream strmeS, strmeT;
+
+  std::vector<Symbol>::const_iterator iterSymbol;
+  for (iterSymbol = m_source.begin(); iterSymbol != m_source.end(); ++iterSymbol)
+  {
+    const Symbol &symbol = *iterSymbol;
+    strmeS << symbol << " ";
+  }
+
+  for (iterSymbol = m_target.begin(); iterSymbol != m_target.end(); ++iterSymbol)
+  {
+    const Symbol &symbol = *iterSymbol;
+    strmeT << symbol << " ";
+  }
+
+  // lhs
+  if (m_lhs)
+  {
+    strmeS << m_lhs->GetSyntaxNode(0).GetLabel();
+    strmeT << m_lhs->GetSyntaxNode(1).GetLabel();
+  }
+
+  out << strmeT.str() << " ||| " << strmeS.str() << " ||| ";
+
+  // alignment
+  Rule::CollType::const_iterator iter;
+  for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
+  {
+    const RuleElement &element = *iter;
+    const LatticeNode &node = element.GetLatticeNode();
+    bool isTerminal = node.IsTerminal();
+
+    if (!isTerminal)
+    {
+      out << element.m_alignmentPos.second << "-" << element.m_alignmentPos.first << " ";
+    }
+  }
+
+  out << "||| 1";
+
+}
+
+
--- a/contrib/other-builds/extract-mixed-syntax/Rule.h
+++ b/contrib/other-builds/extract-mixed-syntax/Rule.h
@ -0,0 +1,96 @@
+#pragma once
+/*
+ *  Rule.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <vector>
+#include <iostream>
+#include "LatticeNode.h"
+#include "SymbolSequence.h"
+#include "Global.h"
+
+class Lattice;
+class SentenceAlignment;
+class Global;
+class RuleCollection;
+class SyntaxNode;
+class TunnelCollection;
+class Range;
+
+class RuleElement
+{
+protected:
+	const LatticeNode *m_latticeNode;
+public:
+	std::pair<size_t, size_t> m_alignmentPos;
+	
+	RuleElement(const RuleElement &copy);
+	RuleElement(const LatticeNode &latticeNode)
+	:m_latticeNode(&latticeNode)
+	,m_alignmentPos(NOT_FOUND, NOT_FOUND)
+	{}
+
+	const LatticeNode &GetLatticeNode() const
+	{ return *m_latticeNode; }
+
+};
+
+class Rule
+{
+protected:
+	typedef std::vector<RuleElement> CollType;
+	CollType m_coll;
+
+	const LatticeNode *m_lhs;
+	SymbolSequence m_source, m_target;
+	
+	bool IsHole(const TunnelCollection &tunnelColl) const;
+	bool NonTermOverlap() const;
+
+	const LatticeNode &GetLatticeNode(size_t ind) const;
+	void CreateSymbols(const Global &global, bool &isValid, const SentenceAlignment &sentence);
+
+public:
+	// init
+	Rule(const LatticeNode *latticeNode);
+
+	// create new rule by appending node to prev rule
+	Rule(const Rule &prevRule, const LatticeNode *latticeNode);
+
+	// create copy with lhs
+	Rule(const Global &global, bool &isValid, const Rule &copy, const LatticeNode *lhs, const SentenceAlignment &sentence);
+
+	// can continue to add to this rule
+	bool CanRecurse(const Global &global, const TunnelCollection &tunnelColl) const;
+
+	virtual ~Rule();
+
+	// can add this to the set of rules
+	bool IsValid(const Global &global, const TunnelCollection &tunnelColl) const;
+
+	size_t GetNumSymbols() const;
+	bool AdjacentDefaultNonTerms() const;
+	bool MaxNonTerm(const Global &global) const;
+	bool MoreDefaultNonTermThanTerm() const;
+	bool SourceHasEdgeDefaultNonTerm() const;
+
+	void CreateRules(RuleCollection &rules
+									 , const Lattice &lattice
+									 , const SentenceAlignment &sentence
+									 , const Global &global);
+	
+	int Compare(const Rule &compare) const;
+	bool operator<(const Rule &compare) const;
+			
+	Range GetSourceRange() const;
+	
+	DEBUG_OUTPUT();
+
+  void Output(std::ostream &out) const;
+  void OutputInv(std::ostream &out) const;
+
+};
--- a/contrib/other-builds/extract-mixed-syntax/RuleCollection.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/RuleCollection.cpp
@ -0,0 +1,102 @@
+/*
+ *  RuleCollection.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include "RuleCollection.h"
+#include "Rule.h"
+#include "SentenceAlignment.h"
+#include "tables-core.h"
+#include "Lattice.h"
+#include "SyntaxTree.h"
+
+using namespace std;
+
+RuleCollection::~RuleCollection()
+{
+	RemoveAllInColl(m_coll);
+}
+
+void RuleCollection::Add(const Global &global, Rule *rule, const SentenceAlignment &sentence)
+{	
+	Range spanS	= rule->GetSourceRange();
+		
+	// cartesian product of lhs
+	Stack nontermNodes = sentence.GetLattice().GetNonTermNode(spanS);
+	Stack::const_iterator iterStack;
+	for (iterStack = nontermNodes.begin(); iterStack != nontermNodes.end(); ++iterStack)
+	{
+		const LatticeNode &node = **iterStack;
+		assert(!node.IsTerminal());
+
+		bool isValid;
+		// create rules with LHS
+		//cerr << "old:" << *rule << endl;
+		Rule *newRule = new Rule(global, isValid, *rule, &node, sentence);
+		
+		if (!isValid)
+		{ // lhs doesn't match non-term spans
+			delete newRule;
+			continue;
+		}
+
+		/*
+		stringstream s;
+		s << *newRule;
+		if (s.str().find("Wiederaufnahme der [X] ||| resumption of the [X] ||| ||| 1") == 0)
+		{
+			cerr << "READY:" << *newRule << endl;
+			g_debug = true;
+		}
+		else {
+			g_debug = false;
+		}
+		*/
+		
+		typedef set<const Rule*, CompareRule>::iterator Iterator;
+		pair<Iterator,bool> ret = m_coll.insert(newRule);
+					
+		if (ret.second)
+		{
+			//cerr << "ACCEPTED:" << *newRule << endl;
+			//cerr << "";
+		}
+		else
+		{
+			//cerr << "REJECTED:" << *newRule << endl;
+			delete newRule;
+		}
+		
+	}
+	
+	delete rule;
+
+}
+
+void RuleCollection::Output(std::ostream &out) const
+{
+  RuleCollection::CollType::const_iterator iter;
+  for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
+  {
+    const Rule &rule = **iter;
+    rule.Output(out);
+    out << endl;
+  }
+}
+
+void RuleCollection::OutputInv(std::ostream &out) const
+{
+  RuleCollection::CollType::const_iterator iter;
+  for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
+  {
+    const Rule &rule = **iter;
+    rule.OutputInv(out);
+    out << endl;
+  }
+}
+
+
+
--- a/contrib/other-builds/extract-mixed-syntax/RuleCollection.h
+++ b/contrib/other-builds/extract-mixed-syntax/RuleCollection.h
@ -0,0 +1,55 @@
+#pragma once
+/*
+ *  RuleCollection.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <set>
+#include <iostream>
+#include "Rule.h"
+
+class SentenceAlignment;
+
+// helper for sort. Don't compare default non-terminals
+struct CompareRule
+{
+ 	bool operator() (const Rule *a, const Rule *b)
+  {
+		/*
+		if (g_debug)
+		{
+			std::cerr << std::endl << (*a) << std::endl << (*b) << " ";
+		}
+		 */
+		bool ret = (*a) < (*b);
+		/*
+		if (g_debug)
+		{
+			std::cerr << ret << std::endl;
+		}
+		 */
+		return ret;
+ 	}
+};
+
+
+class RuleCollection
+{
+protected:
+	typedef std::set<const Rule*, CompareRule> CollType;
+	CollType m_coll;
+	
+public:
+	~RuleCollection();
+	void Add(const Global &global, Rule *rule, const SentenceAlignment &sentence);
+	size_t GetSize() const
+	{ return m_coll.size(); }
+
+  void Output(std::ostream &out) const;
+  void OutputInv(std::ostream &out) const;
+
+};
+
--- a/contrib/other-builds/extract-mixed-syntax/SentenceAlignment.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/SentenceAlignment.cpp
@ -0,0 +1,331 @@
+/*
+ *  SentenceAlignment.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <set>
+#include <map>
+#include <sstream>
+#include "SentenceAlignment.h"
+#include "XmlTree.h"
+#include "tables-core.h"
+#include "TunnelCollection.h"
+#include "Lattice.h"
+#include "LatticeNode.h"
+
+using namespace std;
+
+extern std::set< std::string > targetLabelCollection, sourceLabelCollection;
+extern std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;
+
+SentenceAlignment::SentenceAlignment()
+:m_tunnelCollection(NULL)
+,m_lattice(NULL)
+{}
+
+SentenceAlignment::~SentenceAlignment()
+{
+	delete m_tunnelCollection;
+	delete m_lattice;
+}
+
+int SentenceAlignment::Create( const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global )
+{
+
+  // tokenizing English (and potentially extract syntax spans)
+  if (global.targetSyntax) {
+		string targetStringCPP = string(targetString);
+		ProcessAndStripXMLTags( targetStringCPP, targetTree, targetLabelCollection , targetTopLabelCollection );
+		target = tokenize( targetStringCPP.c_str() );
+		// cerr << "E: " << targetStringCPP << endl;
+  }
+  else {
+		target = tokenize( targetString.c_str() );
+  }
+	
+  // tokenizing source (and potentially extract syntax spans)
+  if (global.sourceSyntax) {
+		string sourceStringCPP = string(sourceString);
+		ProcessAndStripXMLTags( sourceStringCPP, sourceTree, sourceLabelCollection , sourceTopLabelCollection );
+		source = tokenize( sourceStringCPP.c_str() );
+		// cerr << "F: " << sourceStringCPP << endl;
+  }
+  else {
+		source = tokenize( sourceString.c_str() );
+  }
+	
+  // check if sentences are empty
+  if (target.size() == 0 || source.size() == 0) {
+    cerr << "no target (" << target.size() << ") or source (" << source.size() << ") words << end insentence " << sentenceID << endl;
+    cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
+    return 0;
+  }
+	
+  // prepare data structures for alignments
+  for(int i=0; i<source.size(); i++) {
+    alignedCountS.push_back( 0 );
+  }
+  for(int i=0; i<target.size(); i++) {
+    vector< int > dummy;
+    alignedToT.push_back( dummy );
+  }
+	
+	//InitTightest(m_s2tTightest, source.size());
+	//InitTightest(m_t2sTightest, target.size());
+
+	
+  // reading in alignments
+  vector<string> alignmentSequence = tokenize( alignmentString.c_str() );
+  for(int i=0; i<alignmentSequence.size(); i++) {
+    int s,t;
+    // cout << "scaning " << alignmentSequence[i].c_str() << endl;
+    if (! sscanf(alignmentSequence[i].c_str(), "%d-%d", &s, &t)) {
+      cerr << "WARNING: " << alignmentSequence[i] << " is a bad alignment point in sentence " << sentenceID << endl; 
+      cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
+      return 0;
+    }
+		// cout << "alignmentSequence[i] " << alignmentSequence[i] << " is " << s << ", " << t << endl;
+    if (t >= target.size() || s >= source.size()) { 
+      cerr << "WARNING: sentence " << sentenceID << " has alignment point (" << s << ", " << t << ") out of bounds (" << source.size() << ", " << target.size() << ")\n";
+      cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
+      return 0;
+    }
+    alignedToT[t].push_back( s );
+    alignedCountS[s]++;
+		
+		//SetAlignment(s, t);
+  }
+	
+	bool mixed = global.mixed;
+	sourceTree.AddDefaultNonTerms(global.sourceSyntax, mixed, source.size());
+	targetTree.AddDefaultNonTerms(global.targetSyntax, mixed, target.size());
+
+	//CalcTightestSpan(m_s2tTightest);
+	//CalcTightestSpan(m_t2sTightest);
+	
+  return 1;
+}
+
+/*
+void SentenceAlignment::InitTightest(Outer &tightest, size_t len)
+{
+	tightest.resize(len);
+	
+	for (size_t posOuter = 0; posOuter < len; ++posOuter)
+	{
+		Inner &inner = tightest[posOuter];
+		size_t innerSize = len - posOuter;
+		inner.resize(innerSize);
+		
+	}
+}
+
+void SentenceAlignment::CalcTightestSpan(Outer &tightest)
+{
+	size_t len = tightest.size();
+	
+	for (size_t startPos = 0; startPos < len; ++startPos)
+	{
+		for (size_t endPos = startPos + 1; endPos < len; ++endPos)
+		{
+			const Range &prevRange = GetTightest(tightest, startPos, endPos - 1);
+			const Range &smallRange = GetTightest(tightest, endPos, endPos); 
+			Range &newRange = GetTightest(tightest, startPos, endPos);
+			
+			newRange.Merge(prevRange, smallRange);
+			//cerr << "[" << startPos << "-" << endPos << "] --> [" << newRange.GetStartPos() << "-" << newRange.GetEndPos() << "]";
+		}
+	}
+}
+
+Range &SentenceAlignment::GetTightest(Outer &tightest, size_t startPos, size_t endPos)
+{
+	assert(endPos < tightest.size());
+	assert(endPos >= startPos);
+	
+	Inner &inner = tightest[startPos];
+	
+	size_t ind = endPos - startPos;
+	Range &ret = inner[ind];
+	return ret;
+}
+
+void SentenceAlignment::SetAlignment(size_t source, size_t target)
+{
+	SetAlignment(m_s2tTightest, source, target);
+	SetAlignment(m_t2sTightest, target, source);
+}
+
+void SentenceAlignment::SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos)
+{
+
+	Range &range = GetTightest(tightest, thisPos, thisPos);
+	if (range.GetStartPos() == NOT_FOUND)
+	{ // not yet set, do them both
+		assert(range.GetEndPos() == NOT_FOUND);
+		range.SetStartPos(thatPos);
+		range.SetEndPos(thatPos);
+	}
+	else
+	{
+		assert(range.GetEndPos() != NOT_FOUND);
+		range.SetStartPos( (range.GetStartPos() > thatPos) ? thatPos : range.GetStartPos() );
+		range.SetEndPos( (range.GetEndPos() < thatPos) ? thatPos : range.GetEndPos() );
+	}
+}
+ */
+
+
+void SentenceAlignment::FindTunnels(const Global &global ) 
+{
+	int countT = target.size();
+	int countS = source.size();
+	int maxSpan = max(global.maxHoleSpanSourceDefault, global.maxHoleSpanSourceSyntax);
+
+	m_tunnelCollection = new TunnelCollection(countS);
+	
+	m_tunnelCollection->alignedCountS = alignedCountS;
+	m_tunnelCollection->alignedCountT.resize(alignedToT.size());
+	for (size_t ind = 0; ind < alignedToT.size(); ind++)
+	{
+		m_tunnelCollection->alignedCountT[ind] = alignedToT[ind].size();
+	}
+	
+	// phrase repository for creating hiero phrases
+	
+	// check alignments for target phrase startT...endT
+	for(int lengthT=1;
+			lengthT <= maxSpan && lengthT <= countT;
+			lengthT++) {
+		for(int startT=0; startT < countT-(lengthT-1); startT++) {
+			
+			// that's nice to have
+			int endT = startT + lengthT - 1;
+			
+			// if there is target side syntax, there has to be a node
+			if (global.targetSyntax && !targetTree.HasNode(startT,endT))
+				continue;
+			
+			// find find aligned source words
+			// first: find minimum and maximum source word
+			int minS = 9999;
+			int maxS = -1;
+			vector< int > usedS = alignedCountS;
+			for(int ti=startT;ti<=endT;ti++) {
+				for(int i=0;i<alignedToT[ti].size();i++) {
+					int si = alignedToT[ti][i];
+					// cerr << "point (" << si << ", " << ti << ")\n";
+					if (si<minS) { minS = si; }
+					if (si>maxS) { maxS = si; }
+					usedS[ si ]--;
+				}
+			}
+			
+			// unaligned phrases are not allowed
+			if( maxS == -1 )
+				continue;
+			
+			// source phrase has to be within limits
+			if( maxS-minS >= maxSpan )
+			{
+				continue;
+			}
+			
+			// check if source words are aligned to out of bound target words
+			bool out_of_bounds = false;
+			for(int si=minS;si<=maxS && !out_of_bounds;si++)
+			{
+				if (usedS[si]>0) {
+					out_of_bounds = true;
+				}
+			}
+			
+			// if out of bound, you gotta go
+			if (out_of_bounds)
+				continue;
+			
+			if (m_tunnelCollection->NumUnalignedWord(1, startT, endT) >= global.maxUnaligned)
+				continue;
+			
+			// done with all the checks, lets go over all consistent phrase pairs
+			// start point of source phrase may retreat over unaligned
+			for(int startS=minS;
+					(startS>=0 &&
+					 startS>maxS - maxSpan && // within length limit
+					 (startS==minS || alignedCountS[startS]==0)); // unaligned
+					startS--)
+			{
+				// end point of source phrase may advance over unaligned
+				for(int endS=maxS;
+						(endS<countS && endS<startS + maxSpan && // within length limit
+						 (endS==maxS || alignedCountS[endS]==0)); // unaligned
+						endS++) 
+				{
+					if (m_tunnelCollection->NumUnalignedWord(0, startS, endS) >= global.maxUnaligned)
+						continue;
+					
+					// take note that this is a valid phrase alignment
+					m_tunnelCollection->Add(startS, endS, startT, endT);
+				}
+			}
+		}
+	}
+	
+	//cerr << *tunnelCollection << endl;
+
+}
+
+void SentenceAlignment::CreateLattice(const Global &global)
+{
+	size_t countS = source.size();
+	m_lattice = new Lattice(countS);
+	
+	for (size_t startPos = 0; startPos < countS; ++startPos)
+	{
+		//cerr << "creating arcs for " << startPos << "=";
+		m_lattice->CreateArcs(startPos, *m_tunnelCollection, *this, global);
+		
+		//cerr << LatticeNode::s_count << endl;
+	}
+}
+
+void SentenceAlignment::CreateRules(const Global &global)
+{
+	size_t countS = source.size();
+	
+	for (size_t startPos = 0; startPos < countS; ++startPos)
+	{
+		//cerr << "creating rules for " << startPos << "\n";
+		m_lattice->CreateRules(startPos, *this, global);
+	}
+}
+
+void OutputSentenceStr(std::ostream &out, const std::vector<std::string> &vec)
+{
+	for (size_t pos = 0; pos < vec.size(); ++pos)
+	{
+		out << vec[pos] << " ";
+	}
+}
+
+std::ostream& operator<<(std::ostream &out, const SentenceAlignment &obj)
+{	
+	OutputSentenceStr(out, obj.target);
+	out << " ==> ";
+	OutputSentenceStr(out, obj.source);
+	out << endl;
+	
+	out << *obj.m_tunnelCollection;	
+
+	if (obj.m_lattice)
+		out << endl << *obj.m_lattice;
+	
+	return out;
+}
+
+
+
+
--- a/contrib/other-builds/extract-mixed-syntax/SentenceAlignment.h
+++ b/contrib/other-builds/extract-mixed-syntax/SentenceAlignment.h
@ -0,0 +1,69 @@
+#pragma once
+/*
+ *  SentenceAlignment.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <vector>
+#include <cassert>
+#include <iostream>
+#include "SyntaxTree.h"
+#include "Global.h"
+#include "Range.h"
+
+class TunnelCollection;
+class Lattice;
+
+class SentenceAlignment 
+{
+	friend std::ostream& operator<<(std::ostream&, const SentenceAlignment&);
+
+public:
+  std::vector<std::string> target;
+  std::vector<std::string> source;
+  std::vector<int> alignedCountS;
+  std::vector< std::vector<int> > alignedToT;
+  SyntaxTree sourceTree, targetTree;
+	
+	//typedef std::vector<Range> Inner;
+	//typedef std::vector<Inner> Outer;
+	
+	//Outer m_s2tTightest, m_t2sTightest;
+	
+	SentenceAlignment();
+	~SentenceAlignment();
+  int Create(const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global);
+  //  void clear() { delete(alignment); };
+	void FindTunnels( const Global &global ) ;
+
+	void CreateLattice(const Global &global);
+	void CreateRules(const Global &global);
+		
+	const TunnelCollection &GetTunnelCollection() const
+	{ 
+		assert(m_tunnelCollection);
+		return *m_tunnelCollection;
+	}
+
+	const Lattice &GetLattice() const
+	{ 
+		assert(m_lattice);
+		return *m_lattice;
+	}
+	
+protected:
+	TunnelCollection *m_tunnelCollection;
+	Lattice *m_lattice;
+	
+	/*
+	void CalcTightestSpan(Outer &tightest);
+	void InitTightest(Outer &tightest, size_t len);
+	Range &GetTightest(Outer &tightest, size_t startPos, size_t endPos);
+	void SetAlignment(size_t source, size_t target);
+	void SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos);
+	*/
+};
+
--- a/contrib/other-builds/extract-mixed-syntax/Symbol.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Symbol.cpp
@ -0,0 +1,101 @@
+/*
+ *  Symbol.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 21/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <cassert>
+#include "Symbol.h"
+
+using namespace std;
+
+Symbol::Symbol(const std::string &label, size_t pos)
+:m_label(label)
+,m_isTerminal(true)
+,m_span(2)
+{
+	m_span[0].first = pos;
+}
+
+Symbol::Symbol(const std::string &labelS, const std::string &labelT
+							 , size_t startS, size_t endS
+							 , size_t startT, size_t endT
+							 , bool isSourceSyntax, bool isTargetSyntax)
+:m_label(labelS)
+,m_labelT(labelT)
+,m_isTerminal(false)
+,m_span(2)
+,m_isSourceSyntax(isSourceSyntax)
+,m_isTargetSyntax(isTargetSyntax)
+{
+	m_span[0] = std::pair<size_t, size_t>(startS, endS);
+	m_span[1] = std::pair<size_t, size_t>(startT, endT);
+}
+
+int CompareNonTerm(bool thisIsSyntax, bool otherIsSyntax
+									 , const std::pair<size_t, size_t> &thisSpan, const std::pair<size_t, size_t> &otherSpan
+									 , std::string thisLabel, std::string otherLabel)
+{
+	if (thisIsSyntax != otherIsSyntax)
+	{ // 1 is [X] & the other is [NP] on the source
+		return thisIsSyntax ? -1 : +1;
+	}
+
+	assert(thisIsSyntax == otherIsSyntax);
+	if (thisIsSyntax)
+	{ // compare span & label
+		if (thisSpan != otherSpan)
+			return thisSpan < otherSpan ? -1 : +1;
+		if (thisLabel != otherLabel)
+			return thisLabel < otherLabel ? -1 : +1;
+	}
+	
+	return 0;
+}
+
+int Symbol::Compare(const Symbol &other) const
+{
+	if (m_isTerminal != other.m_isTerminal)
+		return m_isTerminal ? -1 : +1;
+	
+	assert(m_isTerminal == other.m_isTerminal);
+	if (m_isTerminal)
+	{ // compare labels & pos
+		if (m_span[0].first != other.m_span[0].first)
+			return (m_span[0].first < other.m_span[0].first) ? -1 : +1;
+		
+		if (m_label != other.m_label)
+			return (m_label < other.m_label) ? -1 : +1;
+		
+	}
+	else 
+	{ // non terms
+		int ret = CompareNonTerm(m_isSourceSyntax, other.m_isSourceSyntax
+														,m_span[0], other.m_span[0]
+														 ,m_label, other.m_label);
+		if (ret != 0)
+			return ret;
+			
+		ret = CompareNonTerm(m_isTargetSyntax, other.m_isTargetSyntax
+												 ,m_span[1], other.m_span[1]
+												 ,m_label, other.m_label);
+		if (ret != 0)
+			return ret;
+	}
+	
+	return 0;
+}
+
+
+std::ostream& operator<<(std::ostream &out, const Symbol &obj)
+{
+	if (obj.m_isTerminal)
+		out << obj.m_label;
+	else 
+		out << obj.m_label + obj.m_labelT;
+
+	return out;
+}
+
--- a/contrib/other-builds/extract-mixed-syntax/Symbol.h
+++ b/contrib/other-builds/extract-mixed-syntax/Symbol.h
@ -0,0 +1,36 @@
+#pragma once
+
+/*
+ *  Symbol.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 21/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <string>
+#include <iostream>
+#include <vector>
+
+class Symbol
+{
+	friend std::ostream& operator<<(std::ostream &out, const Symbol &obj);
+
+protected:
+	std::string m_label, m_labelT; // m_labelT only for non-term
+	std::vector<std::pair<size_t, size_t> > m_span;
+	
+	bool m_isTerminal, m_isSourceSyntax, m_isTargetSyntax;
+public:
+	// for terminals
+	Symbol(const std::string &label, size_t pos);
+
+	// for non-terminals
+	Symbol(const std::string &labelS, const std::string &labelT
+				 , size_t startS, size_t endS
+				 , size_t startT, size_t endT
+				 , bool isSourceSyntax, bool isTargetSyntax);
+
+	int Compare(const Symbol &other) const;
+
+};
--- a/contrib/other-builds/extract-mixed-syntax/SymbolSequence.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/SymbolSequence.cpp
@ -0,0 +1,56 @@
+/*
+ *  SymbolSequence.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 21/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <cassert>
+#include <sstream>
+#include "SymbolSequence.h"
+
+using namespace std;
+
+int SymbolSequence::Compare(const SymbolSequence &other) const
+{	
+	int ret;
+	size_t thisSize = GetSize();
+	size_t otherSize = other.GetSize();
+	if (thisSize != otherSize)
+	{
+		ret = (thisSize < otherSize) ? -1 : +1;
+		return ret;
+	}
+	else 
+	{
+		assert(thisSize == otherSize);
+		for (size_t ind = 0; ind < thisSize; ++ind)
+		{
+			const Symbol &thisSymbol = GetSymbol(ind);
+			const Symbol &otherSymbol = other.GetSymbol(ind);
+			ret = thisSymbol.Compare(otherSymbol);
+			if (ret != 0)
+			{
+				return ret;
+			}
+		}
+	}
+	
+	assert(ret == 0);
+	return ret;
+}
+
+std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj)
+{	
+	SymbolSequence::CollType::const_iterator iterSymbol;
+	for (iterSymbol = obj.m_coll.begin(); iterSymbol != obj.m_coll.end(); ++iterSymbol)
+	{
+		const Symbol &symbol = *iterSymbol;
+		out << symbol << " ";
+	}
+	
+	return out;
+}
+	
+
--- a/contrib/other-builds/extract-mixed-syntax/SymbolSequence.h
+++ b/contrib/other-builds/extract-mixed-syntax/SymbolSequence.h
@ -0,0 +1,42 @@
+#pragma once
+/*
+ *  SymbolSequence.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 21/07/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <iostream>
+#include <vector>
+#include "Symbol.h"
+
+class SymbolSequence
+{
+	friend std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj);
+
+protected:
+	typedef std::vector<Symbol> CollType;
+	CollType m_coll;
+	
+public:
+	typedef CollType::iterator iterator;
+	typedef CollType::const_iterator const_iterator;
+	const_iterator begin() const { return m_coll.begin(); }
+	const_iterator end() const { return m_coll.end(); }
+	
+	void Add(const Symbol &symbol)
+	{
+		m_coll.push_back(symbol);
+	}
+	size_t GetSize() const
+	{ return m_coll.size(); }
+	const Symbol &GetSymbol(size_t ind) const
+	{ return m_coll[ind]; }
+
+	void Clear()
+	{ m_coll.clear(); }
+	
+	int Compare(const SymbolSequence &other) const;
+
+};
--- a/contrib/other-builds/extract-mixed-syntax/SyntaxTree.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/SyntaxTree.cpp
@ -0,0 +1,245 @@
+// $Id: SyntaxTree.cpp 1960 2008-12-15 12:52:38Z phkoehn $
+// vim:tabstop=2
+
+/***********************************************************************
+  Moses - factored phrase-based language decoder
+  Copyright (C) 2009 University of Edinburgh
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+
+#include <iostream>
+#include <cassert>
+#include "SyntaxTree.h"
+//#include "extract.h"
+#include "Global.h"
+
+//extern const Global g_debug;
+extern const Global *g_global;
+
+using namespace std;
+
+bool SyntaxNode::IsSyntax() const
+{
+	bool ret = GetLabel() != "[X]";
+	return ret;
+}
+
+SyntaxTree::SyntaxTree() 
+:m_defaultLHS(0,0, "[X]")
+{
+	m_emptyNode.clear();
+}
+
+SyntaxTree::~SyntaxTree()
+{
+	// loop through all m_nodes, delete them
+	for(int i=0; i<m_nodes.size(); i++)
+	{
+		delete m_nodes[i];
+	}
+}
+
+bool HasDuplicates(const SyntaxNodes &nodes)
+{
+	string prevLabel;
+	SyntaxNodes::const_iterator iter;
+	for (iter = nodes.begin(); iter != nodes.end(); ++iter)
+	{
+		const SyntaxNode &node = **iter;
+		string label = node.GetLabel();
+		if (label == prevLabel)
+			return true;
+	}
+	return false;
+}
+
+void SyntaxTree::AddNode( int startPos, int endPos, std::string label ) 
+{	
+	SyntaxNode* newNode = new SyntaxNode( startPos, endPos, "[" + label + "]");
+	m_nodes.push_back( newNode );
+	
+	SyntaxNodes &nodesChart = m_index[ startPos ][ endPos ];
+	
+	if (!g_global->uppermostOnly)
+	{
+		nodesChart.push_back( newNode );	
+		//assert(!HasDuplicates(m_index[ startPos ][ endPos ]));
+	}
+	else 
+	{
+		if (nodesChart.size() > 0)
+		{
+			assert(nodesChart.size() == 1);
+			//delete nodes[0];
+			nodesChart.resize(0);
+		}
+		assert(nodesChart.size() == 0);
+		nodesChart.push_back( newNode );	
+	}
+}
+
+ParentNodes SyntaxTree::Parse() {
+	ParentNodes parents;
+
+	int size = m_index.size();
+
+	// looping through all spans of size >= 2
+	for( int length=2; length<=size; length++ )
+	{
+		for( int startPos = 0; startPos <= size-length; startPos++ )
+		{
+			if (HasNode( startPos, startPos+length-1 ))
+			{
+				// processing one (parent) span
+
+				//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
+				SplitPoints splitPoints;
+				splitPoints.push_back( startPos );
+				//std::cerr << " " << startPos;
+
+				int first = 1;
+				int covered = 0;
+				while( covered < length )
+				{
+					// find largest covering subspan (child)
+					// starting at last covered position
+					for( int midPos=length-first; midPos>covered; midPos-- )
+					{
+						if( HasNode( startPos+covered, startPos+midPos-1 ) )
+						{							
+							covered = midPos;							
+							splitPoints.push_back( startPos+covered );
+							// std::cerr << " " << ( startPos+covered );
+							first = 0;
+						}
+					}
+				}
+				// std::cerr << std::endl;
+				parents.push_back( splitPoints );
+			}
+		}
+	}
+	return parents;
+}
+
+bool SyntaxTree::HasNode( int startPos, int endPos ) const 
+{
+	return GetNodes( startPos, endPos).size() > 0;
+}
+
+const SyntaxNodes &SyntaxTree::GetNodes( int startPos, int endPos ) const 
+{
+	SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
+	if (startIndex == m_index.end() )
+		return m_emptyNode;
+	
+	SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
+	if (endIndex == startIndex->second.end())
+		return m_emptyNode;
+	
+	return endIndex->second;
+}
+
+// for printing out tree
+std::string SyntaxTree::ToString() const
+{
+	std::stringstream out;
+	out << *this;
+	return out.str();
+}
+
+void SyntaxTree::AddDefaultNonTerms(size_t phraseSize)
+{
+	for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
+	{
+		for (size_t endPos = startPos; endPos < phraseSize; ++endPos)
+		{
+			AddNode(startPos, endPos, "X");
+		}
+	}
+}
+
+void SyntaxTree::AddDefaultNonTerms(bool isSyntax, bool mixed, size_t phraseSize)
+{
+	if (isSyntax)
+	{
+		AddDefaultNonTerms(!mixed, phraseSize);
+	}
+	else 
+	{ // add X everywhere
+		AddDefaultNonTerms(phraseSize);
+	}
+}
+
+void SyntaxTree::AddDefaultNonTerms(bool addEverywhere, size_t phraseSize)
+{
+  //cerr << "GetNumWords()=" << GetNumWords() << endl;
+	//assert(phraseSize == GetNumWords() || GetNumWords() == 1); // 1 if syntax sentence doesn't have any xml. TODO fix syntax tree obj
+
+	for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
+	{
+		for (size_t endPos = startPos; endPos <= phraseSize; ++endPos)
+		{
+			const SyntaxNodes &nodes = GetNodes(startPos, endPos);
+			if (!addEverywhere && nodes.size() > 0)
+			{ // only add if no label
+				continue;
+			}
+			AddNode(startPos, endPos, "X");
+		}
+	}
+}
+
+const SyntaxNodes SyntaxTree::GetNodesForLHS( int startPos, int endPos ) const
+{
+	SyntaxNodes ret(GetNodes(startPos, endPos));
+	
+	if (ret.size() == 0)
+		ret.push_back(&m_defaultLHS);
+	
+	return ret;
+}
+
+std::ostream& operator<<(std::ostream& os, const SyntaxTree& t)
+{
+	int size = t.m_index.size();
+	for(size_t length=1; length<=size; length++)
+	{
+		for(size_t space=0; space<length; space++)
+		{
+			os << "    ";
+		}
+		for(size_t start=0; start<=size-length; start++)
+		{
+			
+			if (t.HasNode( start, start+(length-1) ))
+			{
+				std::string label = t.GetNodes( start, start+(length-1) )[0]->GetLabel() + "#######";
+				
+				os << label.substr(0,7) << " ";
+			}
+			else
+			{
+				os << "------- ";
+			}		
+		}
+		os << std::endl;
+	}
+  return os;
+}
+
+
--- a/contrib/other-builds/extract-mixed-syntax/SyntaxTree.h
+++ b/contrib/other-builds/extract-mixed-syntax/SyntaxTree.h
@ -0,0 +1,96 @@
+#pragma once 
+
+// $Id: SyntaxTree.h 1960 2008-12-15 12:52:38Z phkoehn $
+// vim:tabstop=2
+
+/***********************************************************************
+  Moses - factored phrase-based language decoder
+  Copyright (C) 2009 University of Edinburgh
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <string>
+#include <vector>
+#include <map>
+#include <sstream>
+
+class SyntaxNode;
+
+typedef std::vector<const SyntaxNode*> SyntaxNodes;
+
+class SyntaxNode {
+protected:
+	int m_start, m_end;
+	std::string m_label;
+	SyntaxNodes m_children;
+	SyntaxNode* m_parent;
+public:
+SyntaxNode( int startPos, int endPos, const std::string &label)
+	:m_start(startPos)
+		,m_end(endPos)
+		,m_label(label)
+	{}
+	int GetStart() const
+	{ return m_start; }
+	int GetEnd() const
+	{ return m_end; }
+	const std::string &GetLabel() const
+	{ return m_label; }
+	bool IsSyntax() const;
+};
+
+
+typedef std::vector< int > SplitPoints;
+typedef std::vector< SplitPoints > ParentNodes;
+
+class SyntaxTree {
+protected:
+	SyntaxNodes m_nodes;
+	SyntaxNode* m_top;
+	SyntaxNode m_defaultLHS;
+	
+	typedef std::map< int, SyntaxNodes > SyntaxTreeIndex2;
+	typedef SyntaxTreeIndex2::const_iterator SyntaxTreeIndexIterator2;
+	typedef std::map< int, SyntaxTreeIndex2 > SyntaxTreeIndex;
+	typedef SyntaxTreeIndex::const_iterator SyntaxTreeIndexIterator;
+	SyntaxTreeIndex m_index;
+	SyntaxNodes m_emptyNode;
+
+	friend std::ostream& operator<<(std::ostream&, const SyntaxTree&);
+
+public:
+	SyntaxTree();
+	~SyntaxTree();
+	
+	void AddNode( int startPos, int endPos, std::string label );
+	ParentNodes Parse();
+	bool HasNode( int startPos, int endPos ) const;
+	const SyntaxNodes &GetNodes( int startPos, int endPos ) const;
+	const SyntaxNodes &GetAllNodes() const { return m_nodes; } ;
+	size_t GetNumWords() const { return m_index.size(); }
+	std::string ToString() const;
+	
+	void AddDefaultNonTerms(bool isSyntax, bool addEverywhere, size_t phraseSize);
+	void AddDefaultNonTerms(bool mixed, size_t phraseSize);
+
+	void AddDefaultNonTerms(size_t phraseSize);
+	
+	const SyntaxNodes GetNodesForLHS( int startPos, int endPos ) const;
+	
+};
+
+std::ostream& operator<<(std::ostream&, const SyntaxTree&);
+
--- a/contrib/other-builds/extract-mixed-syntax/Tunnel.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Tunnel.cpp
@ -0,0 +1,38 @@
+/*
+ *  Tunnel.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "Tunnel.h"
+
+
+int Tunnel::Compare(const Tunnel &other) const
+{
+	int ret = m_sourceRange.Compare(other.m_sourceRange);
+	
+	if (ret != 0)
+		return ret;
+
+	ret = m_targetRange.Compare(other.m_targetRange);
+		
+	return ret;
+}
+
+int Tunnel::Compare(const Tunnel &other, size_t direction) const
+{
+	const Range &thisRange = (direction == 0) ? m_sourceRange : m_targetRange;
+	const Range &otherRange = (direction == 0) ? other.m_sourceRange : other.m_targetRange;
+	
+	int ret = thisRange.Compare(otherRange);
+	return ret;
+}
+
+std::ostream& operator<<(std::ostream &out, const Tunnel &tunnel)
+{
+	out << tunnel.m_sourceRange << "==>" << tunnel.m_targetRange;
+	return out;
+}
--- a/contrib/other-builds/extract-mixed-syntax/Tunnel.h
+++ b/contrib/other-builds/extract-mixed-syntax/Tunnel.h
@ -0,0 +1,49 @@
+#pragma once
+
+/*
+ *  Tunnel.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <vector>
+#include <cassert>
+#include <string>
+#include <iostream>
+#include "Range.h"
+
+	// for unaligned source terminal
+
+class Tunnel
+{
+	friend std::ostream& operator<<(std::ostream&, const Tunnel&);
+
+protected:
+	
+	Range m_sourceRange, m_targetRange;
+
+public:
+	Tunnel()
+	{}
+	
+	Tunnel(const Tunnel &copy)
+	:m_sourceRange(copy.m_sourceRange)
+	,m_targetRange(copy.m_targetRange)
+	{}
+	
+	Tunnel(const Range &sourceRange, const Range &targetRange)
+	:m_sourceRange(sourceRange)
+	,m_targetRange(targetRange)
+	{}
+	
+	const Range &GetRange(size_t direction) const
+	{ return (direction == 0) ? m_sourceRange : m_targetRange; }
+		
+	int Compare(const Tunnel &other) const;
+	int Compare(const Tunnel &other, size_t direction) const;
+};
+
+typedef std::vector<Tunnel> TunnelList;
+
--- a/contrib/other-builds/extract-mixed-syntax/TunnelCollection.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/TunnelCollection.cpp
@ -0,0 +1,70 @@
+/*
+ *  TunnelCollection.cpp
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "TunnelCollection.h"
+#include "Range.h"
+
+using namespace std;
+
+size_t TunnelCollection::NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const
+{
+	assert(startPos <= endPos);
+	
+	if (direction == 0)
+		assert(endPos < alignedCountS.size());		
+	else 
+		assert(endPos < alignedCountT.size());
+
+	size_t ret = 0; 
+	for (size_t ind = startPos; ind <= endPos; ++ind)
+	{
+		if (direction == 0 && alignedCountS[ind] == 0)
+		{
+			ret++;
+		}
+		else if (direction == 1 && alignedCountT[ind] == 0)
+		{
+			ret++;
+		}
+		
+	}
+	
+	return ret;
+}
+
+void TunnelCollection::Add(int startS, int endS, int startT, int endT)
+{
+	// m_phraseExist[startS][endS - startS].push_back(Tunnel(startT, endT));
+	m_coll[startS][endS - startS].push_back(Tunnel(Range(startS, endS), Range(startT, endT)));
+}
+
+
+std::ostream& operator<<(std::ostream &out, const TunnelCollection &TunnelCollection)
+{
+	size_t size = TunnelCollection.GetSize();
+	
+	for (size_t startPos = 0; startPos < size; ++startPos)
+	{
+		for (size_t endPos = startPos; endPos < size; ++endPos)
+		{
+			const TunnelList &tunnelList = TunnelCollection.GetTunnels(startPos, endPos);
+			TunnelList::const_iterator iter;
+			for (iter = tunnelList.begin(); iter != tunnelList.end(); ++iter)
+			{
+				const Tunnel &tunnel = *iter;
+				out << tunnel << " ";
+				
+			}
+ 		}
+	}
+	
+	return out;
+}
+
+
--- a/contrib/other-builds/extract-mixed-syntax/TunnelCollection.h
+++ b/contrib/other-builds/extract-mixed-syntax/TunnelCollection.h
@ -0,0 +1,61 @@
+#pragma once
+/*
+ *  TunnelCollection.h
+ *  extract
+ *
+ *  Created by Hieu Hoang on 19/01/2010.
+ *  Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+#include <vector>
+#include "Tunnel.h"
+
+// reposity of extracted phrase pairs
+// which are potential tunnels in larger phrase pairs
+class TunnelCollection
+	{
+		friend std::ostream& operator<<(std::ostream&, const TunnelCollection&);
+
+	protected:
+		std::vector< std::vector<TunnelList> > m_coll;
+		// indexed by source pos. and source length 
+		// maps to list of tunnels where <int, int> are target pos
+
+	public:
+		std::vector<int> alignedCountS, alignedCountT;
+
+		TunnelCollection(const TunnelCollection &);
+
+		TunnelCollection(size_t size)
+		:m_coll(size)
+		{
+			// size is the length of the source sentence
+			for (size_t pos = 0; pos < size; ++pos)
+			{
+				// create empty tunnel lists
+				std::vector<TunnelList> &endVec = m_coll[pos];
+				endVec.resize(size - pos);
+			}
+		}
+		
+		void Add(int startS, int endS, int startT, int endT);
+
+		//const TunnelList &GetTargetHoles(int startS, int endS) const
+		//{
+		//	const TunnelList &targetHoles = m_phraseExist[startS][endS - startS];
+		//	return targetHoles;
+		//}
+		const TunnelList &GetTunnels(int startS, int endS) const
+		{
+			const TunnelList &sourceHoles = m_coll[startS][endS - startS];
+			return sourceHoles;
+		}
+		
+		const size_t GetSize() const
+		{ return m_coll.size(); }
+		
+		size_t NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const;
+
+
+	};
+
--- a/contrib/other-builds/extract-mixed-syntax/XmlTree.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/XmlTree.cpp
@ -0,0 +1,344 @@
+// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
+// vim:tabstop=2
+
+/***********************************************************************
+  Moses - factored phrase-based language decoder
+  Copyright (C) 2006 University of Edinburgh
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <vector>
+#include <string>
+#include <set>
+#include <iostream>
+#include <stdlib.h>
+#include "SyntaxTree.h"
+
+using namespace std;
+
+
+inline std::vector<std::string> Tokenize(const std::string& str,
+                                                                const std::string& delimiters = " \t")
+{
+	std::vector<std::string> tokens;
+	// Skip delimiters at beginning.
+	std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+	// Find first "non-delimiter".
+	std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
+	
+	while (std::string::npos != pos || std::string::npos != lastPos)
+	{
+		// Found a token, add it to the vector.
+		tokens.push_back(str.substr(lastPos, pos - lastPos));
+		// Skip delimiters.  Note the "not_of"
+		lastPos = str.find_first_not_of(delimiters, pos);
+		// Find next "non-delimiter"
+		pos = str.find_first_of(delimiters, lastPos);
+	}
+	
+	return tokens;
+}
+
+const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r")
+{
+	std::string res = str;
+	res.erase(str.find_last_not_of(dropChars)+1);
+	return res.erase(0, res.find_first_not_of(dropChars));
+}
+
+string ParseXmlTagAttribute(const string& tag,const string& attributeName){
+	/*TODO deal with unescaping \"*/
+	string tagOpen = attributeName + "=\"";
+	size_t contentsStart = tag.find(tagOpen);
+	if (contentsStart == string::npos) return "";
+	contentsStart += tagOpen.size();
+	size_t contentsEnd = tag.find_first_of('"',contentsStart+1);
+	if (contentsEnd == string::npos) {
+		cerr << "Malformed XML attribute: "<< tag;
+		return "";
+	}
+	size_t possibleEnd;
+	while (tag.at(contentsEnd-1) == '\\' && (possibleEnd = tag.find_first_of('"',contentsEnd+1)) != string::npos) {
+		contentsEnd = possibleEnd;
+	}
+	return tag.substr(contentsStart,contentsEnd-contentsStart);
+}
+
+/**
+ * Remove "<" and ">" from XML tag
+ *
+ * \param str xml token to be stripped
+ */
+string TrimXml(const string& str) 
+{
+  // too short to be xml token -> do nothing
+	if (str.size() < 2) return str;
+	
+  // strip first and last character
+	if (str[0] == '<' && str[str.size() - 1] == '>') 
+	{
+		return str.substr(1, str.size() - 2);
+	} 
+  // not an xml token -> do nothing
+  else { return str; }
+}
+
+/**
+ * Check if the token is an XML tag, i.e. starts with "<"
+ *
+ * \param tag token to be checked
+ */
+bool isXmlTag(const string& tag)
+{
+	return tag[0] == '<';
+}
+
+/**
+ * Split up the input character string into tokens made up of 
+ * either XML tags or text.
+ * example: this <b> is a </b> test .
+ *       => (this ), (<b>), ( is a ), (</b>), ( test .)
+ *
+ * \param str input string
+ */
+inline vector<string> TokenizeXml(const string& str)
+{
+	string lbrack = "<";
+	string rbrack = ">";
+	vector<string> tokens; // vector of tokens to be returned
+	string::size_type cpos = 0; // current position in string
+	string::size_type lpos = 0; // left start of xml tag
+	string::size_type rpos = 0; // right end of xml tag
+	
+  // walk thorugh the string (loop vver cpos)
+	while (cpos != str.size()) 
+	{
+    // find the next opening "<" of an xml tag
+  	lpos = str.find_first_of(lbrack, cpos);
+		if (lpos != string::npos) 
+		{
+			// find the end of the xml tag
+			rpos = str.find_first_of(rbrack, lpos);
+			// sanity check: there has to be closing ">"
+			if (rpos == string::npos) 
+			{
+			  cerr << "ERROR: malformed XML: " << str << endl;
+				return tokens;
+			}
+		} 
+		else // no more tags found
+		{
+			// add the rest as token
+			tokens.push_back(str.substr(cpos));
+			break;
+		}
+		
+		// add stuff before xml tag as token, if there is any
+		if (lpos - cpos > 0)
+			tokens.push_back(str.substr(cpos, lpos - cpos));
+		
+		// add xml tag as token
+		tokens.push_back(str.substr(lpos, rpos-lpos+1));
+		cpos = rpos + 1;
+	}
+	return tokens;
+}
+
+/**
+ * Process a sentence with xml annotation
+ * Xml tags may specifiy additional/replacing translation options
+ * and reordering constraints
+ *
+ * \param line in: sentence, out: sentence without the xml
+ * \param res vector with translation options specified by xml
+ * \param reorderingConstraint reordering constraint zones specified by xml
+ * \param walls reordering constraint walls specified by xml
+ */
+/*TODO: we'd only have to return a vector of XML options if we dropped linking. 2-d vector
+	is so we can link things up afterwards. We can't create TranslationOptions as we
+	parse because we don't have the completed source parsed until after this function
+	removes all the markup from it (CreateFromString in Sentence::Read).
+*/
+bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &labelCollection, map< string, int > &topLabelCollection ) {
+	//parse XML markup in translation line
+	
+	// no xml tag? we're done.
+	if (line.find_first_of('<') == string::npos) { return true; }
+	
+	// break up input into a vector of xml tags and text
+  // example: (this), (<b>), (is a), (</b>), (test .)
+	vector<string> xmlTokens = TokenizeXml(line);
+	
+	// we need to store opened tags, until they are closed
+	// tags are stored as tripled (tagname, startpos, contents)
+	typedef pair< string, pair< size_t, string > > OpenedTag;
+	vector< OpenedTag > tagStack; // stack that contains active opened tags
+	
+	string cleanLine; // return string (text without xml)
+	size_t wordPos = 0; // position in sentence (in terms of number of words)
+	bool isLinked = false;
+	
+  // loop through the tokens
+	for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++)
+	{
+    // not a xml tag, but regular text (may contain many words)
+		if(!isXmlTag(xmlTokens[xmlTokenPos]))
+		{
+			// add a space at boundary, if necessary
+			if (cleanLine.size()>0 &&
+			    cleanLine[cleanLine.size() - 1] != ' ' &&
+			    xmlTokens[xmlTokenPos][0] != ' ')
+			{
+				cleanLine += " ";
+			}
+			cleanLine += xmlTokens[xmlTokenPos]; // add to output
+			wordPos = Tokenize(cleanLine).size(); // count all the words
+		}
+		
+		// process xml tag
+		else
+		{
+			// *** get essential information about tag ***
+			
+      // strip extra boundary spaces and "<" and ">"
+			string tag =  Trim(TrimXml(xmlTokens[xmlTokenPos]));
+			// cerr << "XML TAG IS: " << tag << std::endl;
+			
+			if (tag.size() == 0)
+			{
+				cerr << "ERROR: empty tag name: " << line << endl;
+				return false;
+			}
+			
+      // check if unary (e.g., "<wall/>")
+			bool isUnary = ( tag[tag.size() - 1] == '/' );
+			
+			// check if opening tag (e.g. "<a>", not "</a>")g
+			bool isClosed = ( tag[0] == '/' );
+			bool isOpen = !isClosed;
+			
+			if (isClosed && isUnary)
+			{
+				cerr << "ERROR: can't have both closed and unary tag <" << tag << ">: " << line << endl;
+				return false;
+			}
+			
+			if (isClosed)
+				tag = tag.substr(1); // remove "/" at the beginning
+			if (isUnary)
+				tag = tag.substr(0,tag.size()-1); // remove "/" at the end
+			
+      // find the tag name and contents
+			string::size_type endOfName = tag.find_first_of(' ');
+			string tagName = tag;
+			string tagContent = "";
+			if (endOfName != string::npos) {
+				tagName = tag.substr(0,endOfName);
+				tagContent = tag.substr(endOfName+1);
+			}
+			
+			// *** process new tag ***
+
+			if (isOpen || isUnary)
+			{
+				// put the tag on the tag stack
+				OpenedTag openedTag = make_pair( tagName, make_pair( wordPos, tagContent ) );
+				tagStack.push_back( openedTag );
+				// cerr << "XML TAG " << tagName << " (" << tagContent << ") added to stack, now size " << tagStack.size() << endl;
+			}
+
+			// *** process completed tag ***
+
+			if (isClosed || isUnary)
+			{
+				// pop last opened tag from stack;
+				if (tagStack.size() == 0)
+				{
+				    cerr << "ERROR: tag " << tagName << " closed, but not opened" << ":" << line << endl;
+					return false;
+				}
+				OpenedTag openedTag = tagStack.back();
+				tagStack.pop_back();
+				
+				// tag names have to match
+				if (openedTag.first != tagName)
+				{
+				    cerr << "ERROR: tag " << openedTag.first << " closed by tag " << tagName << ": " << line << endl;
+					return false;
+				}
+				 
+				// assemble remaining information about tag
+				size_t startPos = openedTag.second.first;
+				string tagContent = openedTag.second.second;
+				size_t endPos = wordPos;
+
+				// span attribute overwrites position
+				string span = ParseXmlTagAttribute(tagContent,"span");
+				if (! span.empty()) 
+				{
+					vector<string> ij = Tokenize(span, "-");
+					if (ij.size() != 1 && ij.size() != 2) {
+					    cerr << "ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl;
+						return false;
+					}
+					startPos = atoi(ij[0].c_str());
+					if (ij.size() == 1) endPos = startPos + 1;
+					else endPos = atoi(ij[1].c_str()) + 1;
+				}
+
+				// cerr << "XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl;
+
+				if (startPos >= endPos)
+				{
+				    cerr << "ERROR: tag " << tagName << " must span at least one word (" << startPos << "-" << endPos << "): " << line << endl;
+					return false;
+				}
+
+				string label = ParseXmlTagAttribute(tagContent,"label");
+				labelCollection.insert( label );
+
+				// report what we have processed so far
+				if (0) {
+				  cerr << "XML TAG NAME IS: '" << tagName << "'" << endl;
+				  cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
+				  cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
+				}
+				tree.AddNode( startPos, endPos-1, label );
+			}
+		}
+	}
+	// we are done. check if there are tags that are still open
+	if (tagStack.size() > 0)
+	{
+		cerr << "ERROR: some opened tags were never closed: " << line << endl;
+		return false;
+	}
+
+	// collect top labels
+	const SyntaxNodes &topNodes = tree.GetNodes( 0, wordPos-1 );
+	for( SyntaxNodes::const_iterator node = topNodes.begin(); node != topNodes.end(); node++ )
+	{
+		const SyntaxNode *n = *node;
+		const string &label = n->GetLabel();
+		if (topLabelCollection.find( label ) == topLabelCollection.end())
+			topLabelCollection[ label ] = 0;
+		topLabelCollection[ label ]++;
+	}
+
+	// return de-xml'ed sentence in line
+	line = cleanLine;
+	return true;
+}
--- a/contrib/other-builds/extract-mixed-syntax/XmlTree.h
+++ b/contrib/other-builds/extract-mixed-syntax/XmlTree.h
@ -0,0 +1,35 @@
+#pragma once 
+
+// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
+// vim:tabstop=2
+
+/***********************************************************************
+  Moses - factored phrase-based language decoder
+  Copyright (C) 2006 University of Edinburgh
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include "SyntaxTree.h"
+
+std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
+std::string TrimXml(const std::string& str);
+bool isXmlTag(const std::string& tag);
+inline std::vector<std::string> TokenizeXml(const std::string& str);
+bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection );
--- a/contrib/other-builds/extract-mixed-syntax/extract.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/extract.cpp
@ -0,0 +1,310 @@
+// $Id: extract.cpp 2828 2010-02-01 16:07:58Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+  Moses - factored phrase-based language decoder
+  Copyright (C) 2009 University of Edinburgh
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <cstdio>
+#include <stdlib.h>
+#include <assert.h>
+#include <time.h>
+#include <cstring>
+#include <sstream>
+#include <iostream>
+#include "extract.h"
+#include "InputFileStream.h"
+#include "OutputFileStream.h"
+#include "Lattice.h"
+
+#ifdef WIN32
+// Include Visual Leak Detector
+#include <vld.h>
+#endif
+
+using namespace std;
+
+void writeGlueGrammar(const string &, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection);
+
+int main(int argc, char* argv[]) 
+{
+  cerr << "Extract v2.0, written by Philipp Koehn\n"
+       << "rule extraction from an aligned parallel corpus\n";
+  //time_t starttime = time(NULL);
+	
+	Global *global = new Global();
+	g_global = global;
+	int sentenceOffset = 0;
+		
+	if (argc < 5) {
+		cerr << "syntax: extract-mixed-syntax corpus.target corpus.source corpus.align extract "
+		     << " [ --Hierarchical | --Orientation"
+				 << " | --GlueGrammar FILE | --UnknownWordLabel FILE"
+				 << " | --OnlyDirect"
+					
+					<< " | --MinHoleSpanSourceDefault[" << global->minHoleSpanSourceDefault << "]"
+					<< " | --MaxHoleSpanSourceDefault[" << global->maxHoleSpanSourceDefault << "]"
+					<< " | --MinHoleSpanSourceSyntax[" << global->minHoleSpanSourceSyntax << "]"
+					<< " | --MaxHoleSpanSourceSyntax[" << global->maxHoleSpanSourceSyntax << "]"
+
+				<< " | --MaxSymbols[" << global->maxSymbols<< "]"
+				 << " | --MaxNonTerm[" << global->maxNonTerm << "]"
+		     << " | --SourceSyntax | --TargetSyntax" 
+				<<	" | --UppermostOnly[" << g_global->uppermostOnly << "]"
+				<< endl;
+		exit(1);
+	}
+  char* &fileNameT = argv[1];
+  char* &fileNameS = argv[2];
+  char* &fileNameA = argv[3];
+	string fileNameGlueGrammar;
+ 	string fileNameUnknownWordLabel;
+	string fileNameExtract = string(argv[4]);
+
+	int optionInd = 5;
+
+  for(int i=optionInd;i<argc;i++) 
+	{
+		if (strcmp(argv[i],"--MinHoleSpanSourceDefault") == 0) {
+			global->minHoleSpanSourceDefault = atoi(argv[++i]);
+			if (global->minHoleSpanSourceDefault < 1) {
+				cerr << "extract error: --minHoleSourceDefault should be at least 1" << endl;
+				exit(1);
+			}
+		}
+		else if (strcmp(argv[i],"--MaxHoleSpanSourceDefault") == 0) {
+			global->maxHoleSpanSourceDefault = atoi(argv[++i]);
+			if (global->maxHoleSpanSourceDefault < 1) {
+				cerr << "extract error: --maxHoleSourceDefault should be at least 1" << endl;
+				exit(1);
+			}
+		}
+		else  if (strcmp(argv[i],"--MinHoleSpanSourceSyntax") == 0) {
+			global->minHoleSpanSourceSyntax = atoi(argv[++i]);
+			if (global->minHoleSpanSourceSyntax < 1) {
+				cerr << "extract error: --minHoleSourceSyntax should be at least 1" << endl;
+				exit(1);
+			}
+		}
+		else if (strcmp(argv[i],"--UppermostOnly") == 0) {
+			global->uppermostOnly = atoi(argv[++i]);
+		}
+		else if (strcmp(argv[i],"--MaxHoleSpanSourceSyntax") == 0) {
+			global->maxHoleSpanSourceSyntax = atoi(argv[++i]);
+			if (global->maxHoleSpanSourceSyntax < 1) {
+				cerr << "extract error: --maxHoleSourceSyntax should be at least 1" << endl;
+				exit(1);
+			}
+		}
+		
+		// maximum number of words in hierarchical phrase
+		else if (strcmp(argv[i],"--maxSymbols") == 0) {
+			global->maxSymbols = atoi(argv[++i]);
+			if (global->maxSymbols < 1) {
+				cerr << "extract error: --maxSymbols should be at least 1" << endl;
+				exit(1);
+			}
+		}
+		// maximum number of non-terminals
+		else if (strcmp(argv[i],"--MaxNonTerm") == 0) {
+			global->maxNonTerm = atoi(argv[++i]);
+			if (global->maxNonTerm < 1) {
+				cerr << "extract error: --MaxNonTerm should be at least 1" << endl;
+				exit(1);
+			}
+		}		
+		// allow consecutive non-terminals (X Y | X Y)
+    else if (strcmp(argv[i],"--TargetSyntax") == 0) {
+      global->targetSyntax = true;
+    }
+    else if (strcmp(argv[i],"--SourceSyntax") == 0) {
+      global->sourceSyntax = true;
+    }
+		// do not create many part00xx files!
+    else if (strcmp(argv[i],"--NoFileLimit") == 0) {
+      // now default
+    }
+		else if (strcmp(argv[i],"--GlueGrammar") == 0) {
+			global->glueGrammarFlag = true;
+			if (++i >= argc)
+			{
+				cerr << "ERROR: Option --GlueGrammar requires a file name" << endl;
+				exit(0);
+			}
+			fileNameGlueGrammar = string(argv[i]);
+			cerr << "creating glue grammar in '" << fileNameGlueGrammar << "'" << endl;
+    }
+		else if (strcmp(argv[i],"--UnknownWordLabel") == 0) {
+			global->unknownWordLabelFlag = true;
+			if (++i >= argc)
+			{
+				cerr << "ERROR: Option --UnknownWordLabel requires a file name" << endl;
+				exit(0);
+			}
+			fileNameUnknownWordLabel = string(argv[i]);
+			cerr << "creating unknown word labels in '" << fileNameUnknownWordLabel << "'" << endl;
+		}
+		// TODO: this should be a useful option
+    //else if (strcmp(argv[i],"--ZipFiles") == 0) {
+    //  zipFiles = true;
+    //}
+		// if an source phrase is paired with two target phrases, then count(t|s) = 0.5
+    else if (strcmp(argv[i],"--Mixed") == 0) {
+			global->mixed = true;
+    }
+		else if (strcmp(argv[i],"--AllowDefaultNonTermEdge") == 0) {
+			global->allowDefaultNonTermEdge = atoi(argv[++i]);
+    }
+		else if (strcmp(argv[i], "--GZOutput") == 0) {
+      global->gzOutput = true;
+    }
+		else if (strcmp(argv[i],"--MaxSpan") == 0) {
+		  // ignore
+      ++i;
+		}
+    else if (strcmp(argv[i],"--SentenceOffset") == 0) {
+      if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
+        cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
+        exit(1);
+      }
+      sentenceOffset = atoi(argv[++i]);
+    }
+    else {
+      cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
+      exit(1);
+    }
+  }
+
+
+	// open input files
+	Moses::InputFileStream tFile(fileNameT);
+	Moses::InputFileStream sFile(fileNameS);
+	Moses::InputFileStream aFile(fileNameA);
+
+	// open output files
+  string fileNameExtractInv = fileNameExtract + ".inv";
+  if (global->gzOutput) {
+    fileNameExtract += ".gz";
+    fileNameExtractInv += ".gz";
+  }
+
+  Moses::OutputFileStream extractFile;
+  Moses::OutputFileStream extractFileInv;
+  extractFile.Open(fileNameExtract.c_str());
+  extractFileInv.Open(fileNameExtractInv.c_str());
+  
+  
+	// loop through all sentence pairs
+  int i = sentenceOffset;
+  while(true) {
+    i++;
+
+    if (i % 1000 == 0) {
+      cerr << i << " " << flush;
+    }
+
+    string targetString;
+    string sourceString;
+    string alignmentString;
+		
+		bool ok = getline(tFile, targetString);
+		if (!ok)
+			break;
+		getline(sFile, sourceString);
+		getline(aFile, alignmentString);
+    
+		//cerr << endl << targetString << endl << sourceString << endl << alignmentString << endl;
+
+		//time_t currTime = time(NULL);
+		//cerr << "A " << (currTime - starttime) << endl;
+
+    SentenceAlignment sentencePair;
+    if (sentencePair.Create( targetString, sourceString, alignmentString, i, *global )) 
+		{			
+			//cerr << sentence.sourceTree << endl;
+			//cerr << sentence.targetTree << endl;
+
+			sentencePair.FindTunnels(*g_global);
+			//cerr << "C " << (time(NULL) - starttime) << endl;
+			//cerr << sentencePair << endl;
+			
+			sentencePair.CreateLattice(*g_global);
+			//cerr << "D " << (time(NULL) - starttime) << endl;
+			//cerr << sentencePair << endl;
+
+			sentencePair.CreateRules(*g_global);
+			//cerr << "E " << (time(NULL) - starttime) << endl;
+
+			//cerr << sentence.lattice->GetRules().GetSize() << endl;
+			sentencePair.GetLattice().GetRules().Output(extractFile);
+      sentencePair.GetLattice().GetRules().OutputInv(extractFileInv);
+    }
+  }
+	
+  tFile.Close();
+  sFile.Close();
+  aFile.Close();
+
+  extractFile.Close();
+  extractFileInv.Close();
+
+  if (global->glueGrammarFlag) {
+    writeGlueGrammar(fileNameGlueGrammar, *global, targetLabelCollection, targetTopLabelCollection);
+  }
+
+  delete global;
+}
+ 
+
+void writeGlueGrammar( const string & fileName, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection )
+{
+  ofstream grammarFile;
+  grammarFile.open(fileName.c_str());
+  if (!options.targetSyntax) {
+    grammarFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
+                << "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
+                << "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
+  } else {
+    // chose a top label that is not already a label
+    string topLabel = "QQQQQQ";
+    for( unsigned int i=1; i<=topLabel.length(); i++) {
+      if(targetLabelCollection.find( topLabel.substr(0,i) ) == targetLabelCollection.end() ) {
+        topLabel = topLabel.substr(0,i);
+        break;
+      }
+    }
+    // basic rules
+    grammarFile << "<s> [X] ||| <s> [" << topLabel << "] ||| 1  ||| " << endl
+                << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 " << endl;
+
+    // top rules
+    for( map<string,int>::const_iterator i =  targetTopLabelCollection.begin();
+         i !=  targetTopLabelCollection.end(); i++ ) {
+      grammarFile << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1" << endl;
+    }
+
+    // glue rules
+    for( set<string>::const_iterator i =  targetLabelCollection.begin();
+         i !=  targetLabelCollection.end(); i++ ) {
+      grammarFile << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1" << endl;
+    }
+    grammarFile << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 |||  0-0 1-1 " << endl; // glue rule for unknown word...
+  }
+  grammarFile.close();
+}
+
--- a/contrib/other-builds/extract-mixed-syntax/extract.h
+++ b/contrib/other-builds/extract-mixed-syntax/extract.h
@ -0,0 +1,34 @@
+#pragma once
+
+#include <vector>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+#include <fstream>
+#include <algorithm>
+#include "SyntaxTree.h"
+#include "XmlTree.h"
+#include "Tunnel.h"
+#include "TunnelCollection.h"
+#include "SentenceAlignment.h"
+#include "Global.h"
+
+std::vector<std::string> tokenize( const char [] );
+
+#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
+                _IS.getline(_LINE, _SIZE, _DELIM); \
+                if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
+                if (_IS.gcount() == _SIZE-1) { \
+                  cerr << "Line too long! Buffer overflow. Delete lines >=" \
+                    << _SIZE << " chars or raise LINE_MAX_LENGTH in phrase-extract/extract.cpp" \
+                    << endl; \
+                    exit(1); \
+                } \
+              }
+#define LINE_MAX_LENGTH 1000000
+
+const Global *g_global;
+
+std::set< std::string > targetLabelCollection, sourceLabelCollection;
+std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;
--- a/contrib/other-builds/extract-mixed-syntax/gzfilebuf.h
+++ b/contrib/other-builds/extract-mixed-syntax/gzfilebuf.h
@ -0,0 +1,81 @@
+#ifndef moses_gzfile_buf_h
+#define moses_gzfile_buf_h
+
+#include <streambuf>
+#include <zlib.h>
+#include <cstring>
+
+class gzfilebuf : public std::streambuf {
+public:
+  gzfilebuf(const char *filename)
+  { _gzf = gzopen(filename, "rb"); 
+    setg (_buff+sizeof(int),     // beginning of putback area
+          _buff+sizeof(int),     // read position
+          _buff+sizeof(int));    // end position
+  }
+  ~gzfilebuf() { gzclose(_gzf); }
+protected:
+  virtual int_type overflow (int_type c) {
+		throw;
+  }
+	
+  // write multiple characters
+  virtual
+  std::streamsize xsputn (const char* s,
+                          std::streamsize num) {
+		throw;
+  }
+	
+  virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ){ throw;
+  }
+	
+  //read one character
+  virtual int_type underflow () {
+    // is read position before end of _buff?
+		if (gptr() < egptr()) {
+			return traits_type::to_int_type(*gptr());
+		}
+		
+		/* process size of putback area
+		 * - use number of characters read
+		 * - but at most four
+		 */
+		unsigned int numPutback = gptr() - eback();
+		if (numPutback > sizeof(int)) {
+			numPutback = sizeof(int);
+		}
+		
+		/* copy up to four characters previously read into
+		 * the putback _buff (area of first four characters)
+		 */
+		std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
+									numPutback);
+		
+		// read new characters
+		int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
+		if (num <= 0) {
+			// ERROR or EOF
+			return EOF;
+		}
+		
+		// reset _buff pointers
+		setg (_buff+(sizeof(int)-numPutback),   // beginning of putback area
+					_buff+sizeof(int),                // read position
+					_buff+sizeof(int)+num);           // end of buffer
+		
+		// return next character
+		return traits_type::to_int_type(*gptr());
+  }
+	
+  std::streamsize xsgetn (char* s,
+                          std::streamsize num) {
+    return gzread(_gzf,s,num);
+  }
+	
+private:
+  gzFile _gzf;
+  static const unsigned int _buffsize = 1024;
+  char _buff[_buffsize];
+};
+
+#endif
--- a/contrib/other-builds/extract-mixed-syntax/tables-core.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/tables-core.cpp
@ -0,0 +1,110 @@
+// $Id: tables-core.cpp 3131 2010-04-13 16:29:55Z pjwilliams $
+//#include "beammain.h"
+//#include "SafeGetLine.h"
+#include "tables-core.h"
+
+#define TABLE_LINE_MAX_LENGTH 1000
+#define UNKNOWNSTR	"UNK"
+
+// as in beamdecoder/tables.cpp
+vector<string> tokenize( const char* input ) {
+  vector< string > token;
+  bool betweenWords = true;
+  int start=0;
+  int i=0;
+  for(; input[i] != '\0'; i++) {
+    bool isSpace = (input[i] == ' ' || input[i] == '\t');
+		
+    if (!isSpace && betweenWords) {
+      start = i;
+      betweenWords = false;
+    }
+    else if (isSpace && !betweenWords) {
+      token.push_back( string( input+start, i-start ) );
+      betweenWords = true;
+    }
+  }
+  if (!betweenWords)
+    token.push_back( string( input+start, i-start ) );
+  return token;
+}
+
+WORD_ID Vocabulary::storeIfNew( const WORD& word ) {
+  map<WORD, WORD_ID>::iterator i = lookup.find( word );
+  
+  if( i != lookup.end() )
+    return i->second;
+	
+  WORD_ID id = vocab.size();
+  vocab.push_back( word );
+  lookup[ word ] = id;
+  return id;  
+}
+
+WORD_ID Vocabulary::getWordID( const WORD& word ) {
+  map<WORD, WORD_ID>::iterator i = lookup.find( word );
+  if( i == lookup.end() )
+    return 0;
+  return i->second;
+}
+
+PHRASE_ID PhraseTable::storeIfNew( const PHRASE& phrase ) {
+  map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
+  if( i != lookup.end() )
+    return i->second;
+	
+  PHRASE_ID id  = phraseTable.size();
+  phraseTable.push_back( phrase );
+  lookup[ phrase ] = id;
+  return id;
+}
+
+PHRASE_ID PhraseTable::getPhraseID( const PHRASE& phrase ) {
+  map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
+  if( i == lookup.end() )
+    return 0;
+  return i->second;
+}
+
+void PhraseTable::clear() {
+  lookup.clear();
+  phraseTable.clear();
+}
+
+void DTable::init() {
+  for(int i = -10; i<10; i++)
+    dtable[i] = -abs( i );
+}
+
+/*
+void DTable::load( const string& fileName ) {
+  ifstream inFile;
+  inFile.open(fileName.c_str());
+  istream *inFileP = &inFile;
+	
+  char line[TABLE_LINE_MAX_LENGTH];
+  int i=0;
+  while(true) {
+    i++;
+    SAFE_GETLINE((*inFileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__);
+    if (inFileP->eof()) break;
+		
+    vector<string> token = tokenize( line );
+    if (token.size() < 2) {
+      cerr << "line " << i << " in " << fileName << " too short, skipping\n";
+      continue;
+    }
+		
+    int d = atoi( token[0].c_str() );
+    double prob = log( atof( token[1].c_str() ) );
+    dtable[ d ] = prob;
+  }  
+}
+*/
+
+double DTable::get( int distortion ) {
+  if (dtable.find( distortion ) == dtable.end())
+    return log( 0.00001 );
+  return dtable[ distortion ];
+}
+
--- a/contrib/other-builds/extract-mixed-syntax/tables-core.h
+++ b/contrib/other-builds/extract-mixed-syntax/tables-core.h
@ -0,0 +1,72 @@
+#pragma once
+// $Id: tables-core.h 2416 2009-07-30 11:07:38Z hieuhoang1972 $
+
+#include <iostream>
+#include <fstream>
+#include <assert.h>
+#include <stdlib.h>
+#include <string>
+#include <queue>
+#include <map>
+#include <cmath>
+
+using namespace std;
+
+#define TABLE_LINE_MAX_LENGTH 1000
+#define UNKNOWNSTR	"UNK"
+
+vector<string> tokenize( const char[] );
+
+//! delete and remove every element of a collection object such as map, set, list etc
+template<class COLL>
+void RemoveAllInColl(COLL &coll)
+{
+	for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter)
+	{
+		delete (*iter);
+	}
+	coll.clear();
+}
+
+typedef string WORD;
+typedef unsigned int WORD_ID;
+
+class Vocabulary {
+ public:
+  map<WORD, WORD_ID>  lookup;
+  vector< WORD > vocab;
+  WORD_ID storeIfNew( const WORD& );
+  WORD_ID getWordID( const WORD& );
+  inline WORD &getWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
+};
+
+typedef vector< WORD_ID > PHRASE;
+typedef unsigned int PHRASE_ID;
+
+class PhraseTable {
+ public:
+  map< PHRASE, PHRASE_ID > lookup;
+  vector< PHRASE > phraseTable;
+  PHRASE_ID storeIfNew( const PHRASE& );
+  PHRASE_ID getPhraseID( const PHRASE& );
+  void clear();
+  inline PHRASE &getPhrase( const PHRASE_ID id ) { return phraseTable[ id ]; }
+};
+
+typedef vector< pair< PHRASE_ID, double > > PHRASEPROBVEC;
+
+class TTable {
+ public:
+  map< PHRASE_ID, vector< pair< PHRASE_ID, double > > > ttable;
+  map< PHRASE_ID, vector< pair< PHRASE_ID, vector< double > > > > ttableMulti;
+};
+
+class DTable {
+ public:
+  map< int, double > dtable;
+  void init();
+  void load( const string& );
+  double get( int );
+};
+
+
--- a/contrib/other-builds/extract-ordering/.cproject
+++ b/contrib/other-builds/extract-ordering/.cproject
@ -0,0 +1,126 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.499747849" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.798364121" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+							<builder buildPath="${workspace_loc:/extract-ordering}/Debug" id="cdt.managedbuild.builder.gnu.cross.1976289814" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1324749613" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.1750299246" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.719498215" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+								<option id="gnu.cpp.compiler.option.optimization.level.251118848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.99297656" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.106920816" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1844372739" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1178164658" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+								<option id="gnu.cpp.link.option.libs.1434184833" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="z"/>
+									<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
+									<listOptionValue builtIn="false" value="boost_system-mt"/>
+									<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
+								</option>
+								<option id="gnu.cpp.link.option.paths.974811544" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.904916320" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1005231499" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1318928675" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.604255673" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.818331963">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
+					<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.818331963." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1489025499" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1052477856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+							<builder buildPath="${workspace_loc:/extract-ordering}/Release" id="cdt.managedbuild.builder.gnu.cross.33925527" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1884790737" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.197048136" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.106898878" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+								<option id="gnu.cpp.compiler.option.optimization.level.1920378037" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.37950410" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1197641703" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1356351201" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2053623412" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1988048517" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1494470963" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1553727957" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="extract-ordering.cdt.managedbuild.target.gnu.cross.exe.1840421491" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417;cdt.managedbuild.tool.gnu.c.compiler.input.106898878">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446;cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964;cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827;cdt.managedbuild.tool.gnu.c.compiler.input.719498215">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+</cproject>
--- a/contrib/other-builds/extract-ordering/.project
+++ b/contrib/other-builds/extract-ordering/.project
@ -0,0 +1,74 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>extract-ordering</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+	<linkedResources>
+		<link>
+			<name>InputFileStream.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp</locationURI>
+		</link>
+		<link>
+			<name>InputFileStream.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
+		</link>
+		<link>
+			<name>OutputFileStream.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
+		</link>
+		<link>
+			<name>OutputFileStream.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
+		</link>
+		<link>
+			<name>SentenceAlignment.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
+		</link>
+		<link>
+			<name>SentenceAlignment.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
+		</link>
+		<link>
+			<name>extract-ordering-main.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ordering-main.cpp</locationURI>
+		</link>
+		<link>
+			<name>tables-core.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI>
+		</link>
+		<link>
+			<name>tables-core.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h</locationURI>
+		</link>
+	</linkedResources>
+</projectDescription>
--- a/contrib/other-builds/extract-rules/.project
+++ b/contrib/other-builds/extract-rules/.project
@ -65,6 +65,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
 		</link>
+		<link>
+			<name>RuleExtractionOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h</locationURI>
+		</link>
 		<link>
 			<name>SentenceAlignment.cpp</name>
 			<type>1</type>
--- a/contrib/other-builds/extractor/.cproject
+++ b/contrib/other-builds/extractor/.cproject
@ -5,12 +5,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1133345948" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -23,7 +23,7 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1512268277" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
 								<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2143789149" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
 								<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.285958391" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.option.include.paths.966722418" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+								<option id="gnu.cpp.compiler.option.include.paths.966722418" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
@ -36,11 +36,13 @@
 							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1048685119" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1295498016" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.338150127" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/mert_lib/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
 								</option>
 								<option id="gnu.cpp.link.option.libs.585257079" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="mert_lib"/>
+									<listOptionValue builtIn="false" value="boost_system-mt"/>
 									<listOptionValue builtIn="false" value="util"/>
 									<listOptionValue builtIn="false" value="z"/>
 								</option>
@ -62,12 +64,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1385955159" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -131,4 +133,5 @@
 		</configuration>
 	</storageModule>
 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 </cproject>
--- a/contrib/other-builds/extractor/.project
+++ b/contrib/other-builds/extractor/.project
@ -82,10 +82,515 @@
 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
 	</natures>
 	<linkedResources>
+		<link>
+			<name>bin</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>extractor.cpp</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/mert/extractor.cpp</locationURI>
 		</link>
+		<link>
+			<name>bin/gcc-4.8</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BleuDocScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BleuDocScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BleuScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BleuScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BleuScorerTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/BleuScorerTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/CderScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/CderScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Data.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Data.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/DataTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/DataTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureArray.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureData.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureData.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureDataIterator.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureDataIterator.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureDataTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureDataTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureStats.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FeatureStats.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FileStream.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/FileStream.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/GzFileBuf.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/GzFileBuf.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/HypPackEnumerator.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/HypPackEnumerator.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/InterpolatedScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/InterpolatedScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MeteorScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MeteorScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MiraFeatureVector.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MiraFeatureVector.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MiraWeightVector.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/MiraWeightVector.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/NgramTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/NgramTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Optimizer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Optimizer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/OptimizerFactory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/OptimizerFactory.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/OptimizerFactoryTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/OptimizerFactoryTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PerScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PerScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Permutation.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Permutation.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PermutationScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PermutationScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Point.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Point.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PointTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PointTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PreProcessFilter.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/PreProcessFilter.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ReferenceTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ReferenceTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreArray.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreArray.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreData.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreData.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreDataIterator.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreDataIterator.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreStats.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScoreStats.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Scorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Scorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScorerFactory.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ScorerFactory.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SemposOverlapping.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SemposOverlapping.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SemposScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SemposScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SentenceLevelScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SentenceLevelScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SingletonTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/SingletonTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/StatisticsBasedScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/StatisticsBasedScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TerScorer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TerScorer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ThreadPool.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ThreadPool.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Timer.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Timer.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TimerTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TimerTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Util.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Util.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/UtilTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/UtilTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Vocabulary.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/Vocabulary.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/VocabularyTest.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/VocabularyTest.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/bleu_scorer_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/bleu_scorer_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/bleu_scorer_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/bleu_scorer_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/data_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/data_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/data_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/data_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/evaluator</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/evaluator</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/evaluator.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/evaluator.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/extractor</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/extractor</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/extractor.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/extractor.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/feature_data_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/feature_data_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/feature_data_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/feature_data_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/kbmira</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/kbmira</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/kbmira.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/kbmira.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/libmert_lib.a</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/libmert_lib.a</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mert</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mert</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mert.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mert.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ngram_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ngram_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ngram_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ngram_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/optimizer_factory_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/optimizer_factory_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/optimizer_factory_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/optimizer_factory_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/point_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/point_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/point_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/point_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/pro</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/pro</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/pro.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/pro.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/reference_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/reference_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/reference_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/reference_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sentence-bleu</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sentence-bleu</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sentence-bleu.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sentence-bleu.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/singleton_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/singleton_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/singleton_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/singleton_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/timer_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/timer_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/timer_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/timer_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/util_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/util_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/util_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/util_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/vocabulary_test</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/vocabulary_test</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/vocabulary_test.passed</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/vocabulary_test.passed</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/alignmentStruct.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/alignmentStruct.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/hashMap.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/hashMap.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/hashMapInfos.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/hashMapInfos.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/hashMapStringInfos.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/hashMapStringInfos.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/infosHasher.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/infosHasher.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/stringHasher.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/stringHasher.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/stringInfosHasher.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/stringInfosHasher.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/terAlignment.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/terAlignment.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/terShift.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/terShift.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/tercalc.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/tercalc.o</locationURI>
+		</link>
+		<link>
+			<name>bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/tools.o</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/mert/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/TER/tools.o</locationURI>
+		</link>
 	</linkedResources>
 </projectDescription>
--- a/contrib/other-builds/manual-label/.cproject
+++ b/contrib/other-builds/manual-label/.cproject
@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1899954923" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1645930772" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+							<builder buildPath="${workspace_loc:/manual-label/Debug}" id="cdt.managedbuild.builder.gnu.cross.1703642277" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1938374607" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1888648788" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.1838052643" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.798368516" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.950686503" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+								<option id="gnu.cpp.compiler.option.optimization.level.153015988" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.418888584" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.406065865" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.596589558" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1741441821" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1626431978" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+								<option id="gnu.cpp.link.option.libs.1886912770" superClass="gnu.cpp.link.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="boost_program_options-mt"/>
+								</option>
+								<option id="gnu.cpp.link.option.paths.1541583695" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1367999206" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.archiver.31522559" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.assembler.826957235" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.350181339" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
+					<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.97427761" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.564169339" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+							<builder buildPath="${workspace_loc:/manual-label/Release}" id="cdt.managedbuild.builder.gnu.cross.663164336" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2104943437" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.2135645103" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.764935013" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1841809129" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1180544943" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+								<option id="gnu.cpp.compiler.option.optimization.level.1877584345" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.935490779" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1084298301" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.355530813" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.940299092" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.17718999" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1527322008" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+							<tool id="cdt.managedbuild.tool.gnu.cross.assembler.480337803" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1788533940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="manual-label.cdt.managedbuild.target.gnu.cross.exe.2117548180" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1335379815;cdt.managedbuild.config.gnu.cross.exe.release.1335379815.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1180544943;cdt.managedbuild.tool.gnu.cpp.compiler.input.1084298301">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639;cdt.managedbuild.config.gnu.cross.exe.debug.1096604639.;cdt.managedbuild.tool.gnu.cross.c.compiler.1938374607;cdt.managedbuild.tool.gnu.c.compiler.input.798368516">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1335379815;cdt.managedbuild.config.gnu.cross.exe.release.1335379815.;cdt.managedbuild.tool.gnu.cross.c.compiler.2104943437;cdt.managedbuild.tool.gnu.c.compiler.input.1841809129">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639;cdt.managedbuild.config.gnu.cross.exe.debug.1096604639.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.950686503;cdt.managedbuild.tool.gnu.cpp.compiler.input.596589558">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+</cproject>
--- a/contrib/other-builds/manual-label/.project
+++ b/contrib/other-builds/manual-label/.project
@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>manual-label</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+</projectDescription>
--- a/contrib/other-builds/manual-label/DeEn.cpp
+++ b/contrib/other-builds/manual-label/DeEn.cpp
@ -0,0 +1,86 @@
+#include <list>
+#include "DeEn.h"
+#include "moses/Util.h"
+
+using namespace std;
+
+extern bool g_debug;
+
+bool IsA(const Phrase &source, int pos, int offset, int factor, const string &str)
+{
+  pos += offset;
+  if (pos >= source.size() || pos < 0) {
+    return false;
+  }
+
+  const string &word = source[pos][factor];
+  vector<string> soughts = Moses::Tokenize(str, " ");
+  for (int i = 0; i < soughts.size(); ++i) {
+    string &sought = soughts[i];
+    bool found = (word == sought);
+    if (found) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Contains(const Phrase &source, int start, int end, int factor, const string &str)
+{
+  for (int pos = start; pos <= end; ++pos) {
+    bool found = IsA(source, pos, 0, factor, str);
+    if (found) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void LabelDeEn(const Phrase &source, ostream &out)
+{
+  typedef pair<int,int> Range;
+  typedef list<Range> Ranges;
+  Ranges ranges;
+
+  // find ranges to label
+  for (int start = 0; start < source.size(); ++start) {
+    for (int end = start; end < source.size(); ++end) {
+     if (IsA(source, start, -1, 1, "VAFIN")
+          && IsA(source, end, +1, 1, "VVINF VVPP")
+          && !Contains(source, start, end, 1, "VAFIN VVINF VVPP VVFIN")) {
+       Range range(start, end);
+       ranges.push_back(range);
+      }
+      else if ((start == 0 || IsA(source, start, -1, 1, "$,"))
+          && IsA(source, end, +1, 0, "zu")
+          && IsA(source, end, +2, 1, "VVINF")
+          && !Contains(source, start, end, 1, "$,")) {
+        Range range(start, end);
+        ranges.push_back(range);
+      }
+    }
+  }
+
+  // output sentence, with labels
+  for (int pos = 0; pos < source.size(); ++pos) {
+    // output beginning of label
+    for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
+      const Range &range = *iter;
+      if (range.first == pos) {
+        out << "<tree label=\"reorder-label\"> ";
+      }
+    }
+
+    const Word &word = source[pos];
+    out << word[0] << " ";
+
+    for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
+      const Range &range = *iter;
+      if (range.second == pos) {
+        out << "</tree> ";
+      }
+    }
+  }
+  out << endl;
+
+}
--- a/contrib/other-builds/manual-label/DeEn.h
+++ b/contrib/other-builds/manual-label/DeEn.h
@ -0,0 +1,10 @@
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include <string>
+
+typedef std::vector<std::string> Word;
+typedef std::vector<Word> Phrase;
+
+void LabelDeEn(const Phrase &source, std::ostream &out);
--- a/contrib/other-builds/manual-label/Makefile
+++ b/contrib/other-builds/manual-label/Makefile
@ -0,0 +1,13 @@
+all: manual-label
+
+clean: 
+	rm -f *.o manual-label
+
+.cpp.o:
+	g++ -I../../../ -O6 -g -c $<
+
+manual-label: DeEn.o manual-label.o
+
+	g++ DeEn.o manual-label.o -lz -lboost_program_options-mt -o manual-label
+
+
--- a/contrib/other-builds/manual-label/manual-label.cpp
+++ b/contrib/other-builds/manual-label/manual-label.cpp
@ -0,0 +1,88 @@
+#include <iostream>
+#include <cstdlib>
+#include <boost/program_options.hpp>
+#include "moses/Util.h"
+#include "DeEn.h"
+
+using namespace std;
+
+bool g_debug = false;
+
+Phrase Tokenize(const string &line);
+
+int main(int argc, char** argv)
+{
+  cerr << "Starting" << endl;
+
+  namespace po = boost::program_options;
+  po::options_description desc("Options");
+  desc.add_options()
+    ("help", "Print help messages")
+    ("add", "additional options")
+    ("source-language,s", po::value<string>()->required(), "Source Language")
+    ("target-language,t", po::value<string>()->required(), "Target Language");
+
+  po::variables_map vm;
+  try
+  {
+    po::store(po::parse_command_line(argc, argv, desc),
+              vm); // can throw
+
+    /** --help option
+     */
+    if ( vm.count("help")  )
+    {
+      std::cout << "Basic Command Line Parameter App" << std::endl
+                << desc << std::endl;
+      return EXIT_SUCCESS;
+    }
+
+    po::notify(vm); // throws on error, so do after help in case
+                    // there are any problems
+  }
+  catch(po::error& e)
+  {
+    std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
+    std::cerr << desc << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  string sourceLang = vm["source-language"].as<string>();
+  string targetLang = vm["target-language"].as<string>();
+  cerr << sourceLang << " " << targetLang << endl;
+
+  string line;
+  size_t lineNum = 1;
+
+  while (getline(cin, line)) {
+    //cerr << lineNum << ":" << line << endl;
+    if (lineNum % 1000 == 0) {
+      cerr << lineNum << " ";
+    }
+
+    Phrase source = Tokenize(line);
+
+    LabelDeEn(source, cout);
+
+    ++lineNum;
+  }
+
+
+
+  cerr << "Finished" << endl;
+  return EXIT_SUCCESS;
+}
+
+Phrase Tokenize(const string &line)
+{
+  Phrase ret;
+
+  vector<string> toks = Moses::Tokenize(line);
+  for (size_t i = 0; i < toks.size(); ++i) {
+    Word word = Moses::Tokenize(toks[i], "|");
+    ret.push_back(word);
+  }
+
+  return ret;
+}
+
--- a/contrib/other-builds/mert_lib/.cproject
+++ b/contrib/other-builds/mert_lib/.cproject
@ -11,11 +11,11 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -46,7 +46,9 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
-					<fileInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013.933309045" name="PreProcessFilter.h" rcbsApplicability="disable" resourcePath="mert/PreProcessFilter.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013.195400614" name="MeteorScorer.cpp" rcbsApplicability="disable" resourcePath="MeteorScorer.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660">
+						<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537"/>
+					</fileInfo>
 					<sourceEntries>
 						<entry excluding="mert/PreProcessFilter.h|mert/PreProcessFilter.cpp|mert/UtilTest.cpp|mert/TimerTest.cpp|mert/SingletonTest.cpp|mert/PointTest.cpp|mert/OptimizerFactoryTest.cpp|mert/NgramTest.cpp|mert/FeatureDataTest.cpp|mert/DataTest.cpp|mert/ReferenceTest.cpp|mert/VocabularyTest.cpp|mert/extractor.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
@ -64,11 +66,11 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -127,4 +129,5 @@
 		</configuration>
 	</storageModule>
 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 </cproject>
--- a/contrib/other-builds/mert_lib/.project
+++ b/contrib/other-builds/mert_lib/.project
--- a/contrib/other-builds/mira/.cproject
+++ b/contrib/other-builds/mira/.cproject
@ -5,12 +5,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -50,7 +50,6 @@
 									<listOptionValue builtIn="false" value="lattice"/>
 									<listOptionValue builtIn="false" value="misc"/>
 									<listOptionValue builtIn="false" value="dalm"/>
-									<listOptionValue builtIn="false" value="MurmurHash3"/>
 									<listOptionValue builtIn="false" value="search"/>
 									<listOptionValue builtIn="false" value="RandLM"/>
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
@ -103,12 +102,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -173,4 +172,5 @@
 		</configuration>
 	</storageModule>
 	<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 </cproject>
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@ -5,13 +5,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -70,7 +70,6 @@
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="dalm"/>
-									<listOptionValue builtIn="false" value="MurmurHash3"/>
 									<listOptionValue builtIn="false" value="flm"/>
 									<listOptionValue builtIn="false" value="oolm"/>
 									<listOptionValue builtIn="false" value="lattice"/>
@ -108,13 +107,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.516628324" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@ -5,13 +5,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -71,7 +71,6 @@
 									<listOptionValue builtIn="false" value="lattice"/>
 									<listOptionValue builtIn="false" value="misc"/>
 									<listOptionValue builtIn="false" value="dalm"/>
-									<listOptionValue builtIn="false" value="MurmurHash3"/>
 									<listOptionValue builtIn="false" value="search"/>
 									<listOptionValue builtIn="false" value="RandLM"/>
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
@ -109,13 +108,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@ -11,12 +11,12 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -88,13 +88,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.401150096" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -166,6 +166,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/ChartHypothesisCollection.h</locationURI>
 		</link>
+		<link>
+			<name>ChartKBestExtractor.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/ChartKBestExtractor.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartKBestExtractor.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/ChartKBestExtractor.h</locationURI>
+		</link>
 		<link>
 			<name>ChartManager.cpp</name>
 			<type>1</type>
@ -1066,6 +1076,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ControlRecombination.h</locationURI>
 		</link>
+		<link>
+			<name>FF/CountNonTerms.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/CountNonTerms.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/CountNonTerms.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/CountNonTerms.h</locationURI>
+		</link>
 		<link>
 			<name>FF/CoveredReferenceFeature.cpp</name>
 			<type>1</type>
@ -1156,6 +1176,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/GlobalLexicalModelUnlimited.h</locationURI>
 		</link>
+		<link>
+			<name>FF/HyperParameterAsWeight.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/HyperParameterAsWeight.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/HyperParameterAsWeight.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/HyperParameterAsWeight.h</locationURI>
+		</link>
 		<link>
 			<name>FF/InputFeature.cpp</name>
 			<type>1</type>
@ -1231,6 +1261,36 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhrasePenalty.h</locationURI>
 		</link>
+		<link>
+			<name>FF/ReferenceComparison.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ReferenceComparison.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/ReferenceComparison.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ReferenceComparison.h</locationURI>
+		</link>
+		<link>
+			<name>FF/RuleAmbiguity.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/RuleAmbiguity.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/RuleAmbiguity.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/RuleAmbiguity.h</locationURI>
+		</link>
+		<link>
+			<name>FF/SetSourcePhrase.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/SetSourcePhrase.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
+		</link>
 		<link>
 			<name>FF/SkeletonStatefulFF.cpp</name>
 			<type>1</type>
@ -1251,6 +1311,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonStatelessFF.h</locationURI>
 		</link>
+		<link>
+			<name>FF/SoftMatchingFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/SoftMatchingFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.h</locationURI>
+		</link>
 		<link>
 			<name>FF/SourceWordDeletionFeature.cpp</name>
 			<type>1</type>
@ -1311,6 +1381,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetWordInsertionFeature.h</locationURI>
 		</link>
+		<link>
+			<name>FF/TreeStructureFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TreeStructureFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/TreeStructureFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TreeStructureFeature.h</locationURI>
+		</link>
 		<link>
 			<name>FF/UnknownWordPenaltyProducer.cpp</name>
 			<type>1</type>
@ -1836,6 +1916,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/CYKPlusParser/CompletedRuleCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/CYKPlusParser/DotChart.h</name>
 			<type>1</type>
--- a/contrib/other-builds/score/.cproject
+++ b/contrib/other-builds/score/.cproject
@ -5,12 +5,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.634831890" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -20,14 +20,14 @@
 							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.2040884960" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
 							<builder buildPath="${workspace_loc:/score/Debug}" id="cdt.managedbuild.builder.gnu.cross.1709170788" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
 							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.786339685" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
-								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1516054114" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
-								<option id="gnu.c.compiler.option.debugging.level.1061705384" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1516054114" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.1061705384" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
 								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2108019237" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1013232238" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
-								<option id="gnu.cpp.compiler.option.optimization.level.1874109813" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.option.debugging.level.2032778777" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.option.include.paths.1713606194" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+								<option id="gnu.cpp.compiler.option.optimization.level.1874109813" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.2032778777" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1713606194" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
 								</option>
@ -37,9 +37,13 @@
 							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1563503789" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
 								<option id="gnu.cpp.link.option.paths.1704292838" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
 								</option>
 								<option id="gnu.cpp.link.option.libs.936233947" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="z"/>
+									<listOptionValue builtIn="false" value="util"/>
+									<listOptionValue builtIn="false" value="moses"/>
 									<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
 									<listOptionValue builtIn="false" value="boost_system-mt"/>
 									<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
@ -63,12 +67,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1994357180" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -78,13 +82,13 @@
 							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1353054437" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
 							<builder buildPath="${workspace_loc:/score/Release}" id="cdt.managedbuild.builder.gnu.cross.1851758128" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
 							<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.323743241" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
-								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.534423111" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
-								<option id="gnu.c.compiler.option.debugging.level.518786530" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.534423111" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
+								<option id="gnu.c.compiler.option.debugging.level.518786530" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
 								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.392640311" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.307472312" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
-								<option id="gnu.cpp.compiler.option.optimization.level.407718562" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
-								<option id="gnu.cpp.compiler.option.debugging.level.1687450255" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.optimization.level.407718562" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.debugging.level.1687450255" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.593478428" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.165176764" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
--- a/contrib/other-builds/score/.project
+++ b/contrib/other-builds/score/.project
@ -25,6 +25,26 @@
 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
 	</natures>
 	<linkedResources>
+		<link>
+			<name>DomainFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/DomainFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>DomainFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/DomainFeature.h</locationURI>
+		</link>
+		<link>
+			<name>ExtractionPhrasePair.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ExtractionPhrasePair.cpp</locationURI>
+		</link>
+		<link>
+			<name>ExtractionPhrasePair.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ExtractionPhrasePair.h</locationURI>
+		</link>
 		<link>
 			<name>InputFileStream.cpp</name>
 			<type>1</type>
@ -55,11 +75,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
 		</link>
-		<link>
-			<name>PhraseAlignment.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/PhraseAlignment.cpp</locationURI>
-		</link>
 		<link>
 			<name>ScoreFeature.cpp</name>
 			<type>1</type>
@ -70,16 +85,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ScoreFeature.h</locationURI>
 		</link>
-		<link>
-			<name>domain.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/domain.cpp</locationURI>
-		</link>
-		<link>
-			<name>domain.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/domain.h</locationURI>
-		</link>
 		<link>
 			<name>exception.cc</name>
 			<type>1</type>
--- a/contrib/rpm/README
+++ b/contrib/rpm/README
@ -12,12 +12,13 @@ Building the RPM SPEC file
 The first phase is to construct the RPM SPEC file in $HOME/rpmbuild. The build_source.sh script builds all the artefacts needed to build. This script needs the following information:

 - The Git repository from which an installer will be built,
- - The branch in the Git repository to build, and
+ - The branch in the Git repository to build,
+ - The location of Boost on the build machine, and
 - The version of the installed Moses distribution.

-For example, to build the RELEASE-1.0 branch in the mosesdecode repository (git://github.com/moses-smt/mosesdecoder.git):
+For example, to build the RELEASE-1.0 branch in the mosesdecoder repository (git://github.com/moses-smt/mosesdecoder.git):

-$ build_source.sh -r git://github.com/moses-smt/mosesdecoder.git -b RELASE-1.0 -v 1.0
+$ build_source.sh -r git://github.com/moses-smt/mosesdecoder.git -b RELASE-1.0 -v 1.0 -t /usr

 This builds the source tarballs in the $HOME/rpmbuild/SOURCES directory and the moses.spec file in $HOME/rpmbuild/SPECS.

--- a/contrib/rpm/build_source.sh
+++ b/contrib/rpm/build_source.sh
@ -1,11 +1,15 @@
 #!/bin/bash

 BRANCH="master"
+BOOST="/usr"
 declare -i NO_RPM_BUILD=0
+declare -i RELEASE=1
 declare -r RPM_VERSION_TAG="___RPM_VERSION__"
+declare -r RPM_RELEASE_TAG="___RPM_RELEASE__"
+declare -r BOOST_TAG="___BOOST_LOCATION__"

 function usage() {
-  echo "`basename $0` -r [Moses Git repo] -b [Moses Git branch: default ${BRANCH}] -v [RPM version]"
+  echo "`basename $0` -r [Moses Git repo] -b [Moses Git branch: default ${BRANCH}] -v [RPM version] -l [RPM release: default ${RELEASE}] -t [Boost install: default ${BOOST}]"
  exit 1
 }

@ -13,12 +17,14 @@ if [ $# -lt 4 ]; then
  usage
 fi

-while getopts r:b:v:nh OPTION
+while getopts r:b:t:v:l:nh OPTION
 do
  case "$OPTION" in
      r) REPO="${OPTARG}";;
      b) BRANCH="${OPTARG}";;
+      t) BOOST="${OPTARG}";;
      v) VERSION="${OPTARG}";;
+      l) RELEASE="${OPTARG}";;
      n) NO_RPM_BUILD=1;;
      [h\?]) usage;;
  esac
@ -53,7 +59,8 @@ if [ ${NO_RPM_BUILD} -eq 0 ]; then
  if [ ! -d ${HOME}/rpmbuild/SPECS ]; then
    mkdir -p ${HOME}/rpmbuild/SPECS
  fi
-  eval sed s/${RPM_VERSION_TAG}/${VERSION}/ ./rpmbuild/SPECS/moses.spec > ${HOME}/rpmbuild/SPECS/moses.spec
+  ESC_BOOST=`echo ${BOOST} | gawk '{gsub(/\//, "\\\\/"); print}'`
+  eval sed -e \"s/${RPM_VERSION_TAG}/${VERSION}/\" -e \"s/${RPM_RELEASE_TAG}/${RELEASE}/\" -e \"s/${BOOST_TAG}/${ESC_BOOST}/\" ./rpmbuild/SPECS/moses.spec > ${HOME}/rpmbuild/SPECS/moses.spec
  if [ ! -d ${HOME}/rpmbuild/SOURCES ]; then
    mkdir -p ${HOME}/rpmbuild/SOURCES
  fi
--- a/contrib/rpm/rpmbuild/SPECS/moses.spec
+++ b/contrib/rpm/rpmbuild/SPECS/moses.spec
@ -1,21 +1,26 @@
-Name: moses
+%define name moses
+%define version ___RPM_VERSION__
+%define release ___RPM_RELEASE__
+
+Name: %{name}
 Summary: Moses is a statistical machine translation system that allows you to automatically train translation models for any language pair.
-Version: ___RPM_VERSION__
-Release: 1
-URL: http://www.statmt.org/moses/
+Version: %{version}
+Release: %{release}
+URL: http://www.statmt.org/%{name}-%{version}/
 Source0: %{name}-%{version}.tar.gz
 License: LGPL
 Group: Development/Tools
 Vendor: Capita Translation and Interpreting
 Packager: Ian Johnson <ian.johnson@capita-ti.com>
-Requires: boost >= 1.48, python >= 2.6, perl >= 5
-BuildRoot: /home/ian/rpmbuild/builds/%{name}-%{version}-%{release}
+Requires: python >= 2.6, perl >= 5
+Prefix: /opt
+BuildRoot: %{_builddir}/%{name}-%{version}-%{release}
 %description
 Moses is a statistical machine translation system that allows you to automatically train translation models for any language pair. All you need is a collection of translated texts (parallel corpus). An efficient search algorithm finds quickly the highest probability translation among the exponential number of choices.
 %prep
 %setup -q

-mkdir -p $RPM_BUILD_ROOT/opt/moses/giza++-v1.0.7
+mkdir -p $RPM_BUILD_ROOT/opt/%{name}-%{version}/giza++-v1.0.7

 wget -O $RPM_BUILD_DIR/irstlm-5.70.04.tgz http://moses-suite.googlecode.com/files/irstlm-5.70.04.tgz 
 wget -O $RPM_BUILD_DIR/giza-pp-v1.0.7.tgz http://moses-suite.googlecode.com/files/giza-pp-v1.0.7.tar.gz
@ -27,39 +32,51 @@ tar -zxf giza-pp-v1.0.7.tgz

 cd irstlm-5.70.04
 bash regenerate-makefiles.sh --force
-./configure --prefix $RPM_BUILD_ROOT/opt/moses/irstlm-5.70.04
+./configure --prefix $RPM_BUILD_ROOT/opt/%{name}-%{version}/irstlm-5.70.04
 make
 make install

 cd ../giza-pp
 make
-cp $RPM_BUILD_DIR/giza-pp/GIZA++-v2/GIZA++ $RPM_BUILD_DIR/giza-pp/GIZA++-v2/snt2cooc.out $RPM_BUILD_DIR/giza-pp/mkcls-v2/mkcls $RPM_BUILD_ROOT/opt/moses/giza++-v1.0.7
+cp $RPM_BUILD_DIR/giza-pp/GIZA++-v2/GIZA++ $RPM_BUILD_DIR/giza-pp/GIZA++-v2/snt2cooc.out $RPM_BUILD_DIR/giza-pp/mkcls-v2/mkcls $RPM_BUILD_ROOT/opt/%{name}-%{version}/giza++-v1.0.7
 %build
-./bjam --with-irstlm=$RPM_BUILD_ROOT/opt/moses/irstlm-5.70.04 --with-giza=$RPM_BUILD_ROOT/opt/moses/giza++-v1.0.7 -j2
+./bjam --with-boost=___BOOST_LOCATION__ --with-irstlm=$RPM_BUILD_ROOT/opt/%{name}-%{version}/irstlm-5.70.04 --with-giza=$RPM_BUILD_ROOT/opt/%{name}-%{version}/giza++-v1.0.7 -j2
 %install
-mkdir -p $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R bin $RPM_BUILD_ROOT/opt/moses
-cp -R scripts/analysis $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/ems $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/generic $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/other $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/recaser $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/regression-testing $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/share $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/tokenizer $RPM_BUILD_ROOT/opt/moses/scripts
-cp -R scripts/training $RPM_BUILD_ROOT/opt/moses/scripts
+mkdir -p $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R bin $RPM_BUILD_ROOT/opt/%{name}-%{version}
+cp -R scripts/OSM $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/Transliteration $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/analysis $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/ems $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/generic $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/other $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/recaser $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/share $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/tokenizer $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
+cp -R scripts/training $RPM_BUILD_ROOT/opt/%{name}-%{version}/scripts
 %clean
 %files
 %defattr(-,root,root)
-/opt/moses/bin/*
-/opt/moses/scripts/analysis/*
-/opt/moses/scripts/ems/*
-/opt/moses/scripts/generic/*
-/opt/moses/scripts/other/*
-/opt/moses/scripts/recaser/*
-/opt/moses/scripts/regression-testing/*
-/opt/moses/scripts/share/*
-/opt/moses/scripts/tokenizer/*
-/opt/moses/scripts/training/*
-/opt/moses/irstlm-5.70.04/*
-/opt/moses/giza++-v1.0.7/*
+/opt/%{name}-%{version}/bin/*
+/opt/%{name}-%{version}/scripts/OSM/*
+/opt/%{name}-%{version}/scripts/Transliteration/*
+/opt/%{name}-%{version}/scripts/analysis/*
+/opt/%{name}-%{version}/scripts/ems/*
+/opt/%{name}-%{version}/scripts/generic/*
+/opt/%{name}-%{version}/scripts/other/*
+/opt/%{name}-%{version}/scripts/recaser/*
+/opt/%{name}-%{version}/scripts/share/*
+/opt/%{name}-%{version}/scripts/tokenizer/*
+/opt/%{name}-%{version}/scripts/training/*
+/opt/%{name}-%{version}/irstlm-5.70.04/*
+/opt/%{name}-%{version}/giza++-v1.0.7/*
+%pre
+if [ "$1" = "1" ]; then
+elif [ "$1" = "2" ]; then
+  rm $RPM_INSTALL_PREFIX/%{name} 2>/dev/null
+fi
+%post
+ln -s $RPM_INSTALL_PREFIX/%{name}-%{version} $RPM_INSTALL_PREFIX/%{name}
+%postun
+rm -Rf $RPM_INSTALL_PREFIX/%{name}-%{version} 2>/dev/null
+rm $RPM_INSTALL_PREFIX/%{name} 2>/dev/null
--- a/contrib/server/Jamfile
+++ b/contrib/server/Jamfile
@ -35,7 +35,7 @@ if $(build-moses-server) = true
  xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
  xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;

-  exe mosesserver : mosesserver.cpp ../../moses//moses ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
+  exe mosesserver : mosesserver.cpp ../../moses//moses ../../moses-cmd/IOWrapper.cpp ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
 } else {
  alias mosesserver ;
 }
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@ -12,6 +12,7 @@
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
 #include "moses/TreeInput.h"
 #include "moses/LM/ORLM.h"
+#include "moses-cmd/IOWrapper.h"

 #ifdef WITH_THREADS
 #include <boost/thread.hpp>
@ -22,6 +23,7 @@
 #include <xmlrpc-c/server_abyss.hpp>

 using namespace Moses;
+using namespace MosesCmd;
 using namespace std;

 typedef std::map<std::string, xmlrpc_c::value> params_t;
@ -215,6 +217,8 @@ public:
    cerr << "Input: " << source << endl;
    si = params.find("align");
    bool addAlignInfo = (si != params.end());
+    si = params.find("word-align");
+    bool addWordAlignInfo = (si != params.end());
    si = params.find("sg");
    bool addGraphInfo = (si != params.end());
    si = params.find("topt");
@ -278,6 +282,20 @@ public:
        if (addAlignInfo) {
          retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
        }
+        if (addWordAlignInfo) {
+          stringstream wordAlignment;
+          OutputAlignment(wordAlignment, hypo);
+          vector<xmlrpc_c::value> alignments;
+          string alignmentPair;
+          while (wordAlignment >> alignmentPair) {
+          	int pos = alignmentPair.find('-');
+          	map<string, xmlrpc_c::value> wordAlignInfo;
+          	wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
+          	wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
+          	alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
+          }
+          retData.insert(pair<string, xmlrpc_c::value_array>("word-align", alignments));
+        }

        if(addGraphInfo) {
          insertGraphInfo(manager,retData);
@ -415,9 +433,25 @@ public:
      }
      nBestXMLItem["hyp"] = xmlrpc_c::value_string(out.str());

-      if (addAlignmentInfo)
+      if (addAlignmentInfo) {
        nBestXMLItem["align"] = xmlrpc_c::value_array(alignInfo);

+        if ((int)edges.size() > 0) {
+          stringstream wordAlignment;
+          OutputAlignment(wordAlignment, edges[0]);
+          vector<xmlrpc_c::value> alignments;
+          string alignmentPair;
+          while (wordAlignment >> alignmentPair) {
+          	int pos = alignmentPair.find('-');
+          	map<string, xmlrpc_c::value> wordAlignInfo;
+          	wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
+          	wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
+          	alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
+          }
+          nBestXMLItem["word-align"] = xmlrpc_c::value_array(alignments);
+        }
+      }
+
      // weighted score
      nBestXMLItem["totalScore"] = xmlrpc_c::value_double(path.GetTotalScore());
      nBestXml.push_back(xmlrpc_c::value_struct(nBestXMLItem));
@ -512,7 +546,7 @@ int main(int argc, char** argv)
  xmlrpc_limit_set(XMLRPC_XML_SIZE_LIMIT_ID, 512*1024*1024);

  xmlrpc_c::registry myRegistry;
-
+  
  xmlrpc_c::methodPtr const translator(new Translator);
  xmlrpc_c::methodPtr const updater(new Updater);
  xmlrpc_c::methodPtr const optimizer(new Optimizer);
@ -521,11 +555,20 @@ int main(int argc, char** argv)
  myRegistry.addMethod("updater", updater);
  myRegistry.addMethod("optimize", optimizer);

+   xmlrpc_c::serverAbyss myAbyssServer(
+					myRegistry,
+					port,              // TCP port on which to listen
+					logfile
+					);
+  /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
  xmlrpc_c::serverAbyss myAbyssServer(
-    myRegistry,
-    port,              // TCP port on which to listen
-    logfile
+    xmlrpc_c::serverAbyss::constrOpt()
+    .registryPtr(&myRegistry)
+    .portNumber(port)              // TCP port on which to listen
+    .logFileName(logfile)
+    .allowOrigin("*")
  );
+  */

  cerr << "Listening on port " << port << endl;
  if (isSerial) {
--- a/contrib/tmcombine/test/model3/model/phrase-table
+++ b/contrib/tmcombine/test/model3/model/phrase-table
@ -1,5 +1,5 @@
-ad ||| af ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000 1000
-bd ||| bf ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10
+ad ||| af ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000 1000 ||| sparse_feature 1
+bd ||| bf ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10 |||
 der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00872768 0.0366795 0.611403 2.718 ||| 1-0 ||| 5808 518
 der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 ||| 749 45
 pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875 582
--- a/contrib/tmcombine/test/model4/model/phrase-table
+++ b/contrib/tmcombine/test/model4/model/phrase-table
@ -1,4 +1,4 @@
-ad ||| af ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 1000 1000
+ad ||| af ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 1000 1000 ||| sparse_feature 2
 bd ||| bf ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 10 10
 der pass ||| le passeport ||| 0.6 0.6 0.6 0.6 0.16 0.03063 0.4 0.0748551 2.718 ||| 0-0 1-1 ||| 25 10
 pass ||| passeport ||| 0.6 0.6 0.6 0.6 0.28022 0.192612 0.607143 0.675926 2.718 ||| 0-0 ||| 182 84
--- a/contrib/tmcombine/test/phrase-table_test1
+++ b/contrib/tmcombine/test/phrase-table_test1
@ -1,8 +1,8 @@
-ad ||| af ||| 0.3 0.3 0.3 0.3 2.718 ||| 0-0 ||| 1000 1000
-bd ||| bf ||| 0.3 0.3 0.3 0.3 2.718 ||| 0-0 ||| 10 10
-der gipfel ||| sommet ||| 0.00163568 0.00436384 0.0183397 0.305702 2.718 ||| 1-0 ||| 5808 518
-der pass ||| le col ||| 0.00867825 0.0142308 0.144445 0.0608095 2.718 ||| 0-0 1-1 ||| 749 45
-pass ||| col ||| 0.0976 0.0719685 0.314433 0.340651 2.718 ||| 0-0 ||| 1875 582
-pass ||| passeport retrouvé ||| 0.25 0.125 0.000859105 1.9065e-07 2.718 ||| 0-0 ||| 2 582
-pass ||| passeport ||| 0.273444 0.221306 0.307008 0.343654 2.718 ||| 0-0 ||| 182 84
-sitzung ||| séance ||| 0.528624 0.417705 0.434797 0.492241 2.718 ||| 0-0 ||| 4251 6455
+ad ||| af ||| 0.3 0.3 0.3 0.3 ||| 0-0 ||| 1000 1000
+bd ||| bf ||| 0.3 0.3 0.3 0.3 ||| 0-0 ||| 10 10
+der gipfel ||| sommet ||| 0.00163568 0.00436384 0.0183397 0.305702 ||| 1-0 ||| 5808 518
+der pass ||| le col ||| 0.00867825 0.0142308 0.144445 0.0608095 ||| 0-0 1-1 ||| 749 45
+pass ||| col ||| 0.0976 0.0719685 0.314433 0.340651 ||| 0-0 ||| 1875 582
+pass ||| passeport retrouvé ||| 0.25 0.125 0.000859105 1.9065e-07 ||| 0-0 ||| 2 582
+pass ||| passeport ||| 0.273444 0.221306 0.307008 0.343654 ||| 0-0 ||| 15 582
+sitzung ||| séance ||| 0.528624 0.417705 0.434797 0.492241 ||| 0-0 ||| 22 17
--- a/contrib/tmcombine/test/phrase-table_test10
+++ b/contrib/tmcombine/test/phrase-table_test10
@ -1,9 +1,9 @@
-ad ||| af ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 25332.4712297 1074.23173673
-bd ||| bf ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 253.324712297 10.7423173673
-der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00686984 0.0366795 0.617135 2.718 ||| 1-0 ||| 5808.0 518.0
-der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.023534 0.284201 0.0972183 2.718 ||| 0-0 1-1 ||| 749.0 45.7423173673
-der pass ||| le passeport ||| 6e-10 6e-10 6e-10 6e-10 0.16 0.0329324 0.0064913 0.00303408 2.718 ||| 0-0 1-1 ||| 608.311780741 45.7423173673
-pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.142393 0.6222 0.671744 2.718 ||| 0-0 ||| 1875.0 588.235465885
-pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.199258 0.0017 5.11945e-07 2.718 ||| 0-0 ||| 2.0 588.235465885
-pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.280174 0.199258 0.0132359 0.0209644 2.718 ||| 0-0 ||| 4443.5097638 588.235465885
-sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.784412 0.59168 0.511045 0.552002 2.718 ||| 0-0 ||| 103459.335197 496.165860589
+ad ||| af ||| 0.3 0.3 0.3 0.3 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 25362.6029089 1074.23173673 ||| sparse_feature 1
+bd ||| bf ||| 0.3 0.3 0.3 0.3 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 253.626029089 10.7423173673 ||| 
+der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00686984 0.0366795 0.617135 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.023534 0.284201 0.0972183 ||| 0-0 1-1 ||| 749.0 45.7423173673
+der pass ||| le passeport ||| 6e-10 6e-10 6e-10 6e-10 0.16 0.0329324 0.0064913 0.00303408 ||| 0-0 1-1 ||| 609.065072723 45.7423173673
+pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.142393 0.6222 0.671744 ||| 0-0 ||| 1875.0 588.235465885
+pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.199258 0.0017 5.11945e-07 ||| 0-0 ||| 2.0 588.235465885
+pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.280174 0.199258 0.0132359 0.0209644 ||| 0-0 ||| 4448.99372942 588.235465885
+sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.784412 0.59168 0.511045 0.552002 ||| 0-0 ||| 103587.424966 496.165860589
--- a/contrib/tmcombine/test/phrase-table_test2
+++ b/contrib/tmcombine/test/phrase-table_test2
@ -1,9 +1,9 @@
-ad ||| af ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 1000 1000
-bd ||| bf ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10 10
-der gipfel ||| sommet ||| 0.000327135 0.000793425 0.0073359 0.305702 2.718 ||| 1-0 ||| 5808 518
-der pass ||| le col ||| 0.00173565 0.00258742 0.0577778 0.0608095 2.718 ||| 0-0 1-1 ||| 749 45
-der pass ||| le passeport ||| 0.144 0.0278455 0.32 0.0374275 2.718 ||| 0-0 1-1 ||| 25 10
-pass ||| col ||| 0.01952 0.0130852 0.125773 0.340651 2.718 ||| 0-0 ||| 1875 582
-pass ||| passeport retrouvé ||| 0.05 0.0227273 0.000343642 1.9065e-07 2.718 ||| 0-0 ||| 2 582
-pass ||| passeport ||| 0.278865 0.197829 0.487089 0.343654 2.718 ||| 0-0 ||| 182 84
-sitzung ||| séance ||| 0.733342 0.56532 0.483911 0.492241 2.718 ||| 0-0 ||| 4251 6455
+ad ||| af ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 1000 1000
+bd ||| bf ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 10 10
+der gipfel ||| sommet ||| 0.000327135 0.000793425 0.0073359 0.305702 ||| 1-0 ||| 5808 518
+der pass ||| le col ||| 0.00173565 0.00258742 0.0577778 0.0608095 ||| 0-0 1-1 ||| 749 45
+der pass ||| le passeport ||| 0.144 0.0278455 0.32 0.0374275 ||| 0-0 1-1 ||| 25 10
+pass ||| col ||| 0.01952 0.0130852 0.125773 0.340651 ||| 0-0 ||| 1875 582
+pass ||| passeport retrouvé ||| 0.05 0.0227273 0.000343642 1.9065e-07 ||| 0-0 ||| 2 582
+pass ||| passeport ||| 0.278865 0.197829 0.487089 0.343654 ||| 0-0 ||| 15 582
+sitzung ||| séance ||| 0.733342 0.56532 0.483911 0.492241 ||| 0-0 ||| 22 17
--- a/contrib/tmcombine/test/phrase-table_test3
+++ b/contrib/tmcombine/test/phrase-table_test3
@ -1,9 +1,9 @@
-ad ||| af ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10000.0 5000.0
-bd ||| bf ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 100.0 50.0
-der gipfel ||| sommet ||| 0.00327135 0.00569336 0.0366795 0.651018 2.718 ||| 1-0 ||| 5808.0 518.0
-der pass ||| le col ||| 0.0173565 0.0193836 0.152941 0.0675369 2.718 ||| 0-0 1-1 ||| 749.0 85.0
-der pass ||| le passeport ||| 0.16 0.0307772 0.188235 0.0128336 2.718 ||| 0-0 1-1 ||| 225.0 85.0
-pass ||| col ||| 0.1952 0.121573 0.398693 0.582296 2.718 ||| 0-0 ||| 1875.0 918.0
-pass ||| passeport retrouvé ||| 0.5 0.193033 0.00108932 1.16835e-06 2.718 ||| 0-0 ||| 2.0 918.0
-pass ||| passeport ||| 0.280097 0.193033 0.22658 0.11065 2.718 ||| 0-0 ||| 1653.0 918.0
-sitzung ||| séance ||| 0.784227 0.597753 0.516546 0.559514 2.718 ||| 0-0 ||| 38281.0 25837.0
+ad ||| af ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 10000.0 5000.0
+bd ||| bf ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 100.0 50.0
+der gipfel ||| sommet ||| 0.00327135 0.00569336 0.0366795 0.651018 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.0173565 0.0193836 0.152941 0.0675369 ||| 0-0 1-1 ||| 749.0 85.0
+der pass ||| le passeport ||| 0.16 0.0307772 0.188235 0.0128336 ||| 0-0 1-1 ||| 225.0 85.0
+pass ||| col ||| 0.1952 0.121573 0.398693 0.582296 ||| 0-0 ||| 1875.0 918.0
+pass ||| passeport retrouvé ||| 0.5 0.193033 0.00108932 1.16835e-06 ||| 0-0 ||| 2.0 918.0
+pass ||| passeport ||| 0.280097 0.193033 0.22658 0.11065 ||| 0-0 ||| 1653.0 918.0
+sitzung ||| séance ||| 0.784227 0.597753 0.516546 0.559514 ||| 0-0 ||| 38281.0 25837.0
--- a/contrib/tmcombine/test/phrase-table_test4
+++ b/contrib/tmcombine/test/phrase-table_test4
@ -1,8 +1,8 @@
-ad ||| af ||| 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000.0 1000.0
-bd ||| bf ||| 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10.0 10.0
-der gipfel ||| sommet ||| 0.00327135 0.00872769 0.0366795 0.611404 2.718 ||| 1-0 ||| 5808.0 518.0
-der pass ||| le col ||| 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 ||| 749.0 45.0
-pass ||| col ||| 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875.0 582.0
-pass ||| passeport retrouvé ||| 0.5 0.25 0.00171821 3.80847e-07 2.718 ||| 0-0 ||| 2.0 582.0
-pass ||| passeport ||| 0.266667 0.25 0.00687285 0.0113821 2.718 ||| 0-0 ||| 15.0 582.0
-sitzung ||| séance ||| 0.272727 0.237288 0.352941 0.424242 2.718 ||| 0-0 ||| 22.0 17.0
+ad ||| af ||| 0.5 0.5 0.5 0.5 ||| 0-0 ||| 1000.0 1000.0
+bd ||| bf ||| 0.5 0.5 0.5 0.5 ||| 0-0 ||| 10.0 10.0
+der gipfel ||| sommet ||| 0.00327135 0.00872769 0.0366795 0.611404 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.0173565 0.0284616 0.288889 0.121619 ||| 0-0 1-1 ||| 749.0 45.0
+pass ||| col ||| 0.1952 0.143937 0.628866 0.681301 ||| 0-0 ||| 1875.0 582.0
+pass ||| passeport retrouvé ||| 0.5 0.25 0.00171821 3.80847e-07 ||| 0-0 ||| 2.0 582.0
+pass ||| passeport ||| 0.266667 0.25 0.00687285 0.0113821 ||| 0-0 ||| 15.0 582.0
+sitzung ||| séance ||| 0.272727 0.237288 0.352941 0.424242 ||| 0-0 ||| 22.0 17.0
--- a/contrib/tmcombine/test/phrase-table_test5
+++ b/contrib/tmcombine/test/phrase-table_test5
@ -1,9 +1,9 @@
-ad ||| af ||| 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 25332.4712297 1074.23173673
-bd ||| bf ||| 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 253.324712297 10.7423173673
-der gipfel ||| sommet ||| 0.00327135 0.00686984 0.0366795 0.617135 2.718 ||| 1-0 ||| 5808.0 518.0
-der pass ||| le col ||| 0.0173565 0.023534 0.284201 0.0972183 2.718 ||| 0-0 1-1 ||| 749.0 45.7423173673
-der pass ||| le passeport ||| 0.16 0.0329324 0.0064913 0.00303408 2.718 ||| 0-0 1-1 ||| 608.311780741 45.7423173673
-pass ||| col ||| 0.1952 0.142393 0.6222 0.671744 2.718 ||| 0-0 ||| 1875.0 588.235465885
-pass ||| passeport retrouvé ||| 0.5 0.199258 0.0017 5.11945e-07 2.718 ||| 0-0 ||| 2.0 588.235465885
-pass ||| passeport ||| 0.280174 0.199258 0.0132359 0.0209644 2.718 ||| 0-0 ||| 4443.5097638 588.235465885
-sitzung ||| séance ||| 0.784412 0.59168 0.511045 0.552002 2.718 ||| 0-0 ||| 103459.335197 496.165860589
+ad ||| af ||| 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 25362.6029089 1074.23173673
+bd ||| bf ||| 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 253.626029089 10.7423173673
+der gipfel ||| sommet ||| 0.00327135 0.00686984 0.0366795 0.617135 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.0173565 0.023534 0.284201 0.0972183 ||| 0-0 1-1 ||| 749.0 45.7423173673
+der pass ||| le passeport ||| 0.16 0.0329324 0.0064913 0.00303408 ||| 0-0 1-1 ||| 609.065072723 45.7423173673
+pass ||| col ||| 0.1952 0.142393 0.6222 0.671744 ||| 0-0 ||| 1875.0 588.235465885
+pass ||| passeport retrouvé ||| 0.5 0.199258 0.0017 5.11945e-07 ||| 0-0 ||| 2.0 588.235465885
+pass ||| passeport ||| 0.280174 0.199258 0.0132359 0.0209644 ||| 0-0 ||| 4448.99372942 588.235465885
+sitzung ||| séance ||| 0.784412 0.59168 0.511045 0.552002 ||| 0-0 ||| 103587.424966 496.165860589
--- a/contrib/tmcombine/test/phrase-table_test6
+++ b/contrib/tmcombine/test/phrase-table_test6
@ -1,4 +1,4 @@
-ad ||| af ||| 0.117462 0.117462 0.117462 0.117462 2.718 ||| 0-0 ||| 1000 1000
-bd ||| bf ||| 0.117462 0.117462 0.117462 0.117462 2.718 ||| 0-0 ||| 10 10
-pass ||| passeport ||| 0.278834 0.197701 0.387861 0.449295 2.718 ||| 0-0 ||| 182 84
-sitzung ||| séance ||| 0.705857 0.545304 0.497336 0.544877 2.718 ||| 0-0 ||| 4251 6455
+ad ||| af ||| 0.117462 0.117462 0.117462 0.117462 ||| 0-0 ||| 1000 1000
+bd ||| bf ||| 0.117462 0.117462 0.117462 0.117462 ||| 0-0 ||| 10 10
+pass ||| passeport ||| 0.278834 0.197701 0.387861 0.449295 ||| 0-0 ||| 15 582
+sitzung ||| séance ||| 0.705857 0.545304 0.497336 0.544877 ||| 0-0 ||| 22 17
--- a/contrib/tmcombine/test/phrase-table_test7
+++ b/contrib/tmcombine/test/phrase-table_test7
@ -1 +1 @@
-([(1.8744705606119034, 2.0752881273042374, 1.5025010618768841, 1.2370391973008494, 0, 0, 1, 1, 22), (0.35011602922315899, 0.74148657814725749, 0.95272965495298623, 0.83588062023889353, 1, 0, 0, 1, 22)], (1, 22, 20))
+([(1.8744705606119034, 2.0752881273042374, 1.5025010618768841, 1.2370391973008494, 0, 0, 1, 1, 22), (0.350116029223159, 0.7414865781472575, 0.9527296549529862, 0.8358806202388935, 1, 0, 0, 1, 22)], (1, 22, 20))
--- a/contrib/tmcombine/test/phrase-table_test8
+++ b/contrib/tmcombine/test/phrase-table_test8
@ -1,9 +1,9 @@
-ad ||| af ||| 0.242966 0.398085 0.483231 0.482814 2.718 ||| 0-0 ||| 2797.86490081 1043.7557397
-bd ||| bf ||| 0.102213 0.111367 0.174411 0.172867 2.718 ||| 0-0 ||| 1807.86490081 53.7557396976
-der gipfel ||| sommet ||| 0.00327135 0.00863717 0.0366795 0.612073 2.718 ||| 1-0 ||| 5808.0 518.0
-der pass ||| le col ||| 0.0173565 0.0260469 0.146469 0.113553 2.718 ||| 0-0 1-1 ||| 749.0 88.7557396976
-der pass ||| le passeport ||| 0.16 0.0389201 0.197196 0.0101009 2.718 ||| 0-0 1-1 ||| 1797.86490081 88.7557396976
-pass ||| col ||| 0.1952 0.131811 0.584893 0.63621 2.718 ||| 0-0 ||| 1875.0 625.755739698
-pass ||| passeport retrouvé ||| 0.5 0.196956 0.00159806 1.89355e-06 2.718 ||| 0-0 ||| 2.0 625.755739698
-pass ||| passeport ||| 0.280108 0.196956 0.0488465 0.0565932 2.718 ||| 0-0 ||| 1812.86490081 625.755739698
-sitzung ||| séance ||| 0.778334 0.545019 0.470846 0.502625 2.718 ||| 0-0 ||| 1819.86490081 60.7557396976
+ad ||| af ||| 0.242882 0.39808 0.483231 0.482813 ||| 0-0 ||| 2799.50876845 1043.75589858
+bd ||| bf ||| 0.102211 0.111366 0.17441 0.172864 ||| 0-0 ||| 1809.50876845 53.7558985771
+der gipfel ||| sommet ||| 0.00327135 0.00863716 0.0366795 0.612073 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.0173565 0.0260468 0.146469 0.113553 ||| 0-0 1-1 ||| 749.0 88.7558985771
+der pass ||| le passeport ||| 0.16 0.03892 0.197197 0.0101013 ||| 0-0 1-1 ||| 1799.50876845 88.7558985771
+pass ||| col ||| 0.1952 0.13181 0.584893 0.636208 ||| 0-0 ||| 1875.0 625.755898577
+pass ||| passeport retrouvé ||| 0.5 0.196956 0.00159806 1.89361e-06 ||| 0-0 ||| 2.0 625.755898577
+pass ||| passeport ||| 0.280108 0.196956 0.0488467 0.056595 ||| 0-0 ||| 1814.50876845 625.755898577
+sitzung ||| séance ||| 0.77834 0.545022 0.470846 0.502627 ||| 0-0 ||| 1821.50876845 60.7558985771
--- a/contrib/tmcombine/test/phrase-table_test9
+++ b/contrib/tmcombine/test/phrase-table_test9
@ -1,9 +1,9 @@
-ad ||| af ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10000.0 5000.0
-bd ||| bf ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 100.0 50.0
-der gipfel ||| sommet ||| 0.15 0.15 0.15 0.15 0.00327135 0.00569336 0.0366795 0.651018 2.718 ||| 1-0 ||| 5808.0 518.0
-der pass ||| le col ||| 0.15 0.15 0.15 0.15 0.0173565 0.0193836 0.152941 0.0675369 2.718 ||| 0-0 1-1 ||| 749.0 85.0
-der pass ||| le passeport ||| 0.3 0.3 0.3 0.3 0.16 0.0307772 0.188235 0.0128336 2.718 ||| 0-0 1-1 ||| 225.0 85.0
-pass ||| col ||| 0.15 0.15 0.15 0.15 0.1952 0.121573 0.398693 0.582296 2.718 ||| 0-0 ||| 1875.0 918.0
-pass ||| passeport retrouvé ||| 0.15 0.15 0.15 0.15 0.5 0.193033 0.00108932 1.16835e-06 2.718 ||| 0-0 ||| 2.0 918.0
-pass ||| passeport ||| 0.45 0.45 0.45 0.45 0.280097 0.193033 0.22658 0.11065 2.718 ||| 0-0 ||| 1653.0 918.0
-sitzung ||| séance ||| 0.45 0.45 0.45 0.45 0.784227 0.597753 0.516546 0.559514 2.718 ||| 0-0 ||| 38281.0 25837.0
+ad ||| af ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 10000.0 5000.0 ||| sparse_feature 1
+bd ||| bf ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 100.0 50.0 ||| 
+der gipfel ||| sommet ||| 0.15 0.15 0.15 0.15 0.00327135 0.00569336 0.0366795 0.651018 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.15 0.15 0.15 0.15 0.0173565 0.0193836 0.152941 0.0675369 ||| 0-0 1-1 ||| 749.0 85.0
+der pass ||| le passeport ||| 0.3 0.3 0.3 0.3 0.16 0.0307772 0.188235 0.0128336 ||| 0-0 1-1 ||| 225.0 85.0
+pass ||| col ||| 0.15 0.15 0.15 0.15 0.1952 0.121573 0.398693 0.582296 ||| 0-0 ||| 1875.0 918.0
+pass ||| passeport retrouvé ||| 0.15 0.15 0.15 0.15 0.5 0.193033 0.00108932 1.16835e-06 ||| 0-0 ||| 2.0 918.0
+pass ||| passeport ||| 0.45 0.45 0.45 0.45 0.280097 0.193033 0.22658 0.11065 ||| 0-0 ||| 1653.0 918.0
+sitzung ||| séance ||| 0.45 0.45 0.45 0.45 0.784227 0.597753 0.516546 0.559514 ||| 0-0 ||| 38281.0 25837.0
--- a/contrib/tmcombine/tmcombine.py
+++ b/contrib/tmcombine/tmcombine.py
@ -106,7 +106,7 @@ class Moses():
                scores = line[2].split()
                if len(scores) <self.number_of_features:
                    sys.stderr.write('Error: model only has {0} features. Expected {1}.\n'.format(len(scores),self.number_of_features))
-                    exit()
+                    exit(1)
                    
                scores = scores[:self.number_of_features]
                model_probabilities = map(float,scores)
@ -114,7 +114,7 @@ class Moses():
                
                if mode == 'counts' and not priority == 2: #priority 2 is MAP
                    try:
-                        counts = map(float,line[-1].split())
+                        counts = map(float,line[4].split())
                        try:
                            target_count,src_count,joint_count = counts
                            joint_count_e2f = joint_count
@ -145,7 +145,7 @@ class Moses():
        if (store == 'all' or store == 'source') and not (filter_by_src and not src in filter_by_src):
            if mode == 'counts' and not priority == 2: #priority 2 is MAP
                try:
-                    self.phrase_source[src][i] = float(line[-1].split()[1])
+                    self.phrase_source[src][i] = float(line[4].split()[1])
                except:
                    sys.stderr.write(str(line)+'\n')
                    sys.stderr.write('ERROR: Counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts or word alignment?\n')
@ -156,7 +156,7 @@ class Moses():
        if (store == 'all' or store == 'target') and not (filter_by_target and not target in filter_by_target):
            if mode == 'counts' and not priority == 2: #priority 2 is MAP
                try:
-                    self.phrase_target[target][i] = float(line[-1].split()[0])
+                    self.phrase_target[target][i] = float(line[4].split()[0])
                except:
                    sys.stderr.write(str(line)+'\n')
                    sys.stderr.write('ERROR: Counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts or word alignment?\n')
@ -179,7 +179,7 @@ class Moses():
                reordering_probabilities[j][i] = p
        except IndexError:
            sys.stderr.write('\nIndexError: Did you correctly specify the number of reordering features? (--number_of_features N in command line)\n')
-            exit()
+            exit(1)

    def traverse_incrementally(self,table,models,load_lines,store_flag,mode='interpolate',inverted=False,lowmem=False,flags=None):
        """hack-ish way to find common phrase pairs in multiple models in one traversal without storing it all in memory
@ -210,6 +210,9 @@ class Moses():
                for line in model:

                    line = line.rstrip().split(b' ||| ')
+                    if line[-1].endswith(b' |||'):
+                      line[-1] = line[-1][:-4]
+                      line.append('')
                
                    if increment != line[0]:
                        stack[i] = line
@ -300,20 +303,21 @@ class Moses():
    def store_info(self,src,target,line):
        """store alignment info and comment section for re-use in output"""
        
-        if len(line) == 5:
-            self.phrase_pairs[src][target][1] = line[3:5]
+        if len(line) >= 5:
+            if not self.phrase_pairs[src][target][1]:
+                self.phrase_pairs[src][target][1] = line[3:]
        
        # assuming that alignment is empty
        elif len(line) == 4:
            if self.require_alignment:
                sys.stderr.write('Error: unexpected phrase table format. Your current configuration requires alignment information. Make sure you trained your model with -phrase-word-alignment (default in newer Moses versions)\n')
-                exit()
+                exit(1)
            
            self.phrase_pairs[src][target][1] = [b'',line[3].lstrip(b'| ')]
   
        else:
            sys.stderr.write('Error: unexpected phrase table format. Are you using a very old/new version of Moses with different formatting?\n')
-            exit()
+            exit(1)
   
   
    def get_word_alignments(self,src,target,cache=False,mycache={}):
@ -373,7 +377,8 @@ class Moses():
            return ''
        
        # information specific to Moses model: alignment info and comment section with target and source counts
-        alignment,comments = self.phrase_pairs[src][target][1]
+        additional_entries = self.phrase_pairs[src][target][1]
+        alignment = additional_entries[0]
        if alignment:
            extra_space = b' '
        else:
@ -384,7 +389,7 @@ class Moses():
            i_f2e = flags['i_f2e']
            srccount =  dot_product(self.phrase_source[src],weights[i_f2e])
            targetcount = dot_product(self.phrase_target[target],weights[i_e2f])
-            comments = b"%s %s" %(targetcount,srccount)
+            additional_entries[1] = b"%s %s" %(targetcount,srccount)
            
        features = b' '.join([b'%.6g' %(f) for f in features])
        
@ -397,7 +402,7 @@ class Moses():
          phrase_penalty = b' 2.718'
        else:
          phrase_penalty = b''
-        line = b"%s ||| %s ||| %s%s %s||| %s%s||| %s\n" %(src,target,features,origin_features,phrase_penalty,alignment,extra_space,comments)
+        line = b"%s ||| %s ||| %s%s %s||| %s%s||| %s\n" %(src,target,features,origin_features,phrase_penalty,alignment,extra_space,b' ||| '.join(additional_entries[1:]))
        return line
        
        
@ -473,8 +478,15 @@ class Moses():
        for line,line2 in izip(pt_normal,pt_inverse):
            
            line = line.split(b' ||| ')
+            if line[-1].endswith(b' |||'):
+                line[-1] = line[-1][:-4]
+                line.append('')
+
            line2 = line2.split(b' ||| ')
-            
+            if line2[-1].endswith(b' |||'):
+                line2[-1] = line2[-1][:-4]
+                line2.append('')
+
            #scores
            mid = int(self.number_of_features/2)
            scores1 = line[2].split()
@ -483,11 +495,11 @@ class Moses():
            
            # marginal counts
            if mode == 'counts':
-                src_count = line[-1].split()[1]
+                src_count = line[4].split()[1]
                target_count = line2[-1].split()[0]
-                line[-1] = b' '.join([target_count,src_count]) + b'\n'
+                line[4] = b' '.join([target_count,src_count])
            
-            pt_out.write(b' ||| '.join(line))
+            pt_out.write(b' ||| '.join(line)+ b'\n')
            
        pt_normal.close()
        pt_inverse.close()
@ -515,7 +527,7 @@ class TigerXML():
        
        if not src or not target:
            sys.stderr.write('Error: Source and/or target language not specified. Required for TigerXML extraction.\n')
-            exit()
+            exit(1)
        
        alignments = self._get_aligned_ids(src,target)
        self._textualize_alignments(src,target,alignments)
@ -685,7 +697,10 @@ class Moses_Alignment():
        for line in fileobj:
            
            line = line.split(b' ||| ')
-            
+            if line[-1].endswith(b' |||'):
+                line[-1] = line[-1][:-4]
+                line.append('')
+
            src = line[0]
            target = line[1]
            
@ -1261,7 +1276,7 @@ def handle_file(filename,action,fileobj=None,mode='r'):
                    sys.stderr.write('For a weighted counts combination, we need statistics that Moses doesn\'t write to disk by default.\n')
                    sys.stderr.write('Repeat step 4 of Moses training for all models with the option -write-lexical-counts.\n')
                
-                exit()
+                exit(1)

        if filename.endswith('.gz'):
            fileobj = gzip.open(filename,mode)
@ -1435,7 +1450,7 @@ class Combine_TMs():

        if mode not in ['interpolate','loglinear','counts']:
            sys.stderr.write('Error: mode must be either "interpolate", "loglinear" or "counts"\n')
-            sys.exit()
+            sys.exit(1)

        models,number_of_features,weights = self._sanity_checks(models,number_of_features,weights)
        
@ -1528,6 +1543,9 @@ class Combine_TMs():
                        sys.stderr.write('...'+str(j))
                    j += 1
                    line = line.rstrip().split(b' ||| ')
+                    if line[-1].endswith(b' |||'):
+                        line[-1] = line[-1][:-4]
+                        line.append('')
                    self.model_interface.load_phrase_features(line,priority,i,store='all',mode=self.mode,filter_by=self.reference_interface.word_pairs,filter_by_src=self.reference_interface.word_source,filter_by_target=self.reference_interface.word_target,flags=self.flags)
                sys.stderr.write(' done\n')

@ -1553,6 +1571,9 @@ class Combine_TMs():
                        sys.stderr.write('...'+str(j))
                    j += 1
                    line = line.rstrip().split(b' ||| ')
+                    if line[-1].endswith(b' |||'):
+                        line[-1] = line[-1][:-4]
+                        line.append('')
                    self.model_interface.load_phrase_features(line,priority,i,mode=self.mode,store='target',flags=self.flags)
                sys.stderr.write(' done\n')

--- a/jam-files/sanity.jam
+++ b/jam-files/sanity.jam
@ -288,11 +288,11 @@ rule failure-message ( ok ? ) {
      echo "If you need support, attach the full output to your e-mail." ;
    } else {
      echo "The build failed.  If you need support, run:" ;
-      echo "  $(args) --debug-configuration -d2 >build.log" ;
-      echo "then attach build.log to your e-mail." ;
+      echo "  $(args) --debug-configuration -d2 |gzip >build.log.gz" ;
+      echo "then attach build.log.gz to your e-mail." ;
      echo "You MUST do 3 things before sending to the mailing list:" ;
      echo "   1. Subscribe to the mailing list at http://mailman.mit.edu/mailman/listinfo/moses-support" ;
-      echo "   2. Zip up your build.log file before attaching it to the email" ;
+      echo "   2. Attach build.log.gz to your e-mail" ;
      echo "   3. Say what is the EXACT command you executed when you got the error" ;
    }
    echo "ERROR" ;
--- a/lm/bhiksha.cc
+++ b/lm/bhiksha.cc
@ -1,4 +1,6 @@
 #include "lm/bhiksha.hh"
+
+#include "lm/binary_format.hh"
 #include "lm/config.hh"
 #include "util/file.hh"
 #include "util/exception.hh"
@ -15,11 +17,11 @@ DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_
 const uint8_t kArrayBhikshaVersion = 0;

 // TODO: put this in binary file header instead when I change the binary file format again.  
-void ArrayBhiksha::UpdateConfigFromBinary(int fd, Config &config) {
-  uint8_t version;
-  uint8_t configured_bits;
-  util::ReadOrThrow(fd, &version, 1);
-  util::ReadOrThrow(fd, &configured_bits, 1);
+void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
+  uint8_t buffer[2];
+  file.ReadForConfig(buffer, 2, offset);
+  uint8_t version = buffer[0];
+  uint8_t configured_bits = buffer[1];
  if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion);
  config.pointer_bhiksha_bits = configured_bits;
 }
@ -87,9 +89,6 @@ void ArrayBhiksha::FinishedLoading(const Config &config) {
  *(head_write++) = config.pointer_bhiksha_bits;
 }

-void ArrayBhiksha::LoadedBinary() {
-}
-
 } // namespace trie
 } // namespace ngram
 } // namespace lm
--- a/lm/bhiksha.hh
+++ b/lm/bhiksha.hh
@ -24,6 +24,7 @@
 namespace lm {
 namespace ngram {
 struct Config;
+class BinaryFormat;

 namespace trie {

@ -31,7 +32,7 @@ class DontBhiksha {
  public:
    static const ModelType kModelTypeAdd = static_cast<ModelType>(0);

-    static void UpdateConfigFromBinary(int /*fd*/, Config &/*config*/) {}
+    static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &/*config*/) {}

    static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; }

@ -53,8 +54,6 @@ class DontBhiksha {

    void FinishedLoading(const Config &/*config*/) {}

-    void LoadedBinary() {}
-
    uint8_t InlineBits() const { return next_.bits; }

  private:
@ -65,7 +64,7 @@ class ArrayBhiksha {
  public:
    static const ModelType kModelTypeAdd = kArrayAdd;

-    static void UpdateConfigFromBinary(int fd, Config &config);
+    static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);

    static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);

@ -93,8 +92,6 @@ class ArrayBhiksha {

    void FinishedLoading(const Config &config);

-    void LoadedBinary();
-
    uint8_t InlineBits() const { return next_inline_.bits; }

  private:
--- a/lm/binary_format.cc
+++ b/lm/binary_format.cc
@ -14,6 +14,9 @@

 namespace lm {
 namespace ngram {
+
+const char *kModelNames[6] = {"probing hash tables", "probing hash tables with rest costs", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"};
+
 namespace {
 const char kMagicBeforeVersion[] = "mmap lm http://kheafield.com/code format version";
 const char kMagicBytes[] = "mmap lm http://kheafield.com/code format version 5\n\0";
@ -58,8 +61,6 @@ struct Sanity {
  }
 };

-const char *kModelNames[6] = {"probing hash tables", "probing hash tables with rest costs", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"};
-
 std::size_t TotalHeaderSize(unsigned char order) {
  return ALIGN8(sizeof(Sanity) + sizeof(FixedWidthParameters) + sizeof(uint64_t) * order);
 }
@ -81,83 +82,6 @@ void WriteHeader(void *to, const Parameters &params) {

 } // namespace

-uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_size, Backing &backing) {
-  if (config.write_mmap) {
-    std::size_t total = TotalHeaderSize(order) + memory_size;
-    backing.file.reset(util::CreateOrThrow(config.write_mmap));
-    if (config.write_method == Config::WRITE_MMAP) {
-      backing.vocab.reset(util::MapZeroedWrite(backing.file.get(), total), total, util::scoped_memory::MMAP_ALLOCATED);
-    } else {
-      util::ResizeOrThrow(backing.file.get(), 0);
-      util::MapAnonymous(total, backing.vocab);
-    }
-    strncpy(reinterpret_cast<char*>(backing.vocab.get()), kMagicIncomplete, TotalHeaderSize(order));
-    return reinterpret_cast<uint8_t*>(backing.vocab.get()) + TotalHeaderSize(order);
-  } else {
-    util::MapAnonymous(memory_size, backing.vocab);
-    return reinterpret_cast<uint8_t*>(backing.vocab.get());
-  }
-}
-
-uint8_t *GrowForSearch(const Config &config, std::size_t vocab_pad, std::size_t memory_size, Backing &backing) {
-  std::size_t adjusted_vocab = backing.vocab.size() + vocab_pad;
-  if (config.write_mmap) {
-    // Grow the file to accomodate the search, using zeros.
-    try {
-      util::ResizeOrThrow(backing.file.get(), adjusted_vocab + memory_size);
-    } catch (util::ErrnoException &e) {
-      e << " for file " << config.write_mmap;
-      throw e;
-    }
-
-    if (config.write_method == Config::WRITE_AFTER) {
-      util::MapAnonymous(memory_size, backing.search);
-      return reinterpret_cast<uint8_t*>(backing.search.get());
-    }
-    // mmap it now.
-    // We're skipping over the header and vocab for the search space mmap.  mmap likes page aligned offsets, so some arithmetic to round the offset down.
-    std::size_t page_size = util::SizePage();
-    std::size_t alignment_cruft = adjusted_vocab % page_size;
-    backing.search.reset(util::MapOrThrow(alignment_cruft + memory_size, true, util::kFileFlags, false, backing.file.get(), adjusted_vocab - alignment_cruft), alignment_cruft + memory_size, util::scoped_memory::MMAP_ALLOCATED);
-    return reinterpret_cast<uint8_t*>(backing.search.get()) + alignment_cruft;
-  } else {
-    util::MapAnonymous(memory_size, backing.search);
-    return reinterpret_cast<uint8_t*>(backing.search.get());
-  }
-}
-
-void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts, std::size_t vocab_pad, Backing &backing) {
-  if (!config.write_mmap) return;
-  switch (config.write_method) {
-    case Config::WRITE_MMAP:
-      util::SyncOrThrow(backing.vocab.get(), backing.vocab.size());
-      util::SyncOrThrow(backing.search.get(), backing.search.size());
-      break;
-    case Config::WRITE_AFTER:
-      util::SeekOrThrow(backing.file.get(), 0);
-      util::WriteOrThrow(backing.file.get(), backing.vocab.get(), backing.vocab.size());
-      util::SeekOrThrow(backing.file.get(), backing.vocab.size() + vocab_pad);
-      util::WriteOrThrow(backing.file.get(), backing.search.get(), backing.search.size());
-      util::FSyncOrThrow(backing.file.get());
-      break;
-  }
-  // header and vocab share the same mmap.  The header is written here because we know the counts.
-  Parameters params = Parameters();
-  params.counts = counts;
-  params.fixed.order = counts.size();
-  params.fixed.probing_multiplier = config.probing_multiplier;
-  params.fixed.model_type = model_type;
-  params.fixed.has_vocabulary = config.include_vocab;
-  params.fixed.search_version = search_version;
-  WriteHeader(backing.vocab.get(), params);
-  if (config.write_method == Config::WRITE_AFTER) {
-    util::SeekOrThrow(backing.file.get(), 0);
-    util::WriteOrThrow(backing.file.get(), backing.vocab.get(), TotalHeaderSize(counts.size()));
-  }
-}
-
-namespace detail {
-
 bool IsBinaryFormat(int fd) {
  const uint64_t size = util::SizeFile(fd);
  if (size == util::kBadSize || (size <= static_cast<uint64_t>(sizeof(Sanity)))) return false;
@ -209,44 +133,164 @@ void MatchCheck(ModelType model_type, unsigned int search_version, const Paramet
  UTIL_THROW_IF(search_version != params.fixed.search_version, FormatLoadException, "The binary file has " << kModelNames[params.fixed.model_type] << " version " << params.fixed.search_version << " but this code expects " << kModelNames[params.fixed.model_type] << " version " << search_version);
 }

-void SeekPastHeader(int fd, const Parameters &params) {
-  util::SeekOrThrow(fd, TotalHeaderSize(params.counts.size()));
+const std::size_t kInvalidSize = static_cast<std::size_t>(-1);
+
+BinaryFormat::BinaryFormat(const Config &config) 
+  : write_method_(config.write_method), write_mmap_(config.write_mmap), load_method_(config.load_method),
+    header_size_(kInvalidSize), vocab_size_(kInvalidSize), vocab_string_offset_(kInvalidOffset) {}
+
+void BinaryFormat::InitializeBinary(int fd, ModelType model_type, unsigned int search_version, Parameters &params) {
+  file_.reset(fd);
+  write_mmap_ = NULL; // Ignore write requests; this is already in binary format.
+  ReadHeader(fd, params);
+  MatchCheck(model_type, search_version, params);
+  header_size_ = TotalHeaderSize(params.counts.size());
 }

-uint8_t *SetupBinary(const Config &config, const Parameters &params, uint64_t memory_size, Backing &backing) {
-  const uint64_t file_size = util::SizeFile(backing.file.get());
+void BinaryFormat::ReadForConfig(void *to, std::size_t amount, uint64_t offset_excluding_header) const {
+  assert(header_size_ != kInvalidSize);
+  util::PReadOrThrow(file_.get(), to, amount, offset_excluding_header + header_size_);
+}
+
+void *BinaryFormat::LoadBinary(std::size_t size) {
+  assert(header_size_ != kInvalidSize);
+  const uint64_t file_size = util::SizeFile(file_.get());
  // The header is smaller than a page, so we have to map the whole header as well.
-  std::size_t total_map = util::CheckOverflow(TotalHeaderSize(params.counts.size()) + memory_size);
-  if (file_size != util::kBadSize && static_cast<uint64_t>(file_size) < total_map)
-    UTIL_THROW(FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map);
+  uint64_t total_map = static_cast<uint64_t>(header_size_) + static_cast<uint64_t>(size);
+  UTIL_THROW_IF(file_size != util::kBadSize && file_size < total_map, FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map);

-  util::MapRead(config.load_method, backing.file.get(), 0, total_map, backing.search);
+  util::MapRead(load_method_, file_.get(), 0, util::CheckOverflow(total_map), mapping_);

-  if (config.enumerate_vocab && !params.fixed.has_vocabulary)
-    UTIL_THROW(FormatLoadException, "The decoder requested all the vocabulary strings, but this binary file does not have them.  You may need to rebuild the binary file with an updated version of build_binary.");
-
-  // Seek to vocabulary words
-  util::SeekOrThrow(backing.file.get(), total_map);
-  return reinterpret_cast<uint8_t*>(backing.search.get()) + TotalHeaderSize(params.counts.size());
+  vocab_string_offset_ = total_map;
+  return reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_;
 }

-void ComplainAboutARPA(const Config &config, ModelType model_type) {
-  if (config.write_mmap || !config.messages) return;
-  if (config.arpa_complain == Config::ALL) {
-    *config.messages << "Loading the LM will be faster if you build a binary file." << std::endl;
-  } else if (config.arpa_complain == Config::EXPENSIVE &&
-             (model_type == TRIE || model_type == QUANT_TRIE || model_type == ARRAY_TRIE || model_type == QUANT_ARRAY_TRIE)) {
-    *config.messages << "Building " << kModelNames[model_type] << " from ARPA is expensive.  Save time by building a binary format." << std::endl;
+void *BinaryFormat::SetupJustVocab(std::size_t memory_size, uint8_t order) {
+  vocab_size_ = memory_size;
+  if (!write_mmap_) {
+    header_size_ = 0;
+    util::MapAnonymous(memory_size, memory_vocab_);
+    return reinterpret_cast<uint8_t*>(memory_vocab_.get());
+  }
+  header_size_ = TotalHeaderSize(order);
+  std::size_t total = util::CheckOverflow(static_cast<uint64_t>(header_size_) + static_cast<uint64_t>(memory_size));
+  file_.reset(util::CreateOrThrow(write_mmap_));
+  // some gccs complain about uninitialized variables even though all enum values are covered.
+  void *vocab_base = NULL;
+  switch (write_method_) {
+    case Config::WRITE_MMAP:
+      mapping_.reset(util::MapZeroedWrite(file_.get(), total), total, util::scoped_memory::MMAP_ALLOCATED);
+      vocab_base = mapping_.get();
+      break;
+    case Config::WRITE_AFTER:
+      util::ResizeOrThrow(file_.get(), 0);
+      util::MapAnonymous(total, memory_vocab_);
+      vocab_base = memory_vocab_.get();
+      break;
+  }
+  strncpy(reinterpret_cast<char*>(vocab_base), kMagicIncomplete, header_size_);
+  return reinterpret_cast<uint8_t*>(vocab_base) + header_size_;
+}
+
+void *BinaryFormat::GrowForSearch(std::size_t memory_size, std::size_t vocab_pad, void *&vocab_base) {
+  assert(vocab_size_ != kInvalidSize);
+  vocab_pad_ = vocab_pad;
+  std::size_t new_size = header_size_ + vocab_size_ + vocab_pad_ + memory_size;
+  vocab_string_offset_ = new_size;
+  if (!write_mmap_ || write_method_ == Config::WRITE_AFTER) {
+    util::MapAnonymous(memory_size, memory_search_);
+    assert(header_size_ == 0 || write_mmap_);
+    vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get()) + header_size_;
+    return reinterpret_cast<uint8_t*>(memory_search_.get());
+  }
+
+  assert(write_method_ == Config::WRITE_MMAP);
+  // Also known as total size without vocab words.
+  // Grow the file to accomodate the search, using zeros.
+  // According to man mmap, behavior is undefined when the file is resized
+  // underneath a mmap that is not a multiple of the page size.  So to be
+  // safe, we'll unmap it and map it again.
+  mapping_.reset();
+  util::ResizeOrThrow(file_.get(), new_size);
+  void *ret;
+  MapFile(vocab_base, ret);
+  return ret;
+}
+
+void BinaryFormat::WriteVocabWords(const std::string &buffer, void *&vocab_base, void *&search_base) {
+  // Checking Config's include_vocab is the responsibility of the caller.
+  assert(header_size_ != kInvalidSize && vocab_size_ != kInvalidSize);
+  if (!write_mmap_) {
+    // Unchanged base.
+    vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get());
+    search_base = reinterpret_cast<uint8_t*>(memory_search_.get());
+    return;
+  }
+  if (write_method_ == Config::WRITE_MMAP) {
+    mapping_.reset();
+  }
+  util::SeekOrThrow(file_.get(), VocabStringReadingOffset());
+  util::WriteOrThrow(file_.get(), &buffer[0], buffer.size());
+  if (write_method_ == Config::WRITE_MMAP) {
+    MapFile(vocab_base, search_base);
+  } else {
+    vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get()) + header_size_;
+    search_base = reinterpret_cast<uint8_t*>(memory_search_.get());
  }
 }

-} // namespace detail
+void BinaryFormat::FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts) {
+  if (!write_mmap_) return;
+  switch (write_method_) {
+    case Config::WRITE_MMAP:
+      util::SyncOrThrow(mapping_.get(), mapping_.size());
+      break;
+    case Config::WRITE_AFTER:
+      util::SeekOrThrow(file_.get(), 0);
+      util::WriteOrThrow(file_.get(), memory_vocab_.get(), memory_vocab_.size());
+      util::SeekOrThrow(file_.get(), header_size_ + vocab_size_ + vocab_pad_);
+      util::WriteOrThrow(file_.get(), memory_search_.get(), memory_search_.size());
+      util::FSyncOrThrow(file_.get());
+      break;
+  }
+  // header and vocab share the same mmap.
+  Parameters params = Parameters();
+  memset(&params, 0, sizeof(Parameters));
+  params.counts = counts;
+  params.fixed.order = counts.size();
+  params.fixed.probing_multiplier = config.probing_multiplier;
+  params.fixed.model_type = model_type;
+  params.fixed.has_vocabulary = config.include_vocab;
+  params.fixed.search_version = search_version;
+  switch (write_method_) {
+    case Config::WRITE_MMAP:
+      WriteHeader(mapping_.get(), params);
+      util::SyncOrThrow(mapping_.get(), mapping_.size());
+      break;
+    case Config::WRITE_AFTER:
+      {
+        std::vector<uint8_t> buffer(TotalHeaderSize(counts.size()));
+        WriteHeader(&buffer[0], params);
+        util::SeekOrThrow(file_.get(), 0);
+        util::WriteOrThrow(file_.get(), &buffer[0], buffer.size());
+      }
+      break;
+  }
+}
+
+void BinaryFormat::MapFile(void *&vocab_base, void *&search_base) {
+  mapping_.reset(util::MapOrThrow(vocab_string_offset_, true, util::kFileFlags, false, file_.get()), vocab_string_offset_, util::scoped_memory::MMAP_ALLOCATED);
+  vocab_base = reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_;
+  search_base = reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_ + vocab_size_ + vocab_pad_;
+}

 bool RecognizeBinary(const char *file, ModelType &recognized) {
  util::scoped_fd fd(util::OpenReadOrThrow(file));
-  if (!detail::IsBinaryFormat(fd.get())) return false;
+  if (!IsBinaryFormat(fd.get())) {
+    return false;
+  }
  Parameters params;
-  detail::ReadHeader(fd.get(), params);
+  ReadHeader(fd.get(), params);
  recognized = params.fixed.model_type;
  return true;
 }
--- a/lm/binary_format.hh
+++ b/lm/binary_format.hh
@ -17,6 +17,8 @@
 namespace lm {
 namespace ngram {

+extern const char *kModelNames[6];
+
 /*Inspect a file to determine if it is a binary lm.  If not, return false.  
 * If so, return true and set recognized to the type.  This is the only API in
 * this header designed for use by decoder authors.  
@ -42,67 +44,63 @@ struct Parameters {
  std::vector<uint64_t> counts;
 };

-struct Backing {
-  // File behind memory, if any.  
-  util::scoped_fd file;
-  // Vocabulary lookup table.  Not to be confused with the vocab words themselves.  
-  util::scoped_memory vocab;
-  // Raw block of memory backing the language model data structures
-  util::scoped_memory search;
+class BinaryFormat {
+  public:
+    explicit BinaryFormat(const Config &config);
+
+    // Reading a binary file:
+    // Takes ownership of fd
+    void InitializeBinary(int fd, ModelType model_type, unsigned int search_version, Parameters &params);
+    // Used to read parts of the file to update the config object before figuring out full size.
+    void ReadForConfig(void *to, std::size_t amount, uint64_t offset_excluding_header) const;
+    // Actually load the binary file and return a pointer to the beginning of the search area.
+    void *LoadBinary(std::size_t size);
+
+    uint64_t VocabStringReadingOffset() const {
+      assert(vocab_string_offset_ != kInvalidOffset);
+      return vocab_string_offset_;
+    }
+
+    // Writing a binary file or initializing in RAM from ARPA:
+    // Size for vocabulary.
+    void *SetupJustVocab(std::size_t memory_size, uint8_t order);
+    // Warning: can change the vocaulary base pointer.
+    void *GrowForSearch(std::size_t memory_size, std::size_t vocab_pad, void *&vocab_base);
+    // Warning: can change vocabulary and search base addresses.
+    void WriteVocabWords(const std::string &buffer, void *&vocab_base, void *&search_base);
+    // Write the header at the beginning of the file.
+    void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts);
+
+  private:
+    void MapFile(void *&vocab_base, void *&search_base);
+
+    // Copied from configuration.
+    const Config::WriteMethod write_method_;
+    const char *write_mmap_;
+    util::LoadMethod load_method_;
+
+    // File behind memory, if any.  
+    util::scoped_fd file_;
+
+    // If there is a file involved, a single mapping.
+    util::scoped_memory mapping_;
+
+    // If the data is only in memory, separately allocate each because the trie
+    // knows vocab's size before it knows search's size (because SRILM might
+    // have pruned).
+    util::scoped_memory memory_vocab_, memory_search_;
+
+    // Memory ranges.  Note that these may not be contiguous and may not all
+    // exist.
+    std::size_t header_size_, vocab_size_, vocab_pad_;
+    // aka end of search.
+    uint64_t vocab_string_offset_;
+
+    static const uint64_t kInvalidOffset = (uint64_t)-1;
 };

-// Create just enough of a binary file to write vocabulary to it.  
-uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_size, Backing &backing);
-// Grow the binary file for the search data structure and set backing.search, returning the memory address where the search data structure should begin.  
-uint8_t *GrowForSearch(const Config &config, std::size_t vocab_pad, std::size_t memory_size, Backing &backing);
-
-// Write header to binary file.  This is done last to prevent incomplete files
-// from loading.   
-void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts,  std::size_t vocab_pad, Backing &backing);
-
-namespace detail {
-
 bool IsBinaryFormat(int fd);

-void ReadHeader(int fd, Parameters &params);
-
-void MatchCheck(ModelType model_type, unsigned int search_version, const Parameters &params);
-
-void SeekPastHeader(int fd, const Parameters &params);
-
-uint8_t *SetupBinary(const Config &config, const Parameters &params, uint64_t memory_size, Backing &backing);
-
-void ComplainAboutARPA(const Config &config, ModelType model_type);
-
-} // namespace detail
-
-template <class To> void LoadLM(const char *file, const Config &config, To &to) {
-  Backing &backing = to.MutableBacking();
-  backing.file.reset(util::OpenReadOrThrow(file));
-
-  try {
-    if (detail::IsBinaryFormat(backing.file.get())) {
-      Parameters params;
-      detail::ReadHeader(backing.file.get(), params);
-      detail::MatchCheck(To::kModelType, To::kVersion, params);
-      // Replace the run-time configured probing_multiplier with the one in the file.  
-      Config new_config(config);
-      new_config.probing_multiplier = params.fixed.probing_multiplier;
-      detail::SeekPastHeader(backing.file.get(), params);
-      To::UpdateConfigFromBinary(backing.file.get(), params.counts, new_config);
-      uint64_t memory_size = To::Size(params.counts, new_config);
-      uint8_t *start = detail::SetupBinary(new_config, params, memory_size, backing);
-      to.InitializeFromBinary(start, params, new_config, backing.file.get());
-    } else {
-      detail::ComplainAboutARPA(config, To::kModelType);
-      to.InitializeFromARPA(file, config);
-    }
-  } catch (util::Exception &e) {
-    e << " File: " << file;
-    throw;
-  }
-}
-
 } // namespace ngram
 } // namespace lm
 #endif // LM_BINARY_FORMAT__
--- a/lm/builder/corpus_count.cc
+++ b/lm/builder/corpus_count.cc
@ -87,7 +87,7 @@ class VocabHandout {
    Table table_;

    std::size_t double_cutoff_;
-
+    
    util::FakeOFStream word_list_;
 };

@ -98,7 +98,7 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
    std::size_t operator()(const WordIndex *start) const {
      return util::MurmurHashNative(start, size_);
    }
-
+    
  private:
    const std::size_t size_;
 };
@ -106,11 +106,11 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
 class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
  public:
    explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}
-
+    
    bool operator()(const WordIndex *first, const WordIndex *second) const {
      return !memcmp(first, second, size_);
-    }
-
+    } 
+    
  private:
    const std::size_t size_;
 };
@ -131,7 +131,7 @@ typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;

 class Writer {
  public:
-    Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
+    Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size) 
      : block_(position), gram_(block_->Get(), order),
        dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
        dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
@ -140,7 +140,7 @@ class Writer {
      dedupe_.Clear();
      assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
      if (order == 1) {
-        // Add special words.  AdjustCounts is responsible if order != 1.
+        // Add special words.  AdjustCounts is responsible if order != 1.    
        AddUnigramWord(kUNK);
        AddUnigramWord(kBOS);
      }
@ -170,16 +170,16 @@ class Writer {
        memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
        return;
      }
-      // Complete the write.
+      // Complete the write.  
      gram_.Count() = 1;
-      // Prepare the next n-gram.
+      // Prepare the next n-gram.  
      if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
        NGram last(gram_);
        gram_.NextInMemory();
        std::copy(last.begin() + 1, last.end(), gram_.begin());
        return;
      }
-      // Block end.  Need to store the context in a temporary buffer.
+      // Block end.  Need to store the context in a temporary buffer.  
      std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
      dedupe_.Clear();
      block_->SetValidSize(block_size_);
@ -207,7 +207,7 @@ class Writer {
    // Hash table combiner implementation.
    Dedupe dedupe_;

-    // Small buffer to hold existing ngrams when shifting across a block boundary.
+    // Small buffer to hold existing ngrams when shifting across a block boundary.  
    boost::scoped_array<WordIndex> buffer_;

    const std::size_t block_size_;
@ -223,7 +223,7 @@ std::size_t CorpusCount::VocabUsage(std::size_t vocab_estimate) {
  return VocabHandout::MemUsage(vocab_estimate);
 }

-CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block)
+CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block) 
  : from_(from), vocab_write_(vocab_write), token_count_(token_count), type_count_(type_count),
    dedupe_mem_size_(Dedupe::Size(entries_per_block, kProbingMultiplier)),
    dedupe_mem_(util::MallocOrThrow(dedupe_mem_size_)) {
@ -240,7 +240,10 @@ void CorpusCount::Run(const util::stream::ChainPosition &position) {
  uint64_t count = 0;
  bool delimiters[256];
  memset(delimiters, 0, sizeof(delimiters));
-  delimiters['\0'] = delimiters['\t'] = delimiters['\n'] = delimiters['\r'] = delimiters[' '] = true;
+  const char kDelimiterSet[] = "\0\t\n\r ";
+  for (const char *i = kDelimiterSet; i < kDelimiterSet + sizeof(kDelimiterSet); ++i) {
+    delimiters[static_cast<unsigned char>(*i)] = true;
+  }
  try {
    while(true) {
      StringPiece line(from_.ReadLine());
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@ -33,12 +33,12 @@ class Callback {
      pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
      probs_[order_minus_1 + 1] = pay.complete.prob;
      pay.complete.prob = log10(pay.complete.prob);
-      // TODO: this is a hack to skip n-grams that don't appear as context.  Pruning will require some different handling.
-      if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS && backoffs_[order_minus_1].Get()) { // check valid pointer at tht end
+      // TODO: this is a hack to skip n-grams that don't appear as context.  Pruning will require some different handling.  
+      if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS) {
        pay.complete.backoff = log10(*static_cast<const float*>(backoffs_[order_minus_1].Get()));
        ++backoffs_[order_minus_1];
      } else {
-        // Not a context.
+        // Not a context.  
        pay.complete.backoff = 0.0;
      }
    }
@ -52,7 +52,7 @@ class Callback {
 };
 } // namespace

-Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs)
+Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs) 
  : uniform_prob_(1.0 / static_cast<float>(unigram_count - 1)), backoffs_(backoffs) {}

 // perform order-wise interpolation
--- a/lm/config.cc
+++ b/lm/config.cc
@ -11,11 +11,7 @@ Config::Config() :
  enumerate_vocab(NULL),
  unknown_missing(COMPLAIN),
  sentence_marker_missing(THROW_UP),
-#if defined(_WIN32) || defined(_WIN64)
-  positive_log_probability(SILENT),
-#else
  positive_log_probability(THROW_UP),
-#endif
  unknown_missing_logprob(-100.0),
  probing_multiplier(1.5),
  building_memory(1073741824ULL), // 1 GB
--- a/lm/facade.hh
+++ b/lm/facade.hh
@ -17,14 +17,14 @@ template <class Child, class StateT, class VocabularyT> class ModelFacade : publ
    typedef VocabularyT Vocabulary;

    /* Translate from void* to State */
-    FullScoreReturn FullScore(const void *in_state, const WordIndex new_word, void *out_state) const {
+    FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const {
      return static_cast<const Child*>(this)->FullScore(
          *reinterpret_cast<const State*>(in_state),
          new_word,
          *reinterpret_cast<State*>(out_state));
    }

-    FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const {
+    FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const {
      return static_cast<const Child*>(this)->FullScoreForgotState(
          context_rbegin,
          context_rend,
@ -37,7 +37,7 @@ template <class Child, class StateT, class VocabularyT> class ModelFacade : publ
      return static_cast<const Child*>(this)->FullScore(in_state, new_word, out_state).prob;
    }

-    float Score(const void *in_state, const WordIndex new_word, void *out_state) const {
+    float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const {
      return static_cast<const Child*>(this)->Score(
          *reinterpret_cast<const State*>(in_state),
          new_word,
--- a/lm/filter/arpa_io.hh
+++ b/lm/filter/arpa_io.hh
@ -14,10 +14,6 @@
 #include <string>
 #include <vector>

-#if !defined __MINGW32__
-#include <err.h>
-#endif
-
 #include <string.h>
 #include <stdint.h>

--- a/lm/filter/count_io.hh
+++ b/lm/filter/count_io.hh
@ -5,27 +5,18 @@
 #include <iostream>
 #include <string>

-#if !defined __MINGW32__
-#include <err.h>
-#endif
-
+#include "util/fake_ofstream.hh"
+#include "util/file.hh"
 #include "util/file_piece.hh"

 namespace lm {

 class CountOutput : boost::noncopyable {
  public:
-    explicit CountOutput(const char *name) : file_(name, std::ios::out) {}
+    explicit CountOutput(const char *name) : file_(util::CreateOrThrow(name)) {}

    void AddNGram(const StringPiece &line) {
-      if (!(file_ << line << '\n')) {
-#if defined __MINGW32__
-        std::cerr<<"Writing counts file failed"<<std::endl;
-        exit(3);
-#else
-        err(3, "Writing counts file failed");
-#endif
-      }
+      file_ << line << '\n';
    }

    template <class Iterator> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line) {
@ -37,12 +28,12 @@ class CountOutput : boost::noncopyable {
    }

  private:
-    std::fstream file_;
+    util::FakeOFStream file_;
 };

 class CountBatch {
  public:
-    explicit CountBatch(std::streamsize initial_read)
+    explicit CountBatch(std::streamsize initial_read) 
      : initial_read_(initial_read) {
      buffer_.reserve(initial_read);
    }
@ -75,7 +66,7 @@ class CountBatch {
  private:
    std::streamsize initial_read_;

-    // This could have been a std::string but that's less happy with raw writes.
+    // This could have been a std::string but that's less happy with raw writes.  
    std::vector<char> buffer_;
 };

--- a/lm/filter/filter_main.cc
+++ b/lm/filter/filter_main.cc
@ -6,6 +6,7 @@
 #endif
 #include "lm/filter/vocab.hh"
 #include "lm/filter/wrapper.hh"
+#include "util/exception.hh"
 #include "util/file_piece.hh"

 #include <boost/ptr_container/ptr_vector.hpp>
@ -57,7 +58,7 @@ typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} Fil
 typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;

 struct Config {
-  Config() :
+  Config() : 
 #ifndef NTHREAD
  batch_size(25000),
  threads(boost::thread::hardware_concurrency()),
@ -157,102 +158,96 @@ template <class Format> void DispatchFilterModes(const Config &config, std::istr
 } // namespace lm

 int main(int argc, char *argv[]) {
-  if (argc < 4) {
-    lm::DisplayHelp(argv[0]);
-    return 1;
-  }
-
-  // I used to have boost::program_options, but some users didn't want to compile boost.
-  lm::Config config;
-  config.mode = lm::MODE_UNSET;
-  for (int i = 1; i < argc - 2; ++i) {
-    const char *str = argv[i];
-    if (!std::strcmp(str, "copy")) {
-      config.mode = lm::MODE_COPY;
-    } else if (!std::strcmp(str, "single")) {
-      config.mode = lm::MODE_SINGLE;
-    } else if (!std::strcmp(str, "multiple")) {
-      config.mode = lm::MODE_MULTIPLE;
-    } else if (!std::strcmp(str, "union")) {
-      config.mode = lm::MODE_UNION;
-    } else if (!std::strcmp(str, "phrase")) {
-      config.phrase = true;
-    } else if (!std::strcmp(str, "context")) {
-      config.context = true;
-    } else if (!std::strcmp(str, "arpa")) {
-      config.format = lm::FORMAT_ARPA;
-    } else if (!std::strcmp(str, "raw")) {
-      config.format = lm::FORMAT_COUNT;
-#ifndef NTHREAD
-    } else if (!std::strncmp(str, "threads:", 8)) {
-      config.threads = boost::lexical_cast<size_t>(str + 8);
-      if (!config.threads) {
-        std::cerr << "Specify at least one thread." << std::endl;
-        return 1;
-      }
-    } else if (!std::strncmp(str, "batch_size:", 11)) {
-      config.batch_size = boost::lexical_cast<size_t>(str + 11);
-      if (config.batch_size < 5000) {
-        std::cerr << "Batch size must be at least one and should probably be >= 5000" << std::endl;
-        if (!config.batch_size) return 1;
-      }
-#endif
-    } else {
+  try {
+    if (argc < 4) {
      lm::DisplayHelp(argv[0]);
      return 1;
    }
-  }

-  if (config.mode == lm::MODE_UNSET) {
-    lm::DisplayHelp(argv[0]);
-    return 1;
-  }
-
-  if (config.phrase && config.mode != lm::MODE_UNION && config.mode != lm::MODE_MULTIPLE) {
-    std::cerr << "Phrase constraint currently only works in multiple or union mode.  If you really need it for single, put everything on one line and use union." << std::endl;
-    return 1;
-  }
-
-  bool cmd_is_model = true;
-  const char *cmd_input = argv[argc - 2];
-  if (!strncmp(cmd_input, "vocab:", 6)) {
-    cmd_is_model = false;
-    cmd_input += 6;
-  } else if (!strncmp(cmd_input, "model:", 6)) {
-    cmd_input += 6;
-  } else if (strchr(cmd_input, ':')) {
-#if defined __MINGW32__
-    std::cerr << "Specify vocab: or model: before the input file name, not " << cmd_input << std::endl;
-    exit(1);
-#else
-    errx(1, "Specify vocab: or model: before the input file name, not \"%s\"", cmd_input);
-#endif // defined
-  } else {
-    std::cerr << "Assuming that " << cmd_input << " is a model file" << std::endl;
-  }
-  std::ifstream cmd_file;
-  std::istream *vocab;
-  if (cmd_is_model) {
-    vocab = &std::cin;
-  } else {
-    cmd_file.open(cmd_input, std::ios::in);
-    if (!cmd_file) {
-#if defined __MINGW32__
-      std::cerr << "Could not open input file " << cmd_input << std::endl;
-      exit(2);
-#else
-      err(2, "Could not open input file %s", cmd_input);
-#endif // defined
+    // I used to have boost::program_options, but some users didn't want to compile boost.
+    lm::Config config;
+    config.mode = lm::MODE_UNSET;
+    for (int i = 1; i < argc - 2; ++i) {
+      const char *str = argv[i];
+      if (!std::strcmp(str, "copy")) {
+        config.mode = lm::MODE_COPY;
+      } else if (!std::strcmp(str, "single")) {
+        config.mode = lm::MODE_SINGLE;
+      } else if (!std::strcmp(str, "multiple")) {
+        config.mode = lm::MODE_MULTIPLE;
+      } else if (!std::strcmp(str, "union")) {
+        config.mode = lm::MODE_UNION;
+      } else if (!std::strcmp(str, "phrase")) {
+        config.phrase = true;
+      } else if (!std::strcmp(str, "context")) {
+        config.context = true;
+      } else if (!std::strcmp(str, "arpa")) {
+        config.format = lm::FORMAT_ARPA;
+      } else if (!std::strcmp(str, "raw")) {
+        config.format = lm::FORMAT_COUNT;
+#ifndef NTHREAD
+      } else if (!std::strncmp(str, "threads:", 8)) {
+        config.threads = boost::lexical_cast<size_t>(str + 8);
+        if (!config.threads) {
+          std::cerr << "Specify at least one thread." << std::endl;
+          return 1;
+        }
+      } else if (!std::strncmp(str, "batch_size:", 11)) {
+        config.batch_size = boost::lexical_cast<size_t>(str + 11);
+        if (config.batch_size < 5000) {
+          std::cerr << "Batch size must be at least one and should probably be >= 5000" << std::endl;
+          if (!config.batch_size) return 1;
+        }
+#endif
+      } else {
+        lm::DisplayHelp(argv[0]);
+        return 1;
+      }
    }
-    vocab = &cmd_file;
-  }

-  util::FilePiece model(cmd_is_model ? util::OpenReadOrThrow(cmd_input) : 0, cmd_is_model ? cmd_input : NULL, &std::cerr);
+    if (config.mode == lm::MODE_UNSET) {
+      lm::DisplayHelp(argv[0]);
+      return 1;
+    }

-  if (config.format == lm::FORMAT_ARPA) {
-    lm::DispatchFilterModes<lm::ARPAFormat>(config, *vocab, model, argv[argc - 1]);
-  } else if (config.format == lm::FORMAT_COUNT) {
-    lm::DispatchFilterModes<lm::CountFormat>(config, *vocab, model, argv[argc - 1]);
+    if (config.phrase && config.mode != lm::MODE_UNION && config.mode != lm::MODE_MULTIPLE) {
+      std::cerr << "Phrase constraint currently only works in multiple or union mode.  If you really need it for single, put everything on one line and use union." << std::endl;
+      return 1;
+    }
+
+    bool cmd_is_model = true;
+    const char *cmd_input = argv[argc - 2];
+    if (!strncmp(cmd_input, "vocab:", 6)) {
+      cmd_is_model = false;
+      cmd_input += 6;
+    } else if (!strncmp(cmd_input, "model:", 6)) {
+      cmd_input += 6;
+    } else if (strchr(cmd_input, ':')) {
+      std::cerr << "Specify vocab: or model: before the input file name, not " << cmd_input << std::endl;
+      return 1;
+    } else {
+      std::cerr << "Assuming that " << cmd_input << " is a model file" << std::endl;
+    }
+    std::ifstream cmd_file;
+    std::istream *vocab;
+    if (cmd_is_model) {
+      vocab = &std::cin;
+    } else {
+      cmd_file.open(cmd_input, std::ios::in);
+      UTIL_THROW_IF(!cmd_file, util::ErrnoException, "Failed to open " << cmd_input);
+      vocab = &cmd_file;
+    }
+
+    util::FilePiece model(cmd_is_model ? util::OpenReadOrThrow(cmd_input) : 0, cmd_is_model ? cmd_input : NULL, &std::cerr);
+
+    if (config.format == lm::FORMAT_ARPA) {
+      lm::DispatchFilterModes<lm::ARPAFormat>(config, *vocab, model, argv[argc - 1]);
+    } else if (config.format == lm::FORMAT_COUNT) {
+      lm::DispatchFilterModes<lm::CountFormat>(config, *vocab, model, argv[argc - 1]);
+    }
+    return 0;
+  } catch (const std::exception &e) {
+    std::cerr << e.what() << std::endl;
+    return 1;
  }
-  return 0;
 }
--- a/lm/filter/format.hh
+++ b/lm/filter/format.hh
@ -1,5 +1,5 @@
 #ifndef LM_FILTER_FORMAT_H__
-#define LM_FITLER_FORMAT_H__
+#define LM_FILTER_FORMAT_H__

 #include "lm/filter/arpa_io.hh"
 #include "lm/filter/count_io.hh"
--- a/lm/filter/vocab.cc
+++ b/lm/filter/vocab.cc
@ -5,10 +5,6 @@

 #include <ctype.h>

-#if !defined __MINGW32__
-#include <err.h>
-#endif
-
 namespace lm {
 namespace vocab {

@ -34,7 +30,7 @@ bool IsLineEnd(std::istream &in) {
 }// namespace

 // Read space separated words in enter separated lines.  These lines can be
-// very long, so don't read an entire line at a time.
+// very long, so don't read an entire line at a time.  
 unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out) {
  in.exceptions(std::istream::badbit);
  unsigned int sentence = 0;
--- a/lm/model.cc
+++ b/lm/model.cc
@ -34,8 +34,47 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
  if (static_cast<std::size_t>(start - static_cast<uint8_t*>(base)) != goal_size) UTIL_THROW(FormatLoadException, "The data structures took " << (start - static_cast<uint8_t*>(base)) << " but Size says they should take " << goal_size);
 }

-template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::GenericModel(const char *file, const Config &config) {
-  LoadLM(file, config, *this);
+namespace {
+void ComplainAboutARPA(const Config &config, ModelType model_type) {
+  if (config.write_mmap || !config.messages) return;
+  if (config.arpa_complain == Config::ALL) {
+    *config.messages << "Loading the LM will be faster if you build a binary file." << std::endl;
+  } else if (config.arpa_complain == Config::EXPENSIVE &&
+             (model_type == TRIE || model_type == QUANT_TRIE || model_type == ARRAY_TRIE || model_type == QUANT_ARRAY_TRIE)) {
+    *config.messages << "Building " << kModelNames[model_type] << " from ARPA is expensive.  Save time by building a binary format." << std::endl;
+  }
+}
+
+void CheckCounts(const std::vector<uint64_t> &counts) {
+  UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ".  " << KENLM_ORDER_MESSAGE);
+  if (sizeof(uint64_t) > sizeof(std::size_t)) {
+    for (std::vector<uint64_t>::const_iterator i = counts.begin(); i != counts.end(); ++i) {
+      UTIL_THROW_IF(*i > static_cast<uint64_t>(std::numeric_limits<size_t>::max()), util::OverflowException, "This model has " << *i << " " << (i - counts.begin() + 1) << "-grams which is too many for 32-bit machines.");
+    }
+  }
+}
+
+} // namespace
+
+template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::GenericModel(const char *file, const Config &init_config) : backing_(init_config) {
+  util::scoped_fd fd(util::OpenReadOrThrow(file));
+  if (IsBinaryFormat(fd.get())) {
+    Parameters parameters;
+    int fd_shallow = fd.release();
+    backing_.InitializeBinary(fd_shallow, kModelType, kVersion, parameters);
+    CheckCounts(parameters.counts);
+
+    Config new_config(init_config);
+    new_config.probing_multiplier = parameters.fixed.probing_multiplier;
+    Search::UpdateConfigFromBinary(backing_, parameters.counts, VocabularyT::Size(parameters.counts[0], new_config), new_config);
+    UTIL_THROW_IF(new_config.enumerate_vocab && !parameters.fixed.has_vocabulary, FormatLoadException, "The decoder requested all the vocabulary strings, but this binary file does not have them.  You may need to rebuild the binary file with an updated version of build_binary.");
+
+    SetupMemory(backing_.LoadBinary(Size(parameters.counts, new_config)), parameters.counts, new_config);
+    vocab_.LoadedBinary(parameters.fixed.has_vocabulary, fd_shallow, new_config.enumerate_vocab, backing_.VocabStringReadingOffset());
+  } else {
+    ComplainAboutARPA(init_config, kModelType);
+    InitializeFromARPA(fd.release(), file, init_config);
+  }

  // g++ prints warnings unless these are fully initialized.
  State begin_sentence = State();
@ -50,27 +89,9 @@ template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::Ge
  P::Init(begin_sentence, null_context, vocab_, search_.Order());
 }

-namespace {
-void CheckCounts(const std::vector<uint64_t> &counts) {
-  UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ".  " << KENLM_ORDER_MESSAGE);
-  if (sizeof(uint64_t) > sizeof(std::size_t)) {
-    for (std::vector<uint64_t>::const_iterator i = counts.begin(); i != counts.end(); ++i) {
-      UTIL_THROW_IF(*i > static_cast<uint64_t>(std::numeric_limits<size_t>::max()), util::OverflowException, "This model has " << *i << " " << (i - counts.begin() + 1) << "-grams which is too many for 32-bit machines.");
-    }
-  }
-}
-} // namespace
-
-template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromBinary(void *start, const Parameters &params, const Config &config, int fd) {
-  CheckCounts(params.counts);
-  SetupMemory(start, params.counts, config);
-  vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab);
-  search_.LoadedBinary();
-}
-
-template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromARPA(const char *file, const Config &config) {
-  // Backing file is the ARPA.  Steal it so we can make the backing file the mmap output if any.
-  util::FilePiece f(backing_.file.release(), file, config.ProgressMessages());
+template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromARPA(int fd, const char *file, const Config &config) {
+  // Backing file is the ARPA.
+  util::FilePiece f(fd, file, config.ProgressMessages());
  try {
    std::vector<uint64_t> counts;
    // File counts do not include pruned trigrams that extend to quadgrams etc.   These will be fixed by search_.
@ -81,13 +102,17 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT

    std::size_t vocab_size = util::CheckOverflow(VocabularyT::Size(counts[0], config));
    // Setup the binary file for writing the vocab lookup table.  The search_ is responsible for growing the binary file to its needs.
-    vocab_.SetupMemory(SetupJustVocab(config, counts.size(), vocab_size, backing_), vocab_size, counts[0], config);
+    vocab_.SetupMemory(backing_.SetupJustVocab(vocab_size, counts.size()), vocab_size, counts[0], config);

-    if (config.write_mmap) {
+    if (config.write_mmap && config.include_vocab) {
      WriteWordsWrapper wrap(config.enumerate_vocab);
      vocab_.ConfigureEnumerate(&wrap, counts[0]);
      search_.InitializeFromARPA(file, f, counts, config, vocab_, backing_);
-      wrap.Write(backing_.file.get(), backing_.vocab.size() + vocab_.UnkCountChangePadding() + Search::Size(counts, config));
+      void *vocab_rebase, *search_rebase;
+      backing_.WriteVocabWords(wrap.Buffer(), vocab_rebase, search_rebase);
+      // Due to writing at the end of file, mmap may have relocated data.  So remap.
+      vocab_.Relocate(vocab_rebase);
+      search_.SetupMemory(reinterpret_cast<uint8_t*>(search_rebase), counts, config);
    } else {
      vocab_.ConfigureEnumerate(config.enumerate_vocab, counts[0]);
      search_.InitializeFromARPA(file, f, counts, config, vocab_, backing_);
@ -99,18 +124,13 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
      search_.UnknownUnigram().backoff = 0.0;
      search_.UnknownUnigram().prob = config.unknown_missing_logprob;
    }
-    FinishFile(config, kModelType, kVersion, counts, vocab_.UnkCountChangePadding(), backing_);
+    backing_.FinishFile(config, kModelType, kVersion, counts);
  } catch (util::Exception &e) {
    e << " Byte: " << f.Offset();
    throw;
  }
 }

-template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config) {
-  util::AdvanceOrThrow(fd, VocabularyT::Size(counts[0], config));
-  Search::UpdateConfigFromBinary(fd, counts, config);
-}
-
 template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search, VocabularyT>::FullScore(const State &in_state, const WordIndex new_word, State &out_state) const {
  FullScoreReturn ret = ScoreExceptBackoff(in_state.words, in_state.words + in_state.length, new_word, out_state);
  for (const float *i = in_state.backoff + ret.ngram_length - 1; i < in_state.backoff + in_state.length; ++i) {
--- a/lm/model.hh
+++ b/lm/model.hh
@ -104,10 +104,6 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
    }

  private:
-    friend void lm::ngram::LoadLM<>(const char *file, const Config &config, GenericModel<Search, VocabularyT> &to);
-
-    static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config);
-
    FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const;

    // Score bigrams and above.  Do not include backoff.   
@ -116,15 +112,11 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
    // Appears after Size in the cc file.
    void SetupMemory(void *start, const std::vector<uint64_t> &counts, const Config &config);

-    void InitializeFromBinary(void *start, const Parameters &params, const Config &config, int fd);
-
-    void InitializeFromARPA(const char *file, const Config &config);
+    void InitializeFromARPA(int fd, const char *file, const Config &config);

    float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const;

-    Backing &MutableBacking() { return backing_; }
-
-    Backing backing_;
+    BinaryFormat backing_;
    
    VocabularyT vocab_;

--- a/lm/model_test.cc
+++ b/lm/model_test.cc
@ -360,10 +360,11 @@ BOOST_AUTO_TEST_CASE(quant_bhiksha_trie) {
  LoadingTest<QuantArrayTrieModel>();
 }

-template <class ModelT> void BinaryTest() {
+template <class ModelT> void BinaryTest(Config::WriteMethod write_method) {
  Config config;
  config.write_mmap = "test.binary";
  config.messages = NULL;
+  config.write_method = write_method;
  ExpectEnumerateVocab enumerate;
  config.enumerate_vocab = &enumerate;

@ -406,6 +407,11 @@ template <class ModelT> void BinaryTest() {
  unlink("test_nounk.binary");
 }

+template <class ModelT> void BinaryTest() {
+  BinaryTest<ModelT>(Config::WRITE_MMAP);
+  BinaryTest<ModelT>(Config::WRITE_AFTER);
+}
+
 BOOST_AUTO_TEST_CASE(write_and_read_probing) {
  BinaryTest<ProbingModel>();
 }
--- a/lm/quantize.cc
+++ b/lm/quantize.cc
@ -38,13 +38,13 @@ const char kSeparatelyQuantizeVersion = 2;

 } // namespace

-void SeparatelyQuantize::UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &/*counts*/, Config &config) {
-  char version;
-  util::ReadOrThrow(fd, &version, 1);
-  util::ReadOrThrow(fd, &config.prob_bits, 1);
-  util::ReadOrThrow(fd, &config.backoff_bits, 1);
+void SeparatelyQuantize::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
+  unsigned char buffer[3];
+  file.ReadForConfig(buffer, 3, offset);
+  char version = buffer[0];
+  config.prob_bits = buffer[1];
+  config.backoff_bits = buffer[2];
  if (version != kSeparatelyQuantizeVersion) UTIL_THROW(FormatLoadException, "This file has quantization version " << (unsigned)version << " but the code expects version " << (unsigned)kSeparatelyQuantizeVersion);
-  util::AdvanceOrThrow(fd, -3);
 }

 void SeparatelyQuantize::SetupMemory(void *base, unsigned char order, const Config &config) {
--- a/Show More
+++ b/Show More