Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-10-26 19:37:58 +03:00 · 2016-03-23 10:26:07 -05:00 · 2016-03-23 10:26:07 -05:00 · 167def1d52
commit 167def1d52
parent 8893524339 4c07496eb2
78 changed files with 3386 additions and 1223 deletions
--- a/3
+++ b/3
@ -208,7 +208,7 @@ if [ option.get "with-icu" : : "yes" ]

 # for probing pt
 external-lib boost_serialization ;
-requirements += <library>boost_serialization ;
+requirements += <library>boost_serialization/<runtime-link>static ;

 if [ option.get "with-vw" ] {
  requirements += <define>HAVE_VW ;
@ -247,6 +247,7 @@ if [ option.get "with-mm-extras" : : "yes" ]
  moses/TranslationModel/UG//bitext-find 
  moses/TranslationModel/UG//ptable-describe-features 
  moses/TranslationModel/UG//count-ptable-features 
+  moses/TranslationModel/UG//ptable-sigtest-filter 
  moses/TranslationModel/UG//ptable-lookup 
  moses/TranslationModel/UG//ptable-lookup-corpus 
  moses/TranslationModel/UG//check-coverage 
--- a/biconcor/SuffixArray.cpp
+++ b/biconcor/SuffixArray.cpp
@ -93,7 +93,7 @@ void SuffixArray::Create(const string& fileName )
  CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
  if (m_useDocument) {
    m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
-    m_documentName = (INDEX*) calloc( sizeof( char ), m_documentCount );
+    m_documentName = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
    m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
    CheckAllocation(m_document != NULL, "m_document");
    CheckAllocation(m_documentName != NULL, "m_documentName");
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@ -11,12 +11,12 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -72,13 +72,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/extract/.project
+++ b/contrib/other-builds/extract/.project
@ -55,6 +55,41 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
 		</link>
+		<link>
+			<name>SentenceAlignmentWithSyntax.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp</locationURI>
+		</link>
+		<link>
+			<name>SentenceAlignmentWithSyntax.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
+		</link>
+		<link>
+			<name>SyntaxNodeCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>SyntaxNodeCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
+		</link>
+		<link>
+			<name>XmlException.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlException.h</locationURI>
+		</link>
+		<link>
+			<name>XmlTree.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp</locationURI>
+		</link>
+		<link>
+			<name>XmlTree.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
+		</link>
 		<link>
 			<name>extract-main.cpp</name>
 			<type>1</type>
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@ -11,11 +11,11 @@
 					</externalSetting>
 				</externalSettings>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -74,7 +74,7 @@
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.871386239" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1761300858" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
 					<sourceEntries>
-                        <entry excluding="TranslationModel/UG/ptable-lookup.cc|TranslationModel/UG/ptable-lookup-corpus.cc|TranslationModel/UG/mm/test-http-client.cc|TranslationModel/UG/ptable-describe-features.cc|TranslationModel/UG/count-ptable-features.cc|TranslationModel/UG/try-align2.cc|TranslationModel/UG/try-align.cc|TranslationModel/UG/spe-check-coverage3.cc|TranslationModel/UG/spe-check-coverage2.cc|TranslationModel/UG/spe-check-coverage.cc|TranslationModel/UG/sim-pe.cc|TranslationModel/UG/generic/stringdist|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/mtt.count.cc|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="TranslationModel/UG/ptable-lookup.cc|TranslationModel/UG/ptable-lookup-corpus.cc|TranslationModel/UG/mm/test-http-client.cc|TranslationModel/UG/ptable-describe-features.cc|TranslationModel/UG/count-ptable-features.cc|TranslationModel/UG/try-align2.cc|TranslationModel/UG/try-align.cc|TranslationModel/UG/spe-check-coverage3.cc|TranslationModel/UG/spe-check-coverage2.cc|TranslationModel/UG/spe-check-coverage.cc|TranslationModel/UG/sim-pe.cc|TranslationModel/UG/generic/stringdist|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/mtt.count.cc|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
@ -84,12 +84,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -1625,6 +1625,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetBigramFeature.h</locationURI>
 		</link>
+		<link>
+			<name>FF/TargetConstituentAdjacencyFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetConstituentAdjacencyFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/TargetConstituentAdjacencyFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetConstituentAdjacencyFeature.h</locationURI>
+		</link>
 		<link>
 			<name>FF/TargetNgramFeature.cpp</name>
 			<type>1</type>
@ -1635,6 +1645,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetNgramFeature.h</locationURI>
 		</link>
+		<link>
+			<name>FF/TargetPreferencesFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/TargetPreferencesFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.h</locationURI>
+		</link>
 		<link>
 			<name>FF/TargetWordInsertionFeature.cpp</name>
 			<type>1</type>
@ -1995,6 +2015,36 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/SpanLengthPhraseProperty.h</locationURI>
 		</link>
+		<link>
+			<name>PP/TargetConstituentBoundariesLeftPhraseProperty.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp</locationURI>
+		</link>
+		<link>
+			<name>PP/TargetConstituentBoundariesLeftPhraseProperty.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h</locationURI>
+		</link>
+		<link>
+			<name>PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp</locationURI>
+		</link>
+		<link>
+			<name>PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h</locationURI>
+		</link>
+		<link>
+			<name>PP/TargetPreferencesPhraseProperty.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.cpp</locationURI>
+		</link>
+		<link>
+			<name>PP/TargetPreferencesPhraseProperty.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.h</locationURI>
+		</link>
 		<link>
 			<name>PP/TreeStructurePhraseProperty.h</name>
 			<type>1</type>
@ -2495,6 +2545,56 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SyntaxOptions.h</locationURI>
 		</link>
+		<link>
+			<name>FF/LexicalReordering/BidirectionalReorderingState.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/BidirectionalReorderingState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/BidirectionalReorderingState.h</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/HReorderingBackwardState.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/HReorderingBackwardState.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/HReorderingBackwardState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/HReorderingBackwardState.h</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/HReorderingForwardState.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/HReorderingForwardState.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/HReorderingForwardState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/HReorderingForwardState.h</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/LRModel.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LRModel.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/LRModel.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LRModel.h</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/LRState.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LRState.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/LRState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LRState.h</locationURI>
+		</link>
 		<link>
 			<name>FF/LexicalReordering/LexicalReordering.cpp</name>
 			<type>1</type>
@ -2505,16 +2605,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LexicalReordering.h</locationURI>
 		</link>
-		<link>
-			<name>FF/LexicalReordering/LexicalReorderingState.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LexicalReorderingState.cpp</locationURI>
-		</link>
-		<link>
-			<name>FF/LexicalReordering/LexicalReorderingState.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LexicalReorderingState.h</locationURI>
-		</link>
 		<link>
 			<name>FF/LexicalReordering/LexicalReorderingTable.cpp</name>
 			<type>1</type>
@ -2525,6 +2615,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LexicalReorderingTable.h</locationURI>
 		</link>
+		<link>
+			<name>FF/LexicalReordering/PhraseBasedReorderingState.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/PhraseBasedReorderingState.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/PhraseBasedReorderingState.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/PhraseBasedReorderingState.h</locationURI>
+		</link>
 		<link>
 			<name>FF/LexicalReordering/ReorderingStack.cpp</name>
 			<type>1</type>
--- a/lm/Jamfile
+++ b/lm/Jamfile
@ -37,4 +37,4 @@ for local p in [ glob *_main.cc ] {
  exes += $(name) ;
 }

-alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
+alias programs : $(exes) filter//filter filter//phrase_table_vocab builder//dump_counts : <threading>multi:<source>builder//lmplz ;
--- a/moses/FF/DistortionScoreProducer.cpp
+++ b/moses/FF/DistortionScoreProducer.cpp
@ -1,26 +1,31 @@

 #include "DistortionScoreProducer.h"
 #include "FFState.h"
+#include "moses/InputPath.h"
 #include "moses/Range.h"
 #include "moses/StaticData.h"
 #include "moses/Hypothesis.h"
 #include "moses/Manager.h"
+#include "moses/FactorCollection.h"
+#include <cmath>
+
 using namespace std;

 namespace Moses
 {
-struct DistortionState_traditional : public FFState {
+struct DistortionState : public FFState {
  Range range;
  int first_gap;
-  DistortionState_traditional(const Range& wr, int fg) : range(wr), first_gap(fg) {}
+  bool inSubordinateConjunction;
+  DistortionState(const Range& wr, int fg, bool subord=false) : range(wr), first_gap(fg), inSubordinateConjunction(subord) {}

  size_t hash() const {
    return range.GetEndPos();
  }
  virtual bool operator==(const FFState& other) const {
-    const DistortionState_traditional& o =
-      static_cast<const DistortionState_traditional&>(other);
-    return range.GetEndPos() == o.range.GetEndPos();
+    const DistortionState& o =
+      static_cast<const DistortionState&>(other);
+    return ( (range.GetEndPos() == o.range.GetEndPos()) && (inSubordinateConjunction == o.inSubordinateConjunction) );
  }

 };
@ -29,11 +34,36 @@ std::vector<const DistortionScoreProducer*> DistortionScoreProducer::s_staticCol

 DistortionScoreProducer::DistortionScoreProducer(const std::string &line)
  : StatefulFeatureFunction(1, line)
+  , m_useSparse(false)
+  , m_sparseDistance(false)
+  , m_sparseSubordinate(false)
 {
  s_staticColl.push_back(this);
  ReadParameters();
 }

+void DistortionScoreProducer::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "sparse") {
+    m_useSparse = Scan<bool>(value);
+  } else if (key == "sparse-distance") {
+    m_sparseDistance = Scan<bool>(value);
+  } else if (key == "sparse-input-factor") {
+    m_sparseFactorTypeSource = Scan<FactorType>(value);
+  } else if (key == "sparse-output-factor") {
+    m_sparseFactorTypeTarget = Scan<FactorType>(value);
+  } else if (key == "sparse-subordinate") {
+    std::string subordinateConjunctionTag = Scan<std::string>(value);
+    FactorCollection &factorCollection = FactorCollection::Instance();
+    m_subordinateConjunctionTagFactor = factorCollection.AddFactor(subordinateConjunctionTag,false);
+    m_sparseSubordinate = true;
+  } else if (key == "sparse-subordinate-output-factor") {
+    m_sparseFactorTypeTargetSubordinate = Scan<FactorType>(value);
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
 const FFState* DistortionScoreProducer::EmptyHypothesisState(const InputType &input) const
 {
  // fake previous translated phrase start and end
@ -44,7 +74,7 @@ const FFState* DistortionScoreProducer::EmptyHypothesisState(const InputType &in
    start = 0;
    end = input.m_frontSpanCoveredLength -1;
  }
-  return new DistortionState_traditional(
+  return new DistortionState(
           Range(start, end),
           NOT_FOUND);
 }
@ -101,17 +131,184 @@ FFState* DistortionScoreProducer::EvaluateWhenApplied(
  const FFState* prev_state,
  ScoreComponentCollection* out) const
 {
-  const DistortionState_traditional* prev = static_cast<const DistortionState_traditional*>(prev_state);
+  const DistortionState* prev = static_cast<const DistortionState*>(prev_state);
+  bool subordinateConjunction = prev->inSubordinateConjunction;
+
+  if (m_useSparse) {
+    int jumpFromPos = prev->range.GetEndPos()+1;
+    int jumpToPos = hypo.GetCurrSourceWordsRange().GetStartPos();
+    size_t distance = std::abs( jumpFromPos - jumpToPos );
+
+    const Sentence& sentence = static_cast<const Sentence&>(hypo.GetInput());
+
+    StringPiece jumpFromSourceFactorPrev;
+    StringPiece jumpFromSourceFactor;
+    StringPiece jumpToSourceFactor;
+    if (jumpFromPos < (int)sentence.GetSize()) {
+      jumpFromSourceFactor = sentence.GetWord(jumpFromPos).GetFactor(m_sparseFactorTypeSource)->GetString();
+    } else {
+      jumpFromSourceFactor = "</s>";
+    }
+    if (jumpFromPos > 0) {
+      jumpFromSourceFactorPrev = sentence.GetWord(jumpFromPos-1).GetFactor(m_sparseFactorTypeSource)->GetString();
+    } else {
+      jumpFromSourceFactorPrev = "<s>";
+    }
+    jumpToSourceFactor = sentence.GetWord(jumpToPos).GetFactor(m_sparseFactorTypeSource)->GetString();
+
+    const TargetPhrase& currTargetPhrase = hypo.GetCurrTargetPhrase();
+    StringPiece jumpToTargetFactor = currTargetPhrase.GetWord(0).GetFactor(m_sparseFactorTypeTarget)->GetString();
+
+    util::StringStream featureName;
+
+    // source factor (start position)
+    featureName = util::StringStream();
+    featureName << m_description << "_";
+    if ( jumpToPos > jumpFromPos ) {
+      featureName << "R";
+    } else if ( jumpToPos < jumpFromPos ) {
+      featureName << "L";
+    } else {
+      featureName << "M";
+    }
+    if (m_sparseDistance) {
+      featureName << distance;
+    }
+    featureName << "_SFS_" << jumpFromSourceFactor;
+    if (m_sparseSubordinate && subordinateConjunction) {
+      featureName << "_SUBORD";
+    }
+    out->SparsePlusEquals(featureName.str(), 1);
+
+    // source factor (start position minus 1)
+    featureName = util::StringStream();
+    featureName << m_description << "_";
+    if ( jumpToPos > jumpFromPos ) {
+      featureName << "R";
+    } else if ( jumpToPos < jumpFromPos ) {
+      featureName << "L";
+    } else {
+      featureName << "M";
+    }
+    if (m_sparseDistance) {
+      featureName << distance;
+    }
+    featureName << "_SFP_" << jumpFromSourceFactorPrev;
+    if (m_sparseSubordinate && subordinateConjunction) {
+      featureName << "_SUBORD";
+    }
+    out->SparsePlusEquals(featureName.str(), 1);
+
+    // source factor (end position)
+    featureName = util::StringStream();
+    featureName << m_description << "_";
+    if ( jumpToPos > jumpFromPos ) {
+      featureName << "R";
+    } else if ( jumpToPos < jumpFromPos ) {
+      featureName << "L";
+    } else {
+      featureName << "M";
+    }
+    if (m_sparseDistance) {
+      featureName << distance;
+    }
+    featureName << "_SFE_" << jumpToSourceFactor;
+    if (m_sparseSubordinate && subordinateConjunction) {
+      featureName << "_SUBORD";
+    }
+    out->SparsePlusEquals(featureName.str(), 1);
+
+    // target factor (end position)
+    featureName = util::StringStream();
+    featureName << m_description << "_";
+    if ( jumpToPos > jumpFromPos ) {
+      featureName << "R";
+    } else if ( jumpToPos < jumpFromPos ) {
+      featureName << "L";
+    } else {
+      featureName << "M";
+    }
+    if (m_sparseDistance) {
+      featureName << distance;
+    }
+    featureName << "_TFE_" << jumpToTargetFactor;
+    if (m_sparseSubordinate && subordinateConjunction) {
+      featureName << "_SUBORD";
+    }
+    out->SparsePlusEquals(featureName.str(), 1);
+
+    // relative source sentence position
+    featureName = util::StringStream();
+    featureName << m_description << "_";
+    if ( jumpToPos > jumpFromPos ) {
+      featureName << "R";
+    } else if ( jumpToPos < jumpFromPos ) {
+      featureName << "L";
+    } else {
+      featureName << "M";
+    }
+    if (m_sparseDistance) {
+      featureName << distance;
+    }
+    size_t relativeSourceSentencePosBin = std::floor( 5 * (float)jumpFromPos / (sentence.GetSize()+1) );
+    featureName << "_P_" << relativeSourceSentencePosBin;
+    if (m_sparseSubordinate && subordinateConjunction) {
+      featureName << "_SUBORD";
+    }
+    out->SparsePlusEquals(featureName.str(), 1);
+
+    // source sentence length bin
+    featureName = util::StringStream();
+    featureName << m_description << "_";
+    if ( jumpToPos > jumpFromPos ) {
+      featureName << "R";
+    } else if ( jumpToPos < jumpFromPos ) {
+      featureName << "L";
+    } else {
+      featureName << "M";
+    }
+    if (m_sparseDistance) {
+      featureName << distance;
+    }
+    size_t sourceSentenceLengthBin = 3;
+    if (sentence.GetSize() < 15) {
+      sourceSentenceLengthBin = 0;
+    } else if (sentence.GetSize() < 23) {
+      sourceSentenceLengthBin = 1;
+    } else if (sentence.GetSize() < 33) {
+      sourceSentenceLengthBin = 2;
+    }
+    featureName << "_SL_" << sourceSentenceLengthBin;
+    if (m_sparseSubordinate && subordinateConjunction) {
+      featureName << "_SUBORD";
+    }
+    out->SparsePlusEquals(featureName.str(), 1);
+
+    if (m_sparseSubordinate) {
+      for (size_t posT=0; posT<currTargetPhrase.GetSize(); ++posT) {
+        const Word &wordT = currTargetPhrase.GetWord(posT);
+        if (wordT[m_sparseFactorTypeTargetSubordinate] == m_subordinateConjunctionTagFactor) {
+          subordinateConjunction = true;
+        } else if (wordT[m_sparseFactorTypeTargetSubordinate]->GetString()[0] == 'V') {
+          subordinateConjunction = false;
+        }
+      };
+    }
+  }
+
  const float distortionScore = CalculateDistortionScore(
                                  hypo,
                                  prev->range,
                                  hypo.GetCurrSourceWordsRange(),
                                  prev->first_gap);
  out->PlusEquals(this, distortionScore);
-  DistortionState_traditional* res = new DistortionState_traditional(
+
+  DistortionState* state = new DistortionState(
    hypo.GetCurrSourceWordsRange(),
-    hypo.GetWordsBitmap().GetFirstGapPos());
-  return res;
+    hypo.GetWordsBitmap().GetFirstGapPos(),
+    subordinateConjunction);
+
+  return state;
 }


--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@ -1,16 +1,11 @@
 #pragma once

-#include <stdexcept>
 #include <string>
 #include "StatefulFeatureFunction.h"
+#include "moses/Range.h"

 namespace Moses
 {
-class FFState;
-class ScoreComponentCollection;
-class Hypothesis;
-class ChartHypothesis;
-class Range;

 /** Calculates Distortion scores
 */
@ -19,6 +14,14 @@ class DistortionScoreProducer : public StatefulFeatureFunction
 protected:
  static std::vector<const DistortionScoreProducer*> s_staticColl;

+  FactorType m_sparseFactorTypeSource;
+  FactorType m_sparseFactorTypeTarget;
+  bool m_useSparse;
+  bool m_sparseDistance;
+  bool m_sparseSubordinate;
+  FactorType m_sparseFactorTypeTargetSubordinate;
+  const Factor* m_subordinateConjunctionTagFactor;
+
 public:
  static const std::vector<const DistortionScoreProducer*>& GetDistortionFeatureFunctions() {
    return s_staticColl;
@ -26,6 +29,8 @@ public:

  DistortionScoreProducer(const std::string &line);

+  void SetParameter(const std::string& key, const std::string& value);
+
  bool IsUseable(const FactorMask &mask) const {
    return true;
  }
@ -44,7 +49,7 @@ public:
    const ChartHypothesis& /* cur_hypo */,
    int /* featureID - used to index the state in the previous hypotheses */,
    ScoreComponentCollection*) const {
-    throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet");
+    UTIL_THROW(util::Exception, "DIstortion not implemented in chart decoder");
  }

 };
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -42,6 +42,7 @@
 #include "moses/FF/ControlRecombination.h"
 #include "moses/FF/ConstrainedDecoding.h"
 #include "moses/FF/SoftSourceSyntacticConstraintsFeature.h"
+#include "moses/FF/TargetConstituentAdjacencyFeature.h"
 #include "moses/FF/TargetPreferencesFeature.h"
 #include "moses/FF/CoveredReferenceFeature.h"
 #include "moses/FF/TreeStructureFeature.h"
@ -264,6 +265,7 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(CoveredReferenceFeature);
  MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
  MOSES_FNAME(SoftSourceSyntacticConstraintsFeature);
+  MOSES_FNAME(TargetConstituentAdjacencyFeature);
  MOSES_FNAME(TargetPreferencesFeature);
  MOSES_FNAME(TreeStructureFeature);
  MOSES_FNAME(SoftMatchingFeature);
--- a/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp
@ -0,0 +1,38 @@
+#include "BidirectionalReorderingState.h"
+
+namespace Moses
+{
+
+///////////////////////////
+//BidirectionalReorderingState
+
+size_t BidirectionalReorderingState::hash() const
+{
+  size_t ret = m_backward->hash();
+  boost::hash_combine(ret, m_forward->hash());
+  return ret;
+}
+
+bool BidirectionalReorderingState::operator==(const FFState& o) const
+{
+  if (&o == this) return 0;
+
+  BidirectionalReorderingState const &other
+  = static_cast<BidirectionalReorderingState const&>(o);
+
+  bool ret = (*m_backward == *other.m_backward) && (*m_forward == *other.m_forward);
+  return ret;
+}
+
+LRState*
+BidirectionalReorderingState::
+Expand(const TranslationOption& topt, const InputType& input,
+       ScoreComponentCollection* scores) const
+{
+  LRState *newbwd = m_backward->Expand(topt,input, scores);
+  LRState *newfwd = m_forward->Expand(topt, input, scores);
+  return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
+}
+
+}
+
--- a/moses/FF/LexicalReordering/BidirectionalReorderingState.h
+++ b/moses/FF/LexicalReordering/BidirectionalReorderingState.h
@ -0,0 +1,38 @@
+#pragma once
+#include "LRState.h"
+
+namespace Moses
+{
+
+class BidirectionalReorderingState
+  : public LRState
+{
+private:
+  const LRState *m_backward;
+  const LRState *m_forward;
+public:
+  BidirectionalReorderingState(const LRModel &config,
+                               const LRState *bw,
+                               const LRState *fw, size_t offset)
+    : LRState(config,
+              LRModel::Bidirectional,
+              offset)
+    , m_backward(bw)
+    , m_forward(fw)
+  { }
+
+  ~BidirectionalReorderingState() {
+    delete m_backward;
+    delete m_forward;
+  }
+
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+
+  LRState*
+  Expand(const TranslationOption& topt, const InputType& input,
+         ScoreComponentCollection*  scores) const;
+};
+
+}
+
--- a/moses/FF/LexicalReordering/HReorderingBackwardState.cpp
+++ b/moses/FF/LexicalReordering/HReorderingBackwardState.cpp
@ -0,0 +1,50 @@
+#include "HReorderingBackwardState.h"
+
+namespace Moses
+{
+
+///////////////////////////
+//HierarchicalReorderingBackwardState
+
+HReorderingBackwardState::
+HReorderingBackwardState(const HReorderingBackwardState *prev,
+                         const TranslationOption &topt,
+                         ReorderingStack reoStack)
+  : LRState(prev, topt),  m_reoStack(reoStack)
+{ }
+
+HReorderingBackwardState::
+HReorderingBackwardState(const LRModel &config, size_t offset)
+  : LRState(config, LRModel::Backward, offset)
+{ }
+
+size_t HReorderingBackwardState::hash() const
+{
+  size_t ret = m_reoStack.hash();
+  return ret;
+}
+
+bool HReorderingBackwardState::operator==(const FFState& o) const
+{
+  const HReorderingBackwardState& other
+  = static_cast<const HReorderingBackwardState&>(o);
+  bool ret = m_reoStack == other.m_reoStack;
+  return ret;
+}
+
+LRState*
+HReorderingBackwardState::
+Expand(const TranslationOption& topt, const InputType& input,
+       ScoreComponentCollection*  scores) const
+{
+  HReorderingBackwardState* nextState;
+  nextState = new HReorderingBackwardState(this, topt, m_reoStack);
+  Range swrange = topt.GetSourceWordsRange();
+  int reoDistance = nextState->m_reoStack.ShiftReduce(swrange);
+  ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
+  CopyScores(scores, topt, input, reoType);
+  return nextState;
+}
+
+}
+
--- a/moses/FF/LexicalReordering/HReorderingBackwardState.h
+++ b/moses/FF/LexicalReordering/HReorderingBackwardState.h
@ -0,0 +1,33 @@
+#pragma once
+#include "LRState.h"
+#include "ReorderingStack.h"
+
+namespace Moses
+{
+
+//! State for a hierarchical reordering model (see Galley and Manning, A
+//! Simple and Effective Hierarchical Phrase Reordering Model, EMNLP 2008)
+//! backward state (conditioned on the previous phrase)
+class HReorderingBackwardState : public LRState
+{
+private:
+  ReorderingStack m_reoStack;
+public:
+  HReorderingBackwardState(const LRModel &config, size_t offset);
+  HReorderingBackwardState(const HReorderingBackwardState *prev,
+                           const TranslationOption &topt,
+                           ReorderingStack reoStack);
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+
+  virtual LRState* Expand(const TranslationOption& hypo, const InputType& input,
+                          ScoreComponentCollection*  scores) const;
+
+private:
+  ReorderingType GetOrientationTypeMSD(int reoDistance) const;
+  ReorderingType GetOrientationTypeMSLR(int reoDistance) const;
+  ReorderingType GetOrientationTypeMonotonic(int reoDistance) const;
+  ReorderingType GetOrientationTypeLeftRight(int reoDistance) const;
+};
+
+}
--- a/moses/FF/LexicalReordering/HReorderingForwardState.cpp
+++ b/moses/FF/LexicalReordering/HReorderingForwardState.cpp
@ -0,0 +1,78 @@
+#include "HReorderingForwardState.h"
+
+namespace Moses
+{
+
+///////////////////////////
+//HReorderingForwardState
+
+HReorderingForwardState::
+HReorderingForwardState(const LRModel &config,
+                        size_t size, size_t offset)
+  : LRState(config, LRModel::Forward, offset)
+  , m_first(true)
+  , m_prevRange(NOT_FOUND,NOT_FOUND)
+  , m_coverage(size)
+{ }
+
+HReorderingForwardState::
+HReorderingForwardState(const HReorderingForwardState *prev,
+                        const TranslationOption &topt)
+  : LRState(prev, topt)
+  , m_first(false)
+  , m_prevRange(topt.GetSourceWordsRange())
+  , m_coverage(prev->m_coverage, topt.GetSourceWordsRange())
+{
+}
+
+size_t HReorderingForwardState::hash() const
+{
+  size_t ret;
+  ret = hash_value(m_prevRange);
+  return ret;
+}
+
+bool HReorderingForwardState::operator==(const FFState& o) const
+{
+  if (&o == this) return true;
+
+  HReorderingForwardState const& other
+  = static_cast<HReorderingForwardState const&>(o);
+
+  int compareScores = ((m_prevRange == other.m_prevRange)
+                       ? ComparePrevScores(other.m_prevOption)
+                       : (m_prevRange < other.m_prevRange) ? -1 : 1);
+  return compareScores == 0;
+}
+
+// For compatibility with the phrase-based reordering model, scoring is one
+// step delayed.
+// The forward model takes determines orientations heuristically as follows:
+//  mono:   if the next phrase comes after the conditioning phrase and
+//          - there is a gap to the right of the conditioning phrase, or
+//          - the next phrase immediately follows it
+//  swap:   if the next phrase goes before the conditioning phrase and
+//          - there is a gap to the left of the conditioning phrase, or
+//          - the next phrase immediately precedes it
+//  dright: if the next phrase follows the conditioning phrase and other
+//          stuff comes in between
+//  dleft:  if the next phrase precedes the conditioning phrase and other
+//          stuff comes in between
+
+LRState*
+HReorderingForwardState::
+Expand(TranslationOption const& topt, InputType const& input,
+       ScoreComponentCollection* scores) const
+{
+  const Range cur = topt.GetSourceWordsRange();
+  // keep track of the current coverage ourselves so we don't need the hypothesis
+  Bitmap cov(m_coverage, cur);
+  if (!m_first) {
+    LRModel::ReorderingType reoType;
+    reoType = m_configuration.GetOrientation(m_prevRange,cur,cov);
+    CopyScores(scores, topt, input, reoType);
+  }
+  return new HReorderingForwardState(this, topt);
+}
+
+}
--- a/moses/FF/LexicalReordering/HReorderingForwardState.h
+++ b/moses/FF/LexicalReordering/HReorderingForwardState.h
@ -0,0 +1,33 @@
+#pragma once
+
+#include "LRState.h"
+#include "moses/Range.h"
+#include "moses/Bitmap.h"
+
+namespace Moses
+{
+
+//!forward state (conditioned on the next phrase)
+class HReorderingForwardState : public LRState
+{
+private:
+  bool m_first;
+  Range m_prevRange;
+  Bitmap m_coverage;
+
+public:
+  HReorderingForwardState(const LRModel &config, size_t sentenceLength,
+                          size_t offset);
+  HReorderingForwardState(const HReorderingForwardState *prev,
+                          const TranslationOption &topt);
+
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+
+  virtual LRState* Expand(const TranslationOption& hypo,
+                          const InputType& input,
+                          ScoreComponentCollection* scores) const;
+};
+
+}
+
--- a/moses/FF/LexicalReordering/LRModel.cpp
+++ b/moses/FF/LexicalReordering/LRModel.cpp
@ -0,0 +1,219 @@
+#include "LRModel.h"
+#include "moses/Range.h"
+#include "moses/Bitmap.h"
+#include "moses/InputType.h"
+#include "HReorderingForwardState.h"
+#include "HReorderingBackwardState.h"
+#include "PhraseBasedReorderingState.h"
+#include "BidirectionalReorderingState.h"
+#include "SparseReordering.h"
+
+namespace Moses
+{
+
+bool
+IsMonotonicStep(Range  const& prev, // words range of last source phrase
+                Range  const& cur,  // words range of current source phrase
+                Bitmap const& cov)  // coverage bitmap
+{
+  size_t e = prev.GetEndPos() + 1;
+  size_t s = cur.GetStartPos();
+  return (s == e || (s >= e && !cov.GetValue(e)));
+}
+
+bool
+IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
+{
+  size_t s = prev.GetStartPos();
+  size_t e = cur.GetEndPos();
+  return (e+1 == s || (e < s && !cov.GetValue(s-1)));
+}
+
+size_t
+LRModel::
+GetNumberOfTypes() const
+{
+  return ((m_modelType == MSD)  ? 3 :
+          (m_modelType == MSLR) ? 4 : 2);
+}
+
+size_t
+LRModel::
+GetNumScoreComponents() const
+{
+  size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
+  return ((m_direction == Bidirectional)
+          ? 2 * score_per_dir + m_additionalScoreComponents
+          : score_per_dir + m_additionalScoreComponents);
+}
+
+void
+LRModel::
+ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
+                const LexicalReordering* producer)
+{
+  if (sparseArgs.size()) {
+    m_sparse.reset(new SparseReordering(sparseArgs, producer));
+  }
+}
+
+void
+LRModel::
+SetAdditionalScoreComponents(size_t number)
+{
+  m_additionalScoreComponents = number;
+}
+
+/// return orientation for the first phrase
+LRModel::ReorderingType
+LRModel::
+GetOrientation(Range const& cur) const
+{
+  UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
+  return ((m_modelType == LeftRight) ? R :
+          (cur.GetStartPos() == 0) ? M  :
+          (m_modelType == MSD)     ? D  :
+          (m_modelType == MSLR)    ? DR : NM);
+}
+
+LRModel::ReorderingType
+LRModel::
+GetOrientation(Range const& prev, Range const& cur) const
+{
+  UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
+  return ((m_modelType == LeftRight)
+          ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
+        : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M
+          : (m_modelType == Monotonic) ? NM
+          : (prev.GetStartPos() ==  cur.GetEndPos() + 1) ? S
+          : (m_modelType == MSD) ? D
+          : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
+}
+
+LRModel::ReorderingType
+LRModel::
+GetOrientation(int const reoDistance) const
+{
+  // this one is for HierarchicalReorderingBackwardState
+  return ((m_modelType == LeftRight)
+          ? (reoDistance >= 1) ? R : L
+        : (reoDistance == 1) ? M
+          : (m_modelType == Monotonic) ? NM
+          : (reoDistance == -1)  ? S
+          : (m_modelType == MSD) ? D
+          : (reoDistance  >  1) ? DR : DL);
+}
+
+LRModel::ReorderingType
+LRModel::
+GetOrientation(Range const& prev, Range const& cur,
+               Bitmap const& cov) const
+{
+  return ((m_modelType == LeftRight)
+          ? cur.GetStartPos() > prev.GetEndPos() ? R : L
+        : IsMonotonicStep(prev,cur,cov) ? M
+          : (m_modelType == Monotonic) ? NM
+          : IsSwap(prev,cur,cov) ? S
+          : (m_modelType == MSD) ? D
+          : cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
+}
+
+LRModel::
+LRModel(const std::string &modelType)
+  : m_modelString(modelType)
+  , m_scoreProducer(NULL)
+  , m_modelType(None)
+  , m_phraseBased(true)
+  , m_collapseScores(false)
+  , m_direction(Backward)
+  , m_additionalScoreComponents(0)
+{
+  std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
+
+  for (size_t i=0; i<config.size(); ++i) {
+    if      (config[i] == "hier")   {
+      m_phraseBased = false;
+    } else if (config[i] == "phrase") {
+      m_phraseBased = true;
+    } else if (config[i] == "wbe")    {
+      m_phraseBased = true;
+    }
+    // no word-based decoding available, fall-back to phrase-based
+    // This is the old lexical reordering model combination of moses
+
+    else if (config[i] == "msd")          {
+      m_modelType = MSD;
+    } else if (config[i] == "mslr")         {
+      m_modelType = MSLR;
+    } else if (config[i] == "monotonicity") {
+      m_modelType = Monotonic;
+    } else if (config[i] == "leftright")    {
+      m_modelType = LeftRight;
+    }
+
+    // unidirectional is deprecated, use backward instead
+    else if (config[i] == "unidirectional") {
+      m_direction = Backward;
+    } else if (config[i] == "backward")       {
+      m_direction = Backward;
+    } else if (config[i] == "forward")        {
+      m_direction = Forward;
+    } else if (config[i] == "bidirectional")  {
+      m_direction = Bidirectional;
+    }
+
+    else if (config[i] == "f")  {
+      m_condition = F;
+    } else if (config[i] == "fe") {
+      m_condition = FE;
+    }
+
+    else if (config[i] == "collapseff") {
+      m_collapseScores = true;
+    } else if (config[i] == "allff") {
+      m_collapseScores = false;
+    } else {
+      std::cerr
+          << "Illegal part in the lexical reordering configuration string: "
+          << config[i] << std::endl;
+      exit(1);
+    }
+  }
+
+  if (m_modelType == None) {
+    std::cerr
+        << "You need to specify the type of the reordering model "
+        << "(msd, monotonicity,...)" << std::endl;
+    exit(1);
+  }
+}
+
+LRState *
+LRModel::
+CreateLRState(const InputType &input) const
+{
+  LRState *bwd = NULL, *fwd = NULL;
+  size_t offset = 0;
+
+  switch(m_direction) {
+  case Backward:
+  case Bidirectional:
+    if (m_phraseBased)
+      bwd = new PhraseBasedReorderingState(*this, Backward, offset);
+    else
+      bwd = new HReorderingBackwardState(*this, offset);
+    offset += m_collapseScores ? 1 : GetNumberOfTypes();
+    if (m_direction == Backward) return bwd; // else fall through
+  case Forward:
+    if (m_phraseBased)
+      fwd = new PhraseBasedReorderingState(*this, Forward, offset);
+    else
+      fwd = new HReorderingForwardState(*this, input.GetSize(), offset);
+    offset += m_collapseScores ? 1 : GetNumberOfTypes();
+    if (m_direction == Forward) return fwd;
+  }
+  return new BidirectionalReorderingState(*this, bwd, fwd, 0);
+}
+
+}
+
--- a/moses/FF/LexicalReordering/LRModel.h
+++ b/moses/FF/LexicalReordering/LRModel.h
@ -0,0 +1,133 @@
+#pragma once
+#include <string>
+#include <map>
+#include <boost/scoped_ptr.hpp>
+
+namespace Moses
+{
+class Range;
+class Bitmap;
+class InputType;
+class LRState;
+class LexicalReordering;
+class SparseReordering;
+
+//! Factory class for lexical reordering states
+class LRModel
+{
+public:
+  friend class LexicalReordering;
+  enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
+  enum Direction { Forward, Backward, Bidirectional };
+  enum Condition { F, E, FE };
+
+  // constants for the different types of reordering
+  // (correspond to indices in the respective table)
+#if 0
+  typedef int ReorderingType;
+  static const ReorderingType M   = 0; // monotonic
+  static const ReorderingType NM  = 1; // non-monotonic
+  static const ReorderingType S   = 1; // swap
+  static const ReorderingType D   = 2; // discontinuous
+  static const ReorderingType DL  = 2; // discontinuous, left
+  static const ReorderingType DR  = 3; // discontinuous, right
+  static const ReorderingType R   = 0; // right
+  static const ReorderingType L   = 1; // left
+  static const ReorderingType MAX = 3; // largest possible
+#else
+  enum ReorderingType {
+    M    = 0, // monotonic
+    NM   = 1, // non-monotonic
+    S    = 1, // swap
+    D    = 2, // discontinuous
+    DL   = 2, // discontinuous, left
+    DR   = 3, // discontinuous, right
+    R    = 0, // right
+    L    = 1, // left
+    MAX  = 3, // largest possible
+    NONE = 4  // largest possible
+  };
+#endif
+  // determine orientation, depending on model:
+
+
+  ReorderingType // for first phrase in phrase-based
+  GetOrientation(Range const& cur) const;
+
+  ReorderingType // for non-first phrases in phrase-based
+  GetOrientation(Range const& prev, Range const& cur) const;
+
+  ReorderingType // for HReorderingForwardState
+  GetOrientation(Range const& prev, Range const& cur,
+                 Bitmap const& cov) const;
+
+  ReorderingType // for HReorderingBackwarddState
+  GetOrientation(int const reoDistance) const;
+
+  LRModel(const std::string &modelType);
+
+  void
+  ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
+                  const LexicalReordering* producer);
+
+  LRState*
+  CreateLRState(const InputType &input) const;
+
+  size_t GetNumberOfTypes() const;
+  size_t GetNumScoreComponents() const;
+  void SetAdditionalScoreComponents(size_t number);
+
+  LexicalReordering*
+  GetScoreProducer() const {
+    return m_scoreProducer;
+  }
+
+  ModelType GetModelType() const {
+    return m_modelType;
+  }
+  Direction GetDirection() const {
+    return m_direction;
+  }
+  Condition GetCondition() const {
+    return m_condition;
+  }
+
+  bool
+  IsPhraseBased()  const {
+    return m_phraseBased;
+  }
+
+  bool
+  CollapseScores() const {
+    return m_collapseScores;
+  }
+
+  SparseReordering const*
+  GetSparseReordering() const {
+    return m_sparse.get();
+  }
+
+private:
+  void
+  SetScoreProducer(LexicalReordering* scoreProducer) {
+    m_scoreProducer = scoreProducer;
+  }
+
+  std::string const&
+  GetModelString() const {
+    return m_modelString;
+  }
+
+  std::string m_modelString;
+  LexicalReordering *m_scoreProducer;
+  ModelType m_modelType;
+  bool m_phraseBased;
+  bool m_collapseScores;
+  Direction m_direction;
+  Condition m_condition;
+  size_t m_additionalScoreComponents;
+  boost::scoped_ptr<SparseReordering> m_sparse;
+};
+
+}
+
--- a/moses/FF/LexicalReordering/LRState.cpp
+++ b/moses/FF/LexicalReordering/LRState.cpp
@ -0,0 +1,88 @@
+// -*- c++ -*-
+#include <vector>
+#include <string>
+
+#include "LRState.h"
+#include "moses/FF/FFState.h"
+#include "moses/Hypothesis.h"
+#include "moses/Range.h"
+#include "moses/TranslationOption.h"
+#include "moses/Util.h"
+
+#include "LexicalReordering.h"
+
+namespace Moses
+{
+
+void
+LRState::
+CopyScores(ScoreComponentCollection*  accum,
+           const TranslationOption &topt,
+           const InputType& input,
+           ReorderingType reoType) const
+{
+  // don't call this on a bidirectional object
+  UTIL_THROW_IF2(m_direction != LRModel::Backward &&
+                 m_direction != LRModel::Forward,
+                 "Unknown direction: " << m_direction);
+
+  TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward)
+                                          ? &topt : m_prevOption);
+
+  LexicalReordering* producer = m_configuration.GetScoreProducer();
+  Scores const* cached = relevantOpt->GetLexReorderingScores(producer);
+
+  // The approach here is bizarre! Why create a whole vector and do
+  // vector addition (acumm->PlusEquals) to update a single value? - UG
+  size_t off_remote = m_offset + reoType;
+  size_t off_local  = m_configuration.CollapseScores() ? m_offset : off_remote;
+
+  UTIL_THROW_IF2(off_local >= producer->GetNumScoreComponents(),
+                 "offset out of vector bounds!");
+
+  // look up applicable score from vectore of scores
+  if(cached) {
+    UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
+    Scores scores(producer->GetNumScoreComponents(),0);
+    scores[off_local ] = (*cached)[off_remote];
+    accum->PlusEquals(producer, scores);
+  }
+
+  // else: use default scores (if specified)
+  else if (producer->GetHaveDefaultScores()) {
+    Scores scores(producer->GetNumScoreComponents(),0);
+    scores[off_local] = producer->GetDefaultScore(off_remote);
+    accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
+  }
+  // note: if no default score, no cost
+
+  const SparseReordering* sparse = m_configuration.GetSparseReordering();
+  if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
+                                   m_direction, accum);
+}
+
+
+int
+LRState::
+ComparePrevScores(const TranslationOption *other) const
+{
+  LexicalReordering* producer = m_configuration.GetScoreProducer();
+  const Scores* myScores = m_prevOption->GetLexReorderingScores(producer);
+  const Scores* yrScores = other->GetLexReorderingScores(producer);
+
+  if(myScores == yrScores) return 0;
+
+  // The pointers are NULL if a phrase pair isn't found in the reordering table.
+  if(yrScores == NULL) return -1;
+  if(myScores == NULL) return  1;
+
+  size_t stop = m_offset + m_configuration.GetNumberOfTypes();
+  for(size_t i = m_offset; i < stop; i++) {
+    if((*myScores)[i] < (*yrScores)[i]) return -1;
+    if((*myScores)[i] > (*yrScores)[i]) return  1;
+  }
+  return 0;
+}
+
+}
+
--- a/moses/FF/LexicalReordering/LRState.h
+++ b/moses/FF/LexicalReordering/LRState.h
@ -0,0 +1,81 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#pragma once
+#include <vector>
+#include <string>
+
+#include "moses/Hypothesis.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Range.h"
+#include "moses/Bitmap.h"
+#include "moses/TranslationOption.h"
+#include "moses/FF/FFState.h"
+#include "LRModel.h"
+
+namespace Moses
+{
+
+//! Abstract class for lexical reordering model states
+class LRState : public FFState
+{
+public:
+
+  typedef LRModel::ReorderingType ReorderingType;
+
+  virtual
+  LRState*
+  Expand(const TranslationOption& hypo, const InputType& input,
+         ScoreComponentCollection* scores) const = 0;
+
+  static
+  LRState*
+  CreateLRState(const std::vector<std::string>& config,
+                LRModel::Direction dir,
+                const InputType &input);
+
+protected:
+
+  const LRModel& m_configuration;
+
+  // The following is the true direction of the object, which can be
+  // Backward or Forward even if the Configuration has Bidirectional.
+  LRModel::Direction m_direction;
+  size_t m_offset;
+  //forward scores are conditioned on prev option, so need to remember it
+  const TranslationOption *m_prevOption;
+
+  inline
+  LRState(const LRState *prev,
+          const TranslationOption &topt)
+    : m_configuration(prev->m_configuration)
+    , m_direction(prev->m_direction)
+    , m_offset(prev->m_offset)
+    , m_prevOption(&topt)
+  { }
+
+  inline
+  LRState(const LRModel &config,
+          LRModel::Direction dir,
+          size_t offset)
+    : m_configuration(config)
+    , m_direction(dir)
+    , m_offset(offset)
+    , m_prevOption(NULL)
+  { }
+
+  // copy the right scores in the right places, taking into account
+  // forward/backward, offset, collapse
+  void
+  CopyScores(ScoreComponentCollection* scores,
+             const TranslationOption& topt,
+             const InputType& input, ReorderingType reoType) const;
+
+  int
+  ComparePrevScores(const TranslationOption *other) const;
+};
+
+
+
+
+
+}
+
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@ -5,7 +5,7 @@
 #include "moses/FF/FFState.h"
 #include "moses/TranslationOptionList.h"
 #include "LexicalReordering.h"
-#include "LexicalReorderingState.h"
+#include "LRState.h"
 #include "moses/StaticData.h"
 #include "moses/Util.h"
 #include "moses/InputPath.h"
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@ -14,7 +14,7 @@
 #include "moses/FF/StatefulFeatureFunction.h"
 #include "util/exception.hh"

-#include "LexicalReorderingState.h"
+#include "LRState.h"
 #include "LexicalReorderingTable.h"
 #include "SparseReordering.h"

--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@ -1,506 +0,0 @@
-// -*- c++ -*-
-#include <vector>
-#include <string>
-
-#include "moses/FF/FFState.h"
-#include "moses/Hypothesis.h"
-#include "moses/Range.h"
-#include "moses/TranslationOption.h"
-#include "moses/Util.h"
-
-#include "LexicalReordering.h"
-#include "LexicalReorderingState.h"
-#include "ReorderingStack.h"
-
-namespace Moses
-{
-
-bool
-IsMonotonicStep(Range  const& prev, // words range of last source phrase
-                Range  const& cur,  // words range of current source phrase
-                Bitmap const& cov)  // coverage bitmap
-{
-  size_t e = prev.GetEndPos() + 1;
-  size_t s = cur.GetStartPos();
-  return (s == e || (s >= e && !cov.GetValue(e)));
-}
-
-bool
-IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
-{
-  size_t s = prev.GetStartPos();
-  size_t e = cur.GetEndPos();
-  return (e+1 == s || (e < s && !cov.GetValue(s-1)));
-}
-
-size_t
-LRModel::
-GetNumberOfTypes() const
-{
-  return ((m_modelType == MSD)  ? 3 :
-          (m_modelType == MSLR) ? 4 : 2);
-}
-
-size_t
-LRModel::
-GetNumScoreComponents() const
-{
-  size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
-  return ((m_direction == Bidirectional)
-          ? 2 * score_per_dir + m_additionalScoreComponents
-          : score_per_dir + m_additionalScoreComponents);
-}
-
-void
-LRModel::
-ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
-                const LexicalReordering* producer)
-{
-  if (sparseArgs.size()) {
-    m_sparse.reset(new SparseReordering(sparseArgs, producer));
-  }
-}
-
-void
-LRModel::
-SetAdditionalScoreComponents(size_t number)
-{
-  m_additionalScoreComponents = number;
-}
-
-/// return orientation for the first phrase
-LRModel::ReorderingType
-LRModel::
-GetOrientation(Range const& cur) const
-{
-  UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
-  return ((m_modelType == LeftRight) ? R :
-          (cur.GetStartPos() == 0) ? M  :
-          (m_modelType == MSD)     ? D  :
-          (m_modelType == MSLR)    ? DR : NM);
-}
-
-LRModel::ReorderingType
-LRModel::
-GetOrientation(Range const& prev, Range const& cur) const
-{
-  UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
-  return ((m_modelType == LeftRight)
-          ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
-        : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M
-          : (m_modelType == Monotonic) ? NM
-          : (prev.GetStartPos() ==  cur.GetEndPos() + 1) ? S
-          : (m_modelType == MSD) ? D
-          : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
-}
-
-LRModel::ReorderingType
-LRModel::
-GetOrientation(int const reoDistance) const
-{
-  // this one is for HierarchicalReorderingBackwardState
-  return ((m_modelType == LeftRight)
-          ? (reoDistance >= 1) ? R : L
-        : (reoDistance == 1) ? M
-          : (m_modelType == Monotonic) ? NM
-          : (reoDistance == -1)  ? S
-          : (m_modelType == MSD) ? D
-          : (reoDistance  >  1) ? DR : DL);
-}
-
-LRModel::ReorderingType
-LRModel::
-GetOrientation(Range const& prev, Range const& cur,
-               Bitmap const& cov) const
-{
-  return ((m_modelType == LeftRight)
-          ? cur.GetStartPos() > prev.GetEndPos() ? R : L
-        : IsMonotonicStep(prev,cur,cov) ? M
-          : (m_modelType == Monotonic) ? NM
-          : IsSwap(prev,cur,cov) ? S
-          : (m_modelType == MSD) ? D
-          : cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
-}
-
-LRModel::
-LRModel(const std::string &modelType)
-  : m_modelString(modelType)
-  , m_scoreProducer(NULL)
-  , m_modelType(None)
-  , m_phraseBased(true)
-  , m_collapseScores(false)
-  , m_direction(Backward)
-  , m_additionalScoreComponents(0)
-{
-  std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
-
-  for (size_t i=0; i<config.size(); ++i) {
-    if      (config[i] == "hier")   {
-      m_phraseBased = false;
-    } else if (config[i] == "phrase") {
-      m_phraseBased = true;
-    } else if (config[i] == "wbe")    {
-      m_phraseBased = true;
-    }
-    // no word-based decoding available, fall-back to phrase-based
-    // This is the old lexical reordering model combination of moses
-
-    else if (config[i] == "msd")          {
-      m_modelType = MSD;
-    } else if (config[i] == "mslr")         {
-      m_modelType = MSLR;
-    } else if (config[i] == "monotonicity") {
-      m_modelType = Monotonic;
-    } else if (config[i] == "leftright")    {
-      m_modelType = LeftRight;
-    }
-
-    // unidirectional is deprecated, use backward instead
-    else if (config[i] == "unidirectional") {
-      m_direction = Backward;
-    } else if (config[i] == "backward")       {
-      m_direction = Backward;
-    } else if (config[i] == "forward")        {
-      m_direction = Forward;
-    } else if (config[i] == "bidirectional")  {
-      m_direction = Bidirectional;
-    }
-
-    else if (config[i] == "f")  {
-      m_condition = F;
-    } else if (config[i] == "fe") {
-      m_condition = FE;
-    }
-
-    else if (config[i] == "collapseff") {
-      m_collapseScores = true;
-    } else if (config[i] == "allff") {
-      m_collapseScores = false;
-    } else {
-      std::cerr
-          << "Illegal part in the lexical reordering configuration string: "
-          << config[i] << std::endl;
-      exit(1);
-    }
-  }
-
-  if (m_modelType == None) {
-    std::cerr
-        << "You need to specify the type of the reordering model "
-        << "(msd, monotonicity,...)" << std::endl;
-    exit(1);
-  }
-}
-
-LRState *
-LRModel::
-CreateLRState(const InputType &input) const
-{
-  LRState *bwd = NULL, *fwd = NULL;
-  size_t offset = 0;
-
-  switch(m_direction) {
-  case Backward:
-  case Bidirectional:
-    if (m_phraseBased)
-      bwd = new PhraseBasedReorderingState(*this, Backward, offset);
-    else
-      bwd = new HReorderingBackwardState(*this, offset);
-    offset += m_collapseScores ? 1 : GetNumberOfTypes();
-    if (m_direction == Backward) return bwd; // else fall through
-  case Forward:
-    if (m_phraseBased)
-      fwd = new PhraseBasedReorderingState(*this, Forward, offset);
-    else
-      fwd = new HReorderingForwardState(*this, input.GetSize(), offset);
-    offset += m_collapseScores ? 1 : GetNumberOfTypes();
-    if (m_direction == Forward) return fwd;
-  }
-  return new BidirectionalReorderingState(*this, bwd, fwd, 0);
-}
-
-
-void
-LRState::
-CopyScores(ScoreComponentCollection*  accum,
-           const TranslationOption &topt,
-           const InputType& input,
-           ReorderingType reoType) const
-{
-  // don't call this on a bidirectional object
-  UTIL_THROW_IF2(m_direction != LRModel::Backward &&
-                 m_direction != LRModel::Forward,
-                 "Unknown direction: " << m_direction);
-
-  TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward)
-                                          ? &topt : m_prevOption);
-
-  LexicalReordering* producer = m_configuration.GetScoreProducer();
-  Scores const* cached = relevantOpt->GetLexReorderingScores(producer);
-
-  // The approach here is bizarre! Why create a whole vector and do
-  // vector addition (acumm->PlusEquals) to update a single value? - UG
-  size_t off_remote = m_offset + reoType;
-  size_t off_local  = m_configuration.CollapseScores() ? m_offset : off_remote;
-
-  UTIL_THROW_IF2(off_local >= producer->GetNumScoreComponents(),
-                 "offset out of vector bounds!");
-
-  // look up applicable score from vectore of scores
-  if(cached) {
-    UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
-    Scores scores(producer->GetNumScoreComponents(),0);
-    scores[off_local ] = (*cached)[off_remote];
-    accum->PlusEquals(producer, scores);
-  }
-
-  // else: use default scores (if specified)
-  else if (producer->GetHaveDefaultScores()) {
-    Scores scores(producer->GetNumScoreComponents(),0);
-    scores[off_local] = producer->GetDefaultScore(off_remote);
-    accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
-  }
-  // note: if no default score, no cost
-
-  const SparseReordering* sparse = m_configuration.GetSparseReordering();
-  if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
-                                   m_direction, accum);
-}
-
-
-int
-LRState::
-ComparePrevScores(const TranslationOption *other) const
-{
-  LexicalReordering* producer = m_configuration.GetScoreProducer();
-  const Scores* myScores = m_prevOption->GetLexReorderingScores(producer);
-  const Scores* yrScores = other->GetLexReorderingScores(producer);
-
-  if(myScores == yrScores) return 0;
-
-  // The pointers are NULL if a phrase pair isn't found in the reordering table.
-  if(yrScores == NULL) return -1;
-  if(myScores == NULL) return  1;
-
-  size_t stop = m_offset + m_configuration.GetNumberOfTypes();
-  for(size_t i = m_offset; i < stop; i++) {
-    if((*myScores)[i] < (*yrScores)[i]) return -1;
-    if((*myScores)[i] > (*yrScores)[i]) return  1;
-  }
-  return 0;
-}
-
-// ===========================================================================
-// PHRASE BASED REORDERING STATE
-// ===========================================================================
-bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
-
-PhraseBasedReorderingState::
-PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
-                           const TranslationOption &topt)
-  : LRState(prev, topt)
-  , m_prevRange(topt.GetSourceWordsRange())
-  , m_first(false)
-{ }
-
-
-PhraseBasedReorderingState::
-PhraseBasedReorderingState(const LRModel &config,
-                           LRModel::Direction dir, size_t offset)
-  : LRState(config, dir, offset)
-  , m_prevRange(NOT_FOUND,NOT_FOUND)
-  , m_first(true)
-{ }
-
-
-size_t PhraseBasedReorderingState::hash() const
-{
-  size_t ret;
-  ret = hash_value(m_prevRange);
-  boost::hash_combine(ret, m_direction);
-
-  return ret;
-}
-
-bool PhraseBasedReorderingState::operator==(const FFState& o) const
-{
-  if (&o == this) return true;
-
-  const PhraseBasedReorderingState &other = static_cast<const PhraseBasedReorderingState&>(o);
-  if (m_prevRange == other.m_prevRange) {
-    if (m_direction == LRModel::Forward) {
-      int compareScore = ComparePrevScores(other.m_prevOption);
-      return compareScore == 0;
-    } else {
-      return true;
-    }
-  } else {
-    return false;
-  }
-}
-
-LRState*
-PhraseBasedReorderingState::
-Expand(const TranslationOption& topt, const InputType& input,
-       ScoreComponentCollection* scores) const
-{
-  // const LRModel::ModelType modelType = m_configuration.GetModelType();
-
-  if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) {
-    LRModel const& lrmodel = m_configuration;
-    Range const cur = topt.GetSourceWordsRange();
-    LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
-                                       : lrmodel.GetOrientation(m_prevRange,cur));
-    CopyScores(scores, topt, input, reoType);
-  }
-  return new PhraseBasedReorderingState(this, topt);
-}
-
-
-///////////////////////////
-//BidirectionalReorderingState
-
-size_t BidirectionalReorderingState::hash() const
-{
-  size_t ret = m_backward->hash();
-  boost::hash_combine(ret, m_forward->hash());
-  return ret;
-}
-
-bool BidirectionalReorderingState::operator==(const FFState& o) const
-{
-  if (&o == this) return 0;
-
-  BidirectionalReorderingState const &other
-  = static_cast<BidirectionalReorderingState const&>(o);
-
-  bool ret = (*m_backward == *other.m_backward) && (*m_forward == *other.m_forward);
-  return ret;
-}
-
-LRState*
-BidirectionalReorderingState::
-Expand(const TranslationOption& topt, const InputType& input,
-       ScoreComponentCollection* scores) const
-{
-  LRState *newbwd = m_backward->Expand(topt,input, scores);
-  LRState *newfwd = m_forward->Expand(topt, input, scores);
-  return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
-}
-
-///////////////////////////
-//HierarchicalReorderingBackwardState
-
-HReorderingBackwardState::
-HReorderingBackwardState(const HReorderingBackwardState *prev,
-                         const TranslationOption &topt,
-                         ReorderingStack reoStack)
-  : LRState(prev, topt),  m_reoStack(reoStack)
-{ }
-
-HReorderingBackwardState::
-HReorderingBackwardState(const LRModel &config, size_t offset)
-  : LRState(config, LRModel::Backward, offset)
-{ }
-
-size_t HReorderingBackwardState::hash() const
-{
-  size_t ret = m_reoStack.hash();
-  return ret;
-}
-
-bool HReorderingBackwardState::operator==(const FFState& o) const
-{
-  const HReorderingBackwardState& other
-  = static_cast<const HReorderingBackwardState&>(o);
-  bool ret = m_reoStack == other.m_reoStack;
-  return ret;
-}
-
-LRState*
-HReorderingBackwardState::
-Expand(const TranslationOption& topt, const InputType& input,
-       ScoreComponentCollection*  scores) const
-{
-  HReorderingBackwardState* nextState;
-  nextState = new HReorderingBackwardState(this, topt, m_reoStack);
-  Range swrange = topt.GetSourceWordsRange();
-  int reoDistance = nextState->m_reoStack.ShiftReduce(swrange);
-  ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
-  CopyScores(scores, topt, input, reoType);
-  return nextState;
-}
-
-///////////////////////////
-//HReorderingForwardState
-
-HReorderingForwardState::
-HReorderingForwardState(const LRModel &config,
-                        size_t size, size_t offset)
-  : LRState(config, LRModel::Forward, offset)
-  , m_first(true)
-  , m_prevRange(NOT_FOUND,NOT_FOUND)
-  , m_coverage(size)
-{ }
-
-HReorderingForwardState::
-HReorderingForwardState(const HReorderingForwardState *prev,
-                        const TranslationOption &topt)
-  : LRState(prev, topt)
-  , m_first(false)
-  , m_prevRange(topt.GetSourceWordsRange())
-  , m_coverage(prev->m_coverage, topt.GetSourceWordsRange())
-{
-}
-
-size_t HReorderingForwardState::hash() const
-{
-  size_t ret;
-  ret = hash_value(m_prevRange);
-  return ret;
-}
-
-bool HReorderingForwardState::operator==(const FFState& o) const
-{
-  if (&o == this) return true;
-
-  HReorderingForwardState const& other
-  = static_cast<HReorderingForwardState const&>(o);
-
-  int compareScores = ((m_prevRange == other.m_prevRange)
-                       ? ComparePrevScores(other.m_prevOption)
-                       : (m_prevRange < other.m_prevRange) ? -1 : 1);
-  return compareScores == 0;
-}
-
-// For compatibility with the phrase-based reordering model, scoring is one
-// step delayed.
-// The forward model takes determines orientations heuristically as follows:
-//  mono:   if the next phrase comes after the conditioning phrase and
-//          - there is a gap to the right of the conditioning phrase, or
-//          - the next phrase immediately follows it
-//  swap:   if the next phrase goes before the conditioning phrase and
-//          - there is a gap to the left of the conditioning phrase, or
-//          - the next phrase immediately precedes it
-//  dright: if the next phrase follows the conditioning phrase and other
-//          stuff comes in between
-//  dleft:  if the next phrase precedes the conditioning phrase and other
-//          stuff comes in between
-
-LRState*
-HReorderingForwardState::
-Expand(TranslationOption const& topt, InputType const& input,
-       ScoreComponentCollection* scores) const
-{
-  const Range cur = topt.GetSourceWordsRange();
-  // keep track of the current coverage ourselves so we don't need the hypothesis
-  Bitmap cov(m_coverage, cur);
-  if (!m_first) {
-    LRModel::ReorderingType reoType;
-    reoType = m_configuration.GetOrientation(m_prevRange,cur,cov);
-    CopyScores(scores, topt, input, reoType);
-  }
-  return new HReorderingForwardState(this, topt);
-}
-}
-
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@ -1,308 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
-#pragma once
-#include <vector>
-#include <string>
-
-#include <boost/scoped_ptr.hpp>
-
-#include "moses/Hypothesis.h"
-#include "moses/ScoreComponentCollection.h"
-#include "moses/Range.h"
-#include "moses/Bitmap.h"
-#include "moses/TranslationOption.h"
-#include "moses/FF/FFState.h"
-#include "ReorderingStack.h"
-
-namespace Moses
-{
-class LRState;
-class LexicalReordering;
-class SparseReordering;
-
-//! Factory class for lexical reordering states
-class LRModel
-{
-public:
-  friend class LexicalReordering;
-  enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
-  enum Direction { Forward, Backward, Bidirectional };
-  enum Condition { F, E, FE };
-
-  // constants for the different types of reordering
-  // (correspond to indices in the respective table)
-#if 0
-  typedef int ReorderingType;
-  static const ReorderingType M   = 0; // monotonic
-  static const ReorderingType NM  = 1; // non-monotonic
-  static const ReorderingType S   = 1; // swap
-  static const ReorderingType D   = 2; // discontinuous
-  static const ReorderingType DL  = 2; // discontinuous, left
-  static const ReorderingType DR  = 3; // discontinuous, right
-  static const ReorderingType R   = 0; // right
-  static const ReorderingType L   = 1; // left
-  static const ReorderingType MAX = 3; // largest possible
-#else
-  enum ReorderingType {
-    M    = 0, // monotonic
-    NM   = 1, // non-monotonic
-    S    = 1, // swap
-    D    = 2, // discontinuous
-    DL   = 2, // discontinuous, left
-    DR   = 3, // discontinuous, right
-    R    = 0, // right
-    L    = 1, // left
-    MAX  = 3, // largest possible
-    NONE = 4  // largest possible
-  };
-#endif
-  // determine orientation, depending on model:
-
-
-  ReorderingType // for first phrase in phrase-based
-  GetOrientation(Range const& cur) const;
-
-  ReorderingType // for non-first phrases in phrase-based
-  GetOrientation(Range const& prev, Range const& cur) const;
-
-  ReorderingType // for HReorderingForwardState
-  GetOrientation(Range const& prev, Range const& cur,
-                 Bitmap const& cov) const;
-
-  ReorderingType // for HReorderingBackwarddState
-  GetOrientation(int const reoDistance) const;
-
-  LRModel(const std::string &modelType);
-
-  void
-  ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
-                  const LexicalReordering* producer);
-
-  LRState*
-  CreateLRState(const InputType &input) const;
-
-  size_t GetNumberOfTypes() const;
-  size_t GetNumScoreComponents() const;
-  void SetAdditionalScoreComponents(size_t number);
-
-  LexicalReordering*
-  GetScoreProducer() const {
-    return m_scoreProducer;
-  }
-
-  ModelType GetModelType() const {
-    return m_modelType;
-  }
-  Direction GetDirection() const {
-    return m_direction;
-  }
-  Condition GetCondition() const {
-    return m_condition;
-  }
-
-  bool
-  IsPhraseBased()  const {
-    return m_phraseBased;
-  }
-
-  bool
-  CollapseScores() const {
-    return m_collapseScores;
-  }
-
-  SparseReordering const*
-  GetSparseReordering() const {
-    return m_sparse.get();
-  }
-
-private:
-  void
-  SetScoreProducer(LexicalReordering* scoreProducer) {
-    m_scoreProducer = scoreProducer;
-  }
-
-  std::string const&
-  GetModelString() const {
-    return m_modelString;
-  }
-
-  std::string m_modelString;
-  LexicalReordering *m_scoreProducer;
-  ModelType m_modelType;
-  bool m_phraseBased;
-  bool m_collapseScores;
-  Direction m_direction;
-  Condition m_condition;
-  size_t m_additionalScoreComponents;
-  boost::scoped_ptr<SparseReordering> m_sparse;
-};
-
-//! Abstract class for lexical reordering model states
-class LRState : public FFState
-{
-public:
-
-  typedef LRModel::ReorderingType ReorderingType;
-
-  virtual
-  LRState*
-  Expand(const TranslationOption& hypo, const InputType& input,
-         ScoreComponentCollection* scores) const = 0;
-
-  static
-  LRState*
-  CreateLRState(const std::vector<std::string>& config,
-                LRModel::Direction dir,
-                const InputType &input);
-
-protected:
-
-  const LRModel& m_configuration;
-
-  // The following is the true direction of the object, which can be
-  // Backward or Forward even if the Configuration has Bidirectional.
-  LRModel::Direction m_direction;
-  size_t m_offset;
-  //forward scores are conditioned on prev option, so need to remember it
-  const TranslationOption *m_prevOption;
-
-  inline
-  LRState(const LRState *prev,
-          const TranslationOption &topt)
-    : m_configuration(prev->m_configuration)
-    , m_direction(prev->m_direction)
-    , m_offset(prev->m_offset)
-    , m_prevOption(&topt)
-  { }
-
-  inline
-  LRState(const LRModel &config,
-          LRModel::Direction dir,
-          size_t offset)
-    : m_configuration(config)
-    , m_direction(dir)
-    , m_offset(offset)
-    , m_prevOption(NULL)
-  { }
-
-  // copy the right scores in the right places, taking into account
-  // forward/backward, offset, collapse
-  void
-  CopyScores(ScoreComponentCollection* scores,
-             const TranslationOption& topt,
-             const InputType& input, ReorderingType reoType) const;
-
-  int
-  ComparePrevScores(const TranslationOption *other) const;
-};
-
-//! @todo what is this?
-class BidirectionalReorderingState
-  : public LRState
-{
-private:
-  const LRState *m_backward;
-  const LRState *m_forward;
-public:
-  BidirectionalReorderingState(const LRModel &config,
-                               const LRState *bw,
-                               const LRState *fw, size_t offset)
-    : LRState(config,
-              LRModel::Bidirectional,
-              offset)
-    , m_backward(bw)
-    , m_forward(fw)
-  { }
-
-  ~BidirectionalReorderingState() {
-    delete m_backward;
-    delete m_forward;
-  }
-
-  virtual size_t hash() const;
-  virtual bool operator==(const FFState& other) const;
-
-  LRState*
-  Expand(const TranslationOption& topt, const InputType& input,
-         ScoreComponentCollection*  scores) const;
-};
-
-//! State for the standard Moses implementation of lexical reordering models
-//! (see Koehn et al, Edinburgh System Description for the 2005 NIST MT
-//! Evaluation)
-class PhraseBasedReorderingState
-  : public LRState
-{
-private:
-  Range m_prevRange;
-  bool m_first;
-public:
-  static bool m_useFirstBackwardScore;
-  PhraseBasedReorderingState(const LRModel &config,
-                             LRModel::Direction dir,
-                             size_t offset);
-  PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
-                             const TranslationOption &topt);
-
-  virtual size_t hash() const;
-  virtual bool operator==(const FFState& other) const;
-
-  virtual
-  LRState*
-  Expand(const TranslationOption& topt,const InputType& input,
-         ScoreComponentCollection*  scores) const;
-
-  ReorderingType GetOrientationTypeMSD(Range currRange) const;
-  ReorderingType GetOrientationTypeMSLR(Range currRange) const;
-  ReorderingType GetOrientationTypeMonotonic(Range currRange) const;
-  ReorderingType GetOrientationTypeLeftRight(Range currRange) const;
-};
-
-//! State for a hierarchical reordering model (see Galley and Manning, A
-//! Simple and Effective Hierarchical Phrase Reordering Model, EMNLP 2008)
-//! backward state (conditioned on the previous phrase)
-class HReorderingBackwardState : public LRState
-{
-private:
-  ReorderingStack m_reoStack;
-public:
-  HReorderingBackwardState(const LRModel &config, size_t offset);
-  HReorderingBackwardState(const HReorderingBackwardState *prev,
-                           const TranslationOption &topt,
-                           ReorderingStack reoStack);
-  virtual size_t hash() const;
-  virtual bool operator==(const FFState& other) const;
-
-  virtual LRState* Expand(const TranslationOption& hypo, const InputType& input,
-                          ScoreComponentCollection*  scores) const;
-
-private:
-  ReorderingType GetOrientationTypeMSD(int reoDistance) const;
-  ReorderingType GetOrientationTypeMSLR(int reoDistance) const;
-  ReorderingType GetOrientationTypeMonotonic(int reoDistance) const;
-  ReorderingType GetOrientationTypeLeftRight(int reoDistance) const;
-};
-
-
-//!forward state (conditioned on the next phrase)
-class HReorderingForwardState : public LRState
-{
-private:
-  bool m_first;
-  Range m_prevRange;
-  Bitmap m_coverage;
-
-public:
-  HReorderingForwardState(const LRModel &config, size_t sentenceLength,
-                          size_t offset);
-  HReorderingForwardState(const HReorderingForwardState *prev,
-                          const TranslationOption &topt);
-
-  virtual size_t hash() const;
-  virtual bool operator==(const FFState& other) const;
-
-  virtual LRState* Expand(const TranslationOption& hypo,
-                          const InputType& input,
-                          ScoreComponentCollection* scores) const;
-};
-}
-
--- a/moses/FF/LexicalReordering/PhraseBasedReorderingState.cpp
+++ b/moses/FF/LexicalReordering/PhraseBasedReorderingState.cpp
@ -0,0 +1,72 @@
+#include "PhraseBasedReorderingState.h"
+
+namespace Moses
+{
+// ===========================================================================
+// PHRASE BASED REORDERING STATE
+// ===========================================================================
+bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
+
+PhraseBasedReorderingState::
+PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
+                           const TranslationOption &topt)
+  : LRState(prev, topt)
+  , m_prevRange(topt.GetSourceWordsRange())
+  , m_first(false)
+{ }
+
+
+PhraseBasedReorderingState::
+PhraseBasedReorderingState(const LRModel &config,
+                           LRModel::Direction dir, size_t offset)
+  : LRState(config, dir, offset)
+  , m_prevRange(NOT_FOUND,NOT_FOUND)
+  , m_first(true)
+{ }
+
+
+size_t PhraseBasedReorderingState::hash() const
+{
+  size_t ret;
+  ret = hash_value(m_prevRange);
+  boost::hash_combine(ret, m_direction);
+
+  return ret;
+}
+
+bool PhraseBasedReorderingState::operator==(const FFState& o) const
+{
+  if (&o == this) return true;
+
+  const PhraseBasedReorderingState &other = static_cast<const PhraseBasedReorderingState&>(o);
+  if (m_prevRange == other.m_prevRange) {
+    if (m_direction == LRModel::Forward) {
+      int compareScore = ComparePrevScores(other.m_prevOption);
+      return compareScore == 0;
+    } else {
+      return true;
+    }
+  } else {
+    return false;
+  }
+}
+
+LRState*
+PhraseBasedReorderingState::
+Expand(const TranslationOption& topt, const InputType& input,
+       ScoreComponentCollection* scores) const
+{
+  // const LRModel::ModelType modelType = m_configuration.GetModelType();
+
+  if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) {
+    LRModel const& lrmodel = m_configuration;
+    Range const cur = topt.GetSourceWordsRange();
+    LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
+                                       : lrmodel.GetOrientation(m_prevRange,cur));
+    CopyScores(scores, topt, input, reoType);
+  }
+  return new PhraseBasedReorderingState(this, topt);
+}
+
+}
+
--- a/moses/FF/LexicalReordering/PhraseBasedReorderingState.h
+++ b/moses/FF/LexicalReordering/PhraseBasedReorderingState.h
@ -0,0 +1,38 @@
+#pragma once
+#include "LRState.h"
+
+namespace Moses
+{
+//! State for the standard Moses implementation of lexical reordering models
+//! (see Koehn et al, Edinburgh System Description for the 2005 NIST MT
+//! Evaluation)
+class PhraseBasedReorderingState
+  : public LRState
+{
+private:
+  Range m_prevRange;
+  bool m_first;
+public:
+  static bool m_useFirstBackwardScore;
+  PhraseBasedReorderingState(const LRModel &config,
+                             LRModel::Direction dir,
+                             size_t offset);
+  PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
+                             const TranslationOption &topt);
+
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+
+  virtual
+  LRState*
+  Expand(const TranslationOption& topt,const InputType& input,
+         ScoreComponentCollection*  scores) const;
+
+  ReorderingType GetOrientationTypeMSD(Range currRange) const;
+  ReorderingType GetOrientationTypeMSLR(Range currRange) const;
+  ReorderingType GetOrientationTypeMonotonic(Range currRange) const;
+  ReorderingType GetOrientationTypeLeftRight(Range currRange) const;
+};
+
+}
+
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@ -19,7 +19,7 @@

 #include "moses/FeatureVector.h"
 #include "moses/ScoreComponentCollection.h"
-#include "LexicalReorderingState.h"
+#include "LRState.h"

 /**
 Configuration of sparse reordering:
--- a/moses/FF/Model1Feature.cpp
+++ b/moses/FF/Model1Feature.cpp
@ -140,6 +140,8 @@ float Model1LexicalTable::GetProbability(const Factor* wordS, const Factor* word

 Model1Feature::Model1Feature(const std::string &line)
  : StatelessFeatureFunction(1, line)
+  , m_skipTargetPunctuation(false)
+  , m_is_syntax(false)
 {
  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
  ReadParameters();
@ -150,10 +152,12 @@ void Model1Feature::SetParameter(const std::string& key, const std::string& valu
 {
  if (key == "path") {
    m_fileNameModel1 = value;
-  } else if (key == "sourceVocabulary") {
+  } else if (key == "source-vocabulary") {
    m_fileNameVcbS = value;
-  } else if (key == "targetVocabulary") {
+  } else if (key == "target-vocabulary") {
    m_fileNameVcbT = value;
+  } else if (key == "skip-target-punctuation") {
+    m_skipTargetPunctuation = Scan<bool>(value);
  } else {
    StatelessFeatureFunction::SetParameter(key, value);
  }
@ -162,6 +166,8 @@ void Model1Feature::SetParameter(const std::string& key, const std::string& valu
 void Model1Feature::Load(AllOptions::ptr const& opts)
 {
  m_options = opts;
+  m_is_syntax = is_syntax(opts->search.algo);
+
  FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading source vocabulary from file " << m_fileNameVcbS << " ...");
  Model1Vocabulary vcbS;
  vcbS.Load(m_fileNameVcbS);
@ -177,6 +183,16 @@ void Model1Feature::Load(AllOptions::ptr const& opts)
  m_emptyWord = factorCollection.GetFactor(Model1Vocabulary::GIZANULL,false);
  UTIL_THROW_IF2(m_emptyWord==NULL, GetScoreProducerDescription()
                 << ": Factor for GIZA empty word does not exist.");
+
+  if (m_skipTargetPunctuation) {
+    const std::string punctuation = ",;.:!?";
+    for (size_t i=0; i<punctuation.size(); ++i) {
+      const std::string punct = punctuation.substr(i,1);
+      FactorCollection &factorCollection = FactorCollection::Instance();
+      const Factor* punctFactor = factorCollection.AddFactor(punct,false);
+      std::pair<std::set<const Factor*>::iterator,bool> inserted = m_punctuation.insert(punctFactor);
+    }
+  }
 }

 void Model1Feature::EvaluateWithSourceContext(const InputType &input
@ -192,6 +208,12 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input

  for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
    const Word &wordT = targetPhrase.GetWord(posT);
+    if (m_skipTargetPunctuation) {
+      std::set<const Factor*>::const_iterator foundPunctuation = m_punctuation.find(wordT[0]);
+      if (foundPunctuation != m_punctuation.end()) {
+        continue;
+      }
+    }
    if ( !wordT.IsNonTerminal() ) {
      float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word

@ -213,7 +235,7 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
      }

      if (!foundInCache) {
-        for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) { // ignore <s> and </s>
+        for (size_t posS=(m_is_syntax?1:0); posS<(m_is_syntax?sentence.GetSize()-1:sentence.GetSize()); ++posS) { // ignore <s> and </s>
          const Word &wordS = sentence.GetWord(posS);
          float modelProb = m_model1.GetProbability(wordS[0],wordT[0]);
          FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl);
--- a/moses/FF/Model1Feature.h
+++ b/moses/FF/Model1Feature.h
@ -2,6 +2,7 @@

 #include <string>
 #include <limits>
+#include <set>
 #include <boost/unordered_map.hpp>
 #include "StatelessFeatureFunction.h"
 #include "moses/Factor.h"
@ -98,6 +99,9 @@ private:
  std::string m_fileNameModel1;
  Model1LexicalTable m_model1;
  const Factor* m_emptyWord;
+  bool m_skipTargetPunctuation;
+  std::set<const Factor*> m_punctuation;
+  bool m_is_syntax;

  void Load(AllOptions::ptr const& opts);

--- a/moses/FF/TargetConstituentAdjacencyFeature.cpp
+++ b/moses/FF/TargetConstituentAdjacencyFeature.cpp
@ -0,0 +1,189 @@
+#include "TargetConstituentAdjacencyFeature.h"
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/StaticData.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Hypothesis.h"
+#include "moses/FactorCollection.h"
+#include "moses/TreeInput.h"
+#include <algorithm>
+
+
+using namespace std;
+
+namespace Moses
+{
+
+size_t TargetConstituentAdjacencyFeatureState::hash() const
+{
+  if (m_recombine) {
+    return 0;
+  }
+  size_t ret = 0;
+  boost::hash_combine(ret, m_collection.size());
+  for (std::map<const Factor*, float>::const_iterator it=m_collection.begin();
+       it!=m_collection.end(); ++it) {
+    boost::hash_combine(ret, it->first);
+  }
+  return ret;
+};
+
+bool TargetConstituentAdjacencyFeatureState::operator==(const FFState& other) const
+{
+  if (m_recombine) {
+    return true;
+  }
+
+  if (this == &other) {
+    return true;
+  }
+
+  const TargetConstituentAdjacencyFeatureState* otherState =
+    dynamic_cast<const TargetConstituentAdjacencyFeatureState*>(&other);
+  UTIL_THROW_IF2(otherState == NULL, "Wrong state type");
+
+  if (m_collection.size() != (otherState->m_collection).size()) {
+    return false;
+  }
+  std::map<const Factor*, float>::const_iterator thisIt, otherIt;
+  for (thisIt=m_collection.begin(), otherIt=(otherState->m_collection).begin();
+       thisIt!=m_collection.end(); ++thisIt, ++otherIt) {
+    if (thisIt->first != otherIt->first) {
+      return false;
+    }
+  }
+  return true;
+};
+
+
+TargetConstituentAdjacencyFeature::TargetConstituentAdjacencyFeature(const std::string &line)
+  : StatefulFeatureFunction(2, line)
+  , m_featureVariant(0)
+  , m_recombine(false)
+{
+  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+  ReadParameters();
+  VERBOSE(1, " Done." << std::endl);
+  VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl);
+}
+
+
+void TargetConstituentAdjacencyFeature::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "variant") {
+    m_featureVariant = Scan<size_t>(value);
+  } else if (key == "recombine") {
+    m_recombine = Scan<bool>(value);
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
+
+FFState* TargetConstituentAdjacencyFeature::EvaluateWhenApplied(
+  const Hypothesis& cur_hypo,
+  const FFState* prev_state,
+  ScoreComponentCollection* accumulator) const
+{
+  // dense scores
+  std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 2
+
+  // state
+  const TargetConstituentAdjacencyFeatureState *prevState = static_cast<const TargetConstituentAdjacencyFeatureState*>(prev_state);
+
+  // read TargetConstituentAdjacency property
+  const TargetPhrase &currTarPhr = cur_hypo.GetCurrTargetPhrase();
+  FEATUREVERBOSE(2, "Phrase: " << currTarPhr << std::endl);
+
+  if (const PhraseProperty *property = currTarPhr.GetProperty("TargetConstituentBoundariesLeft")) {
+
+    const TargetConstituentBoundariesLeftPhraseProperty *targetConstituentBoundariesLeftPhraseProperty =
+      static_cast<const TargetConstituentBoundariesLeftPhraseProperty*>(property);
+    const TargetConstituentBoundariesLeftCollection& leftConstituentCollection =
+      targetConstituentBoundariesLeftPhraseProperty->GetCollection();
+    float prob = 0;
+    size_t numMatch = 0;
+    size_t numOverall = 0;
+
+    if ( !cur_hypo.GetPrevHypo()->GetPrevHypo() ) {
+      // previous hypothesis is initial, i.e. target sentence starts here
+
+      ++numOverall;
+      FactorCollection &factorCollection = FactorCollection::Instance();
+      const Factor* bosFactor = factorCollection.AddFactor("BOS_",false);
+      TargetConstituentBoundariesLeftCollection::const_iterator found =
+        leftConstituentCollection.find(bosFactor);
+      if ( found != leftConstituentCollection.end() ) {
+        ++numMatch;
+        prob += found->second;
+      }
+
+    } else {
+
+      const std::map<const Factor*, float>& hypConstituentCollection = prevState->m_collection;
+      std::map<const Factor*, float>::const_iterator iter1 = hypConstituentCollection.begin();
+      std::map<const Factor*, float>::const_iterator iter2 = leftConstituentCollection.begin();
+      while ( iter1 != hypConstituentCollection.end() && iter2 != leftConstituentCollection.end() ) {
+        ++numOverall;
+        if ( iter1->first < iter2->first ) {
+          ++iter1;
+        } else if ( iter2->first < iter1->first ) {
+          ++iter2;
+        } else {
+          ++numMatch;
+          float currProb = iter1->second * iter2->second;
+          if (currProb > prob)
+            prob = currProb;
+          ++iter1;
+          ++iter2;
+        }
+      }
+    }
+
+    if ( (numMatch == 0) || (prob == 0) ) {
+      ++newScores[1];
+    } else {
+      if ( m_featureVariant == 1 ) {
+        newScores[0] += TransformScore(prob);
+      } else {
+        newScores[0] += TransformScore( (float)numMatch/numOverall );
+      }
+    }
+
+  } else {
+
+    // abort with error message if the phrase does not translate an unknown word
+    UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+                   << ": Missing TargetConstituentBoundariesLeft property.");
+
+    ++newScores[1];
+
+  }
+
+  TargetConstituentAdjacencyFeatureState *newState = new TargetConstituentAdjacencyFeatureState(m_recombine);
+
+  if (const PhraseProperty *property = currTarPhr.GetProperty("TargetConstituentBoundariesRightAdjacent")) {
+
+    const TargetConstituentBoundariesRightAdjacentPhraseProperty *targetConstituentBoundariesRightAdjacentPhraseProperty =
+      static_cast<const TargetConstituentBoundariesRightAdjacentPhraseProperty*>(property);
+    const TargetConstituentBoundariesLeftCollection& rightAdjacentConstituentCollection = targetConstituentBoundariesRightAdjacentPhraseProperty->GetCollection();
+
+    std::copy(rightAdjacentConstituentCollection.begin(), rightAdjacentConstituentCollection.end(),
+              std::inserter(newState->m_collection, newState->m_collection.begin()));
+
+  } else {
+
+    // abort with error message if the phrase does not translate an unknown word
+    UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+                   << ": Missing TargetConstituentBoundariesRightAdjacent property.");
+
+  }
+
+  // add scores
+  accumulator->PlusEquals(this, newScores);
+
+  return newState;
+}
+
+}
+
--- a/moses/FF/TargetConstituentAdjacencyFeature.h
+++ b/moses/FF/TargetConstituentAdjacencyFeature.h
@ -0,0 +1,101 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <set>
+#include <iostream>
+#include "StatefulFeatureFunction.h"
+#include "FFState.h"
+#include "util/exception.hh"
+#include <stdint.h>
+
+namespace Moses
+{
+
+class TargetConstituentAdjacencyFeatureState : public FFState
+{
+
+public:
+
+  friend class TargetConstituentAdjacencyFeature;
+
+  TargetConstituentAdjacencyFeatureState(bool recombine)
+    : m_recombine(recombine)
+  {};
+
+  size_t hash() const;
+
+  virtual bool operator==(const FFState& other) const;
+
+private:
+
+  const bool m_recombine;
+  std::map<const Factor*, float> m_collection;
+
+};
+
+
+class TargetConstituentAdjacencyFeature : public StatefulFeatureFunction
+{
+
+public:
+
+  TargetConstituentAdjacencyFeature(const std::string &line);
+
+  ~TargetConstituentAdjacencyFeature()
+  {};
+
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  };
+
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+    return new TargetConstituentAdjacencyFeatureState(m_recombine);
+  };
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+  void Load(AllOptions::ptr const& opts)
+  {};
+
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedFutureScore) const
+  {};
+
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
+  {};
+
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const
+  {};
+
+  FFState* EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+
+  FFState* EvaluateWhenApplied(
+    const ChartHypothesis& cur_hypo,
+    int featureID, // used to index the state in the previous hypotheses
+    ScoreComponentCollection* accumulator) const {
+    UTIL_THROW2(GetScoreProducerDescription() << ": feature currently not implemented for chart-based decoding.");
+    return new TargetConstituentAdjacencyFeatureState(m_recombine);
+  };
+
+
+private:
+
+  size_t m_featureVariant;
+  bool m_recombine;
+
+};
+
+}
+
--- a/moses/FF/VW/TrainingLoss.h
+++ b/moses/FF/VW/TrainingLoss.h
@ -72,7 +72,7 @@ private:
  std::string MakeNGram(const TargetPhrase &phrase, size_t start, size_t end) const {
    std::vector<std::string> words;
    while (start != end) {
-      words.push_back(phrase.GetWord(start).GetString(StaticData::Instance().options().output.factor_order, false));
+      words.push_back(phrase.GetWord(start).GetString(StaticData::Instance().options()->output.factor_order, false));
      start++;
    }
    return Join(" ", words);
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@ -323,7 +323,7 @@ public:
    Phrase *target = new Phrase();
    target->CreateFromString(
      Output
-      , StaticData::Instance().options().output.factor_order
+      , StaticData::Instance().options()->output.factor_order
      , tabbedSentence.GetColumns()[0]
      , NULL);

--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@ -111,8 +111,7 @@ void WordTranslationFeature::Load(AllOptions::ptr const& opts)
    }

    inFileSource.close();
-  } else if (!m_filePathSource.empty() || !m_filePathTarget.empty()) {
-    return;
+  } else {
    // restricted source word vocabulary
    ifstream inFileSource(m_filePathSource.c_str());
    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
--- a/moses/Factor.h
+++ b/moses/Factor.h
@ -19,8 +19,7 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/

-#ifndef moses_Factor_h
-#define moses_Factor_h
+#pragma once

 #include <ostream>
 #include <string>
@ -98,4 +97,4 @@ public:
 size_t hash_value(const Factor &f);

 }
-#endif
+
--- a/moses/FeatureVector.cpp
+++ b/moses/FeatureVector.cpp
@ -175,7 +175,7 @@ void FVector::resize(size_t newsize)

 void FVector::clear()
 {
-  m_coreFeatures.resize(0);
+  m_coreFeatures.resize(m_coreFeatures.size(), 0);
  m_features.clear();
 }

--- a/moses/LM/Backward.cpp
+++ b/moses/LM/Backward.cpp
@ -40,7 +40,8 @@ namespace Moses
 {

 /** Constructs a new backward language model. */
-template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType,lazy)
+// TODO(lane): load_method instead of lazy bool
+template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType, lazy ? util::LAZY : util::POPULATE_OR_READ)
 {
  //
  // This space intentionally left blank
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@ -69,63 +69,6 @@ struct KenLMState : public FFState {

 };

-///*
-// * An implementation of single factor LM using Ken's code.
-// */
-//template <class Model> class LanguageModelKen : public LanguageModel
-//{
-//public:
-//  LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
-//
-//  const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
-//    KenLMState *ret = new KenLMState();
-//    ret->state = m_ngram->BeginSentenceState();
-//    return ret;
-//  }
-//
-//  void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
-//
-//  FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
-//
-//  FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
-//
-//  void IncrementalCallback(Incremental::Manager &manager) const {
-//    manager.LMCallback(*m_ngram, m_lmIdLookup);
-//  }
-//
-//  bool IsUseable(const FactorMask &mask) const;
-//private:
-//  LanguageModelKen(const LanguageModelKen<Model> &copy_from);
-//
-//  lm::WordIndex TranslateID(const Word &word) const {
-//    std::size_t factor = word.GetFactor(m_factorType)->GetId();
-//    return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
-//  }
-//
-//  // Convert last words of hypothesis into vocab ids, returning an end pointer.
-//  lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
-//    lm::WordIndex *index = indices;
-//    lm::WordIndex *end = indices + m_ngram->Order() - 1;
-//    int position = hypo.GetCurrTargetWordsRange().GetEndPos();
-//    for (; ; ++index, --position) {
-//      if (index == end) return index;
-//      if (position == -1) {
-//        *index = m_ngram->GetVocabulary().BeginSentence();
-//        return index + 1;
-//      }
-//      *index = TranslateID(hypo.GetWord(position));
-//    }
-//  }
-//
-//  boost::shared_ptr<Model> m_ngram;
-//
-//  std::vector<lm::WordIndex> m_lmIdLookup;
-//
-//  FactorType m_factorType;
-//
-//  const Factor *m_beginSentenceFactor;
-//};
-
 class MappingBuilder : public lm::EnumerateVocab
 {
 public:
@ -148,7 +91,7 @@ private:

 } // namespace

-template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, bool lazy)
+template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, util::LoadMethod load_method)
 {
  m_lmIdLookup.clear();

@ -161,18 +104,18 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
  FactorCollection &collection = FactorCollection::Instance();
  MappingBuilder builder(collection, m_lmIdLookup);
  config.enumerate_vocab = &builder;
-  config.load_method = lazy ? util::LAZY : util::POPULATE_OR_READ;
+  config.load_method = load_method;

  m_ngram.reset(new Model(file.c_str(), config));
 }

-template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
  :LanguageModel(line)
  ,m_factorType(factorType)
  ,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
 {
  ReadParameters();
-  LoadModel(file, lazy);
+  LoadModel(file, load_method);
 }

 template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
@ -480,7 +423,7 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig)
 {
  FactorType factorType = 0;
  string filePath;
-  bool lazy = false;
+  util::LoadMethod load_method = util::POPULATE_OR_READ;

  util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
  ++argument; // KENLM
@ -501,38 +444,53 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig)
    } else if (name == "path") {
      filePath.assign(value.data(), value.size());
    } else if (name == "lazyken") {
-      lazy = boost::lexical_cast<bool>(value);
+      // deprecated: use load instead.
+      load_method = boost::lexical_cast<bool>(value) ? util::LAZY : util::POPULATE_OR_READ;
+    } else if (name == "load") {
+      if (value == "lazy") {
+        load_method = util::LAZY;
+      } else if (value == "populate_or_lazy") {
+        load_method = util::POPULATE_OR_LAZY;
+      } else if (value == "populate_or_read" || value == "populate") {
+        load_method = util::POPULATE_OR_READ;
+      } else if (value == "read") {
+        load_method = util::READ;
+      } else if (value == "parallel_read") {
+        load_method = util::PARALLEL_READ;
+      } else {
+        UTIL_THROW2("Unknown KenLM load method " << value);
+      }
    } else {
      // pass to base class to interpret
      line << " " << name << "=" << value;
    }
  }

-  return ConstructKenLM(line.str(), filePath, factorType, lazy);
+  return ConstructKenLM(line.str(), filePath, factorType, load_method);
 }

-LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
 {
  lm::ngram::ModelType model_type;
  if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
    switch(model_type) {
    case lm::ngram::PROBING:
-      return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+      return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, load_method);
    case lm::ngram::REST_PROBING:
-      return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
+      return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, load_method);
    case lm::ngram::TRIE:
-      return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
+      return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, load_method);
    case lm::ngram::QUANT_TRIE:
-      return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
+      return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, load_method);
    case lm::ngram::ARRAY_TRIE:
-      return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
+      return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, load_method);
    case lm::ngram::QUANT_ARRAY_TRIE:
-      return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
+      return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, load_method);
    default:
      UTIL_THROW2("Unrecognized kenlm model type " << model_type);
    }
  } else {
-    return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+    return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, load_method);
  }
 }

--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <boost/shared_ptr.hpp>

 #include "lm/word_index.hh"
+#include "util/mmap.hh"

 #include "moses/LM/Base.h"
 #include "moses/Hypothesis.h"
@ -41,7 +42,7 @@ class FFState;
 LanguageModel *ConstructKenLM(const std::string &line);

 //! This will also load. Returns a templated KenLM class
-LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);

 /*
 * An implementation of single factor LM using Kenneth's code.
@ -49,7 +50,7 @@ LanguageModel *ConstructKenLM(const std::string &line, const std::string &file,
 template <class Model> class LanguageModelKen : public LanguageModel
 {
 public:
-  LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+  LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);

  virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;

@ -73,7 +74,7 @@ protected:

  FactorType m_factorType;

-  void LoadModel(const std::string &file, bool lazy);
+  void LoadModel(const std::string &file, util::LoadMethod load_method);

  lm::WordIndex TranslateID(const Word &word) const {
    std::size_t factor = word.GetFactor(m_factorType)->GetId();
--- a/moses/LM/Reloading.cpp
+++ b/moses/LM/Reloading.cpp
@ -73,7 +73,7 @@ template <class Model> FFState *ReloadingLanguageModel<Model>::EvaluateWhenAppli

  std::auto_ptr<FFState> kenlmState(LanguageModelKen<Model>::EvaluateWhenApplied(hypo, ps, out));
  const lm::ngram::State &out_state = static_cast<const ReloadingLMState&>(*kenlmState).state;
-  
+

  std::auto_ptr<ReloadingLMState> ret(new ReloadingLMState());
  ret->state = out_state;
--- a/moses/LM/Reloading.h
+++ b/moses/LM/Reloading.h
@ -64,18 +64,18 @@ private:
 template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model>
 {
 public:
-
- ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy), m_file(file), m_lazy(lazy)
-  { 
+  // TODO(Lane) copy less code, update to load_method
+  ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) {

    std::cerr << "ReloadingLM constructor: " << m_file << std::endl;
    //    std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
-    
+
  }

-  virtual void InitializeForInput(ttasksptr const& ttask) { 
+  virtual void InitializeForInput(ttasksptr const& ttask) {
    std::cerr << "ReloadingLM InitializeForInput" << std::endl;
-    LanguageModelKen<Model>::LoadModel(m_file, m_lazy);
+    // TODO(lane): load_method
+    LanguageModelKen<Model>::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ);
    /*
    lm::ngram::Config config;
    if(this->m_verbosity >= 1) {
@ -87,15 +87,15 @@ public:
    MappingBuilder builder(collection, m_lmIdLookup);
    config.enumerate_vocab = &builder;
    config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;
-    
+
    m_ngram.reset(new Model(m_file.c_str(), config));
-    
+
    m_beginSentenceFactor = collection.AddFactor(BOS_);
    */
  };

  /*
- ReloadingLanguageModel(const std::string &line) : LanguageModelKen<Model>(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) {
+  ReloadingLanguageModel(const std::string &line) : LanguageModelKen<Model>(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) {
    std::cerr << "ReloadingLM constructor" << std::endl;
    std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
  }
@ -138,12 +138,12 @@ public:
  }


-private:
+  private:

  LanguageModel *m_lm;
  */

- protected:
+protected:

  using LanguageModelKen<Model>::m_ngram;
  using LanguageModelKen<Model>::m_lmIdLookup;
--- a/moses/LVoc.h
+++ b/moses/LVoc.h
@ -58,7 +58,14 @@ public:

  void Write(const std::string& fname) const {
    std::ofstream out(fname.c_str());
+    // Little-known fact: ofstream tracks failures but does not, by default,
+    // report them.  You have to tell it to, or check for errors yourself.
+    out.exceptions(std::ifstream::failbit | std::ifstream::badbit);
    Write(out);
+    // Make sure the file is flushed, so that any errors are reported.  If we
+    // flush implicitly in the destructor, it won't be able to throw
+    // exceptions.
+    out.close();
  }
  void Write(std::ostream& out) const {
    for(int i=data.size()-1; i>=0; --i)
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@ -11,6 +11,8 @@
 #include "moses/PP/SpanLengthPhraseProperty.h"
 #include "moses/PP/NonTermContextProperty.h"
 #include "moses/PP/OrientationPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"

 namespace Moses
 {
@ -58,6 +60,8 @@ PhrasePropertyFactory::PhrasePropertyFactory()

  MOSES_PNAME2("Counts", CountsPhraseProperty);
  MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
+  MOSES_PNAME2("TargetConstituentBoundariesLeft", TargetConstituentBoundariesLeftPhraseProperty);
+  MOSES_PNAME2("TargetConstituentBoundariesRightAdjacent", TargetConstituentBoundariesRightAdjacentPhraseProperty);
  MOSES_PNAME2("TargetPreferences", TargetPreferencesPhraseProperty);
  MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
  MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
--- a/moses/PP/PhraseProperty.cpp
+++ b/moses/PP/PhraseProperty.cpp
@ -5,9 +5,14 @@ namespace Moses

 std::ostream& operator<<(std::ostream &out, const PhraseProperty &obj)
 {
-  out << "Base phrase property";
+  obj.Print(out);
  return out;
 }

+void PhraseProperty::Print(std::ostream &out) const
+{
+  out << "Base phrase property";
+}
+
 }

--- a/moses/PP/PhraseProperty.h
+++ b/moses/PP/PhraseProperty.h
@ -28,6 +28,8 @@ public:

 protected:

+  virtual void Print(std::ostream& out) const;
+
  std::string *m_value;

 };
--- a/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp
+++ b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp
@ -0,0 +1,63 @@
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include <iostream>
+#include <queue>
+#include <ostream>
+
+namespace Moses
+{
+
+void TargetConstituentBoundariesLeftPhraseProperty::ProcessValue(const std::string &value)
+{
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  std::vector<std::string> tokens;
+  Tokenize(tokens, value, " ");
+  std::vector<std::string>::const_iterator tokenIter = tokens.begin();
+  while (tokenIter != tokens.end()) {
+    try {
+
+      std::vector<std::string> constituents;
+      Tokenize(constituents, *tokenIter, "<");
+      ++tokenIter;
+      float count = std::atof( tokenIter->c_str() );
+      ++tokenIter;
+
+      std::set<const Factor* > dedup;
+
+      for ( std::vector<std::string>::iterator constituentIter = constituents.begin();
+            constituentIter != constituents.end(); ++constituentIter ) {
+
+        const Factor* constituentFactor = factorCollection.AddFactor(*constituentIter,false);
+
+        std::pair< std::set<const Factor* >::iterator, bool > dedupIns =
+          dedup.insert(constituentFactor);
+        if ( dedupIns.second ) {
+
+          std::pair< TargetConstituentBoundariesLeftCollection::iterator, bool > inserted =
+            m_constituentsCollection.insert(std::make_pair(constituentFactor,count));
+          if ( !inserted.second ) {
+            (inserted.first)->second += count;
+          }
+        }
+      }
+
+    } catch (const std::exception &e) {
+      UTIL_THROW2("TargetConstituentBoundariesLeftPhraseProperty: Read error. Flawed property?  " << value);
+    }
+  }
+};
+
+void TargetConstituentBoundariesLeftPhraseProperty::Print(std::ostream& out) const
+{
+  for ( TargetConstituentBoundariesLeftCollection::const_iterator it = m_constituentsCollection.begin();
+        it != m_constituentsCollection.end(); ++it ) {
+    if ( it != m_constituentsCollection.begin() ) {
+      out << " ";
+    }
+    out << *(it->first) << " " << it->second;
+  }
+}
+
+} // namespace Moses
+
--- a/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h
+++ b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h
@ -0,0 +1,40 @@
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "moses/Factor.h"
+#include "util/exception.hh"
+#include <map>
+#include <string>
+
+namespace Moses
+{
+
+typedef std::map<const Factor*, float> TargetConstituentBoundariesLeftCollection;
+
+
+class TargetConstituentBoundariesLeftPhraseProperty : public PhraseProperty
+{
+public:
+  TargetConstituentBoundariesLeftPhraseProperty()
+  {};
+
+  virtual void ProcessValue(const std::string &value);
+
+  const TargetConstituentBoundariesLeftCollection &GetCollection() const {
+    return m_constituentsCollection;
+  };
+
+  virtual const std::string *GetValueString() const {
+    UTIL_THROW2("TargetConstituentBoundariesLeftPhraseProperty: value string not available in this phrase property");
+    return NULL;
+  };
+
+protected:
+
+  virtual void Print(std::ostream& out) const;
+
+  TargetConstituentBoundariesLeftCollection m_constituentsCollection;
+};
+
+} // namespace Moses
+
--- a/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp
+++ b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp
@ -0,0 +1,63 @@
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include <iostream>
+#include <queue>
+#include <ostream>
+
+namespace Moses
+{
+
+void TargetConstituentBoundariesRightAdjacentPhraseProperty::ProcessValue(const std::string &value)
+{
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  std::vector<std::string> tokens;
+  Tokenize(tokens, value, " ");
+  std::vector<std::string>::const_iterator tokenIter = tokens.begin();
+  while (tokenIter != tokens.end()) {
+    try {
+
+      std::vector<std::string> constituents;
+      Tokenize(constituents, *tokenIter, "<");
+      ++tokenIter;
+      float count = std::atof( tokenIter->c_str() );
+      ++tokenIter;
+
+      std::set<const Factor* > dedup;
+
+      for ( std::vector<std::string>::iterator constituentIter = constituents.begin();
+            constituentIter != constituents.end(); ++constituentIter ) {
+
+        const Factor* constituentFactor = factorCollection.AddFactor(*constituentIter,false);
+
+        std::pair< std::set<const Factor* >::iterator, bool > dedupIns =
+          dedup.insert(constituentFactor);
+        if ( dedupIns.second ) {
+
+          std::pair< TargetConstituentBoundariesRightAdjacentCollection::iterator, bool > inserted =
+            m_constituentsCollection.insert(std::make_pair(constituentFactor,count));
+          if ( !inserted.second ) {
+            (inserted.first)->second += count;
+          }
+        }
+      }
+
+    } catch (const std::exception &e) {
+      UTIL_THROW2("TargetConstituentBoundariesRightAdjacentPhraseProperty: Read error. Flawed property?  " << value);
+    }
+  }
+};
+
+void TargetConstituentBoundariesRightAdjacentPhraseProperty::Print(std::ostream& out) const
+{
+  for ( TargetConstituentBoundariesRightAdjacentCollection::const_iterator it = m_constituentsCollection.begin();
+        it != m_constituentsCollection.end(); ++it ) {
+    if ( it != m_constituentsCollection.begin() ) {
+      out << " ";
+    }
+    out << *(it->first) << " " << it->second;
+  }
+}
+
+} // namespace Moses
+
--- a/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h
+++ b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h
@ -0,0 +1,40 @@
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "moses/Factor.h"
+#include "util/exception.hh"
+#include <map>
+#include <string>
+
+namespace Moses
+{
+
+typedef std::map<const Factor*, float> TargetConstituentBoundariesRightAdjacentCollection;
+
+
+class TargetConstituentBoundariesRightAdjacentPhraseProperty : public PhraseProperty
+{
+public:
+  TargetConstituentBoundariesRightAdjacentPhraseProperty()
+  {};
+
+  virtual void ProcessValue(const std::string &value);
+
+  const TargetConstituentBoundariesRightAdjacentCollection &GetCollection() const {
+    return m_constituentsCollection;
+  };
+
+  virtual const std::string *GetValueString() const {
+    UTIL_THROW2("TargetConstituentBoundariesRightAdjacentPhraseProperty: value string not available in this phrase property");
+    return NULL;
+  };
+
+protected:
+
+  virtual void Print(std::ostream& out) const;
+
+  TargetConstituentBoundariesRightAdjacentCollection m_constituentsCollection;
+};
+
+} // namespace Moses
+
--- a/moses/TranslationModel/UG/Jamfile
+++ b/moses/TranslationModel/UG/Jamfile
@ -1,3 +1,13 @@
+exe ptable-sigtest-filter : 
+filter-pt.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil
+;
 exe try-align : 
 try-align.cc 
 $(TOP)/moses//moses
--- a/moses/TranslationModel/UG/filter-pt.cc
+++ b/moses/TranslationModel/UG/filter-pt.cc
@ -0,0 +1,669 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+// significance filtering for phrase tables as described in
+// H. Johnson, et al. (2007) Improving Translation Quality
+// by Discarding Most of the Phrasetable. EMNLP 2007.
+// Implemented by Marcin Junczys-Dowmunt
+// recommended use: -l a+e -n <ttable-limit>
+#include <cstring> 
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+#include <fstream>
+#include <sstream>
+
+#include <vector>
+#include <iostream>
+#include <set>
+
+#include <boost/thread/tss.hpp>
+#include <boost/thread.hpp> 
+#include <boost/unordered_map.hpp>
+#include <boost/program_options.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/foreach.hpp>
+
+#ifdef WIN32
+#include "WIN32_functions.h"
+#else
+#include <unistd.h>
+#endif
+
+#include "mm/ug_bitext.h"
+
+// constants
+const size_t MINIMUM_SIZE_TO_KEEP = 10000;     // increase this to improve memory usage,
+// reduce for speed
+const std::string SEPARATOR       = " ||| ";
+
+const double ALPHA_PLUS_EPS  = -1000.0;        // dummy value
+const double ALPHA_MINUS_EPS = -2000.0;        // dummy value
+
+// configuration params
+int pfe_filter_limit = 0;               // 0 = don't filter anything based on P(f|e)
+bool print_cooc_counts = false;         // add cooc counts to phrase table?
+bool print_neglog_significance = false; // add -log(p) to phrase table?
+double sig_filter_limit = 0;            // keep phrase pairs with -log(sig) > sig_filter_limit
+//    higher = filter-more
+bool pef_filter_only = false;           // only filter based on pef
+bool hierarchical = false;
+
+double p_111 = 0.0;                     // alpha
+size_t pt_lines = 0;
+size_t nremoved_sigfilter = 0;
+size_t nremoved_pfefilter = 0;
+
+typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
+typedef sapt::mmTtrack<Token> ttrack_t;
+typedef sapt::mmTSA<Token> tsa_t;
+typedef sapt::TokenIndex tind_t;
+
+int num_lines;
+
+boost::mutex in_mutex;
+boost::mutex out_mutex;
+boost::mutex err_mutex;
+
+typedef size_t TextLenType;
+
+typedef boost::shared_ptr<std::vector<TextLenType> > SentIdSet;
+
+class Cache {
+  typedef std::pair<SentIdSet, clock_t> ClockedSet;
+  typedef boost::unordered_map<std::string, ClockedSet> ClockedMap;
+  
+  public:
+    
+    SentIdSet get(const std::string& phrase) {
+      boost::shared_lock<boost::shared_mutex> lock(m_mutex);
+      if(m_cont.count(phrase)) {
+        ClockedSet& set = m_cont[phrase];
+        set.second = clock();
+        return set.first;
+      }
+      return SentIdSet( new SentIdSet::element_type() );
+    }
+    
+    void put(const std::string& phrase, const SentIdSet set) {
+      boost::unique_lock<boost::shared_mutex> lock(m_mutex);
+      m_cont[phrase] = std::make_pair(set, clock());
+    }
+    
+    static void set_max_cache(size_t max_cache) {
+      s_max_cache = max_cache;
+    }
+    
+    void prune() {
+      if(s_max_cache > 0) {
+        boost::upgrade_lock<boost::shared_mutex> lock(m_mutex);
+        if(m_cont.size() > s_max_cache) {
+          std::vector<clock_t> clocks;
+          for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++) 
+            clocks.push_back(it->second.second);
+          
+          std::sort(clocks.begin(), clocks.end());
+          clock_t out = clocks[m_cont.size() - s_max_cache];
+          
+          boost::upgrade_to_unique_lock<boost::shared_mutex> uniq_lock(lock);
+          for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
+            if(it->second.second < out)
+              m_cont.erase(it);
+        }
+      }
+    }
+  
+  private:
+    ClockedMap m_cont;
+    boost::shared_mutex m_mutex;
+    static size_t s_max_cache;
+};
+
+size_t Cache::s_max_cache = 0;
+
+struct SA {
+  tind_t V;
+  boost::shared_ptr<ttrack_t> T;
+  tsa_t I;
+  Cache cache;
+};
+
+std::vector<boost::shared_ptr<SA> > e_sas;
+std::vector<boost::shared_ptr<SA> > f_sas;
+
+#undef min
+
+void usage()
+{
+  std::cerr << "\nFilter phrase table using significance testing as described\n"
+            << "in H. Johnson, et al. (2007) Improving Translation Quality\n"
+            << "by Discarding Most of the Phrasetable. EMNLP 2007.\n";
+}
+
+struct PTEntry {
+  PTEntry(const std::string& str, int index);
+  std::string f_phrase;
+  std::string e_phrase;
+  std::string extra;
+  std::string scores;
+  float pfe;
+  int cf;
+  int ce;
+  int cfe;
+  float nlog_pte;
+  void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
+    cfe = _cef;
+    cf = _cf;
+    ce = _ce;
+    nlog_pte = nlp;
+  }
+
+};
+
+PTEntry::PTEntry(const std::string& str, int index) :
+  cf(0), ce(0), cfe(0), nlog_pte(0.0)
+{
+  size_t pos = 0;
+  std::string::size_type nextPos = str.find(SEPARATOR, pos);
+  this->f_phrase = str.substr(pos,nextPos);
+
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  this->e_phrase = str.substr(pos,nextPos-pos);
+
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  if (nextPos < str.size()) {
+    this->scores = str.substr(pos,nextPos-pos);
+
+    pos = nextPos + SEPARATOR.size();
+    this->extra = str.substr(pos);
+  }
+  else {
+    this->scores = str.substr(pos,str.size()-pos);
+  }
+
+  int c = 0;
+  std::string::iterator i=scores.begin();
+  if (index > 0) {
+    for (; i != scores.end(); ++i) {
+      if ((*i) == ' ') {
+        c++;
+        if (c == index) break;
+      }
+    }
+  }
+  if (i != scores.end()) {
+    ++i;
+  }
+  char f[24];
+  char *fp=f;
+  while (i != scores.end() && *i != ' ') {
+    *fp++=*i++;
+  }
+  *fp++=0;
+
+  this->pfe = atof(f);
+}
+
+struct PfeComparer {
+  bool operator()(const PTEntry* a, const PTEntry* b) const {
+    return a->pfe > b->pfe;
+  }
+};
+
+struct NlogSigThresholder {
+  NlogSigThresholder(float threshold) : t(threshold) {}
+  float t;
+  bool operator()(const PTEntry* a) const {
+    if (a->nlog_pte < t) {
+      delete a;
+      return true;
+    } else return false;
+  }
+};
+
+std::ostream& operator << (std::ostream& os, const PTEntry& pp)
+{
+  os << pp.f_phrase << " ||| " << pp.e_phrase;
+  os << " ||| " << pp.scores;
+  if (pp.extra.size()>0) os << " ||| " << pp.extra;
+  if (print_cooc_counts) os << " ||| " << pp.cfe << " " << pp.cf << " " << pp.ce;
+  if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
+  return os;
+}
+
+void print(int a, int b, int c, int d, float p)
+{
+  std::cerr << a << "\t" << b << "\t P=" << p << "\n"
+            << c << "\t" << d << "\t xf="
+            << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
+}
+
+// 2x2 (one-sided) Fisher's exact test
+// see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
+double fisher_exact(int cfe, int ce, int cf)
+{
+  assert(cfe <= ce);
+  assert(cfe <= cf);
+
+  int a = cfe;
+  int b = (cf - cfe);
+  int c = (ce - cfe);
+  int d = (num_lines - ce - cf + cfe);
+  int n = a + b + c + d;
+
+  double cp = exp(lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d)
+                  - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c)
+                  - lgamma(1+d));
+  double total_p = 0.0;
+  int tc = std::min(b,c);
+  for (int i=0; i<=tc; i++) {
+    total_p += cp;
+    double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
+    cp *= coef;
+    ++a;
+    --c;
+    ++d;
+    --b;
+  }
+  return total_p;
+}
+
+template <class setType>
+void ordered_set_intersect(setType& out, const setType set_1, const setType set_2)
+{
+    std::set_intersection(set_1->begin(), set_1->end(), set_2->begin(),
+                          set_2->end(), inserter(*out, out->begin()) );
+}
+
+
+void lookup_phrase(SentIdSet& ids, const std::string& phrase,
+                   tsa_t &my_sa, tind_t &my_v, Cache& cache)
+{
+    ids = cache.get(phrase);
+    if(ids->empty()) {
+      
+      std::vector<sapt::id_type> snt;
+      my_v.fillIdSeq(phrase, snt);
+
+      tsa_t::tree_iterator m(&my_sa);
+      size_t k = 0;
+      while (k < snt.size() && m.extend(snt[k])) ++k;
+      if(k == snt.size()) {
+        ids->reserve(m.approxOccurrenceCount()+10);
+        sapt::tsa::ArrayEntry I(m.lower_bound(-1));
+        char const* stop = m.upper_bound(-1);
+        do {
+          m.root->readEntry(I.next,I);
+          ids->push_back(I.sid);
+        } while (I.next != stop);
+        
+        std::sort(ids->begin(), ids->end());
+        SentIdSet::element_type::iterator it =
+          std::unique(ids->begin(), ids->end());
+        ids->resize(it - ids->begin());
+        
+        if(ids->size() >= MINIMUM_SIZE_TO_KEEP)
+          cache.put(phrase, ids);
+      }
+    }
+}
+
+void lookup_multiple_phrases(SentIdSet& ids, std::vector<std::string> & phrases,
+                             tsa_t & my_sa, tind_t &my_v,
+                             const std::string & rule, Cache& cache) 
+{ 
+
+    if (phrases.size() == 1) {
+        lookup_phrase(ids, phrases.front(), my_sa, my_v, cache);
+    }
+    else {
+        SentIdSet main_set( new SentIdSet::element_type() );
+        bool first = true;
+        SentIdSet first_set( new SentIdSet::element_type() );
+        lookup_phrase(first_set, phrases.front(), my_sa, my_v, cache);
+        for (std::vector<std::string>::iterator phrase=phrases.begin()+1;
+             phrase != phrases.end(); ++phrase) {
+            SentIdSet temp_set( new SentIdSet::element_type() );
+            lookup_phrase(temp_set, *phrase, my_sa, my_v, cache);
+            if (first) {
+                ordered_set_intersect(main_set, first_set, temp_set);
+                first = false;
+            }
+            else {
+                SentIdSet new_set( new SentIdSet::element_type() );
+                ordered_set_intersect(new_set, main_set, temp_set);
+                main_set->swap(*new_set);
+            }
+        }
+        ids->swap(*main_set);
+    }
+}
+
+
+void find_occurrences(SentIdSet& ids, const std::string& rule,
+                      tsa_t& my_sa, tind_t &my_v, Cache& cache)
+{
+    // we search for hierarchical rules by stripping away NT and looking for terminals sequences
+    // if a rule contains multiple sequences of terminals, we intersect their occurrences.
+    if (hierarchical) {
+        //   std::cerr << "splitting up phrase: " << phrase << "\n";
+        int pos = 0;
+        int NTStartPos, NTEndPos;
+        std::vector<std::string> phrases;
+        while (rule.find("] ", pos) < rule.size()) {
+            NTStartPos = rule.find("[",pos) - 1; // -1 to cut space before NT
+            NTEndPos = rule.find("] ",pos);
+            if (NTStartPos < pos) { // no space: NT at start of rule (or two consecutive NTs)
+                pos = NTEndPos + 2;
+                continue;
+            }
+            phrases.push_back(rule.substr(pos,NTStartPos-pos));
+            pos = NTEndPos + 2;
+        }
+
+        NTStartPos = rule.find("[",pos) - 1; // LHS of rule
+        if (NTStartPos > pos) {
+            phrases.push_back(rule.substr(pos,NTStartPos-pos));
+        }
+
+        lookup_multiple_phrases(ids, phrases, my_sa, my_v, rule, cache);
+    }
+    else {
+        lookup_phrase(ids, rule, my_sa, my_v, cache);
+    }
+}
+
+
+// input: unordered list of translation options for a single source phrase
+void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
+{
+  if (pfe_filter_limit > 0 && options.size() > pfe_filter_limit) {
+    nremoved_pfefilter += (options.size() - pfe_filter_limit);
+    std::nth_element(options.begin(), options.begin() + pfe_filter_limit,
+                     options.end(), PfeComparer());
+    for (std::vector<PTEntry*>::iterator i = options.begin() + pfe_filter_limit;
+         i != options.end(); ++i)
+      delete *i;
+    options.erase(options.begin() + pfe_filter_limit,options.end());
+  }
+  
+  if (pef_filter_only)
+    return;
+  
+  if (options.empty())
+    return;
+  
+  size_t cf = 0;
+  std::vector<SentIdSet> fsets;
+  BOOST_FOREACH(boost::shared_ptr<SA>& f_sa, f_sas) {
+    fsets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
+    find_occurrences(fsets.back(), options.front()->f_phrase, f_sa->I, f_sa->V, f_sa->cache);
+    cf += fsets.back()->size();
+  }
+  
+  for (std::vector<PTEntry*>::iterator i = options.begin();
+       i != options.end(); ++i) {
+    const std::string& e_phrase = (*i)->e_phrase;
+    
+    size_t ce = 0;
+    std::vector<SentIdSet> esets;
+    BOOST_FOREACH(boost::shared_ptr<SA>& e_sa,  e_sas) {
+      esets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
+      find_occurrences(esets.back(), e_phrase, e_sa->I, e_sa->V, e_sa->cache);
+      ce += esets.back()->size();
+    }
+      
+    size_t cef = 0;
+    for(size_t j = 0; j < fsets.size(); ++j) {
+      SentIdSet efset( new SentIdSet::element_type() );
+      ordered_set_intersect(efset, fsets[j], esets[j]);
+      cef += efset->size();
+    }
+    
+    double nlp = -log(fisher_exact(cef, cf, ce));
+    (*i)->set_cooc_stats(cef, cf, ce, nlp);
+  }
+  
+  std::vector<PTEntry*>::iterator new_end =
+    std::remove_if(options.begin(), options.end(),
+                   NlogSigThresholder(sig_filter_limit));
+  nremoved_sigfilter += (options.end() - new_end);
+  options.erase(new_end,options.end());
+}
+
+void filter_thread(std::istream* in, std::ostream* out, int pfe_index) {
+      
+  std::vector<std::string> lines;
+  std::string prev = "";
+  std::vector<PTEntry*> options;
+  while(true) {
+    {
+      boost::mutex::scoped_lock lock(in_mutex);
+      if(in->eof())
+        break;
+      
+      lines.clear();
+      std::string line;
+      while(getline(*in, line) && lines.size() < 500000)
+        lines.push_back(line);
+    }
+    
+    std::stringstream out_temp;
+    for(std::vector<std::string>::iterator it = lines.begin(); it != lines.end(); it++) {
+      size_t tmp_lines = ++pt_lines;
+      if(tmp_lines % 10000 == 0) {
+        boost::mutex::scoped_lock lock(err_mutex);
+        std::cerr << ".";
+      
+        if(tmp_lines % 500000 == 0)
+          std::cerr << "[n:" << tmp_lines << "]\n";
+  
+        if(tmp_lines % 10000000 == 0) {
+          float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+          float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+          std::cerr << "------------------------------------------------------\n"
+                    << "  unfiltered phrases pairs: " << pt_lines << "\n"
+                    << "\n"
+                    << "     P(f|e) filter [first]: " << nremoved_pfefilter << "   (" << pfefper << "%)\n"
+                    << "       significance filter: " << nremoved_sigfilter << "   (" << sigfper << "%)\n"
+                    << "            TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << "   (" << (sigfper + pfefper) << "%)\n"
+                    << "\n"
+                    << "     FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << "   (" << (100.0-sigfper - pfefper) << "%)\n"
+                    << "------------------------------------------------------\n";
+        }
+      }
+      
+      if(pt_lines % 10000 == 0) {
+        BOOST_FOREACH(boost::shared_ptr<SA> f_sa, f_sas)
+          f_sa->cache.prune();
+        BOOST_FOREACH(boost::shared_ptr<SA> e_sa, e_sas)
+          e_sa->cache.prune();
+      }
+      
+      if(it->length() > 0) {
+        PTEntry* pp = new PTEntry(it->c_str(), pfe_index);
+        if (prev != pp->f_phrase) {
+          prev = pp->f_phrase;
+  
+          if (!options.empty()) {  // always true after first line
+            compute_cooc_stats_and_filter(options);
+          }
+          
+          for (std::vector<PTEntry*>::iterator i = options.begin();
+               i != options.end(); ++i) {
+            out_temp << **i << '\n';
+            delete *i;
+          }
+        
+          options.clear();
+          options.push_back(pp);
+  
+        } else {
+          options.push_back(pp);
+        }
+      }
+    }
+    boost::mutex::scoped_lock lock(out_mutex);
+    *out << out_temp.str() << std::flush;
+  }
+  compute_cooc_stats_and_filter(options);
+  
+  boost::mutex::scoped_lock lock(out_mutex);
+  for (std::vector<PTEntry*>::iterator i = options.begin();
+       i != options.end(); ++i) {
+    *out << **i << '\n';
+    delete *i;
+  }
+  *out << std::flush;
+}
+
+namespace po = boost::program_options;
+
+int main(int argc, char * argv[])
+{
+  bool help;
+  std::vector<std::string> efiles;
+  std::vector<std::string> ffiles;
+  int pfe_index = 2;
+  int threads = 1;
+  size_t max_cache = 0;
+  std::string str_sig_filter_limit;
+   
+  po::options_description general("General options");
+  general.add_options()
+    ("english,e", po::value<std::vector<std::string> >(&efiles)->multitoken(),
+     "english.suf-arr")
+    ("french,f", po::value<std::vector<std::string> >(&ffiles)->multitoken(),
+     "french.suf-arr")
+    ("pfe-index,i", po::value(&pfe_index)->default_value(2),
+     "Index of P(f|e) in phrase table")
+    ("pfe-filter-limit,n", po::value(&pfe_filter_limit)->default_value(0),
+     "0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements")
+    ("threads,t", po::value(&threads)->default_value(1),
+     "number of threads to use")
+    ("max-cache,m", po::value(&max_cache)->default_value(0),
+     "limit cache to  arg  most recent phrases")
+    ("print-cooc,c", po::value(&print_cooc_counts)->zero_tokens()->default_value(false),
+     "add the coocurrence counts to the phrase table")
+    ("print-significance,p", po::value(&print_neglog_significance)->zero_tokens()->default_value(false),
+     "add -log(significance) to the phrase table")
+    ("hierarchical,x", po::value(&hierarchical)->zero_tokens()->default_value(false),
+     "filter hierarchical rule table")
+    ("sig-filter-limit,l", po::value(&str_sig_filter_limit),
+     ">0.0, a+e, or a-e: keep values that have a -log significance > this")
+    ("help,h", po::value(&help)->zero_tokens()->default_value(false),
+     "display this message")
+  ;
+
+  po::options_description cmdline_options("Allowed options");
+  cmdline_options.add(general);
+  po::variables_map vm;
+  
+  try { 
+    po::store(po::command_line_parser(argc,argv).
+              options(cmdline_options).run(), vm);
+    po::notify(vm);
+  }
+  catch (std::exception& e) {
+    std::cout << "Error: " << e.what() << std::endl << std::endl;
+    
+    usage();
+    std::cout << cmdline_options << std::endl;
+    exit(0);
+  }
+  
+  if(vm["help"].as<bool>()) {
+    usage();
+    std::cout << cmdline_options << std::endl;
+    exit(0);
+  }
+   
+  if(vm.count("pfe-filter-limit"))
+    std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
+  if(vm.count("threads"))
+    std::cerr << "Using threads: " << threads << std::endl;  
+  if(vm.count("max-cache"))
+    std::cerr << "Using max phrases in caches: " << max_cache << std::endl;
+    
+  if (strcmp(str_sig_filter_limit.c_str(),"a+e") == 0) {
+    sig_filter_limit = ALPHA_PLUS_EPS;
+  } else if (strcmp(str_sig_filter_limit.c_str(),"a-e") == 0) {
+    sig_filter_limit = ALPHA_MINUS_EPS;
+  } else {
+    char *x;
+    sig_filter_limit = strtod(str_sig_filter_limit.c_str(), &x);
+    if (sig_filter_limit < 0.0) {
+      std::cerr << "Filter limit (-l) must be either 'a+e', 'a-e' or a real number >= 0.0\n";
+      usage();
+    }
+  }
+    
+  if (sig_filter_limit == 0.0) pef_filter_only = true;
+  //-----------------------------------------------------------------------------
+  if (optind != argc || ((efiles.empty() || ffiles.empty()) && !pef_filter_only)) {
+    usage();
+  }
+  
+  if (!pef_filter_only) {
+    size_t elines = 0;
+    BOOST_FOREACH(std::string& efile, efiles) {
+      e_sas.push_back(boost::shared_ptr<SA>(new SA()));
+      e_sas.back()->V.open(efile + ".tdx");
+      e_sas.back()->T.reset(new ttrack_t());  
+      e_sas.back()->T->open(efile + ".mct");
+      e_sas.back()->I.open(efile + ".sfa", e_sas.back()->T);
+      elines += e_sas.back()->T->size(); 
+    }
+    
+    size_t flines = 0;
+    BOOST_FOREACH(std::string& ffile, ffiles) {
+      f_sas.push_back(boost::shared_ptr<SA>(new SA()));
+      f_sas.back()->V.open(ffile + ".tdx");
+      f_sas.back()->T.reset(new ttrack_t());  
+      f_sas.back()->T->open(ffile + ".mct");
+      f_sas.back()->I.open(ffile + ".sfa", f_sas.back()->T);
+      flines += f_sas.back()->T->size(); 
+    }
+    
+    if (elines != flines) {
+      std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
+      usage();
+      exit(1);
+    } else {
+      std::cerr << "Training corpus: " << elines << " lines\n";
+      num_lines = elines;
+    }
+    p_111 = -log(fisher_exact(1,1,1));
+    std::cerr << "\\alpha = " << p_111 << "\n";
+    if (sig_filter_limit == ALPHA_MINUS_EPS) {
+      sig_filter_limit = p_111 - 0.001;
+    } else if (sig_filter_limit == ALPHA_PLUS_EPS) {
+      sig_filter_limit = p_111 + 0.001;
+    }
+    std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
+  } else {
+    std::cerr << "Filtering using P(e|f) only. n=" << pfe_filter_limit << std::endl;
+  }
+
+  Cache::set_max_cache(max_cache);
+  std::ios_base::sync_with_stdio(false);
+  
+  boost::thread_group threadGroup;
+  for(int i = 0; i < threads; i++) 
+    threadGroup.add_thread(new boost::thread(filter_thread, &std::cin, &std::cout, pfe_index));
+  threadGroup.join_all();
+
+  float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+  float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+  
+  std::cerr << "\n\n------------------------------------------------------\n"
+            << "  unfiltered phrases pairs: " << pt_lines << "\n"
+            << "\n"
+            << "     P(f|e) filter [first]: " << nremoved_pfefilter << "   (" << pfefper << "%)\n"
+            << "       significance filter: " << nremoved_sigfilter << "   (" << sigfper << "%)\n"
+            << "            TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << "   (" << (sigfper + pfefper) << "%)\n"
+            << "\n"
+            << "     FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << "   (" << (100.0-sigfper - pfefper) << "%)\n"
+            << "------------------------------------------------------\n";  
+}
--- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
@ -5,7 +5,7 @@
 #include <vector>

 #ifndef NO_MOSES
-#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
+#include "moses/FF/LexicalReordering/LRState.h"
 #endif

 namespace sapt {
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.h
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@ -4,7 +4,7 @@
 #include "ug_typedefs.h"
 #include "ug_bitext_pstats.h"
 #ifndef NO_MOSES
-#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
+#include "moses/FF/LexicalReordering/LRState.h"
 #endif
 #include "boost/format.hpp"
 #include "tpt_tokenindex.h"
--- a/moses/TrellisPath.cpp
+++ b/moses/TrellisPath.cpp
@ -42,7 +42,7 @@ TrellisPath::TrellisPath(const Hypothesis *hypo)

 void TrellisPath::InitTotalScore()
 {
-  m_totalScore		= m_path[0]->GetWinningHypo()->GetFutureScore();
+  m_totalScore = m_path[0]->GetWinningHypo()->GetFutureScore();

  //calc score
  size_t sizePath = m_path.size();
@ -50,7 +50,7 @@ void TrellisPath::InitTotalScore()
    const Hypothesis *hypo = m_path[pos];
    const Hypothesis *winningHypo = hypo->GetWinningHypo();
    if (hypo != winningHypo) {
-      m_totalScore = m_totalScore - winningHypo->GetFutureScore() + hypo->GetFutureScore();
+      m_totalScore += hypo->GetFutureScore() - winningHypo->GetFutureScore();
    }
  }
 }
@ -169,9 +169,6 @@ TrellisPath::
 GetScoreBreakdown() const
 {
  if (!m_scoreBreakdown) {
-    float totalScore = m_path[0]->GetWinningHypo()->GetFutureScore();
-    // calculated for sanity check only
-
    m_scoreBreakdown.reset(new ScoreComponentCollection());
    m_scoreBreakdown->PlusEquals(m_path[0]->GetWinningHypo()->GetScoreBreakdown());

@ -184,13 +181,10 @@ GetScoreBreakdown() const
      const Hypothesis *hypo = m_path[pos];
      const Hypothesis *winningHypo = hypo->GetWinningHypo();
      if (hypo != winningHypo) {
-        totalScore += hypo->GetFutureScore() - winningHypo->GetFutureScore();
        m_scoreBreakdown->MinusEquals(winningHypo->GetScoreBreakdown());
        m_scoreBreakdown->PlusEquals(hypo->GetScoreBreakdown());
      }
    }
-
-    assert(totalScore == m_totalScore);
  }

  return m_scoreBreakdown;
--- a/moses/server/TranslationRequest.cpp
+++ b/moses/server/TranslationRequest.cpp
@ -3,6 +3,7 @@
 #include "moses/ContextScope.h"
 #include <boost/foreach.hpp>
 #include "moses/Util.h"
+#include "moses/TreeInput.h"
 #include "moses/Hypothesis.h"

 namespace MosesServer
@ -24,6 +25,7 @@ using Moses::FValue;
 using Moses::PhraseDictionaryMultiModel;
 using Moses::FindPhraseDictionary;
 using Moses::Sentence;
+using Moses::TreeInput;

 boost::shared_ptr<TranslationRequest>
 TranslationRequest::
@ -317,7 +319,13 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
  // 	for (size_t i = 1; i < tmp.size(); i += 2)
  // 	  m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
  //   }
-  m_source.reset(new Sentence(m_options,0,m_source_string));
+  if (is_syntax(m_options->search.algo)) {
+    m_source.reset(new TreeInput(m_options));
+    istringstream in(m_source_string + "\n");
+    m_source->Read(in);
+  } else {
+    m_source.reset(new Sentence(m_options,0,m_source_string));
+  }
 } // end of Translationtask::parse_request()


@ -334,7 +342,7 @@ run_chart_decoder()

  const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
  ostringstream out;
-  outputChartHypo(out,hypo);
+  if (hypo) outputChartHypo(out,hypo);

  m_target_string = out.str();
  m_retData["text"] = xmlrpc_c::value_string(m_target_string);
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@ -311,12 +311,14 @@ std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &ke
  std::ostringstream oss;
  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
       iter!=allPropertyValues->end(); ++iter) {
-    if (iter!=allPropertyValues->begin()) {
+    if (!(iter->first).empty()) {
+      if (iter!=allPropertyValues->begin()) {
+        oss << " ";
+      }
+      oss << iter->first;
      oss << " ";
+      oss << iter->second;
    }
-    oss << iter->first;
-    oss << " ";
-    oss << iter->second;
  }

  std::string allPropertyValuesString(oss.str());
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@ -50,7 +50,10 @@ private:
  bool onlyOutputSpanInfo;
  bool gzOutput;
  std::string instanceWeightsFile; //weights for each sentence
+  bool targetConstituentConstrainedFlag;
+  bool targetConstituentBoundariesFlag;
  bool flexScoreFlag;
+  bool singleWordHeuristicFlag;

 public:
  std::vector<std::string> placeholders;
@ -72,7 +75,10 @@ public:
    includeSentenceIdFlag(false),
    onlyOutputSpanInfo(false),
    gzOutput(false),
+    targetConstituentConstrainedFlag(false),
+    targetConstituentBoundariesFlag(false),
    flexScoreFlag(false),
+    singleWordHeuristicFlag(false),
    debug(false) {
  }

@ -116,9 +122,18 @@ public:
  void initInstanceWeightsFile(const char* initInstanceWeightsFile) {
    instanceWeightsFile = std::string(initInstanceWeightsFile);
  }
+  void initTargetConstituentConstrainedFlag(const bool initTargetConstituentConstrainedFlag) {
+    targetConstituentConstrainedFlag = initTargetConstituentConstrainedFlag;
+  }
+  void initTargetConstituentBoundariesFlag(const bool initTargetConstituentBoundariesFlag) {
+    targetConstituentBoundariesFlag = initTargetConstituentBoundariesFlag;
+  }
  void initFlexScoreFlag(const bool initflexScoreFlag) {
    flexScoreFlag=initflexScoreFlag;
  }
+  void initSingleWordHeuristicFlag(const bool initSingleWordHeuristicFlag) {
+    singleWordHeuristicFlag = initSingleWordHeuristicFlag;
+  }

  // functions for getting values
  bool isAllModelsOutputFlag() const {
@ -160,9 +175,18 @@ public:
  std::string getInstanceWeightsFile() const {
    return instanceWeightsFile;
  }
+  bool isTargetConstituentConstrainedFlag() const {
+    return targetConstituentConstrainedFlag;
+  }
+  bool isTargetConstituentBoundariesFlag() const {
+    return targetConstituentBoundariesFlag;
+  }
  bool isFlexScoreFlag() const {
    return flexScoreFlag;
  }
+  bool isSingleWordHeuristicFlag() const {
+    return singleWordHeuristicFlag;
+  }
 };

 }
--- a/phrase-extract/RuleExtractionOptions.h
+++ b/phrase-extract/RuleExtractionOptions.h
@ -18,8 +18,6 @@
 ***********************************************************************/

 #pragma once
-#ifndef RULEEXTRACTIONOPTIONS_H_INCLUDED_
-#define RULEEXTRACTIONOPTIONS_H_INCLUDED_

 namespace MosesTraining
 {
@ -95,4 +93,3 @@ public:

 }

-#endif
--- a/phrase-extract/SentenceAlignmentWithSyntax.cpp
+++ b/phrase-extract/SentenceAlignmentWithSyntax.cpp
@ -35,7 +35,7 @@ namespace MosesTraining

 bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID, bool boundaryRules)
 {
-  if (!m_options.targetSyntax) {
+  if (!m_targetSyntax) {
    return SentenceAlignment::processTargetSentence(targetString, sentenceID, boundaryRules);
  }

@ -56,7 +56,7 @@ bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetStrin

 bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceString, int sentenceID, bool boundaryRules)
 {
-  if (!m_options.sourceSyntax) {
+  if (!m_sourceSyntax) {
    return SentenceAlignment::processSourceSentence(sourceString, sentenceID, boundaryRules);
  }

--- a/phrase-extract/SentenceAlignmentWithSyntax.h
+++ b/phrase-extract/SentenceAlignmentWithSyntax.h
@ -18,8 +18,6 @@
 ***********************************************************************/

 #pragma once
-#ifndef SENTENCEALIGNMENTWITHSYNTAX_H_INCLUDED_
-#define SENTENCEALIGNMENTWITHSYNTAX_H_INCLUDED_

 #include <map>
 #include <set>
@ -42,18 +40,20 @@ public:
  std::set<std::string> & m_sourceLabelCollection;
  std::map<std::string, int> & m_targetTopLabelCollection;
  std::map<std::string, int> & m_sourceTopLabelCollection;
-  const RuleExtractionOptions & m_options;
+  const bool m_targetSyntax, m_sourceSyntax;

  SentenceAlignmentWithSyntax(std::set<std::string> & tgtLabelColl,
                              std::set<std::string> & srcLabelColl,
                              std::map<std::string,int> & tgtTopLabelColl,
                              std::map<std::string,int> & srcTopLabelColl,
-                              const RuleExtractionOptions & options)
+                              bool targetSyntax,
+                              bool sourceSyntax)
    : m_targetLabelCollection(tgtLabelColl)
    , m_sourceLabelCollection(srcLabelColl)
    , m_targetTopLabelCollection(tgtTopLabelColl)
    , m_sourceTopLabelCollection(srcTopLabelColl)
-    , m_options(options) {
+    , m_targetSyntax(targetSyntax)
+    , m_sourceSyntax(sourceSyntax) {
  }

  virtual ~SentenceAlignmentWithSyntax() {}
@ -67,4 +67,3 @@ public:

 }

-#endif
--- a/phrase-extract/SyntaxNodeCollection.cpp
+++ b/phrase-extract/SyntaxNodeCollection.cpp
@ -47,6 +47,8 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
  SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos);
  m_nodes.push_back( newNode );
  m_index[ startPos ][ endPos ].push_back( newNode );
+  m_endPositionsIndex[ endPos ].push_back( newNode );
+  m_startPositionsIndex[ startPos ].push_back( newNode ); // TODO: may not need this: access m_index by startPos and iterate over its InnerNodeIndex (= end positions)?
  m_numWords = std::max(endPos+1, m_numWords);
  return newNode;
 }
@ -70,6 +72,36 @@ const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes(
  return endIndex->second;
 }

+bool SyntaxNodeCollection::HasNodeStartingAtPosition( int startPos ) const
+{
+  return GetNodesByStartPosition(startPos).size() > 0;
+}
+
+const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByStartPosition(
+  int startPos ) const
+{
+  InnerNodeIndex::const_iterator startIndex = m_startPositionsIndex.find( startPos );
+  if (startIndex == m_startPositionsIndex.end() )
+    return m_emptyNode;
+
+  return startIndex->second;
+}
+
+bool SyntaxNodeCollection::HasNodeEndingAtPosition( int endPos ) const
+{
+  return GetNodesByEndPosition(endPos).size() > 0;
+}
+
+const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByEndPosition(
+  int endPos ) const
+{
+  InnerNodeIndex::const_iterator endIndex = m_endPositionsIndex.find( endPos );
+  if (endIndex == m_endPositionsIndex.end() )
+    return m_emptyNode;
+
+  return endIndex->second;
+}
+
 std::auto_ptr<SyntaxTree> SyntaxNodeCollection::ExtractTree()
 {
  std::map<SyntaxNode *, SyntaxTree *> nodeToTree;
--- a/phrase-extract/SyntaxNodeCollection.h
+++ b/phrase-extract/SyntaxNodeCollection.h
@ -50,6 +50,11 @@ public:
  //! Lookup the SyntaxNodes for a given span.
  const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;

+  bool HasNodeStartingAtPosition( int startPos ) const;
+  const std::vector< SyntaxNode* >& GetNodesByStartPosition( int startPos ) const;
+  bool HasNodeEndingAtPosition( int endPos ) const;
+  const std::vector< SyntaxNode* >& GetNodesByEndPosition( int endPos ) const;
+
  //! Get a vector of pointers to all SyntaxNodes (unordered).
  const std::vector< SyntaxNode* >& GetAllNodes() {
    return m_nodes;
@ -78,6 +83,9 @@ private:
  NodeIndex m_index;
  int m_numWords;
  std::vector< SyntaxNode* > m_emptyNode;
+
+  InnerNodeIndex m_endPositionsIndex;
+  InnerNodeIndex m_startPositionsIndex;
 };

 }  // namespace MosesTraining
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@ -1,11 +1,3 @@
-/*
- * extract.cpp
- *	Modified by: Rohit Gupta CDAC, Mumbai, India
- *	on July 15, 2012 to implement parallel processing
- *      Modified by: Nadi Tomeh - LIMSI/CNRS
- *      Machine Translation Marathon 2010, Dublin
- */
-
 #include <cstdio>
 #include <iostream>
 #include <fstream>
@ -20,11 +12,13 @@
 #include <vector>
 #include <limits>

-#include "SentenceAlignment.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
 #include "OutputFileStream.h"
 #include "PhraseExtractionOptions.h"
+#include "SentenceAlignmentWithSyntax.h"
+#include "SyntaxNode.h"
+#include "moses/Util.h"

 using namespace std;
 using namespace MosesTraining;
@ -46,14 +40,14 @@ typedef vector < HPhrase > HPhraseVector;
 // The key of the map is the English index and the value is a set of the source ones
 typedef map <int, set<int> > HSentenceVertices;

-REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientWordModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
                           int, int, int, int, int, int, int,
                           bool (*)(int, int), bool (*)(int, int));
-REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientPhraseModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
                             int, int, int, int, int, int, int,
                             bool (*)(int, int), bool (*)(int, int),
                             const HSentenceVertices &, const HSentenceVertices &);
-REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientHierModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
                           int, int, int, int, int, int, int,
                           bool (*)(int, int), bool (*)(int, int),
                           const HSentenceVertices &, const HSentenceVertices &,
@ -69,25 +63,16 @@ bool ge(int, int);
 bool le(int, int);
 bool lt(int, int);

-bool isAligned (SentenceAlignment &, int, int);
+bool isAligned (SentenceAlignmentWithSyntax &, int, int);

 int sentenceOffset = 0;

-std::vector<std::string> Tokenize(const std::string& str,
-                                  const std::string& delimiters = " \t");
-
-bool flexScoreFlag = false;
-
-}
-
-namespace MosesTraining
-{

 class ExtractTask
 {
 public:
  ExtractTask(
-    size_t id, SentenceAlignment &sentence,
+    size_t id, SentenceAlignmentWithSyntax &sentence,
    PhraseExtractionOptions &initoptions,
    Moses::OutputFileStream &extractFile,
    Moses::OutputFileStream &extractFileInv,
@ -109,14 +94,26 @@ private:
  vector< string > m_extractedPhrasesSid;
  vector< string > m_extractedPhrasesContext;
  vector< string > m_extractedPhrasesContextInv;
-  void extractBase(SentenceAlignment &);
-  void extract(SentenceAlignment &);
-  void addPhrase(SentenceAlignment &, int, int, int, int, string &);
+  void extractBase();
+  void extract();
+  void addPhrase(int, int, int, int, const std::string &);
  void writePhrasesToFile();
-  bool checkPlaceholders (const SentenceAlignment &sentence, int startE, int endE, int startF, int endF);
-  bool isPlaceholder(const string &word);
+  bool checkPlaceholders(int startE, int endE, int startF, int endF) const;
+  bool isPlaceholder(const string &word) const;
+  bool checkTargetConstituentBoundaries(int startE, int endE, int startF, int endF,
+                                        ostringstream &outextractstrPhraseProperties) const;
+  void getOrientationInfo(int startE, int endE, int startF, int endF,
+                          const HSentenceVertices& inTopLeft,
+                          const HSentenceVertices& inTopRight,
+                          const HSentenceVertices& inBottomLeft,
+                          const HSentenceVertices& inBottomRight,
+                          const HSentenceVertices& outTopLeft,
+                          const HSentenceVertices& outTopRight,
+                          const HSentenceVertices& outBottomLeft,
+                          const HSentenceVertices& outBottomRight,
+                          std::string &orientationInfo) const;

-  SentenceAlignment &m_sentence;
+  SentenceAlignmentWithSyntax &m_sentence;
  const PhraseExtractionOptions &m_options;
  Moses::OutputFileStream &m_extractFile;
  Moses::OutputFileStream &m_extractFileInv;
@ -128,12 +125,13 @@ private:

 int main(int argc, char* argv[])
 {
-  cerr	<< "PhraseExtract v1.4, written by Philipp Koehn\n"
-        << "phrase extraction from an aligned parallel corpus\n";
+  cerr	<< "PhraseExtract v1.5, written by Philipp Koehn et al." << std::endl
+        << "phrase extraction from an aligned parallel corpus" << std::endl;

  if (argc < 6) {
    cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
-    cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
+    cerr << "| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ";
+    cerr << "| --TargetConstituentConstrained | --TargetConstituentBoundaries ]" << std::endl;
    exit(1);
  }

@ -153,8 +151,14 @@ int main(int argc, char* argv[])
      options.initOnlyOutputSpanInfo(true);
    } else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
      options.initOrientationFlag(true);
+    } else if (strcmp(argv[i],"--TargetConstituentConstrained") == 0) {
+      options.initTargetConstituentConstrainedFlag(true);
+    } else if (strcmp(argv[i],"--TargetConstituentBoundaries") == 0) {
+      options.initTargetConstituentBoundariesFlag(true);
    } else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
      options.initFlexScoreFlag(true);
+    } else if (strcmp(argv[i],"--SingleWordHeuristic") == 0) {
+      options.initSingleWordHeuristicFlag(true);
    } else if (strcmp(argv[i],"--NoTTable") == 0) {
      options.initTranslationFlag(false);
    } else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
@ -231,9 +235,9 @@ int main(int argc, char* argv[])
    } else if (strcmp(argv[i], "--Placeholders") == 0) {
      ++i;
      string str = argv[i];
-      options.placeholders = Tokenize(str.c_str(), ",");
+      Moses::Tokenize(options.placeholders, str.c_str(), ",");
    } else {
-      cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
+      cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'" << std::endl;
      exit(1);
    }
  }
@ -278,11 +282,16 @@ int main(int argc, char* argv[])
    extractFileContextInv.Open(fileNameExtractContextInv.c_str());
  }

+  // stats on labels for glue grammar and unknown word label probabilities
+  set< string > targetLabelCollection, sourceLabelCollection;
+  map< string, int > targetTopLabelCollection, sourceTopLabelCollection;
+  const bool targetSyntax = true;
+
  int i = sentenceOffset;

  string englishString, foreignString, alignmentString, weightString;

-  while(getline(*eFileP, englishString)) {
+  while (getline(*eFileP, englishString)) {
    // Print progress dots to stderr.
    i++;
    if (i%10000 == 0) cerr << "." << flush;
@ -293,7 +302,10 @@ int main(int argc, char* argv[])
      getline(*iwFileP, weightString);
    }

-    SentenceAlignment sentence;
+    SentenceAlignmentWithSyntax sentence
+    (targetLabelCollection, sourceLabelCollection,
+     targetTopLabelCollection, sourceTopLabelCollection,
+     targetSyntax, false);
    // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
    //az: output src, tgt, and alingment line
    if (options.isOnlyOutputSpanInfo()) {
@ -347,7 +359,7 @@ namespace MosesTraining
 {
 void ExtractTask::Run()
 {
-  extract(m_sentence);
+  extract();
  writePhrasesToFile();
  m_extractedPhrases.clear();
  m_extractedPhrasesInv.clear();
@ -358,10 +370,10 @@ void ExtractTask::Run()

 }

-void ExtractTask::extract(SentenceAlignment &sentence)
+void ExtractTask::extract()
 {
-  int countE = sentence.target.size();
-  int countF = sentence.source.size();
+  int countE = m_sentence.target.size();
+  int countF = m_sentence.source.size();

  HPhraseVector inboundPhrases;

@ -376,21 +388,20 @@ void ExtractTask::extract(SentenceAlignment &sentence)
  HSentenceVertices outBottomRight;

  bool relaxLimit = m_options.isHierModel();
-  bool buildExtraStructure = m_options.isPhraseModel() || m_options.isHierModel();

  // check alignments for target phrase startE...endE
  // loop over extracted phrases which are compatible with the word-alignments
-  for(int startE=0; startE<countE; startE++) {
-    for(int endE=startE;
-        (endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength));
-        endE++) {
+  for (int startE=0; startE<countE; startE++) {
+    for (int endE=startE;
+         (endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength));
+         endE++) {

      int minF = std::numeric_limits<int>::max();
      int maxF = -1;
-      vector< int > usedF = sentence.alignedCountS;
-      for(int ei=startE; ei<=endE; ei++) {
-        for(size_t i=0; i<sentence.alignedToT[ei].size(); i++) {
-          int fi = sentence.alignedToT[ei][i];
+      vector< int > usedF = m_sentence.alignedCountS;
+      for (int ei=startE; ei<=endE; ei++) {
+        for (size_t i=0; i<m_sentence.alignedToT[ei].size(); i++) {
+          int fi = m_sentence.alignedToT[ei][i];
          if (fi<minF) {
            minF = fi;
          }
@ -406,111 +417,142 @@ void ExtractTask::extract(SentenceAlignment &sentence)

        // check if source words are aligned to out of bound target words
        bool out_of_bounds = false;
-        for(int fi=minF; fi<=maxF && !out_of_bounds; fi++)
+        for (int fi=minF; fi<=maxF && !out_of_bounds; fi++)
          if (usedF[fi]>0) {
-            // cout << "ouf of bounds: " << fi << "\n";
+            // cout << "ouf of bounds: " << fi << std::endl;
            out_of_bounds = true;
          }

-        // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
+        // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")" << std::endl;
        if (!out_of_bounds) {
          // start point of source phrase may retreat over unaligned
-          for(int startF=minF;
-              (startF>=0 &&
-               (relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
-               (startF==minF || sentence.alignedCountS[startF]==0)); // unaligned
-              startF--)
+          for (int startF=minF;
+               (startF>=0 &&
+                (relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
+                (startF==minF || m_sentence.alignedCountS[startF]==0)); // unaligned
+               startF--) {
            // end point of source phrase may advance over unaligned
-            for(int endF=maxF;
-                (endF<countF &&
-                 (relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
-                 (endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned
-                endF++) { // at this point we have extracted a phrase
-              if(buildExtraStructure) { // phrase || hier
-                if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit
-                  inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
-                                                   HPhraseVertex(endF,endE)));
-                  insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
-                                       startF, startE, endF, endE);
-                } else
-                  insertPhraseVertices(outTopLeft, outTopRight, outBottomLeft, outBottomRight,
-                                       startF, startE, endF, endE);
+            for (int endF=maxF;
+                 (endF<countF &&
+                  (relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
+                  (endF==maxF || m_sentence.alignedCountS[endF]==0)); // unaligned
+                 endF++) { // at this point we have extracted a phrase
+
+              if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit
+                inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
+                                                 HPhraseVertex(endF,endE)));
+                insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
+                                     startF, startE, endF, endE);
              } else {
-                string orientationInfo = "";
-                if(m_options.isWordModel()) {
-                  REO_POS wordPrevOrient, wordNextOrient;
-                  bool connectedLeftTopP  = isAligned( sentence, startF-1, startE-1 );
-                  bool connectedRightTopP = isAligned( sentence, endF+1,   startE-1 );
-                  bool connectedLeftTopN  = isAligned( sentence, endF+1, endE+1 );
-                  bool connectedRightTopN = isAligned( sentence, startF-1,   endE+1 );
-                  wordPrevOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, &lt);
-                  wordNextOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, &lt, &ge);
-                  orientationInfo += getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType());
-                  // if(m_options.isAllModelsOutputFlag())
-                  // " | | ";
-                }
-                addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
+                insertPhraseVertices(outTopLeft, outTopRight, outBottomLeft, outBottomRight,
+                                     startF, startE, endF, endE);
              }
            }
+          }
        }
      }
    }
  }

-  if(buildExtraStructure) { // phrase || hier
-    string orientationInfo = "";
-    REO_POS wordPrevOrient=UNKNOWN, wordNextOrient=UNKNOWN, phrasePrevOrient, phraseNextOrient, hierPrevOrient, hierNextOrient;
+  std::string orientationInfo = "";

-    for(size_t i = 0; i < inboundPhrases.size(); i++) {
-      int startF = inboundPhrases[i].first.first;
-      int startE = inboundPhrases[i].first.second;
-      int endF = inboundPhrases[i].second.first;
-      int endE = inboundPhrases[i].second.second;
+  for (size_t i = 0; i < inboundPhrases.size(); i++) {

-      bool connectedLeftTopP  = isAligned( sentence, startF-1, startE-1 );
-      bool connectedRightTopP = isAligned( sentence, endF+1,   startE-1 );
-      bool connectedLeftTopN  = isAligned( sentence, endF+1, endE+1 );
-      bool connectedRightTopN = isAligned( sentence, startF-1,   endE+1 );
+    int startF = inboundPhrases[i].first.first;
+    int startE = inboundPhrases[i].first.second;
+    int endF = inboundPhrases[i].second.first;
+    int endE = inboundPhrases[i].second.second;

-      if(m_options.isWordModel()) {
-        wordPrevOrient = getOrientWordModel(sentence, m_options.isWordType(),
-                                            connectedLeftTopP, connectedRightTopP,
-                                            startF, endF, startE, endE, countF, 0, 1,
-                                            &ge, &lt);
-        wordNextOrient = getOrientWordModel(sentence, m_options.isWordType(),
-                                            connectedLeftTopN, connectedRightTopN,
-                                            endF, startF, endE, startE, 0, countF, -1,
-                                            &lt, &ge);
+    getOrientationInfo(startE, endE, startF, endF,
+                       inTopLeft, inTopRight, inBottomLeft, inBottomRight,
+                       outTopLeft, outTopRight, outBottomLeft, outBottomRight,
+                       orientationInfo);
+
+    addPhrase(startE, endE, startF, endF, orientationInfo);
+  }
+
+  if (m_options.isSingleWordHeuristicFlag()) {
+    // add single word phrases that are not consistent with the word alignment
+    m_sentence.invertAlignment();
+    for (int ei=0; ei<countE; ei++) {
+      for (size_t i=0; i<m_sentence.alignedToT[ei].size(); i++) {
+        int fi = m_sentence.alignedToT[ei][i];
+        if ((m_sentence.alignedToT[ei].size() > 1) || (m_sentence.alignedToS[fi].size() > 1)) {
+
+          if (m_options.isOrientationFlag()) {
+            getOrientationInfo(ei, ei, fi, fi,
+                               inTopLeft, inTopRight, inBottomLeft, inBottomRight,
+                               outTopLeft, outTopRight, outBottomLeft, outBottomRight,
+                               orientationInfo);
+          }
+
+          addPhrase(ei, ei, fi, fi, orientationInfo);
+        }
      }
-      if (m_options.isPhraseModel()) {
-        phrasePrevOrient = getOrientPhraseModel(sentence, m_options.isPhraseType(),
-                                                connectedLeftTopP, connectedRightTopP,
-                                                startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft);
-        phraseNextOrient = getOrientPhraseModel(sentence, m_options.isPhraseType(),
-                                                connectedLeftTopN, connectedRightTopN,
-                                                endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight);
-      } else {
-        phrasePrevOrient = phraseNextOrient = UNKNOWN;
-      }
-      if(m_options.isHierModel()) {
-        hierPrevOrient = getOrientHierModel(sentence, m_options.isHierType(),
-                                            connectedLeftTopP, connectedRightTopP,
-                                            startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft, outBottomRight, outBottomLeft, phrasePrevOrient);
-        hierNextOrient = getOrientHierModel(sentence, m_options.isHierType(),
-                                            connectedLeftTopN, connectedRightTopN,
-                                            endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight, outBottomLeft, outBottomRight, phraseNextOrient);
-      }
-
-      orientationInfo = ((m_options.isWordModel())? getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType()) : "") + " | " +
-                        ((m_options.isPhraseModel())? getOrientString(phrasePrevOrient, m_options.isPhraseType()) + " " + getOrientString(phraseNextOrient, m_options.isPhraseType()) : "") + " | " +
-                        ((m_options.isHierModel())? getOrientString(hierPrevOrient, m_options.isHierType()) + " " + getOrientString(hierNextOrient, m_options.isHierType()) : "");
-
-      addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
    }
  }
 }

-REO_POS getOrientWordModel(SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
+void ExtractTask::getOrientationInfo(int startE, int endE, int startF, int endF,
+                                     const HSentenceVertices& inTopLeft,
+                                     const HSentenceVertices& inTopRight,
+                                     const HSentenceVertices& inBottomLeft,
+                                     const HSentenceVertices& inBottomRight,
+                                     const HSentenceVertices& outTopLeft,
+                                     const HSentenceVertices& outTopRight,
+                                     const HSentenceVertices& outBottomLeft,
+                                     const HSentenceVertices& outBottomRight,
+                                     std::string &orientationInfo) const
+{
+  REO_POS wordPrevOrient=UNKNOWN, wordNextOrient=UNKNOWN;
+  REO_POS phrasePrevOrient=UNKNOWN, phraseNextOrient=UNKNOWN;
+  REO_POS hierPrevOrient=UNKNOWN, hierNextOrient=UNKNOWN;
+
+  bool connectedLeftTopP  = isAligned( m_sentence, startF-1, startE-1 );
+  bool connectedRightTopP = isAligned( m_sentence, endF+1,   startE-1 );
+  bool connectedLeftTopN  = isAligned( m_sentence, endF+1, endE+1 );
+  bool connectedRightTopN = isAligned( m_sentence, startF-1,   endE+1 );
+
+  const int countF = m_sentence.source.size();
+
+  if (m_options.isWordModel()) {
+    wordPrevOrient = getOrientWordModel(m_sentence, m_options.isWordType(),
+                                        connectedLeftTopP, connectedRightTopP,
+                                        startF, endF, startE, endE, countF, 0, 1,
+                                        &ge, &lt);
+    wordNextOrient = getOrientWordModel(m_sentence, m_options.isWordType(),
+                                        connectedLeftTopN, connectedRightTopN,
+                                        endF, startF, endE, startE, 0, countF, -1,
+                                        &lt, &ge);
+  }
+  if (m_options.isPhraseModel()) {
+    phrasePrevOrient = getOrientPhraseModel(m_sentence, m_options.isPhraseType(),
+                                            connectedLeftTopP, connectedRightTopP,
+                                            startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft);
+    phraseNextOrient = getOrientPhraseModel(m_sentence, m_options.isPhraseType(),
+                                            connectedLeftTopN, connectedRightTopN,
+                                            endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight);
+  }
+  if (m_options.isHierModel()) {
+    hierPrevOrient = getOrientHierModel(m_sentence, m_options.isHierType(),
+                                        connectedLeftTopP, connectedRightTopP,
+                                        startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft, outBottomRight, outBottomLeft, phrasePrevOrient);
+    hierNextOrient = getOrientHierModel(m_sentence, m_options.isHierType(),
+                                        connectedLeftTopN, connectedRightTopN,
+                                        endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight, outBottomLeft, outBottomRight, phraseNextOrient);
+  }
+
+  if (m_options.isWordModel()) {
+    orientationInfo = getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType());
+  } else {
+    orientationInfo = " | " +
+                      ((m_options.isPhraseModel())? getOrientString(phrasePrevOrient, m_options.isPhraseType()) + " " + getOrientString(phraseNextOrient, m_options.isPhraseType()) : "") + " | " +
+                      ((m_options.isHierModel())? getOrientString(hierPrevOrient, m_options.isHierType()) + " " + getOrientString(hierNextOrient, m_options.isHierType()) : "");
+  }
+}
+
+
+REO_POS getOrientWordModel(SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
                           bool connectedLeftTop, bool connectedRightTop,
                           int startF, int endF, int startE, int endE, int countF, int zero, int unit,
                           bool (*ge)(int, int), bool (*lt)(int, int) )
@ -536,7 +578,7 @@ REO_POS getOrientWordModel(SentenceAlignment & sentence, REO_MODEL_TYPE modelTyp
 }

 // to be called with countF-1 instead of countF
-REO_POS getOrientPhraseModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
+REO_POS getOrientPhraseModel (SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
                              bool connectedLeftTop, bool connectedRightTop,
                              int startF, int endF, int startE, int endE, int countF, int zero, int unit,
                              bool (*ge)(int, int), bool (*lt)(int, int),
@ -572,7 +614,7 @@ REO_POS getOrientPhraseModel (SentenceAlignment & sentence, REO_MODEL_TYPE model
 }

 // to be called with countF-1 instead of countF
-REO_POS getOrientHierModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
+REO_POS getOrientHierModel (SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
                            bool connectedLeftTop, bool connectedRightTop,
                            int startF, int endF, int startE, int endE, int countF, int zero, int unit,
                            bool (*ge)(int, int), bool (*lt)(int, int),
@ -624,7 +666,7 @@ REO_POS getOrientHierModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelTy
  return UNKNOWN;
 }

-bool isAligned ( SentenceAlignment &sentence, int fi, int ei )
+bool isAligned ( SentenceAlignmentWithSyntax &sentence, int fi, int ei )
 {
  if (ei == -1 && fi == -1)
    return true;
@ -660,7 +702,7 @@ void insertVertex( HSentenceVertices & corners, int x, int y )
  set<int> tmp;
  tmp.insert(x);
  pair< HSentenceVertices::iterator, bool > ret = corners.insert( pair<int, set<int> > (y, tmp) );
-  if(ret.second == false) {
+  if (ret.second == false) {
    ret.first->second.insert(x);
  }
 }
@ -711,41 +753,174 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
  return "";
 }

-void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
+
+bool ExtractTask::checkTargetConstituentBoundaries(int startE, int endE, int startF, int endF,
+    ostringstream &outextractstrPhraseProperties) const
 {
-  // source
-  //   // cout << "adding ( " << startF << "-" << endF << ", " << startE << "-" << endE << ")\n";
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+    outextractstrPhraseProperties << " {{TargetConstituentBoundariesLeft ";
+  }
+
+  bool validTargetConstituentBoundaries = false;
+  bool outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = true;
+
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+    if (startE==0) {
+      outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+      outextractstrPhraseProperties << "BOS_";
+    }
+  }
+
+  if (!m_sentence.targetTree.HasNodeStartingAtPosition(startE)) {
+
+    validTargetConstituentBoundaries = false;
+
+  } else {
+
+    const std::vector< SyntaxNode* >& startingNodes = m_sentence.targetTree.GetNodesByStartPosition(startE);
+    for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = startingNodes.rbegin(); iter != startingNodes.rend(); ++iter ) {
+      if ( (*iter)->end == endE ) {
+        validTargetConstituentBoundaries = true;
+        if (!m_options.isTargetConstituentBoundariesFlag()) {
+          break;
+        }
+      }
+      if (m_options.isTargetConstituentBoundariesFlag()) {
+        if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+          outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+        } else {
+          outextractstrPhraseProperties << "<";
+        }
+        outextractstrPhraseProperties << (*iter)->label;
+      }
+    }
+  }
+
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+    if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+      outextractstrPhraseProperties << "<";
+    }
+    outextractstrPhraseProperties << "}}";
+  }
+
+
+  if (m_options.isTargetConstituentConstrainedFlag() && !validTargetConstituentBoundaries) {
+    // skip over all boundary punctuation and check again
+    bool relaxedValidTargetConstituentBoundaries = false;
+    int relaxedStartE = startE;
+    int relaxedEndE = endE;
+    const std::string punctuation = ",;.:!?";
+    while ( (relaxedStartE < endE) &&
+            (m_sentence.target[relaxedStartE].size() == 1) &&
+            (punctuation.find(m_sentence.target[relaxedStartE].at(0)) != std::string::npos) ) {
+      ++relaxedStartE;
+    }
+    while ( (relaxedEndE > relaxedStartE) &&
+            (m_sentence.target[relaxedEndE].size() == 1) &&
+            (punctuation.find(m_sentence.target[relaxedEndE].at(0)) != std::string::npos) ) {
+      --relaxedEndE;
+    }
+
+    if ( (relaxedStartE != startE) || (relaxedEndE !=endE) ) {
+      const std::vector< SyntaxNode* >& startingNodes = m_sentence.targetTree.GetNodesByStartPosition(relaxedStartE);
+      for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = startingNodes.rbegin();
+            (iter != startingNodes.rend() && !relaxedValidTargetConstituentBoundaries);
+            ++iter ) {
+        if ( (*iter)->end == relaxedEndE ) {
+          relaxedValidTargetConstituentBoundaries = true;
+        }
+      }
+    }
+
+    if (!relaxedValidTargetConstituentBoundaries) {
+      return false;
+    }
+  }
+
+
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+
+    outextractstrPhraseProperties << " {{TargetConstituentBoundariesRightAdjacent ";
+    outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = true;
+
+    if (endE==(int)m_sentence.target.size()-1) {
+
+      outextractstrPhraseProperties << "EOS_";
+      outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+
+    } else {
+
+      const std::vector< SyntaxNode* >& adjacentNodes = m_sentence.targetTree.GetNodesByStartPosition(endE+1);
+      for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = adjacentNodes.rbegin(); iter != adjacentNodes.rend(); ++iter ) {
+        if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+          outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+        } else {
+          outextractstrPhraseProperties << "<";
+        }
+        outextractstrPhraseProperties << (*iter)->label;
+      }
+    }
+
+    if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+      outextractstrPhraseProperties << "<";
+    }
+    outextractstrPhraseProperties << "}}";
+  }
+
+  return true;
+}
+
+
+void ExtractTask::addPhrase( int startE, int endE, int startF, int endF,
+                             const std::string &orientationInfo)
+{
+  ostringstream outextractstrPhraseProperties;
+  if (m_options.isTargetConstituentBoundariesFlag() || m_options.isTargetConstituentConstrainedFlag()) {
+    bool isTargetConstituentCovered = checkTargetConstituentBoundaries(startE, endE, startF, endF, outextractstrPhraseProperties);
+    if (m_options.isTargetConstituentBoundariesFlag() && !isTargetConstituentCovered) {
+      return;
+    }
+  }
+
+  if (m_options.placeholders.size() && !checkPlaceholders(startE, endE, startF, endF)) {
+    return;
+  }
+
+  if (m_options.isOnlyOutputSpanInfo()) {
+    cout << startF << " " << endF << " " << startE << " " << endE << std::endl;
+    return;
+  }
+
  ostringstream outextractstr;
  ostringstream outextractstrInv;
  ostringstream outextractstrOrientation;

-  if (m_options.isOnlyOutputSpanInfo()) {
-    cout << startF << " " << endF << " " << startE << " " << endE << endl;
-    return;
-  }
-
-  if (m_options.placeholders.size() && !checkPlaceholders(sentence, startE, endE, startF, endF)) {
-    return;
-  }
-
  if (m_options.debug) {
-    outextractstr << "sentenceID=" << sentence.sentenceID << " ";
-    outextractstrInv << "sentenceID=" << sentence.sentenceID << " ";
-    outextractstrOrientation << "sentenceID=" << sentence.sentenceID << " ";
+    outextractstr << "sentenceID=" << m_sentence.sentenceID << " ";
+    outextractstrInv << "sentenceID=" << m_sentence.sentenceID << " ";
+    outextractstrOrientation << "sentenceID=" << m_sentence.sentenceID << " ";
  }

+  // source
  for(int fi=startF; fi<=endF; fi++) {
-    if (m_options.isTranslationFlag()) outextractstr << sentence.source[fi] << " ";
-    if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.source[fi] << " ";
+    if (m_options.isTranslationFlag()) outextractstr << m_sentence.source[fi] << " ";
+    if (m_options.isOrientationFlag()) outextractstrOrientation << m_sentence.source[fi] << " ";
  }
  if (m_options.isTranslationFlag()) outextractstr << "||| ";
  if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";

+
  // target
  for(int ei=startE; ei<=endE; ei++) {
-    if (m_options.isTranslationFlag()) outextractstr << sentence.target[ei] << " ";
-    if (m_options.isTranslationFlag()) outextractstrInv << sentence.target[ei] << " ";
-    if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.target[ei] << " ";
+
+    if (m_options.isTranslationFlag()) {
+      outextractstr << m_sentence.target[ei] << " ";
+      outextractstrInv << m_sentence.target[ei] << " ";
+    }
+
+    if (m_options.isOrientationFlag()) {
+      outextractstrOrientation << m_sentence.target[ei] << " ";
+    }
  }
  if (m_options.isTranslationFlag()) outextractstr << "|||";
  if (m_options.isTranslationFlag()) outextractstrInv << "||| ";
@ -755,17 +930,22 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,

  if (m_options.isTranslationFlag()) {
    for(int fi=startF; fi<=endF; fi++)
-      outextractstrInv << sentence.source[fi] << " ";
+      outextractstrInv << m_sentence.source[fi] << " ";
    outextractstrInv << "|||";
  }

  // alignment
  if (m_options.isTranslationFlag()) {
-    for(int ei=startE; ei<=endE; ei++) {
-      for(unsigned int i=0; i<sentence.alignedToT[ei].size(); i++) {
-        int fi = sentence.alignedToT[ei][i];
-        outextractstr << " " << fi-startF << "-" << ei-startE;
-        outextractstrInv << " " << ei-startE << "-" << fi-startF;
+    if (m_options.isSingleWordHeuristicFlag() && (startE==endE) && (startF==endF)) {
+      outextractstr << " 0-0";
+      outextractstrInv << " 0-0";
+    } else {
+      for(int ei=startE; ei<=endE; ei++) {
+        for(unsigned int i=0; i<m_sentence.alignedToT[ei].size(); i++) {
+          int fi = m_sentence.alignedToT[ei][i];
+          outextractstr << " " << fi-startF << "-" << ei-startE;
+          outextractstrInv << " " << ei-startE << "-" << fi-startF;
+        }
      }
    }
  }
@ -774,20 +954,20 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
    outextractstrOrientation << orientationInfo;

  if (m_options.isIncludeSentenceIdFlag()) {
-    outextractstr << " ||| " << sentence.sentenceID;
+    outextractstr << " ||| " << m_sentence.sentenceID;
  }

  if (m_options.getInstanceWeightsFile().length()) {
    if (m_options.isTranslationFlag()) {
-      outextractstr << " ||| " << sentence.weightString;
-      outextractstrInv << " ||| " << sentence.weightString;
+      outextractstr << " ||| " << m_sentence.weightString;
+      outextractstrInv << " ||| " << m_sentence.weightString;
    }
    if (m_options.isOrientationFlag()) {
-      outextractstrOrientation << " ||| " << sentence.weightString;
+      outextractstrOrientation << " ||| " << m_sentence.weightString;
    }
  }

-
+  outextractstr << outextractstrPhraseProperties.str();

  // generate two lines for every extracted phrase:
  // once with left, once with right context
@ -797,20 +977,20 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
    ostringstream outextractstrContextInv;

    for(int fi=startF; fi<=endF; fi++) {
-      outextractstrContext << sentence.source[fi] << " ";
+      outextractstrContext << m_sentence.source[fi] << " ";
    }
    outextractstrContext << "||| ";

    // target
    for(int ei=startE; ei<=endE; ei++) {
-      outextractstrContext << sentence.target[ei] << " ";
-      outextractstrContextInv << sentence.target[ei] << " ";
+      outextractstrContext << m_sentence.target[ei] << " ";
+      outextractstrContextInv << m_sentence.target[ei] << " ";
    }
    outextractstrContext << "||| ";
    outextractstrContextInv << "||| ";

    for(int fi=startF; fi<=endF; fi++)
-      outextractstrContextInv << sentence.source[fi] << " ";
+      outextractstrContextInv << m_sentence.source[fi] << " ";

    outextractstrContextInv << "|||";

@ -823,25 +1003,25 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
    // write context to left
    outextractstrContext << "< ";
    if (startF == 0) outextractstrContext << "<s>";
-    else outextractstrContext << sentence.source[startF-1];
+    else outextractstrContext << m_sentence.source[startF-1];

    outextractstrContextInv << " < ";
    if (startE == 0) outextractstrContextInv << "<s>";
-    else outextractstrContextInv << sentence.target[startE-1];
+    else outextractstrContextInv << m_sentence.target[startE-1];

    // write context to right
    outextractstrContextRight << "> ";
-    if (endF+1 == sentence.source.size()) outextractstrContextRight << "<s>";
-    else outextractstrContextRight << sentence.source[endF+1];
+    if (endF+1 == (int)m_sentence.source.size()) outextractstrContextRight << "<s>";
+    else outextractstrContextRight << m_sentence.source[endF+1];

    outextractstrContextRightInv << " > ";
-    if (endE+1 == sentence.target.size()) outextractstrContextRightInv << "<s>";
-    else outextractstrContextRightInv << sentence.target[endE+1];
+    if (endE+1 == (int)m_sentence.target.size()) outextractstrContextRightInv << "<s>";
+    else outextractstrContextRightInv << m_sentence.target[endE+1];

-    outextractstrContext << "\n";
-    outextractstrContextInv << "\n";
-    outextractstrContextRight << "\n";
-    outextractstrContextRightInv << "\n";
+    outextractstrContext << std::endl;
+    outextractstrContextInv << std::endl;
+    outextractstrContextRight << std::endl;
+    outextractstrContextRightInv << std::endl;

    m_extractedPhrasesContext.push_back(outextractstrContext.str());
    m_extractedPhrasesContextInv.push_back(outextractstrContextInv.str());
@ -849,9 +1029,9 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
    m_extractedPhrasesContextInv.push_back(outextractstrContextRightInv.str());
  }

-  if (m_options.isTranslationFlag()) outextractstr << "\n";
-  if (m_options.isTranslationFlag()) outextractstrInv << "\n";
-  if (m_options.isOrientationFlag()) outextractstrOrientation << "\n";
+  if (m_options.isTranslationFlag()) outextractstr << std::endl;
+  if (m_options.isTranslationFlag()) outextractstrInv << std::endl;
+  if (m_options.isOrientationFlag()) outextractstrOrientation << std::endl;


  m_extractedPhrases.push_back(outextractstr.str());
@ -896,30 +1076,30 @@ void ExtractTask::writePhrasesToFile()

 // if proper conditioning, we need the number of times a source phrase occured

-void ExtractTask::extractBase( SentenceAlignment &sentence )
+void ExtractTask::extractBase()
 {
  ostringstream outextractFile;
  ostringstream outextractFileInv;

-  int countF = sentence.source.size();
+  int countF = m_sentence.source.size();
  for(int startF=0; startF<countF; startF++) {
    for(int endF=startF;
        (endF<countF && endF<startF+m_options.maxPhraseLength);
        endF++) {
      for(int fi=startF; fi<=endF; fi++) {
-        outextractFile << sentence.source[fi] << " ";
+        outextractFile << m_sentence.source[fi] << " ";
      }
      outextractFile << "|||" << endl;
    }
  }

-  int countE = sentence.target.size();
+  int countE = m_sentence.target.size();
  for(int startE=0; startE<countE; startE++) {
    for(int endE=startE;
        (endE<countE && endE<startE+m_options.maxPhraseLength);
        endE++) {
      for(int ei=startE; ei<=endE; ei++) {
-        outextractFileInv << sentence.target[ei] << " ";
+        outextractFileInv << m_sentence.target[ei] << " ";
      }
      outextractFileInv << "|||" << endl;
    }
@ -930,17 +1110,17 @@ void ExtractTask::extractBase( SentenceAlignment &sentence )
 }


-bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int startE, int endE, int startF, int endF)
+bool ExtractTask::checkPlaceholders(int startE, int endE, int startF, int endF) const
 {
-  for (size_t pos = startF; pos <= endF; ++pos) {
-    const string &sourceWord = sentence.source[pos];
+  for (int pos = startF; pos <= endF; ++pos) {
+    const string &sourceWord = m_sentence.source[pos];
    if (isPlaceholder(sourceWord)) {
-      if (sentence.alignedToS.at(pos).size() != 1) {
+      if (m_sentence.alignedToS.at(pos).size() != 1) {
        return false;
      } else {
        // check it actually lines up to another placeholder
-        int targetPos = sentence.alignedToS.at(pos).at(0);
-        const string &otherWord = sentence.target[targetPos];
+        int targetPos = m_sentence.alignedToS.at(pos).at(0);
+        const string &otherWord = m_sentence.target[targetPos];
        if (!isPlaceholder(otherWord)) {
          return false;
        }
@ -948,15 +1128,15 @@ bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int star
    }
  }

-  for (size_t pos = startE; pos <= endE; ++pos) {
-    const string &targetWord = sentence.target[pos];
+  for (int pos = startE; pos <= endE; ++pos) {
+    const string &targetWord = m_sentence.target[pos];
    if (isPlaceholder(targetWord)) {
-      if (sentence.alignedToT.at(pos).size() != 1) {
+      if (m_sentence.alignedToT.at(pos).size() != 1) {
        return false;
      } else {
        // check it actually lines up to another placeholder
-        int sourcePos = sentence.alignedToT.at(pos).at(0);
-        const string &otherWord = sentence.source[sourcePos];
+        int sourcePos = m_sentence.alignedToT.at(pos).at(0);
+        const string &otherWord = m_sentence.source[sourcePos];
        if (!isPlaceholder(otherWord)) {
          return false;
        }
@ -966,7 +1146,7 @@ bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int star
  return true;
 }

-bool ExtractTask::isPlaceholder(const string &word)
+bool ExtractTask::isPlaceholder(const string &word) const
 {
  for (size_t i = 0; i < m_options.placeholders.size(); ++i) {
    const string &placeholder = m_options.placeholders[i];
@ -976,28 +1156,5 @@ bool ExtractTask::isPlaceholder(const string &word)
  }
  return false;
 }
-/** tokenise input string to vector of string. each element has been separated by a character in the delimiters argument.
-		The separator can only be 1 character long. The default delimiters are space or tab
-*/
-std::vector<std::string> Tokenize(const std::string& str,
-                                  const std::string& delimiters)
-{
-  std::vector<std::string> tokens;
-  // Skip delimiters at beginning.
-  std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
-  // Find first "non-delimiter".
-  std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
-
-  while (std::string::npos != pos || std::string::npos != lastPos) {
-    // Found a token, add it to the vector.
-    tokens.push_back(str.substr(lastPos, pos - lastPos));
-    // Skip delimiters.  Note the "not_of"
-    lastPos = str.find_first_not_of(delimiters, pos);
-    // Find next "non-delimiter"
-    pos = str.find_first_of(delimiters, lastPos);
-  }
-
-  return tokens;
-}

 }
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@ -347,7 +347,8 @@ int main(int argc, char* argv[])

    SentenceAlignmentWithSyntax sentence
    (targetLabelCollection, sourceLabelCollection,
-     targetTopLabelCollection, sourceTopLabelCollection, options);
+     targetTopLabelCollection, sourceTopLabelCollection,
+     options.targetSyntax, options.sourceSyntax);
    //az: output src, tgt, and alingment line
    if (options.onlyOutputSpanInfo) {
      cout << "LOG: SRC: " << sourceString << endl;
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@ -68,6 +68,7 @@ bool spanLength = false;
 bool ruleLength = false;
 bool nonTermContext = false;
 bool nonTermContextTarget = false;
+bool targetConstituentBoundariesFlag = false;

 int countOfCounts[COC_MAX+1];
 int totalDistinct = 0;
@ -286,6 +287,9 @@ int main(int argc, char* argv[])
    } else if (strcmp(argv[i],"--NonTermContextTarget") == 0) {
      nonTermContextTarget = true;
      std::cerr << "non-term context (target)" << std::endl;
+    } else if (strcmp(argv[i],"--TargetConstituentBoundaries") == 0) {
+      targetConstituentBoundariesFlag = true;
+      std::cerr << "including target constituent boundaries information" << std::endl;
    } else {
      featureArgs.push_back(argv[i]);
      ++i;
@ -957,6 +961,18 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
    }
  }

+  // target constituent boundaries
+  if (targetConstituentBoundariesFlag && !inverseFlag) {
+    const std::string targetConstituentBoundariesLeftValues = phrasePair.CollectAllPropertyValues("TargetConstituentBoundariesLeft");
+    if (!targetConstituentBoundariesLeftValues.empty()) {
+      phraseTableFile << " {{TargetConstituentBoundariesLeft " << targetConstituentBoundariesLeftValues << "}}";
+    }
+    const std::string targetConstituentBoundariesRightAdjacentValues = phrasePair.CollectAllPropertyValues("TargetConstituentBoundariesRightAdjacent");
+    if (!targetConstituentBoundariesRightAdjacentValues.empty()) {
+      phraseTableFile << " {{TargetConstituentBoundariesRightAdjacent " << targetConstituentBoundariesRightAdjacentValues << "}}";
+    }
+  }
+
  phraseTableFile << std::endl;
 }

--- a/run-regtests.sh
+++ b/run-regtests.sh
@ -53,18 +53,18 @@ git submodule update regtest
 # -- compile from scratch with server, run regtests
 set -x
 if [ "$full" == true ] ; then
-    ./bjam -j$j --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest -a $skipcompact $@ $q || exit $?
+    ./bjam -j$j --with-mm --with-mm-extras --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest -a $skipcompact $@ $q || exit $?
    if ./regression-testing/run-single-test.perl --server --startuptest  ; then
-    	./bjam -j$j --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest -a $skipcompact $@ $q 
+    	./bjam -j$j --with-mm --with-mm-extras --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest -a $skipcompact $@ $q 
    fi
 else
   # when investigating failures, always run single-threaded
   if [ "$q" == "-q" ] ; then j=1; fi 

   if ./regression-testing/run-single-test.perl --server --startuptest  ; then
-       ./bjam -j$j $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest $skipcompact $@ 
+       ./bjam -j$j --with-mm $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest $skipcompact $@ 
   else
-       ./bjam -j$j $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest $skipcompact $@ 
+       ./bjam -j$j --with-mm --with-mm-extras $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest $skipcompact $@ 
   fi
 fi

--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@ -811,7 +811,8 @@ generation-prune
 	in: generation-table
 	out: generation-table-pruned
 	rerun-on-change: TRAINING:prune-generation
-	ignore-unless: AND TRAINING:prune-generation
+	pass-unless: TRAINING:prune-generation
+	ignore-unless: generation-factors
 	default-name: model/generation-table-pruned
 	final-model: yes
 	template: $TRAINING:prune-generation IN OUT
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -384,11 +384,11 @@ sub read_config {
 	$resolve = 0;
 	foreach my $parameter (keys %CONFIG) {
 	    foreach (@{$CONFIG{$parameter}}) {
-		next unless /\$/;
+		next unless /\$[a-z\{]/i;
 		my $escaped = 0;
 		die ("BAD USE OF \$ IN VALUE used in parameter $parameter")
-		    if ! ( /^(.*)\$([a-z\-\:\d]+)(.*)$/i ||
-			  (/^(.*)\$\{([a-z\-\:\d]+)\}(.*)$/i && ($escaped = 1)));
+		    if ! ( /^(.*)\$([a-z][a-z\-\:\d]*)(.*)$/i ||
+			  (/^(.*)\$\{([a-z][a-z\-\:\d]*)\}(.*)$/i && ($escaped = 1)));
 		my ($pre,$substitution,$post) = ($1,$2,$3);
 		my $pattern = $substitution;
 		if ($substitution !~ /\:/) { # handle local variables
@ -1800,6 +1800,10 @@ sub define_lm_train_bilingual_lm {
    my $epochs = &get_bilingual_lm_epochs($set);
    $cmd .= " -e $epochs" if defined($epochs);

+    my $nnjm_settings = backoff_and_get("LM:$set:nnjm-settings");
+    $cmd .= " ";
+    $cmd .= $nnjm_settings;
+
    my $nplm_settings = backoff_and_get("LM:$set:nplm-settings");
    $cmd .= " --extra-settings \"$nplm_settings\"" if defined($nplm_settings);

@ -2403,6 +2407,12 @@ sub define_training_extract_phrases {
      if (&get("TRAINING:ghkm-strip-bitpar-nonterminal-labels")) {
        $cmd .= "-ghkm-strip-bitpar-nonterminal-labels ";
      }
+
+    } else { # !hierarchical-rule-set
+
+      if (&get("TRAINING:target-constituent-boundaries")) {
+        $cmd .= "-target-constituent-boundaries ";
+      }
    }

    my $extract_settings = &get("TRAINING:extract-settings");
@ -2460,6 +2470,12 @@ sub define_training_build_ttable {
        my $parts_of_speech_labels_file = &versionize(&long_file_name("parts-of-speech","model",""));
        $cmd .= "-ghkm-parts-of-speech-file $parts_of_speech_labels_file ";
      }
+
+    } else { # !hierarchical-rule-set
+
+      if (&get("TRAINING:target-constituent-boundaries")) {
+        $cmd .= "-target-constituent-boundaries ";
+      }
    }

    &create_step($step_id,$cmd);
@ -2674,6 +2690,10 @@ sub define_training_create_config {
      $cmd .= "-ghkm-parts-of-speech-file $parts_of_speech_labels_file ";
    }

+    if (&get("TRAINING:target-constituent-boundaries")) {
+      $cmd .= "-target-constituent-boundaries ";
+    }
+
    # sparse lexical features provide additional content for config file
    my @additional_ini_files;
    push  (@additional_ini_files, "$sparse_lexical_features.ini") if $sparse_lexical_features;
@ -3601,8 +3621,8 @@ sub define_template {
    print "\tcmd is $cmd\n" if $VERBOSE;

    # replace variables
-    while ($cmd =~ /^([\S\s]*)\$(\??)\{([^\s\/\"\']+)\}([\S\s]*)$/ ||
-           $cmd =~ /^([\S\s]*)\$(\??)([^\s\/\"\']+)([\S\s]*)$/) {
+    while ($cmd =~ /^([\S\s]*)\$(\??)\{([a-z][^\s\/\"\']*)\}([\S\s]*)$/i ||
+           $cmd =~ /^([\S\s]*)\$(\??)([a-z][^\s\/\"\']*)([\S\s]*)$/i) {
 	my ($pre,$optional,$variable,$post) = ($1,$2,$3,$4);
 	my $value;
 	if ($optional eq '?') {
@ -3616,7 +3636,8 @@ sub define_template {
    }

    # deal with pipelined commands
-    $cmd =~ s/\|(.*)(\<\s*\S+) /$2 \| $1 /g;
+    $cmd =~ s/\|(.*[^\\])(\<\s*\S+) /$2 \| $1 /g;
+    $cmd =~ s/\\\</\</g;

    # deal with gzipped input
    my $c = "";
--- a/scripts/ems/support/analysis.perl
+++ b/scripts/ems/support/analysis.perl
@ -782,7 +782,8 @@ sub hs_scan_line {
    if ($line =~ /^Trans Opt/) {
        # Old format
        $line =~ /^Trans Opt (\d+) \[(\d+)\.\.(\d+)\]: (.+)  : (\S+) \-\>(.+) :([\(\),\d\- ]*): pC=[\d\.\-e]+, c=/ ||
-        $line =~ /^Trans Opt (\d+) \[(\d+)\.\.(\d+)\]: (.+)  : (\S+) \-\>\S+  \-\> (.+) :([\(\),\d\- ]*): c=/ || return 0;
+        $line =~ /^Trans Opt (\d+) \[(\d+)\.\.(\d+)\]: (.+)  : (\S+) \-\>\S+  \-\> (.+) :([\(\),\d\- ]*): c=/ || 
+        $line =~ /^Trans Opt (\d+) \[(\d+)\.\.(\d+)\]: (.+)  : (\S+) \-\>\S+  \-\> (.+) :([\(\),\d\- ]*): term=.*: nonterm=.*: c=/ || return 0;
        my ($sentence,$start,$end,$spans,$rule_lhs,$rule_rhs,$alignment) = ($1,$2,$3,$4,$5,$6,$7);

        ${$ref_sentence} = $sentence;
@ -1202,7 +1203,8 @@ sub process_search_graph {
    if (/^(\d+) (\d+)\-?\>?(\S*) (\S+) =\> (.+) :(.*): pC=([\de\-\.]+), c=([\de\-\.]+) \[(\d+)\.\.(\d+)\] (.*)\[total=([\d\-\.]+)\] \<\</) {
      ($sentence,$id,$recomb,$lhs,$output,$alignment,$rule_score,$heuristic_rule_score,$from,$to,$children,$hyp_score) = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12);
    }
-    elsif (/^(\d+) (\d+)\-?\>?(\S*) (\S+) =\> (.+) :(.*): c=([\de\-\.]+) \[(\d+)\.\.(\d+)\] (.*)\[total=([\d\-\.]+)\] core/) {
+    elsif (/^(\d+) (\d+)\-?\>?(\S*) (\S+) =\> (.+) :(.*): c=([\de\-\.]+) \[(\d+)\.\.(\d+)\] (.*)\[total=([\de\-\.]+)\] core/ || 
+    /^(\d+) (\d+)\-?\>?(\S*) (\S+) =\> (.+) :(.*): c=([\de\-\.]+) core=\(.*\)  \[(\d+)\.\.(\d+)\] (.*)\[total=([\de\-\.]+)\] core/) {
      ($sentence,$id,$recomb,$lhs,$output,$alignment,$rule_score,$from,$to,$children,$hyp_score) = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12);
      $heuristic_rule_score = $rule_score; # hmmmm....
    }
--- a/scripts/generic/score_parallel.py
+++ b/scripts/generic/score_parallel.py
@ -472,6 +472,7 @@ def compose_score_command(extract_file, context_file, half_file,
        command += [
            '&&',
            find_first_executable(['bzcat']),
+            half_file,
            '|',
            quote(args.flexibility_score),
            quote(context_file),
--- a/scripts/nbest-rescore/README.md
+++ b/scripts/nbest-rescore/README.md
@ -0,0 +1,79 @@
+# N-best List Re-Scorer
+
+Written by Michael Denkowski
+
+These scripts simplify running N-best re-ranking experiments with Moses.  You
+can score N-best lists with external tools (such as models that would be very
+costly to integrate with Moses just for feasibility experiments), then use the
+extended feature set to select translations that may be of a higher quality than
+those preferred by the Moses features alone.  In some cases, training a
+re-ranker even without any new features can yield improvement.
+
+### Training
+
+* Use Moses to generate large N-best lists for a dev set.  Use a config file
+(moses.ini) that has been optimized with MERT, MIRA, or similar:
+
+```
+cat dev-src.txt |moses -f moses.ini -n-best-list dev.best1000.out 1000 distinct
+```
+
+* (Optionally) add new feature scores to the N-best list using any external
+tools.  Make sure the features are added to the correct field using the correct
+format.  You don't need to update the final scores (right now your new features
+have zero weight):
+
+```
+0 ||| some translation ||| Feature0= -1.75645 Feature1= -1.38629 -2.19722 -2.31428 -0.81093 AwesomeNewFeature= -1.38629 ||| -4.42063
+```
+
+* Run the optimizer (currently K-best MIRA) to learn new re-ranking weights for
+all features in your N-best list.  Supply the reference translation for the dev
+set:
+
+```
+python train.py --nbest dev.best1000.with-new-features --ref dev-ref.txt --working-dir rescore-work
+```
+
+* You now have a new config file that contains N-best re-scoring weights:
+
+```
+rescore-work/rescore.ini
+```
+
+### Test
+
+* Use the **original** config file to generate N-best lists for the test set:
+
+```
+cat test-src.txt |moses -f moses.ini -n-best-list test.best1000.out 100 distinct
+```
+
+* Add any new features you added for training
+
+* Re-score the N-best list (update total scores) using the **re-scoring**
+weights file:
+
+```
+python rescore.py rescore-work/rescore.ini <test.best1000.with-new-features >test.best1000.rescored
+```
+
+* The N-best list is **not** re-sorted, so the entries will be out of order.
+Use the top-best script to extract the highest scoring entry for each sentence:
+
+```
+python topbest.py <test.best1000.rescored >test.topbest
+```
+
+### Not implemented yet
+
+The following could be relatively easily implemented by replicating the
+behavior of mert-moses.pl:
+
+* Sparse features (sparse weight file)
+
+* Other optimizers (MERT, PRO, etc.)
+
+* Other objective functions (TER, Meteor, etc.)
+
+* Multiple reference translations
--- a/scripts/nbest-rescore/rescore.py
+++ b/scripts/nbest-rescore/rescore.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+import sys
+
+FEAT_FIELD = 2
+SCORE_FIELD = 3
+
+def main():
+
+    if len(sys.argv[1:]) != 1:
+        sys.stderr.write('Usage: {} moses.ini <nbest.with-new-features >nbest.rescored\n'.format(sys.argv[0]))
+        sys.stderr.write('Entries are _not_ re-sorted based on new score.  Use topbest.py\n')
+        sys.exit(2)
+
+    weights = {}
+
+    # moses.ini
+    ini = open(sys.argv[1])
+    while True:
+        line = ini.readline()
+        if not line:
+            sys.stderr.write('Error: no [weight] section\n')
+            sys.exit(1)
+        if line.strip() == '[weight]':
+            break
+    while True:
+        line = ini.readline()
+        if not line or line.strip().startswith('['):
+            break
+        if line.strip() == '':
+            continue
+        fields = line.split()
+        weights[fields[0]] = [float(f) for f in fields[1:]]
+
+    # N-best
+    for line in sys.stdin:
+        fields = [f.strip() for f in line.split('|||')]
+        feats = fields[FEAT_FIELD].split()
+        key = ''
+        i = 0
+        score = 0
+        for f in feats:
+            if f.endswith('='):
+                key = f
+                i = 0
+            else:
+                score += (float(f) * weights[key][i])
+                i += 1
+        fields[SCORE_FIELD] = str(score)
+        sys.stdout.write('{}\n'.format(' ||| '.join(fields)))
+
+if __name__ == '__main__':
+    main()
--- a/scripts/nbest-rescore/topbest.py
+++ b/scripts/nbest-rescore/topbest.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+import sys
+
+SCORE_FIELD = 3
+
+def main():
+
+    i = ''
+    hyp = ''
+    top = 0
+
+    for line in sys.stdin:
+        fields = [f.strip() for f in line.split('|||')]
+        id = fields[0]
+        if i != id:
+            if i:
+                sys.stdout.write('{}\n'.format(hyp))
+        score = float(fields[SCORE_FIELD])
+        if score > top or i != id:
+            i = id
+            hyp = fields[1]
+            top = score
+    sys.stdout.write('{}\n'.format(hyp))
+
+if __name__ == '__main__':
+    main()
--- a/scripts/nbest-rescore/train.py
+++ b/scripts/nbest-rescore/train.py
@ -0,0 +1,116 @@
+#!/usr/bin/env python
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+import argparse
+import os
+import subprocess
+import sys
+
+# Feature field in N-best format
+FEAT_FIELD = 2
+
+# Location of mert, kbmira, etc. in relation to this script
+BIN_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'bin')
+
+def main():
+
+    # Args
+    parser = argparse.ArgumentParser(description='Learn N-best rescoring weights')
+    parser.add_argument('--nbest', metavar='nbest', \
+            help='Dev set N-best list augmented with new features', required=True)
+    parser.add_argument('--ref', metavar='ref', \
+            help='Dev set reference translation', required=True)
+    parser.add_argument('--working-dir', metavar='rescore-work', \
+            help='Optimizer working directory', required=True)
+    parser.add_argument('--bin-dir', metavar='DIR', \
+            help='Moses bin dir, containing kbmira, evaluator, etc.', default=BIN_DIR)
+    # Since we're starting with uniform weights and only running kbmira once,
+    # run a gratuitous number of iterations.  (mert-moses.pl default is 60
+    # iterations for each Moses run)
+    parser.add_argument('--iterations', metavar='N', type=int, \
+            help='Number of K-best MIRA iterations to run (default: 300)', default=300)
+    args = parser.parse_args()
+
+    # Find executables
+    extractor = os.path.join(args.bin_dir, 'extractor')
+    kbmira = os.path.join(args.bin_dir, 'kbmira')
+    for exe in (extractor, kbmira):
+        if not os.path.exists(exe):
+            sys.stderr.write('Error: cannot find executable "{}" in "{}", please specify --bin-dir\n'.format(exe, args.bin_dir))
+            sys.exit(1)
+
+    # rescore-work dir
+    if not os.path.exists(args.working_dir):
+        os.mkdir(args.working_dir)
+
+    # Feature names and numbers of weights from N-best list
+    # Assume all features are dense (present for each entry)
+    init_weights = []
+    fields = [f.strip() for f in open(args.nbest).readline().split('|||')]
+    feats = fields[FEAT_FIELD].split()
+    for i in range(len(feats)):
+        if feats[i].endswith('='):
+            n_weights = 0
+            j = i + 1
+            while j < len(feats):
+                if feats[j].endswith('='):
+                    break
+                n_weights += 1
+                j += 1
+            # Start all weights at 0
+            init_weights.append([feats[i], [0] * n_weights])
+
+    # Extract score and feature data from N-best list
+    extractor_cmd = [extractor, \
+            '--sctype', 'BLEU', '--scconfig', 'case:true', \
+            '--scfile', os.path.join(args.working_dir, 'scores.dat'), \
+            '--ffile', os.path.join(args.working_dir, 'features.dat'), \
+            '-r', args.ref, \
+            '-n', args.nbest]
+    subprocess.call(extractor_cmd)
+
+    # Write dense feature list
+    with open(os.path.join(args.working_dir, 'init.dense'), 'w') as out:
+        for (feat, weights) in init_weights:
+            for w in weights:
+                out.write('{} {}\n'.format(feat, w))
+
+    # Run K-best MIRA optimizer
+    kbmira_cmd = [kbmira, \
+            '--dense-init', os.path.join(args.working_dir, 'init.dense'), \
+            '--ffile', os.path.join(args.working_dir, 'features.dat'), \
+            '--scfile', os.path.join(args.working_dir, 'scores.dat'), \
+            '-o', os.path.join(args.working_dir, 'mert.out'), \
+            '--iters', str(args.iterations)]
+    subprocess.call(kbmira_cmd)
+
+    # Read optimized weights, sum for normalization
+    opt_weights = []
+    total = 0
+    with open(os.path.join(args.working_dir, 'mert.out')) as inp:
+        # Same structure as original weight list
+        for (feat, weights) in init_weights:
+            opt_weights.append([feat, []])
+            for _ in weights:
+                w = float(inp.readline().split()[1])
+                opt_weights[-1][1].append(w)
+                # Sum for normalization
+                total += abs(w)
+
+    # Normalize weights
+    for (_, weights) in opt_weights:
+        for i in range(len(weights)):
+            weights[i] /= total
+
+    # Generate rescore.ini
+    with open(os.path.join(args.working_dir, 'rescore.ini'), 'w') as out:
+        out.write('# For use with Moses N-best rescorer "scripts/nbest-rescore/rescore.py"\n')
+        out.write('\n')
+        out.write('[weight]\n')
+        for (feat, weights) in opt_weights:
+            out.write('{} {}\n'.format(feat, ' '.join(str(w) for w in weights)))
+
+if __name__ == '__main__':
+    main()
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@ -348,6 +348,9 @@ sub tokenize
    $text =~ s/^ //g;
    $text =~ s/ $//g;

+    # .' at end of sentence is missed
+    $text =~ s/\.\' ?$/ . ' /;
+
    # restore protected
    for (my $i = 0; $i < scalar(@protected); ++$i) {
      my $subst = sprintf("THISISPROTECTED%.3d", $i);
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@ -234,7 +234,7 @@ while(my $line = <INI>) {
 			  $w = $args[1];
 			}
 			elsif ($args[0] eq "input-factor") {
-			  $source_factor = chomp($args[1]);
+			  $source_factor = $args[1];
 			}
 			elsif ($args[0] eq "output-factor") {
 			  #$t = chomp($args[1]);
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@ -134,6 +134,7 @@ my($_EXTERNAL_BINDIR,
   	$_LMODEL_OOV_FEATURE,
   	$_NUM_LATTICE_FEATURES,
   	$IGNORE,
+    $_TARGET_CONSTITUENT_BOUNDARIES,
   	$_FLEXIBILITY_SCORE,
   	$_FEATURE_LINES,
   	$_WEIGHT_LINES,
@ -258,6 +259,7 @@ $_HELP = 1
 		       'instance-weights-file=s' => \$_INSTANCE_WEIGHTS_FILE,
 		       'lmodel-oov-feature' => \$_LMODEL_OOV_FEATURE,
 		       'num-lattice-features=i' => \$_NUM_LATTICE_FEATURES,
+               'target-constituent-boundaries' => \$_TARGET_CONSTITUENT_BOUNDARIES,
 		       'flexibility-score' => \$_FLEXIBILITY_SCORE,
 		       'config-add-feature-lines=s' => \$_FEATURE_LINES,
 		       'config-add-weight-lines=s' => \$_WEIGHT_LINES,
@ -321,7 +323,6 @@ my $_ADDITIONAL_INI; # allow multiple switches
 foreach (@_ADDITIONAL_INI) { $_ADDITIONAL_INI .= $_." "; }
 chop($_ADDITIONAL_INI) if $_ADDITIONAL_INI;

-$_HIERARCHICAL = 1 if $_SOURCE_SYNTAX || $_TARGET_SYNTAX;
 $_XML = 1 if $_SOURCE_SYNTAX || $_TARGET_SYNTAX;
 my $___FACTOR_DELIMITER = $_FACTOR_DELIMITER;
 $___FACTOR_DELIMITER = '|' unless ($_FACTOR_DELIMITER);
@ -1608,6 +1609,7 @@ sub extract_phrase {
    $cmd .= " --GZOutput ";
    $cmd .= " --InstanceWeights $_INSTANCE_WEIGHTS_FILE " if defined $_INSTANCE_WEIGHTS_FILE;
    $cmd .= " --BaselineExtract $_BASELINE_EXTRACT" if defined($_BASELINE_EXTRACT) && $PHRASE_EXTRACT =~ /extract-parallel.perl/;
+    $cmd .= " --TargetConstituentBoundaries" if $_TARGET_CONSTITUENT_BOUNDARIES;
    $cmd .= " --FlexibilityScore" if $_FLEXIBILITY_SCORE;
    $cmd .= " --NoTTable" if $_MMSAPT;

@ -1765,9 +1767,10 @@ sub score_phrase_phrase_extract {
        $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
        $cmd .= " --TargetSyntacticPreferences $_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE" if $_TARGET_SYNTACTIC_PREFERENCES && defined($_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE);
        $cmd .= " --PartsOfSpeech $_GHKM_PARTS_OF_SPEECH_FILE" if $_GHKM_PARTS_OF_SPEECH && defined($_GHKM_PARTS_OF_SPEECH_FILE);
+        $cmd .= " --TargetConstituentBoundaries" if $_TARGET_CONSTITUENT_BOUNDARIES;
+        $cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;
        $cmd .= " $DOMAIN" if $DOMAIN;
        $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
-        $cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;

 				# sorting
 				if ($direction eq "e2f" || $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2) {
@ -1904,7 +1907,7 @@ sub get_reordering {
                # * the value stored in $REORDERING_MODEL_TYPES{$mtype} is a concatenation of the "orient"
                #   attributes such as "msd"
                # * the "filename" attribute is appended to the filename, but actually serves as the main configuration specification
-                #   for reordering scoring. it holds a string such as "wbe-msd-didirectional-fe"
+                #   for reordering scoring. it holds a string such as "wbe-msd-bidirectional-fe"
                #   which has the more general format type-orient-dir-lang
 		$cmd .= " --model \"$mtype $REORDERING_MODEL_TYPES{$mtype}";
 		foreach my $model (@REORDERING_MODELS) {
@ -2325,7 +2328,7 @@ sub create_ini {
  # hierarchical model settings
  print INI "\n";
  if ($_HIERARCHICAL) {
-    print INI "[unknown-lhs]\n$_UNKNOWN_WORD_LABEL_FILE\n\n" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_LABEL_FILE);
+    print INI "[unknown-lhs]\n$_UNKNOWN_WORD_LABEL_FILE\n\n" if $_TARGET_SYNTAX && !$_TARGET_SYNTACTIC_PREFERENCES && defined($_UNKNOWN_WORD_LABEL_FILE);
    print INI "[cube-pruning-pop-limit]\n1000\n\n";
    print INI "[non-terminals]\nX\n\n";
    print INI "[search-algorithm]\n3\n\n";
@ -2382,6 +2385,12 @@ sub create_ini {
    chomp($TOPLABEL);
    print INI " glue-label=$TOPLABEL\n";
  }
+  if ($_HIERARCHICAL && $_TARGET_SYNTAX && $_TARGET_SYNTACTIC_PREFERENCES && defined($_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE)) {
+    print INI "TargetPreferencesFeature label-set-file=$_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE";
+    print INI " unknown-word-labels-file=$_UNKNOWN_WORD_LABEL_FILE" if defined($_UNKNOWN_WORD_LABEL_FILE);
+    print INI "\n";
+  }
+  print INI "TargetConstituentAdjacencyFeature\n" if $_TARGET_CONSTITUENT_BOUNDARIES;
  print INI $feature_spec;

  print INI "\n# dense weights for feature functions\n";
@ -2393,6 +2402,8 @@ sub create_ini {
  print INI "PhrasePenalty0= 0.2\n";
  print INI "SoftSourceSyntacticConstraintsFeature0= -0.2 -0.2 -0.2 0.1 0.1 0.1\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
  print INI "PhraseOrientationFeature0= 0.05 0.05 0.05 0.05 0.05 0.05\n" if $_PHRASE_ORIENTATION;
+  print INI "TargetPreferencesFeature0= 0.2 -0.2\n" if $_HIERARCHICAL && $_TARGET_SYNTAX && $_TARGET_SYNTACTIC_PREFERENCES && defined($_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE);
+  print INI "TargetConstituentAdjacencyFeature0= 0.05 -0.1\n" if $_TARGET_CONSTITUENT_BOUNDARIES;
  print INI $weight_spec;
  close(INI);
 }
--- a/util/file_stream.hh
+++ b/util/file_stream.hh
@ -58,6 +58,7 @@ class FileStream : public FakeOStream<FileStream> {
    }

    FileStream &seekp(uint64_t to) {
+      flush();
      util::SeekOrThrow(fd_, to);
      return *this;
    }