Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-12-25 04:43:03 +03:00 · 2014-08-11 11:49:38 +01:00 · 2014-08-11 11:49:38 +01:00 · 27eee55a57
commit 27eee55a57
parent c5b3f67877 79b6b60237
79 changed files with 1449 additions and 217 deletions
--- a/5
+++ b/5
@ -1,5 +0,0 @@
-This code includes data from Daniel Naber's Language Tools (czech abbreviations).
-
-This code includes data from czech wiktionary (also czech abbreviations).
-
-
--- a/OnDiskPt/TargetPhrase.cpp
+++ b/OnDiskPt/TargetPhrase.cpp
@ -312,7 +312,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
  // property
  ret->SetProperties(m_property);

-  ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
+  ret->EvaluateInIsolation(mosesSP, phraseDict.GetFeaturesToApply());

  return ret;
 }
--- a/contrib/other-builds/consolidate/.cproject
+++ b/contrib/other-builds/consolidate/.cproject
@ -36,11 +36,15 @@
 							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1950007837" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.110628197" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.libs.1393924562" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="util"/>
 									<listOptionValue builtIn="false" value="boost_iostreams"/>
 									<listOptionValue builtIn="false" value="z"/>
 								</option>
 								<option id="gnu.cpp.link.option.paths.1967422094" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1093223502" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
 									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
@ -52,6 +56,15 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.2091728208.911524129" name="PropertiesConsolidator.cpp" rcbsApplicability="disable" resourcePath="PropertiesConsolidator.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654.741737356">
+						<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654.741737356" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654">
+							<option id="gnu.cpp.compiler.option.include.paths.858416673" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+								<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
+								<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
+							</option>
+							<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2042647079" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+						</tool>
+					</fileInfo>
 				</configuration>
 			</storageModule>
 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
--- a/contrib/other-builds/consolidate/.project
+++ b/contrib/other-builds/consolidate/.project
@ -3,6 +3,8 @@
 	<name>consolidate</name>
 	<comment></comment>
 	<projects>
+		<project>moses</project>
+		<project>util</project>
 	</projects>
 	<buildSpec>
 		<buildCommand>
@ -45,6 +47,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
 		</link>
+		<link>
+			<name>PropertiesConsolidator.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/PropertiesConsolidator.cpp</locationURI>
+		</link>
+		<link>
+			<name>PropertiesConsolidator.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/PropertiesConsolidator.h</locationURI>
+		</link>
 		<link>
 			<name>consolidate-main.cpp</name>
 			<type>1</type>
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -1306,6 +1306,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.h</locationURI>
 		</link>
+		<link>
+			<name>FF/SoftSourceSyntacticConstraintsFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/SoftSourceSyntacticConstraintsFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftSourceSyntacticConstraintsFeature.h</locationURI>
+		</link>
 		<link>
 			<name>FF/SourceGHKMTreeInputMatchFeature.cpp</name>
 			<type>1</type>
@ -1686,6 +1696,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/NonTermContextProperty.h</locationURI>
 		</link>
+		<link>
+			<name>PP/OrientationPhraseProperty.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/OrientationPhraseProperty.cpp</locationURI>
+		</link>
+		<link>
+			<name>PP/OrientationPhraseProperty.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/OrientationPhraseProperty.h</locationURI>
+		</link>
 		<link>
 			<name>PP/PhraseProperty.cpp</name>
 			<type>1</type>
--- a/doc/PhraseDictionaryBitextSampling.howto
+++ b/doc/PhraseDictionaryBitextSampling.howto
@ -1,3 +1,4 @@
 The documentation for memory-mapped, dynamic suffix arrays has moved to 
   http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc40

+Search for PhraseDictionaryBitextSampling.
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@ -215,7 +215,7 @@ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const
  IFVERBOSE(2) {
    hypothesis.GetManager().GetSentenceStats().StopTimeBuildHyp();
  }
-  newHypo->Evaluate(m_futurescore);
+  newHypo->EvaluateWhenApplied(m_futurescore);

  return newHypo;
 }
--- a/moses/ChartCell.cpp
+++ b/moses/ChartCell.cpp
@ -60,7 +60,7 @@ ChartCell::~ChartCell() {}

 /** Add the given hypothesis to the cell.
 *  Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
- *  This function just calls the correspondind AddHypothesis() in ChartHypothesisCollection
+ *  This function just calls the corresponding AddHypothesis() in ChartHypothesisCollection
 *  \param hypo Hypothesis to be added
 */
 bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@ -212,7 +212,7 @@ int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const
 /** calculate total score
  * @todo this should be in ScoreBreakdown
 */
-void ChartHypothesis::Evaluate()
+void ChartHypothesis::EvaluateWhenApplied()
 {
  const StaticData &staticData = StaticData::Instance();
  // total scores from prev hypos
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@ -144,7 +144,7 @@ public:

  int RecombineCompare(const ChartHypothesis &compare) const;

-  void Evaluate();
+  void EvaluateWhenApplied();

  void AddArc(ChartHypothesis *loserHypo);
  void CleanupArcList();
--- a/moses/ChartHypothesisCollection.cpp
+++ b/moses/ChartHypothesisCollection.cpp
@ -56,7 +56,7 @@ ChartHypothesisCollection::~ChartHypothesisCollection()
 /** public function to add hypothesis to this collection.
 * Returns false if equiv hypo exists in collection, otherwise returns true.
 * Takes care of update arc list for n-best list creation.
- * Will delete hypo is it exist - once this function is call don't delete hypothesis.
+ * Will delete hypo if it exists - once this function is call don't delete hypothesis.
 * \param hypo hypothesis to add
 * \param manager pointer back to manager
 */
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@ -87,7 +87,7 @@ void ChartManager::ProcessSentence()
      m_translationOptionList.ApplyThreshold();

      const InputPath &inputPath = m_parser.GetInputPath(range);
-      m_translationOptionList.Evaluate(m_source, inputPath);
+      m_translationOptionList.EvaluateWithSourceContext(m_source, inputPath);

      // decode
      ChartCell &cell = m_hypoStackColl.Get(range);
@ -143,7 +143,7 @@ void ChartManager::AddXmlChartOptions()

    RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
    ChartHypothesis* hypo = new ChartHypothesis(*opt, *item, *this);
-    hypo->Evaluate();
+    hypo->EvaluateWhenApplied();


    ChartCell &cell = m_hypoStackColl.Get(range);
--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@ -68,6 +68,12 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range

  m_unksrcs.push_back(unksrc);

+  // hack. Once the OOV FF is a phrase table, get rid of this
+  PhraseDictionary *firstPt = NULL;
+  if (PhraseDictionary::GetColl().size() == 0) {
+    firstPt = PhraseDictionary::GetColl()[0];
+  }
+
  //TranslationOption *transOpt;
  if (! staticData.GetDropUnknown() || isDigit) {
    // loop
@ -85,7 +91,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
      UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");

      // add to dictionary
-      TargetPhrase *targetPhrase = new TargetPhrase(NULL);
+      TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
      Word &targetWord = targetPhrase->AddWord();
      targetWord.CreateUnknownWord(sourceWord);

@ -93,7 +99,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
      float unknownScore = FloorScore(TransformScore(prob));

      targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
-      targetPhrase->Evaluate(*unksrc);
+      targetPhrase->EvaluateInIsolation(*unksrc);

      targetPhrase->SetTargetLHS(targetLHS);
      targetPhrase->SetAlignmentInfo("0-0");
@ -108,7 +114,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
    // drop source word. create blank trans opt
    float unknownScore = FloorScore(-numeric_limits<float>::infinity());

-    TargetPhrase *targetPhrase = new TargetPhrase(NULL);
+    TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
    // loop
    const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
    UnknownLHSList::const_iterator iterLHS;
@ -121,7 +127,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
      UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");

      targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
-      targetPhrase->Evaluate(*unksrc);
+      targetPhrase->EvaluateInIsolation(*unksrc);

      targetPhrase->SetTargetLHS(targetLHS);

--- a/moses/ChartParserCallback.h
+++ b/moses/ChartParserCallback.h
@ -25,7 +25,7 @@ public:

  virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range) = 0;

-  virtual void Evaluate(const InputType &input, const InputPath &inputPath) = 0;
+  virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;

  virtual float GetBestScore(const ChartCellLabel *chartCell) const = 0;

--- a/moses/ChartTranslationOption.cpp
+++ b/moses/ChartTranslationOption.cpp
@ -10,7 +10,7 @@ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
 {
 }

-void ChartTranslationOption::Evaluate(const InputType &input,
+void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
 		const InputPath &inputPath,
 		const StackVec &stackVec)
 {
--- a/moses/ChartTranslationOption.h
+++ b/moses/ChartTranslationOption.h
@ -44,7 +44,7 @@ public:
    return m_scoreBreakdown;
  }

-  void Evaluate(const InputType &input,
+  void EvaluateWithSourceContext(const InputType &input,
 		  const InputPath &inputPath,
 		  const StackVec &stackVec);
 };
--- a/moses/ChartTranslationOptionList.cpp
+++ b/moses/ChartTranslationOptionList.cpp
@ -168,13 +168,13 @@ float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell)
 	return bestHypo.GetTotalScore();
 }

-void ChartTranslationOptionList::Evaluate(const InputType &input, const InputPath &inputPath)
+void ChartTranslationOptionList::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
 {
  // NEVER iterate over ALL of the collection. Just over the first m_size
  CollType::iterator iter;
  for (iter = m_collection.begin(); iter != m_collection.begin() + m_size; ++iter) {
    ChartTranslationOptions &transOpts = **iter;
-    transOpts.Evaluate(input, inputPath);
+    transOpts.EvaluateWithSourceContext(input, inputPath);
  }

  // get rid of empty trans opts
--- a/moses/ChartTranslationOptionList.h
+++ b/moses/ChartTranslationOptionList.h
@ -65,7 +65,7 @@ public:

  void Clear();
  void ApplyThreshold();
-  void Evaluate(const InputType &input, const InputPath &inputPath);
+  void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);

 private:
  typedef std::vector<ChartTranslationOptions*> CollType;
--- a/moses/ChartTranslationOptions.cpp
+++ b/moses/ChartTranslationOptions.cpp
@ -51,7 +51,7 @@ ChartTranslationOptions::~ChartTranslationOptions()

 }

-void ChartTranslationOptions::Evaluate(const InputType &input, const InputPath &inputPath)
+void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
 {
  SetInputPath(&inputPath);
  if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) {
@ -62,7 +62,7 @@ void ChartTranslationOptions::Evaluate(const InputType &input, const InputPath &
  for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
    ChartTranslationOption &transOpt = **iter;
    transOpt.SetInputPath(&inputPath);
-    transOpt.Evaluate(input, inputPath, m_stackVec);
+    transOpt.EvaluateWithSourceContext(input, inputPath, m_stackVec);
  }

  // get rid of -inf trans opts
--- a/moses/ChartTranslationOptions.h
+++ b/moses/ChartTranslationOptions.h
@ -85,7 +85,7 @@ public:
    return m_estimateOfBestScore;
  }

-  void Evaluate(const InputType &input, const InputPath &inputPath);
+  void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);

  void SetInputPath(const InputPath *inputPath);

--- a/moses/DecodeStepGeneration.cpp
+++ b/moses/DecodeStepGeneration.cpp
@ -148,7 +148,7 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
    outPhrase.GetScoreBreakdown().PlusEquals(generationScore);

    outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
-    outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply);
+    outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply);

    const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();

--- a/moses/DecodeStepTranslation.cpp
+++ b/moses/DecodeStepTranslation.cpp
@ -84,7 +84,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
      }

      outPhrase.Merge(targetPhrase, m_newOutputFactors);
-      outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
+      outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up

      TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
      assert(newTransOpt != NULL);
@ -258,7 +258,7 @@ void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialT
      }

      outPhrase.Merge(targetPhrase, m_newOutputFactors);
-      outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
+      outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up


      TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -35,6 +35,7 @@
 #include "moses/FF/ControlRecombination.h"
 #include "moses/FF/ExternalFeature.h"
 #include "moses/FF/ConstrainedDecoding.h"
+#include "moses/FF/SoftSourceSyntacticConstraintsFeature.h"
 #include "moses/FF/CoveredReferenceFeature.h"
 #include "moses/FF/TreeStructureFeature.h"
 #include "moses/FF/SoftMatchingFeature.h"
@ -48,11 +49,11 @@
 #include "NieceTerminal.h"
 #include "SpanLength.h"
 #include "SyntaxRHS.h"
-#include "SkeletonChangeInput.h"

 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"
 #include "moses/LM/SkeletonLM.h"
+#include "SkeletonChangeInput.h"
 #include "moses/TranslationModel/SkeletonPT.h"

 #ifdef HAVE_CMPH
@ -197,6 +198,7 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(CoveredReferenceFeature);
  MOSES_FNAME(ExternalFeature);
  MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
+  MOSES_FNAME(SoftSourceSyntacticConstraintsFeature);
  MOSES_FNAME(TreeStructureFeature);
  MOSES_FNAME(SoftMatchingFeature);
  MOSES_FNAME(HyperParameterAsWeight);
@ -209,11 +211,11 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(SparseHieroReorderingFeature);
  MOSES_FNAME(SpanLength);
  MOSES_FNAME(SyntaxRHS);
-  MOSES_FNAME(SkeletonChangeInput);

  MOSES_FNAME(SkeletonStatelessFF);
  MOSES_FNAME(SkeletonStatefulFF);
  MOSES_FNAME(SkeletonLM);
+  MOSES_FNAME(SkeletonChangeInput);
  MOSES_FNAME(SkeletonPT);

 #ifdef HAVE_CMPH
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
@ -0,0 +1,536 @@
+#include <vector>
+#include <limits>
+#include <assert.h>
+#include "SoftSourceSyntacticConstraintsFeature.h"
+#include "moses/StaticData.h"
+#include "moses/InputFileStream.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ChartManager.h"
+#include "moses/FactorCollection.h"
+#include "moses/TreeInput.h"
+#include "moses/PP/SourceLabelsPhraseProperty.h"
+
+
+using namespace std;
+
+namespace Moses
+{
+
+SoftSourceSyntacticConstraintsFeature::SoftSourceSyntacticConstraintsFeature(const std::string &line)
+  : StatelessFeatureFunction(3, line), m_featureVariant(0)
+{
+  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+  ReadParameters();
+  VERBOSE(1, " Done.");
+  VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl);
+}
+
+void SoftSourceSyntacticConstraintsFeature::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "sourceLabelSetFile") {
+    m_sourceLabelSetFile = value;
+  } else if (key == "coreSourceLabelSetFile") {
+    m_coreSourceLabelSetFile = value;
+  } else if (key == "targetSourceLeftHandSideJointCountFile") {
+    m_targetSourceLHSJointCountFile = value;
+  } else if (key == "tuneable") {
+    m_tuneable = Scan<bool>(value);
+  } else if (key == "featureVariant") {
+    m_featureVariant = Scan<size_t>(value); // 0: only dense features, 1: no mismatches (also set weights 1 0 0 and tuneable=false), 2: with sparse features, 3: with sparse features for core labels only
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+
+
+void SoftSourceSyntacticConstraintsFeature::Load()
+{
+  // don't change the loading order!
+  LoadSourceLabelSet();
+  if (m_featureVariant == 3) {
+    LoadCoreSourceLabelSet();
+  }
+  if (!m_targetSourceLHSJointCountFile.empty()) {
+    LoadTargetSourceLeftHandSideJointCountFile();
+  }
+}
+
+void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
+{
+  VERBOSE(2, GetScoreProducerDescription() << ": Loading source label set from file " << m_sourceLabelSetFile << std::endl);
+  InputFileStream inFile(m_sourceLabelSetFile);
+
+  FactorCollection &factorCollection = FactorCollection::Instance();
+
+  // read source label set
+  std::string line;
+  m_sourceLabels.clear();
+  m_sourceLabelsByIndex.clear();
+  m_sourceLabelIndexesByFactor.clear();
+  while (getline(inFile, line)) {
+    std::istringstream tokenizer(line);
+    std::string label;
+    size_t index;
+    try {
+      tokenizer >> label >> index;
+    } catch (const std::exception &e) {
+      UTIL_THROW2(GetScoreProducerDescription() 
+                  << ": Error reading source label set file " << m_sourceLabelSetFile << " .");
+    }
+    std::pair< boost::unordered_map<std::string,size_t>::iterator, bool > inserted = m_sourceLabels.insert( std::pair<std::string,size_t>(label,index) );
+    UTIL_THROW_IF2(!inserted.second, GetScoreProducerDescription()
+                   << ": Source label set file " << m_sourceLabelSetFile << " should contain each syntactic label only once.");
+    
+    if (index >= m_sourceLabelsByIndex.size()) { 
+      m_sourceLabelsByIndex.resize(index+1);
+    }
+    m_sourceLabelsByIndex[index] = label;
+    const Factor* sourceLabelFactor = factorCollection.AddFactor(label,true);
+    m_sourceLabelIndexesByFactor[sourceLabelFactor] = index;
+  }
+
+  inFile.Close();
+
+  std::list<std::string> specialLabels;
+  specialLabels.push_back("GlueTop");
+  specialLabels.push_back("GlueX");
+//  specialLabels.push_back("XRHS");
+//  specialLabels.push_back("XLHS");
+  for (std::list<std::string>::const_iterator iter=specialLabels.begin();
+       iter!=specialLabels.end(); ++iter) {
+    boost::unordered_map<std::string,size_t>::iterator found = m_sourceLabels.find(*iter);
+    UTIL_THROW_IF2(found == m_sourceLabels.end(), GetScoreProducerDescription()
+                   << ": Source label set file " << m_sourceLabelSetFile << " should contain an entry for the special label \"" << *iter << "\".");
+    if (!(found->first).compare("GlueTop")) {
+      m_GlueTopLabel = found->second;
+//    } else if (!(found->first).compare("XRHS")) {
+//      m_XRHSLabel = found->second;
+//    } else if (!(found->first).compare("XLHS")) {
+//      m_XLHSLabel = found->second;
+    }
+  }
+}
+
+void SoftSourceSyntacticConstraintsFeature::LoadCoreSourceLabelSet()
+{
+  VERBOSE(2, GetScoreProducerDescription() << ": Loading core source label set from file " << m_coreSourceLabelSetFile << std::endl);
+  InputFileStream inFile(m_coreSourceLabelSetFile);
+
+  // read core source label set
+  std::string line;
+  m_coreSourceLabels.clear();
+  while (getline(inFile, line)) {
+    istringstream tokenizer(line);
+    std::string label;
+    tokenizer >> label;
+    boost::unordered_map<std::string,size_t>::iterator foundSourceLabelIndex = m_sourceLabels.find( label );
+    if ( foundSourceLabelIndex != m_sourceLabels.end() ) {
+      m_coreSourceLabels.insert(foundSourceLabelIndex->second);
+    } else {
+      VERBOSE(2, GetScoreProducerDescription()
+              << ": Ignoring unknown source label \"" << label << "\" "
+              << "from core source label set file " << m_coreSourceLabelSetFile << "."
+              << std::endl);
+    }
+  }
+
+  inFile.Close();
+}
+
+void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCountFile()
+{
+
+  VERBOSE(2, GetScoreProducerDescription() << ": Loading target/source label joint counts from file " << m_targetSourceLHSJointCountFile << std::endl);
+  InputFileStream inFile(m_targetSourceLHSJointCountFile);
+
+  for (boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator iter=m_labelPairProbabilities.begin();
+       iter!=m_labelPairProbabilities.end(); ++iter) {
+    delete iter->second;
+  }
+  m_labelPairProbabilities.clear();
+
+  // read joint counts
+  std::string line;
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  boost::unordered_map<const Factor*,float> targetLHSCounts;
+  std::vector<float> sourceLHSCounts(m_sourceLabels.size(),0.0);
+
+  while (getline(inFile, line)) {
+    istringstream tokenizer(line);
+    std::string targetLabel;
+    std::string sourceLabel;
+    float count;
+    tokenizer >> targetLabel;
+    tokenizer >> sourceLabel;
+    tokenizer >> count;
+
+    boost::unordered_map<std::string,size_t>::iterator foundSourceLabelIndex = m_sourceLabels.find( sourceLabel );
+    UTIL_THROW_IF2(foundSourceLabelIndex == m_sourceLabels.end(), GetScoreProducerDescription()
+                   << ": Target/source label joint count file " << m_targetSourceLHSJointCountFile
+                   << " contains unknown source label \"" << sourceLabel << "\".");
+
+    const Factor* targetLabelFactor = factorCollection.AddFactor(targetLabel,true);
+
+    sourceLHSCounts[foundSourceLabelIndex->second] += count;
+    std::pair< boost::unordered_map<const Factor*,float >::iterator, bool > insertedTargetLHSCount = 
+      targetLHSCounts.insert( std::pair<const Factor*,float>(targetLabelFactor,count) );
+    if (!insertedTargetLHSCount.second) {
+      (insertedTargetLHSCount.first)->second += count;
+      boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator jointCountIt = 
+        m_labelPairProbabilities.find( targetLabelFactor );
+      assert(jointCountIt != m_labelPairProbabilities.end());
+      (jointCountIt->second)->at(foundSourceLabelIndex->second).first += count;
+      (jointCountIt->second)->at(foundSourceLabelIndex->second).second += count;
+    } else {
+      std::pair<float,float> init(0.0,0.0);
+      std::vector< std::pair<float,float> >* sourceVector = new std::vector< std::pair<float,float> >(m_sourceLabels.size(),init);
+      sourceVector->at(foundSourceLabelIndex->second) = std::pair<float,float>(count,count);
+      std::pair< boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator, bool > insertedJointCount = 
+        m_labelPairProbabilities.insert( std::pair<const Factor*, std::vector< std::pair<float,float> >* >(targetLabelFactor,sourceVector) );
+      assert(insertedJointCount.second);
+    }
+  }
+
+  // normalization
+  for (boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator iter=m_labelPairProbabilities.begin();
+       iter!=m_labelPairProbabilities.end(); ++iter) {
+    float targetLHSCount = 0;
+    boost::unordered_map<const Factor*,float >::const_iterator targetLHSCountIt = targetLHSCounts.find( iter->first );
+    if ( targetLHSCountIt != targetLHSCounts.end() ) {
+      targetLHSCount = targetLHSCountIt->second;
+    }
+    std::vector< std::pair<float,float> > &probabilities = *(iter->second);
+    for (size_t index=0; index<probabilities.size(); ++index) {
+
+      if ( probabilities[index].first != 0 ) {
+        assert(targetLHSCount != 0);
+        probabilities[index].first  /= targetLHSCount;
+      }
+      if ( probabilities[index].second != 0 ) {
+        assert(sourceLHSCounts[index] != 0);
+        probabilities[index].second /= sourceLHSCounts[index];
+      }
+    }
+  }
+
+  inFile.Close();
+}
+
+
+void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
+  const ChartHypothesis& hypo,
+  ScoreComponentCollection* accumulator) const
+{
+  // dense scores
+  std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 3
+
+  const InputType& input = hypo.GetManager().GetSource();
+  const TreeInput& treeInput = static_cast<const TreeInput&>(input);
+  const StaticData& staticData = StaticData::Instance();
+  const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
+
+  size_t nNTs = 1;
+  bool treeInputMismatchLHSBinary = true;
+  size_t treeInputMismatchRHSCount = 0;
+  bool hasCompleteTreeInputMatch = false;
+  float t2sLabelsProb = 1;
+  float s2tLabelsProb = 1;
+  float ruleLabelledProbability = 1;
+
+  // read SourceLabels property
+  const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
+  const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
+  bool isGlueGrammarRule = false;
+  bool isUnkRule = false;
+
+  if (const PhraseProperty *property = currTarPhr.GetProperty("SourceLabels")) {
+
+    const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property); 
+
+    nNTs = sourceLabelsPhraseProperty->GetNumberOfNonTerminals(); 
+    float totalCount = sourceLabelsPhraseProperty->GetTotalCount();
+  
+    // prepare for input tree label matching
+    std::vector< boost::unordered_set<size_t> > treeInputLabelsRHS(nNTs-1);
+    boost::unordered_set<size_t> treeInputLabelsLHS;
+
+    // get index map for underlying hypotheses
+    const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+      currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
+
+    std::vector<const Factor*> targetLabelsRHS;
+    if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+      size_t nonTerminalNumber = 0;
+  
+      for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
+        // consult rule for either word or non-terminal
+        const Word &word = currTarPhr.GetWord(phrasePos);
+        if ( word.IsNonTerminal() ) {
+          // non-terminal: consult subderivation
+          size_t nonTermIndex = nonTermIndexMap[phrasePos];
+          const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
+          targetLabelsRHS.push_back( prevHypo->GetTargetLHS()[0] );
+
+          // retrieve information that is required for input tree label matching (RHS)
+          const WordsRange& prevWordsRange = prevHypo->GetCurrSourceRange();
+          size_t prevStartPos = prevWordsRange.GetStartPos();
+          size_t prevEndPos = prevWordsRange.GetEndPos();
+          const NonTerminalSet& prevTreeInputLabels = treeInput.GetLabelSet(prevStartPos,prevEndPos);
+
+          for (NonTerminalSet::const_iterator prevTreeInputLabelsIt = prevTreeInputLabels.begin();
+               prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) {
+            if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) {
+              boost::unordered_map<const Factor*,size_t>::const_iterator foundPrevTreeInputLabel 
+                = m_sourceLabelIndexesByFactor.find((*prevTreeInputLabelsIt)[0]);
+              if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
+                size_t prevTreeInputLabelIndex = foundPrevTreeInputLabel->second;
+                treeInputLabelsRHS[nonTerminalNumber].insert(prevTreeInputLabelIndex);
+              }
+            }
+          }
+    
+          ++nonTerminalNumber;
+        }
+      }
+    }
+
+    // retrieve information that is required for input tree label matching (LHS)
+    const WordsRange& wordsRange = hypo.GetCurrSourceRange();
+    size_t startPos = wordsRange.GetStartPos();
+    size_t endPos = wordsRange.GetEndPos();
+    const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(startPos,endPos);
+
+    for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
+         treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
+      if (*treeInputLabelsIt != outputDefaultNonTerminal) {
+        boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel 
+          = m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
+        if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
+          size_t treeInputLabelIndex = foundTreeInputLabel->second;
+          treeInputLabelsLHS.insert(treeInputLabelIndex);
+        }
+      }
+    }
+
+  
+    // inspect source-labelled rule items
+
+    std::vector< boost::unordered_set<size_t> > sparseScoredTreeInputLabelsRHS(nNTs-1);
+    boost::unordered_set<size_t> sparseScoredTreeInputLabelsLHS;
+
+    std::vector<bool> sourceLabelSeenAsLHS(m_sourceLabels.size(),false);
+    std::vector<bool> treeInputMatchRHSCountByNonTerminal(nNTs-1,false);
+
+    const std::list<SourceLabelsPhrasePropertyItem> &sourceLabelItems = sourceLabelsPhraseProperty->GetSourceLabelItems();
+
+    for (std::list<SourceLabelsPhrasePropertyItem>::const_iterator sourceLabelItem = sourceLabelItems.begin();
+         sourceLabelItem != sourceLabelItems.end() && !hasCompleteTreeInputMatch; ++sourceLabelItem) {
+
+      const std::list<size_t> &sourceLabelsRHS = sourceLabelItem->GetSourceLabelsRHS();
+      // float sourceLabelsRHSCount = sourceLabelItem->GetSourceLabelsRHSCount();
+      const std::list< std::pair<size_t,float> > &sourceLabelsLHSList = sourceLabelItem->GetSourceLabelsLHSList();
+
+      assert(sourceLabelsRHS.size() == nNTs-1);
+
+      bool currentSourceLabelItemIsCompleteTreeInputMatch = true;
+
+      size_t nonTerminalNumber=0;
+      for (std::list<size_t>::const_iterator sourceLabelsRHSIt = sourceLabelsRHS.begin();
+           sourceLabelsRHSIt != sourceLabelsRHS.end(); ++sourceLabelsRHSIt, ++nonTerminalNumber) {
+
+        if (treeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) != treeInputLabelsRHS[nonTerminalNumber].end()) {
+
+          treeInputMatchRHSCountByNonTerminal[nonTerminalNumber] = true;
+
+          if ( m_featureVariant == 2 || 
+               (m_featureVariant == 3 && m_coreSourceLabels.find(*sourceLabelsRHSIt) != m_coreSourceLabels.end()) ) {
+            // score sparse features: RHS match
+            if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
+            // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+              float score_RHS_1 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
+              accumulator->PlusEquals(this,
+                                      std::string("RHS_1_" + m_sourceLabelsByIndex[*sourceLabelsRHSIt]),
+                                      score_RHS_1); 
+              sparseScoredTreeInputLabelsRHS[nonTerminalNumber].insert(*sourceLabelsRHSIt);
+            }
+          }
+
+        } else {
+
+          currentSourceLabelItemIsCompleteTreeInputMatch = false;
+
+        }
+      }
+
+      // LHS source non-terminal labels seen with this RHS
+      bool currentSourceLabelItemHasLHSTreeInputMatch = false;
+      //float ruleLabelledCount = 0;
+      std::list< std::pair<size_t,float> >::const_iterator sourceLabelsLHSIt;
+
+      for (sourceLabelsLHSIt = sourceLabelsLHSList.begin(); sourceLabelsLHSIt != sourceLabelsLHSList.end(); ++sourceLabelsLHSIt) {
+
+        if ( sourceLabelsLHSIt->first == m_GlueTopLabel ) {
+          isGlueGrammarRule = true;
+        }
+
+        if (treeInputLabelsLHS.find(sourceLabelsLHSIt->first) != treeInputLabelsLHS.end()) {
+
+          currentSourceLabelItemHasLHSTreeInputMatch = true;
+
+          if ( m_featureVariant == 2 || 
+               (m_featureVariant == 3 && m_coreSourceLabels.find(sourceLabelsLHSIt->first) != m_coreSourceLabels.end()) ) {
+            // score sparse features: LHS match
+            if (sparseScoredTreeInputLabelsLHS.find(sourceLabelsLHSIt->first) == sparseScoredTreeInputLabelsLHS.end()) {
+            // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
+              float score_LHS_1 = (float)1/treeInputLabelsLHS.size();
+              accumulator->PlusEquals(this,
+                                      std::string("LHS_1_" + m_sourceLabelsByIndex[sourceLabelsLHSIt->first]),
+                                      score_LHS_1); 
+              sparseScoredTreeInputLabelsLHS.insert(sourceLabelsLHSIt->first);
+            }
+          }
+          break;
+
+        }
+      }
+
+      if (currentSourceLabelItemHasLHSTreeInputMatch) {
+        // input tree matching (LHS)
+        treeInputMismatchLHSBinary = false;
+      } else {
+        currentSourceLabelItemIsCompleteTreeInputMatch = false;
+      }
+
+      if (currentSourceLabelItemIsCompleteTreeInputMatch) {
+        hasCompleteTreeInputMatch = true;
+
+        ruleLabelledProbability = sourceLabelsLHSIt->second / totalCount;
+        std::pair<float,float> probPair = GetLabelPairProbabilities( targetLHS, sourceLabelsLHSIt->first);
+        t2sLabelsProb = probPair.first;
+        s2tLabelsProb = probPair.second;
+        nonTerminalNumber=0;
+        for (std::list<size_t>::const_iterator sourceLabelsRHSIt = sourceLabelsRHS.begin();
+             sourceLabelsRHSIt != sourceLabelsRHS.end(); ++sourceLabelsRHSIt, ++nonTerminalNumber) {
+          probPair = GetLabelPairProbabilities( targetLabelsRHS[nonTerminalNumber], *sourceLabelsRHSIt );
+          t2sLabelsProb += probPair.first;
+          s2tLabelsProb += probPair.second;
+        }
+        t2sLabelsProb /= nNTs;
+        s2tLabelsProb /= nNTs;
+        assert(t2sLabelsProb != 0);
+        assert(s2tLabelsProb != 0);
+      }
+
+    }
+
+    // input tree matching (RHS)
+    if ( !hasCompleteTreeInputMatch ) {
+      treeInputMismatchRHSCount = nNTs-1;
+      for (std::vector<bool>::const_iterator treeInputMatchRHSCountByNonTerminalIt = treeInputMatchRHSCountByNonTerminal.begin();
+           treeInputMatchRHSCountByNonTerminalIt != treeInputMatchRHSCountByNonTerminal.end(); ++treeInputMatchRHSCountByNonTerminalIt) {
+        if (*treeInputMatchRHSCountByNonTerminalIt) {
+          --treeInputMismatchRHSCount;
+        }
+      }
+    }
+
+    // score sparse features: mismatches
+    if ( m_featureVariant == 2 || m_featureVariant == 3 ) {
+
+      // RHS
+
+      for (size_t nonTerminalNumber = 0; nonTerminalNumber < nNTs-1; ++nonTerminalNumber) {
+      // nNTs-1 because nNTs also counts the left-hand side non-terminal
+
+        float score_RHS_0 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
+        for (boost::unordered_set<size_t>::const_iterator treeInputLabelsRHSIt = treeInputLabelsRHS[nonTerminalNumber].begin();
+             treeInputLabelsRHSIt != treeInputLabelsRHS[nonTerminalNumber].end(); ++treeInputLabelsRHSIt) {
+
+          if ( m_featureVariant == 2 || 
+               (m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsRHSIt) != m_coreSourceLabels.end()) ) {
+
+            if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*treeInputLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
+              // score sparse features: RHS mismatch
+              accumulator->PlusEquals(this,
+                                      std::string("RHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsRHSIt]),
+                                      score_RHS_0);
+            }
+          }
+        }
+      }
+
+      // LHS
+
+      float score_LHS_0 = (float)1/treeInputLabelsLHS.size();
+      for (boost::unordered_set<size_t>::const_iterator treeInputLabelsLHSIt = treeInputLabelsLHS.begin();
+           treeInputLabelsLHSIt != treeInputLabelsLHS.end(); ++treeInputLabelsLHSIt) {
+
+        if ( m_featureVariant == 2 || 
+             (m_featureVariant == 3 && m_coreSourceLabels.find(*treeInputLabelsLHSIt) != m_coreSourceLabels.end()) ) {
+
+          if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) {
+            // score sparse features: RHS mismatch
+            accumulator->PlusEquals(this,
+                                    std::string("LHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt]),
+                                    score_LHS_0);
+          }
+        }
+      }
+
+    }
+      
+  } else {
+
+    // abort with error message if the phrase does not translate an unknown word
+    UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+                   << ": Missing SourceLabels property. "
+                   << "Please check phrase table and glue rules.");
+
+    // unknown word
+    isUnkRule = true;
+
+  }
+
+  // add scores
+
+  // input tree matching
+  switch (m_featureVariant) {
+
+    case 0:
+      newScores[0] = hasCompleteTreeInputMatch;
+      break;
+
+    case 1:
+      newScores[0] = ( (hasCompleteTreeInputMatch || isGlueGrammarRule || isUnkRule) ? 0 : std::numeric_limits<float>::min() );
+      break;
+
+    default:
+      newScores[0] = hasCompleteTreeInputMatch;
+  }
+  newScores[1] = treeInputMismatchLHSBinary;
+  newScores[2] = treeInputMismatchRHSCount;
+//  newScores[3] = hasCompleteTreeInputMatch ? std::log(t2sLabelsProb) : 0;
+//  newScores[4] = hasCompleteTreeInputMatch ? std::log(s2tLabelsProb) : 0;
+//  newScores[3] = hasCompleteTreeInputMatch ? std::log(ruleLabelledProbability) : 0;
+
+  accumulator->PlusEquals(this, newScores);
+}
+
+ 
+std::pair<float,float> SoftSourceSyntacticConstraintsFeature::GetLabelPairProbabilities(
+  const Factor* target, 
+  const size_t source) const
+{
+  boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::const_iterator found = 
+    m_labelPairProbabilities.find(target);
+  if ( found == m_labelPairProbabilities.end() ) {
+    return std::pair<float,float>(0,0);
+  }
+  return found->second->at(source);
+}
+
+ 
+}
+
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
@ -0,0 +1,87 @@
+#pragma once
+
+#include <string>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include "StatelessFeatureFunction.h"
+#include "FFState.h"
+#include "moses/Factor.h"
+
+namespace Moses
+{
+
+
+class SoftSourceSyntacticConstraintsFeature : public StatelessFeatureFunction
+{
+public:
+  SoftSourceSyntacticConstraintsFeature(const std::string &line);
+
+  ~SoftSourceSyntacticConstraintsFeature() {
+    for (boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* >::iterator iter=m_labelPairProbabilities.begin();
+         iter!=m_labelPairProbabilities.end(); ++iter) {
+      delete iter->second;
+    }
+  }
+
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+  void EvaluateInIsolation(const Phrase &source
+                , const TargetPhrase &targetPhrase
+                , ScoreComponentCollection &scoreBreakdown
+                , ScoreComponentCollection &estimatedFutureScore) const
+  {};
+
+  void EvaluateWithSourceContext(const InputType &input
+                , const InputPath &inputPath
+                , const TargetPhrase &targetPhrase
+                , const StackVec *stackVec
+                , ScoreComponentCollection &scoreBreakdown
+                , ScoreComponentCollection *estimatedFutureScore = NULL) const
+  {};
+
+  void EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    ScoreComponentCollection* accumulator) const
+  {};
+
+  void EvaluateWhenApplied(
+    const ChartHypothesis& cur_hypo,
+    ScoreComponentCollection* accumulator) const;
+
+private:
+  std::string m_sourceLabelSetFile;
+  std::string m_coreSourceLabelSetFile;
+  std::string m_targetSourceLHSJointCountFile;
+  std::string m_unknownLeftHandSideFile;
+  size_t m_featureVariant;
+
+  boost::unordered_map<std::string,size_t> m_sourceLabels;
+  std::vector<std::string> m_sourceLabelsByIndex;
+  boost::unordered_set<size_t> m_coreSourceLabels;
+  boost::unordered_map<const Factor*,size_t> m_sourceLabelIndexesByFactor;
+  size_t m_GlueTopLabel;
+//  mutable size_t m_XRHSLabel;
+//  mutable size_t m_XLHSLabel;
+
+  boost::unordered_map<const Factor*, std::vector< std::pair<float,float> >* > m_labelPairProbabilities;
+  boost::unordered_map<size_t,float> m_unknownLHSProbabilities;
+  float m_smoothingWeight;
+  float m_unseenLHSSmoothingFactorForUnknowns;
+
+  void Load();
+  void LoadSourceLabelSet();
+  void LoadCoreSourceLabelSet();
+  void LoadTargetSourceLeftHandSideJointCountFile();
+
+  std::pair<float,float> GetLabelPairProbabilities(const Factor* target, 
+                                                   const size_t source) const;
+
+};
+
+
+}
+
--- a/moses/FactorCollection.cpp
+++ b/moses/FactorCollection.cpp
@ -67,6 +67,23 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool
  return &ret.first->in;
 }

+const Factor *FactorCollection::GetFactor(const StringPiece &factorString, bool isNonTerminal)
+{
+  FactorFriend to_find;
+  to_find.in.m_string = factorString;
+  to_find.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
+  Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
+  { // read=lock scope
+#ifdef WITH_THREADS
+    boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif // WITH_THREADS
+    Set::const_iterator i = set.find(to_find);
+    if (i != set.end()) return &i->in;
+  }
+  return NULL;
+}
+
+
 FactorCollection::~FactorCollection() {}

 TO_STRING_BODY(FactorCollection);
--- a/moses/FactorCollection.h
+++ b/moses/FactorCollection.h
@ -114,6 +114,8 @@ public:
    return m_factorIdNonTerminal;
  }

+  const Factor *GetFactor(const StringPiece &factorString, bool isNonTerminal = false);
+
  // TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
  const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString, bool isNonTerminal = false) {
    return AddFactor(factorString, isNonTerminal);
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@ -205,7 +205,7 @@ int Hypothesis::RecombineCompare(const Hypothesis &compare) const
  return 0;
 }

-void Hypothesis::EvaluateWith(const StatefulFeatureFunction &sfff,
+void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff,
                              int state_idx)
 {
  const StaticData &staticData = StaticData::Instance();
@ -217,7 +217,7 @@ void Hypothesis::EvaluateWith(const StatefulFeatureFunction &sfff,
  }
 }

-void Hypothesis::EvaluateWith(const StatelessFeatureFunction& slff)
+void Hypothesis::EvaluateWhenApplied(const StatelessFeatureFunction& slff)
 {
  const StaticData &staticData = StaticData::Instance();
  if (! staticData.IsFeatureFunctionIgnored( slff )) {
@ -228,7 +228,7 @@ void Hypothesis::EvaluateWith(const StatelessFeatureFunction& slff)
 /***
 * calculate the logarithm of our total translation score (sum up components)
 */
-void Hypothesis::Evaluate(const SquareMatrix &futureScore)
+void Hypothesis::EvaluateWhenApplied(const SquareMatrix &futureScore)
 {
  IFVERBOSE(2) {
    m_manager.GetSentenceStats().StartTimeOtherScore();
@ -244,7 +244,7 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore)
    StatelessFeatureFunction::GetStatelessFeatureFunctions();
  for (unsigned i = 0; i < sfs.size(); ++i) {
    const StatelessFeatureFunction &ff = *sfs[i];
-    EvaluateWith(ff);
+    EvaluateWhenApplied(ff);
  }

  const vector<const StatefulFeatureFunction*>& ffs =
@ -332,7 +332,7 @@ void Hypothesis::CleanupArcList()
   */
  const StaticData &staticData = StaticData::Instance();
  size_t nBestSize = staticData.GetNBestSize();
-  bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphSLF() || staticData.GetOutputSearchGraphHypergraph() || staticData.UseLatticeMBR() ;
+  bool distinctNBest = staticData.GetDistinctNBest() || staticData.GetLatticeSamplesSize() ||  staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphSLF() || staticData.GetOutputSearchGraphHypergraph() || staticData.UseLatticeMBR() ;

  if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
    // prune arc list only if there too many arcs
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@ -142,7 +142,7 @@ public:
    return m_currTargetWordsRange.GetNumWordsCovered();
  }

-  void Evaluate(const SquareMatrix &futureScore);
+  void EvaluateWhenApplied(const SquareMatrix &futureScore);

  int GetId()const {
    return m_id;
@ -256,8 +256,8 @@ public:
  }

  // Added by oliver.wilson@ed.ac.uk for async lm stuff.
-  void EvaluateWith(const StatefulFeatureFunction &sfff, int state_idx);
-  void EvaluateWith(const StatelessFeatureFunction &slff);
+  void EvaluateWhenApplied(const StatefulFeatureFunction &sfff, int state_idx);
+  void EvaluateWhenApplied(const StatelessFeatureFunction &slff);

  //! target span that trans opt would populate if applied to this hypo. Used for alignment check
  size_t GetNextStartPos(const TranslationOption &transOpt) const;
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@ -102,7 +102,7 @@ public:
    return vertex.BestChild();
  }

-  void Evaluate(const InputType &input, const InputPath &inputPath) {
+  void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) {
    // TODO for input lattice
  }
 private:
--- a/moses/Jamfile
+++ b/moses/Jamfile
@ -12,7 +12,7 @@ if $(with-dlib) {

 with-lbllm = [ option.get "with-lbllm" ] ;
 if $(with-lbllm) {
-  lbllm2 = <cxxflags>-std=c++0x <define>LM_LBL <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+  lbllm2 = <cxxflags>-std=c++0x <define>LM_LBL <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
 } else {
  lbllm2 = ;
 }
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@ -94,9 +94,10 @@ if $(with-nplm) {
 local with-lbllm = [ option.get "with-lbllm" ] ;
 if $(with-lbllm) {
  lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
-  obj LBLLM.o : oxlm/LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
-  obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
-  alias lbllm : LBLLM.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
+  lib murmurhash : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
+  obj LBLLM.o : oxlm/LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
+  obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
+  alias lbllm : LBLLM.o Mapper.o lbl murmurhash /top//boost_filesystem : : : <cxxflags>-std=c++0x <define>LM_LBL ;
  dependencies += lbllm ;
  lmmacros += LM_LBL ;
 }
--- a/moses/LM/oxlm/LBLLM.cpp
+++ b/moses/LM/oxlm/LBLLM.cpp
@ -1,11 +1,171 @@
-
 #include "LBLLM.h"

+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/binary_oarchive.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "moses/FactorCollection.h"
+#include "moses/InputType.h"
+
 using namespace std;
+using namespace oxlm;

 namespace Moses
 {

+template<class Model>
+LBLLM<Model>::LBLLM(const string &line) : LanguageModelSingleFactor(line) {
+  ReadParameters();
+
+  FactorCollection &factorCollection = FactorCollection::Instance();
+
+  // needed by parent language model classes. Why didn't they set these themselves?
+  m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+  m_sentenceStartWord[m_factorType] = m_sentenceStart;
+
+  m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
+  m_sentenceEndWord[m_factorType] = m_sentenceEnd;
+
+  cacheHits = totalHits = 0;
+}
+
+
+template<class Model>
+LBLLM<Model>::~LBLLM() {
+  if (persistentCache) {
+    double cache_hit_ratio = 100.0 * cacheHits / totalHits;
+    cerr << "Cache hit ratio: " << cache_hit_ratio << endl;
+  }
+}
+
+
+template<class Model>
+void LBLLM<Model>::SetParameter(const string& key, const string& value) {
+  if (key == "persistent-cache") {
+    persistentCache = Scan<bool>(value);
+  } else {
+    LanguageModelSingleFactor::SetParameter(key, value);
+  }
+}
+
+template<class Model>
+void LBLLM<Model>::Load() {
+  model.load(m_filePath);
+
+  Dict dict = model.getDict();
+  mapper = boost::make_shared<OXLMMapper>(dict);
+
+  kSTART = dict.Convert("<s>");
+  kSTOP = dict.Convert("</s>");
+  kUNKNOWN = dict.Convert("<unk>");
+
+  size_t ngram_order = model.getConfig()->ngram_order;
+  UTIL_THROW_IF2(
+      m_nGramOrder != ngram_order,
+      "Wrong order for LBLLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
+}
+
+template<class Model>
+LMResult LBLLM<Model>::GetValue(
+    const vector<const Word*> &contextFactor, State* finalState) const {
+  if (!cache.get()) {
+    cache.reset(new QueryCache());
+  }
+
+  vector<int> context;
+  int word;
+  mapper->convert(contextFactor, context, word);
+
+  size_t context_width = m_nGramOrder - 1;
+
+  if (!context.empty() && context.back() == kSTART) {
+    context.resize(context_width, kSTART);
+  } else {
+    context.resize(context_width, kUNKNOWN);
+  }
+
+
+  double score;
+  if (persistentCache) {
+    ++totalHits;
+    NGram query(word, context);
+    pair<double, bool> ret = cache->get(query);
+    if (ret.second) {
+      score = ret.first;
+      ++cacheHits;
+    } else {
+      score = model.predict(word, context);
+      cache->put(query, score);
+    }
+  } else {
+    score = model.predict(word, context);
+  }
+
+  LMResult ret;
+  ret.score = score;
+  ret.unknown = (word == kUNKNOWN);
+
+  // calc state from hash of last n-1 words
+  size_t seed = 0;
+  boost::hash_combine(seed, word);
+  for (size_t i = 0; i < context.size() && i < context_width - 1; ++i) {
+    int id = context[i];
+    boost::hash_combine(seed, id);
+  }
+
+  (*finalState) = (State*) seed;
+  return ret;
+}
+
+template<class Model>
+void LBLLM<Model>::InitializeForInput(const InputType& source) {
+  LanguageModelSingleFactor::InitializeForInput(source);
+
+  if (persistentCache) {
+    if (!cache.get()) {
+      cache.reset(new QueryCache());
+    }
+
+    int sentence_id = source.GetTranslationId();
+    string cacheFile = m_filePath + "." + to_string(sentence_id) + ".cache.bin";
+    if (boost::filesystem::exists(cacheFile)) {
+      ifstream f(cacheFile);
+      boost::archive::binary_iarchive iar(f);
+      cerr << "Loading n-gram probability cache from " << cacheFile << endl;
+      iar >> *cache;
+      cerr << "Done loading " << cache->size()
+           << " n-gram probabilities..." << endl;
+    } else {
+      cerr << "Cache file not found" << endl;
+    }
+  }
+}
+
+template<class Model>
+void LBLLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
+  model.clearCache();
+
+  if (persistentCache) {
+    int sentence_id = source.GetTranslationId();
+    string cacheFile = m_filePath + "." + to_string(sentence_id) + ".cache.bin";
+    ofstream f(cacheFile);
+    boost::archive::binary_oarchive oar(f);
+    cerr << "Saving persistent cache to " << cacheFile << endl;
+    oar << *cache;
+    cerr << "Done saving " << cache->size()
+         << " n-gram probabilities..." << endl;
+
+    cache->clear();
+  }
+
+  LanguageModelSingleFactor::CleanUpAfterSentenceProcessing(source);
+}
+
+template class LBLLM<LM>;
+template class LBLLM<FactoredLM>;
+template class LBLLM<FactoredMaxentLM>;
+
 }


--- a/moses/LM/oxlm/LBLLM.h
+++ b/moses/LM/oxlm/LBLLM.h
@ -2,15 +2,12 @@
 #pragma once

 #include <vector>
-#include <boost/functional/hash.hpp>
+
 #include "moses/LM/SingleFactor.h"
-#include "moses/FactorCollection.h"

 // lbl stuff
 #include "corpus/corpus.h"
-#include "lbl/lbl_features.h"
 #include "lbl/model.h"
-#include "lbl/process_identifier.h"
 #include "lbl/query_cache.h"

 #include "Mapper.h"
@ -22,100 +19,34 @@ namespace Moses
 template<class Model>
 class LBLLM : public LanguageModelSingleFactor
 {
-protected:
-
 public:
-	LBLLM(const std::string &line)
-	:LanguageModelSingleFactor(line)
-	{
-		ReadParameters();
+	LBLLM(const std::string &line);

-		FactorCollection &factorCollection = FactorCollection::Instance();
+  ~LBLLM();

-		// needed by parent language model classes. Why didn't they set these themselves?
-		m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
-		m_sentenceStartWord[m_factorType] = m_sentenceStart;
+  void SetParameter(const std::string& key, const std::string& value);

-		m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
-		m_sentenceEndWord[m_factorType] = m_sentenceEnd;
-	}
+  void Load();

-  ~LBLLM()
-  {}
+  virtual LMResult GetValue(
+      const std::vector<const Word*> &contextFactor,
+      State* finalState = 0) const;

-  void Load()
-  {
-    model.load(m_filePath);
+  virtual void InitializeForInput(const InputType& source);

-    config = model.getConfig();
-    int context_width = config->ngram_order - 1;
-    // For each state, we store at most context_width word ids to the left and
-    // to the right and a kSTAR separator. The last bit represents the actual
-    // size of the state.
-    //int max_state_size = (2 * context_width + 1) * sizeof(int) + 1;
-    //FeatureFunction::SetStateSize(max_state_size);
-
-    dict = model.getDict();
-    mapper = boost::make_shared<OXLMMapper>(dict);
-    //stateConverter = boost::make_shared<CdecStateConverter>(max_state_size - 1);
-    //ruleConverter = boost::make_shared<CdecRuleConverter>(mapper, stateConverter);
-
-    kSTART = dict.Convert("<s>");
-    kSTOP = dict.Convert("</s>");
-    kUNKNOWN = dict.Convert("<unk>");
-  }
-
-
-  virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const
-  {
-    std::vector<int> context;
-    int word;
-    mapper->convert(contextFactor, context, word);
-
-    size_t context_width = m_nGramOrder - 1;
-
-    if (!context.empty() && context.back() == kSTART) {
-      context.resize(context_width, kSTART);
-    } else {
-      context.resize(context_width, kUNKNOWN);
-    }
-
-
-    double score;
-    score = model.predict(word, context);
-
-    /*
-	std::string str = DebugContextFactor(contextFactor);
-    std::cerr << "contextFactor=" << str << " " << score << std::endl;
-	*/
-
-    LMResult ret;
-    ret.score = score;
-    ret.unknown = (word == kUNKNOWN);
-
-    // calc state from hash of last n-1 words
-    size_t seed = 0;
-    boost::hash_combine(seed, word);
-    for (size_t i = 0; i < context.size() && i < context_width - 1; ++i) {
-    	int id = context[i];
-    	boost::hash_combine(seed, id);
-    }
-
-    (*finalState) = (State*) seed;
-    return ret;
-  }
+  virtual void CleanUpAfterSentenceProcessing(const InputType& source);

 protected:
-  oxlm::Dict dict;
-  boost::shared_ptr<oxlm::ModelData> config;
  Model model;
+  boost::shared_ptr<OXLMMapper> mapper;

  int kSTART;
  int kSTOP;
  int kUNKNOWN;

-  boost::shared_ptr<OXLMMapper> mapper;
-
+  bool persistentCache;
+  mutable boost::thread_specific_ptr<oxlm::QueryCache> cache;
+  mutable int cacheHits, totalHits;
 };


--- a/moses/PDTAimp.cpp
+++ b/moses/PDTAimp.cpp
@ -422,7 +422,7 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
  }

  targetPhrase.GetScoreBreakdown().Assign(m_obj, transVector);
-  targetPhrase.Evaluate(*srcPtr, m_obj->GetFeaturesToApply());
+  targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
 }

 TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@ -9,6 +9,7 @@
 #include "moses/PP/TreeStructurePhraseProperty.h"
 #include "moses/PP/SpanLengthPhraseProperty.h"
 #include "moses/PP/NonTermContextProperty.h"
+#include "moses/PP/OrientationPhraseProperty.h"

 namespace Moses
 {
@ -59,6 +60,7 @@ PhrasePropertyFactory::PhrasePropertyFactory()
  MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
  MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
  MOSES_PNAME2("NonTermContext", NonTermContextProperty);
+  MOSES_PNAME2("Orientation", OrientationPhraseProperty);
 }

 PhrasePropertyFactory::~PhrasePropertyFactory()
--- a/moses/PP/OrientationPhraseProperty.cpp
+++ b/moses/PP/OrientationPhraseProperty.cpp
@ -0,0 +1,26 @@
+#include "moses/PP/OrientationPhraseProperty.h"
+#include <iostream>
+
+
+namespace Moses
+{
+
+void OrientationPhraseProperty::ProcessValue(const std::string &value)
+{
+  // bidirectional MSLR phrase orientation with 2x4 orientation classes: 
+  // mono swap dright dleft
+
+  std::istringstream tokenizer(value);
+
+  try {
+    if (! (tokenizer >> m_l2rMonoProbability >> m_l2rSwapProbability >> m_l2rDrightProbability >> m_l2rDleftProbability
+                     >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDrightProbability >> m_r2lDleftProbability)) {
+      UTIL_THROW2("OrientationPhraseProperty: Not able to read value. Flawed property?");
+    }
+  } catch (const std::exception &e) {
+    UTIL_THROW2("OrientationPhraseProperty: Read error. Flawed property?");
+  }
+};
+
+} // namespace Moses
+
--- a/moses/PP/OrientationPhraseProperty.h
+++ b/moses/PP/OrientationPhraseProperty.h
@ -0,0 +1,65 @@
+
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "util/exception.hh"
+#include <string>
+
+namespace Moses
+{
+
+class OrientationPhraseProperty : public PhraseProperty
+{
+public:
+  OrientationPhraseProperty() {};
+
+  virtual void ProcessValue(const std::string &value);
+
+
+  double GetLeftToRightProbabilityMono() const {
+    return m_l2rMonoProbability;
+  };
+
+  double GetLeftToRightProbabilitySwap() const {
+    return m_l2rSwapProbability;
+  };
+
+  double GetLeftToRightProbabilityDright() const {
+    return m_l2rDrightProbability;
+  };
+
+  double GetLeftToRightProbabilityDleft() const {
+    return m_l2rDleftProbability;
+  };
+
+
+  double GetRightToLeftProbabilityMono() const {
+    return m_r2lMonoProbability;
+  };
+
+  double GetRightToLeftProbabilitySwap() const {
+    return m_r2lSwapProbability;
+  };
+
+  double GetRightToLeftProbabilityDright() const {
+    return m_r2lDrightProbability;
+  };
+
+  double GetRightToLeftProbabilityDleft() const {
+    return m_r2lDleftProbability;
+  };
+
+
+  virtual const std::string *GetValueString() const { 
+    UTIL_THROW2("OrientationPhraseProperty: value string not available in this phrase property");
+    return NULL; 
+  };
+
+protected:
+
+  float m_l2rMonoProbability, m_l2rSwapProbability, m_l2rDrightProbability, m_l2rDleftProbability,
+        m_r2lMonoProbability, m_r2lSwapProbability, m_r2lDrightProbability, m_r2lDleftProbability;
+};
+
+} // namespace Moses
+
--- a/moses/PP/SourceLabelsPhraseProperty.cpp
+++ b/moses/PP/SourceLabelsPhraseProperty.cpp
@ -16,12 +16,12 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
  std::istringstream tokenizer(value);

  if (! (tokenizer >> m_nNTs)) { // first token: number of non-terminals (incl. left-hand side)
-    UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of non-terminals. Flawed property?");
+    UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of non-terminals. Flawed property? " << value);
  }
  assert( m_nNTs > 0 );

  if (! (tokenizer >> m_totalCount)) { // second token: overall rule count
-    UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read overall rule count. Flawed property?");
+    UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read overall rule count. Flawed property? " << value);
  }
  assert( m_totalCount > 0.0 );

@ -32,7 +32,7 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
  std::priority_queue<float> ruleLabelledCountsPQ;

  while (tokenizer.peek() != EOF) {
-    try {
+//    try {

      SourceLabelsPhrasePropertyItem item;
      size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
@ -46,28 +46,28 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)
        for (size_t i=0; i<m_nNTs-1; ++i) { // RHS source non-terminal labels
          size_t sourceLabelRHS;
          if (! (tokenizer >> sourceLabelRHS) ) { // RHS source non-terminal label
-            UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property?");
+            UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side label index. Flawed property? " << value);
          }
          item.m_sourceLabelsRHS.push_back(sourceLabelRHS);
        }

        if (! (tokenizer >> item.m_sourceLabelsRHSCount)) {
-          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property?");
+          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read right-hand side count. Flawed property? " << value);
        }

        if (! (tokenizer >> numberOfLHSsGivenRHS)) {
-          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property?");
+          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read number of left-hand sides. Flawed property? " << value);
        }
      }

      for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
        size_t sourceLabelLHS;
        if (! (tokenizer >> sourceLabelLHS)) { // LHS source non-terminal label
-          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property?");
+          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read left-hand side label index. Flawed property? " << value);
        }
        float ruleSourceLabelledCount;
        if (! (tokenizer >> ruleSourceLabelledCount)) {
-          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property?");
+          UTIL_THROW2("SourceLabelsPhraseProperty: Not able to read count. Flawed property? " << value);
        }
        item.m_sourceLabelsLHSList.push_back( std::make_pair(sourceLabelLHS,ruleSourceLabelledCount) );
        ruleLabelledCountsPQ.push(ruleSourceLabelledCount);
@ -75,9 +75,9 @@ void SourceLabelsPhraseProperty::ProcessValue(const std::string &value)

      m_sourceLabelItems.push_back(item);

-    } catch (const std::exception &e) {
-      UTIL_THROW2("SourceLabelsPhraseProperty: Read error. Flawed property?");
-    }
+//    } catch (const std::exception &e) {
+//      UTIL_THROW2("SourceLabelsPhraseProperty: Read error. Flawed property?");
+//    }
  }

  // keep only top N label vectors
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@ -50,7 +50,7 @@ Parameter::Parameter()
  AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
  AddParam("input-factors", "list of factors in the input");
  AddParam("input-file", "i", "location of the input file to be translated");
-  AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
+  AddParam("inputtype", "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
  AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
  AddParam("mark-unknown", "mu", "mark unknown words in output");
  AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
--- a/moses/RuleCubeItem.cpp
+++ b/moses/RuleCubeItem.cpp
@ -79,7 +79,7 @@ void RuleCubeItem::CreateHypothesis(const ChartTranslationOptions &transOpt,
                                    ChartManager &manager)
 {
  m_hypothesis = new ChartHypothesis(transOpt, *this, manager);
-  m_hypothesis->Evaluate();
+  m_hypothesis->EvaluateWhenApplied();
  m_score = m_hypothesis->GetTotalScore();
 }

--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@ -261,6 +261,11 @@ public:

  void PlusEquals(const FeatureFunction* sp, const ScorePair &scorePair);

+  // Add score by index
+  void PlusEquals(size_t index, float score) {
+    m_scores[index] += score;
+  }
+
  //For features which have an unbounded number of components
  void SparsePlusEquals(const std::string& full_name, float score) {
    FName fname(full_name);
@ -283,7 +288,7 @@ public:
    m_scores[indexes.first] = score;
  }

-  // Assign core weight by index
+  // Assign score by index
  void Assign(size_t index, float score) {
    m_scores[index] = score;
  }
@ -354,6 +359,11 @@ public:
    m_scores.capMin(minValue);
  }

+  std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
+    IndexPair indexPair = GetIndexes(sp);
+    return indexPair;
+  }
+
  //! if a FeatureFunction produces a single score (for example, a language model score)
  //! this will return it.  If not, this method will throw
  float GetScoreForProducer(const FeatureFunction* sp) const {
--- a/moses/SearchNormal.cpp
+++ b/moses/SearchNormal.cpp
@ -288,7 +288,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const Translat
      stats.StopTimeBuildHyp();
    }
    if (newHypo==NULL) return;
-    newHypo->Evaluate(m_transOptColl.GetFutureScore());
+    newHypo->EvaluateWhenApplied(m_transOptColl.GetFutureScore());
  } else
    // early discarding: check if hypothesis is too bad to build
  {
--- a/moses/SearchNormalBatch.cpp
+++ b/moses/SearchNormalBatch.cpp
@ -159,13 +159,13 @@ void SearchNormalBatch::EvalAndMergePartialHypos()
         ++sfff_iter) {
      const StatefulFeatureFunction &ff = *(sfff_iter->second);
      int state_idx = sfff_iter->first;
-      hypo->EvaluateWith(ff, state_idx);
+      hypo->EvaluateWhenApplied(ff, state_idx);
    }
    std::vector<const StatelessFeatureFunction*>::iterator slff_iter;
    for (slff_iter = m_stateless_ffs.begin();
         slff_iter != m_stateless_ffs.end();
         ++slff_iter) {
-      hypo->EvaluateWith(**slff_iter);
+      hypo->EvaluateWhenApplied(**slff_iter);
    }
  }

@ -190,7 +190,7 @@ void SearchNormalBatch::EvalAndMergePartialHypos()
         dlm_iter != m_dlm_ffs.end();
         ++dlm_iter) {
      LanguageModel &lm = *(dlm_iter->second);
-      hypo->EvaluateWith(lm, (*dlm_iter).first);
+      hypo->EvaluateWhenApplied(lm, (*dlm_iter).first);
    }

    // Put completed hypothesis onto its stack.
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@ -125,6 +125,9 @@ bool StaticData::LoadData(Parameter *parameter)
  if (m_inputType == 2) {
    s_it = "word lattice";
  }
+  if (m_inputType == 3) {
+    s_it = "tree";
+  }
  VERBOSE(2,"input type is: "<<s_it<<"\n");

  if(m_parameter->GetParam("recover-input-path").size()) {
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@ -101,13 +101,13 @@ void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const
 }
 #endif

-void TargetPhrase::Evaluate(const Phrase &source)
+void TargetPhrase::EvaluateInIsolation(const Phrase &source)
 {
  const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
-  Evaluate(source, ffs);
+  EvaluateInIsolation(source, ffs);
 }

-void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunction*> &ffs)
+void TargetPhrase::EvaluateInIsolation(const Phrase &source, const std::vector<FeatureFunction*> &ffs)
 {
  if (ffs.size()) {
    const StaticData &staticData = StaticData::Instance();
@ -126,7 +126,7 @@ void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunct
  }
 }

-void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
+void TargetPhrase::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
 {
  const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
  const StaticData &staticData = StaticData::Instance();
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@ -71,14 +71,14 @@ public:
  ~TargetPhrase();

  // 1st evaluate method. Called during loading of phrase table.
-  void Evaluate(const Phrase &source, const std::vector<FeatureFunction*> &ffs);
+  void EvaluateInIsolation(const Phrase &source, const std::vector<FeatureFunction*> &ffs);

  // as above, score with ALL FFs
  // Used only for OOV processing. Doesn't have a phrase table connect with it
-  void Evaluate(const Phrase &source);
+  void EvaluateInIsolation(const Phrase &source);

  // 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
-  void Evaluate(const InputType &input, const InputPath &inputPath);
+  void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);

  void SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString);

--- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
@ -418,7 +418,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
      }

      if(eval) {
-        targetPhrase->Evaluate(sourcePhrase, m_phraseDictionary.GetFeaturesToApply());
+        targetPhrase->EvaluateInIsolation(sourcePhrase, m_phraseDictionary.GetFeaturesToApply());
      }

      if(m_coding == PREnc) {
--- a/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
+++ b/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
@ -60,7 +60,7 @@ GetTargetPhraseCollectionLEGACY(const Phrase& src) const
  BOOST_FOREACH(pstat_entry & e, pstats) {
    TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src, this);
    tp->GetScoreBreakdown().Assign(this,e.second);
-    tp->Evaluate(src);
+    tp->EvaluateInIsolation(src);
    ret->Add(tp);
  }
  // return ret;
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@ -147,7 +147,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
          vector<FeatureFunction*> pd_feature;
          pd_feature.push_back(m_pd[i]);
          const vector<FeatureFunction*> pd_feature_const(pd_feature);
-          statistics->targetPhrase->Evaluate(src, pd_feature_const);
+          statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
          // zero out scores from original phrase table
          statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);

@ -186,7 +186,7 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
    vector<FeatureFunction*> pd_feature;
    pd_feature.push_back(const_cast<PhraseDictionaryMultiModel*>(this));
    const vector<FeatureFunction*> pd_feature_const(pd_feature);
-    statistics->targetPhrase->Evaluate(src, pd_feature_const);
+    statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);

    ret->Add(new TargetPhrase(*statistics->targetPhrase));
  }
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@ -189,7 +189,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
          vector<FeatureFunction*> pd_feature;
          pd_feature.push_back(m_pd[i]);
          const vector<FeatureFunction*> pd_feature_const(pd_feature);
-          statistics->targetPhrase->Evaluate(src, pd_feature_const);
+          statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
          // zero out scores from original phrase table
          statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);

@ -251,7 +251,7 @@ TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseColl
      vector<FeatureFunction*> pd_feature;
      pd_feature.push_back(const_cast<PhraseDictionaryMultiModelCounts*>(this));
      const vector<FeatureFunction*> pd_feature_const(pd_feature);
-      statistics->targetPhrase->Evaluate(src, pd_feature_const);
+      statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
    } catch (AlignmentException& e) {
      continue;
    }
--- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
@ -132,7 +132,7 @@ std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(
 	  tp->GetScoreBreakdown().PlusEquals(this, score);

 	  // score of all other ff when this rule is being loaded
-	  tp->Evaluate(sourcePhrase, GetFeaturesToApply());
+	  tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());

 	  ret.push_back(tp);
 	}
--- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp
+++ b/moses/TranslationModel/ProbingPT/ProbingPT.cpp
@ -181,7 +181,7 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
  */

  // score of all other ff when this rule is being loaded
-  tp->Evaluate(sourcePhrase, GetFeaturesToApply());
+  tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
  return tp;
 }

--- a/moses/TranslationModel/RuleTable/LoaderCompact.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderCompact.cpp
@ -226,7 +226,7 @@ bool RuleTableLoaderCompact::LoadRuleSection(
    targetPhrase->SetAlignNonTerm(alignNonTerm);
    targetPhrase->SetTargetLHS(targetLhs);

-    targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());
+    targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());

    // Insert rule into table.
    TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
--- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
@ -247,7 +247,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
    }

    targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
-    targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());
+    targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());

    TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
    phraseColl.Add(targetPhrase);
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@ -284,7 +284,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);

    targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
-    targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply());
+    targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());

    TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
    phraseColl.Add(targetPhrase);
--- a/moses/TranslationModel/SkeletonPT.cpp
+++ b/moses/TranslationModel/SkeletonPT.cpp
@ -62,7 +62,7 @@ TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
  tp->GetScoreBreakdown().PlusEquals(this, scores);

  // score of all other ff when this rule is being loaded
-  tp->Evaluate(sourcePhrase, GetFeaturesToApply());
+  tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());

  return tp;
 }
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@ -499,6 +499,16 @@ namespace Moses {
 		aln[k] += s2 - s1;
 	      Token const* o = (j->fwd ? ag.bt.T2 : ag.bt.T1)->sntStart(sid);
 	      float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
+
+	      vector<uint64_t> seen; 
+	      seen.reserve(100);
+	      // It is possible that the phrase extraction extracts the same
+	      // phrase twice, e.g., when word a co-occurs with sequence b b b
+	      // but is aligned only to the middle word. We can only count
+	      // each phrase pair once per source phrase occurrence, or else
+	      // run the risk of having more joint counts than marginal
+	      // counts.
+
 	      for (size_t s = s1; s <= s2; ++s)
 		{
 		  sptr<iter> b = (j->fwd ? ag.bt.I2 : ag.bt.I1)->find(o+s,e1-s);
@ -507,7 +517,26 @@ namespace Moses {
 		  // assert(b);
 		  for (size_t i = e1; i <= e2; ++i)
 		    {
-		      if (! j->stats->add(b->getPid(),sample_weight,aln,
+		      uint64_t tpid = b->getPid();
+		      size_t s = 0;
+		      while (s < seen.size() && seen[s] != tpid) ++s;
+		      if (s < seen.size())
+			{
+#if 0
+			  size_t sid, off, len;
+			  parse_pid(tpid,sid,off,len);
+			  cerr << "HA, gotcha! " << sid << ":" << off << " at " << HERE << endl;
+			  for (size_t z = 0; z < len; ++z)
+			    {
+			      id_type tid = ag.bt.T2->sntStart(sid)[off+z].id();
+			      cerr << (*ag.bt.V2)[tid] << " "; 
+			    }
+			  cerr << endl;
+#endif
+			  continue;
+			}
+		      seen.push_back(tpid);
+		      if (! j->stats->add(tpid,sample_weight,aln,
 					  b->approxOccurrenceCount(),
 					  po_fwd,po_bwd))
 			{
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@ -476,7 +476,7 @@ namespace Moses
 	tp->AddWord(w);
      }
    tp->GetScoreBreakdown().Assign(this, fvals);
-    tp->Evaluate(src);
+    tp->EvaluateInIsolation(src);
    return tp;
  }

--- a/moses/TranslationOption.cpp
+++ b/moses/TranslationOption.cpp
@ -71,10 +71,10 @@ void TranslationOption::CacheLexReorderingScores(const LexicalReordering &produc
  m_lexReorderingScores[&producer] = score;
 }

-void TranslationOption::Evaluate(const InputType &input)
+void TranslationOption::EvaluateWithSourceContext(const InputType &input)
 {
  const InputPath &inputPath = GetInputPath();
-  m_targetPhrase.Evaluate(input, inputPath);
+  m_targetPhrase.EvaluateWithSourceContext(input, inputPath);
 }

 const InputPath &TranslationOption::GetInputPath() const
--- a/moses/TranslationOption.h
+++ b/moses/TranslationOption.h
@ -135,7 +135,7 @@ public:
    return m_targetPhrase.GetScoreBreakdown();
  }

-  void Evaluate(const InputType &input);
+  void EvaluateWithSourceContext(const InputType &input);

  /** returns cached scores */
  inline const Scores *GetLexReorderingScores(const LexicalReordering *scoreProducer) const {
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@ -212,6 +212,12 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPa
  float unknownScore = FloorScore(TransformScore(0));
  const Word &sourceWord = inputPath.GetPhrase().GetWord(0);

+  // hack. Once the OOV FF is a phrase table, get rid of this
+  PhraseDictionary *firstPt = NULL;
+  if (PhraseDictionary::GetColl().size() == 0) {
+    firstPt = PhraseDictionary::GetColl()[0];
+  }
+
  // unknown word, add as trans opt
  FactorCollection &factorCollection = FactorCollection::Instance();

@ -231,7 +237,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPa
    // modify the starting bitmap
  }

-  TargetPhrase targetPhrase(NULL);
+  TargetPhrase targetPhrase(firstPt);

  if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
    // add to dictionary
@ -266,7 +272,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPa
  m_unksrcs.push_back(&sourcePhrase);
  WordsRange range(sourcePos, sourcePos + length - 1);

-  targetPhrase.Evaluate(sourcePhrase);
+  targetPhrase.EvaluateInIsolation(sourcePhrase);

  TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
  transOpt->SetInputPath(inputPath);
@ -410,7 +416,7 @@ void TranslationOptionCollection::CreateTranslationOptions()

  ProcessUnknownWord();

-  EvaluateWithSource();
+  EvaluateWithSourceContext();

  // Prune
  Prune();
@ -535,7 +541,7 @@ void TranslationOptionCollection::SetInputScore(const InputPath &inputPath, Part
  }
 }

-void TranslationOptionCollection::EvaluateWithSource()
+void TranslationOptionCollection::EvaluateWithSourceContext()
 {
  const size_t size = m_source.GetSize();
  for (size_t startPos = 0 ; startPos < size ; ++startPos) {
@ -549,7 +555,7 @@ void TranslationOptionCollection::EvaluateWithSource()
      TranslationOptionList::const_iterator iterTransOpt;
      for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) {
        TranslationOption &transOpt = **iterTransOpt;
-        transOpt.Evaluate(m_source);
+        transOpt.EvaluateWithSourceContext(m_source);
      }
    }
  }
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@ -96,7 +96,7 @@ protected:
  //! implemented by inherited class, called by this class
  virtual void ProcessUnknownWord(size_t sourcePos)=0;

-  void EvaluateWithSource();
+  void EvaluateWithSourceContext();

  void CacheLexReordering();

--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@ -147,7 +147,7 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
    		const TargetPhrase &tp = **iter;
    		TranslationOption *transOpt = new TranslationOption(range, tp);
    		transOpt->SetInputPath(path);
-    		transOpt->Evaluate(m_source);
+    		transOpt->EvaluateWithSourceContext(m_source);

    		Add(transOpt);
    	}
--- a/moses/TreeInput.cpp
+++ b/moses/TreeInput.cpp
@ -5,6 +5,7 @@
 #include "Util.h"
 #include "XmlOption.h"
 #include "FactorCollection.h"
+#include "moses/TranslationModel/PhraseDictionary.h"

 using namespace std;

@ -30,6 +31,12 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
    return true;
  }

+  // hack. What pt should XML trans opt be assigned to?
+  PhraseDictionary *firstPt = NULL;
+  if (PhraseDictionary::GetColl().size() == 0) {
+    firstPt = PhraseDictionary::GetColl()[0];
+  }
+
  // break up input into a vector of xml tags and text
  // example: (this), (<b>), (is a), (</b>), (test .)
  vector<string> xmlTokens = TokenizeXml(line);
@ -173,7 +180,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
          //TRACE_ERR("number of translations: " << altTexts.size() << endl);
          for (size_t i=0; i<altTexts.size(); ++i) {
            // set target phrase
-            TargetPhrase targetPhrase(NULL);
+            TargetPhrase targetPhrase(firstPt);
            // targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
            targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);

@ -203,7 +210,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
            // convert from prob to log-prob
            float scoreValue = FloorScore(TransformScore(probValue));
            targetPhrase.SetXMLScore(scoreValue);
-            targetPhrase.Evaluate(sourcePhrase);
+            targetPhrase.EvaluateInIsolation(sourcePhrase);

            // set span and create XmlOption
            WordsRange range(startPos+1,endPos);
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@ -30,6 +30,7 @@
 #include "TargetPhrase.h"
 #include "ReorderingConstraint.h"
 #include "FactorCollection.h"
+#include "moses/TranslationModel/PhraseDictionary.h"

 namespace Moses
 {
@ -160,6 +161,12 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon

  const StaticData &staticData = StaticData::Instance();

+  // hack. What pt should XML trans opt be assigned to?
+  PhraseDictionary *firstPt = NULL;
+  if (PhraseDictionary::GetColl().size() == 0) {
+    firstPt = PhraseDictionary::GetColl()[0];
+  }
+
  // no xml tag? we're done.
 //if (line.find_first_of('<') == string::npos) {
  if (line.find(lbrackStr) == string::npos) {
@ -361,7 +368,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
              float scoreValue = FloorScore(TransformScore(probValue));

              WordsRange range(startPos + offset,endPos-1 + offset); // span covered by phrase
-              TargetPhrase targetPhrase(NULL);
+              TargetPhrase targetPhrase(firstPt);
              // targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
              targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);

@ -375,7 +382,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
              }

              targetPhrase.SetXMLScore(scoreValue);
-              targetPhrase.Evaluate(sourcePhrase);
+              targetPhrase.EvaluateInIsolation(sourcePhrase);

              XmlOption *option = new XmlOption(range,targetPhrase);
              assert(option);
--- a/phrase-extract/PropertiesConsolidator.cpp
+++ b/phrase-extract/PropertiesConsolidator.cpp
@ -0,0 +1,159 @@
+/***********************************************************************
+  Moses - factored phrase-based language decoder
+  Copyright (C) University of Edinburgh
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include "PropertiesConsolidator.h"
+
+#include <sstream>
+#include <limits>
+#include <vector>
+
+#include "moses/Util.h"
+#include "phrase-extract/InputFileStream.h"
+#include "phrase-extract/OutputFileStream.h"
+
+
+namespace MosesTraining
+{
+
+void PropertiesConsolidator::ActivateSourceLabelsProcessing(const std::string &sourceLabelSetFile) 
+{
+  Moses::InputFileStream inFile(sourceLabelSetFile);
+
+  // read source label set
+  m_sourceLabels.clear();
+  std::string line;
+  while (getline(inFile, line)) {
+    std::istringstream tokenizer(line);
+    std::string label;
+    size_t index;
+    try {
+      tokenizer >> label >> index;
+    } catch (const std::exception &e) {
+      UTIL_THROW2("Error reading source label set file " << sourceLabelSetFile << " .");
+    }
+    std::pair< std::map<std::string,size_t>::iterator, bool > inserted = m_sourceLabels.insert( std::pair<std::string,size_t>(label,index) );
+    UTIL_THROW_IF2(!inserted.second,"Source label set file " << sourceLabelSetFile << " should contain each syntactic label only once.");
+  }
+
+  inFile.Close();
+
+  m_sourceLabelsFlag = true;
+}
+
+
+std::string PropertiesConsolidator::ProcessPropertiesString(const std::string &propertiesString) const 
+{
+  if ( propertiesString.empty() ) {
+    return propertiesString;
+  }
+
+  std::ostringstream out;
+  std::vector<std::string> toks;
+  Moses::TokenizeMultiCharSeparator(toks, propertiesString, "{{");
+  for (size_t i = 1; i < toks.size(); ++i) {
+    std::string &tok = toks[i];
+    if (tok.empty()) {
+      continue;
+    }
+    size_t endPos = tok.rfind("}");
+    tok = tok.substr(0, endPos - 1);
+    std::vector<std::string> keyValue = Moses::TokenizeFirstOnly(tok, " ");
+    assert(keyValue.size() == 2);
+
+    if ( !keyValue[0].compare("SourceLabels") ) {
+
+      if ( m_sourceLabelsFlag ) {
+
+        // SourceLabels additional property: replace strings with vocabulary indices
+        out << " {{" << keyValue[0];
+
+        std::istringstream tokenizer(keyValue[1]);
+
+        size_t nNTs;
+        double totalCount;
+
+        if (! (tokenizer >> nNTs)) { // first token: number of non-terminals (incl. left-hand side)
+          UTIL_THROW2("Not able to read number of non-terminals from SourceLabels property. " 
+                      << "Flawed SourceLabels property?");
+        }
+        assert( nNTs > 0 );
+        out << " " << nNTs;
+
+        if (! (tokenizer >> totalCount)) { // second token: overall rule count
+          UTIL_THROW2("Not able to read overall rule count from SourceLabels property. " 
+                      << "Flawed SourceLabels property?");
+        }
+        assert( totalCount > 0.0 );
+        out << " " << totalCount;
+
+        while (tokenizer.peek() != EOF) {
+          try {
+
+            size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
+
+            std::string token;
+
+            if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+              for (size_t i=0; i<nNTs-1; ++i) { // RHS source non-terminal labels
+                tokenizer >> token; // RHS source non-terminal label
+                std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
+                UTIL_THROW_IF2(found == m_sourceLabels.end(), "Label \"" << token << "\" from the phrase table not found in given label set.");
+                out << " " << found->second;
+              }
+
+              tokenizer >> token; // sourceLabelsRHSCount
+              out << " " << token;
+
+              tokenizer >> numberOfLHSsGivenRHS;
+              out << " " << numberOfLHSsGivenRHS;
+            }
+
+            for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
+              tokenizer >> token; // LHS source non-terminal label
+              std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
+              UTIL_THROW_IF2(found == m_sourceLabels.end() ,"Label \"" << token << "\" from the phrase table not found in given label set.");
+              out << " " << found->second;
+
+              tokenizer >> token; // ruleSourceLabelledCount
+              out << " " << token;
+            }
+
+          } catch (const std::exception &e) {
+            UTIL_THROW2("Flawed item in SourceLabels property?");
+          }
+        }
+
+        out << "}}";
+
+      } else { // don't process source labels additional property
+        out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
+      }
+
+    } else {
+
+      // output other additional property
+      out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
+    }
+  }
+
+  return out.str();
+}
+
+}  // namespace MosesTraining
+
--- a/phrase-extract/PropertiesConsolidator.h
+++ b/phrase-extract/PropertiesConsolidator.h
@ -0,0 +1,48 @@
+/***********************************************************************
+  Moses - factored phrase-based language decoder
+  Copyright (C) University of Edinburgh
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+
+#pragma once
+
+#include <string>
+#include <map>
+
+
+namespace MosesTraining
+{
+
+class PropertiesConsolidator
+{
+public:
+
+  PropertiesConsolidator() : m_sourceLabelsFlag(false) {};
+
+  void ActivateSourceLabelsProcessing(const std::string &sourceLabelSetFile);
+
+  std::string ProcessPropertiesString(const std::string &propertiesString) const;
+
+private:
+
+  bool m_sourceLabelsFlag;
+  std::map<std::string,size_t> m_sourceLabels;
+
+};
+
+}  // namespace MosesTraining
+
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@ -28,6 +28,7 @@
 #include "tables-core.h"
 #include "InputFileStream.h"
 #include "OutputFileStream.h"
+#include "PropertiesConsolidator.h"

 using namespace std;

@ -37,13 +38,14 @@ bool phraseCountFlag = false;
 bool lowCountFlag = false;
 bool goodTuringFlag = false;
 bool kneserNeyFlag = false;
+bool sourceLabelsFlag = false;
 bool logProbFlag = false;
 inline float maybeLogProb( float a )
 {
  return logProbFlag ? log(a) : a;
 }

-void processFiles( char*, char*, char*, char* );
+void processFiles( char*, char*, char*, char*, char* );
 void loadCountOfCounts( char* );
 void breakdownCoreAndSparse( string combined, string &core, string &sparse );
 bool getLine( istream &fileP, vector< string > &item );
@ -57,13 +59,14 @@ int main(int argc, char* argv[])
       << "consolidating direct and indirect rule tables\n";

  if (argc < 4) {
-    cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--PhraseCount] \n";
+    cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--PhraseCount] [--GoodTuring counts-of-counts-file] [--KneserNey counts-of-counts-file] [--LowCountFeature] [--SourceLabels source-labels-file] \n";
    exit(1);
  }
  char* &fileNameDirect = argv[1];
  char* &fileNameIndirect = argv[2];
  char* &fileNameConsolidated = argv[3];
  char* fileNameCountOfCounts;
+  char* fileNameSourceLabelSet;

  for(int i=4; i<argc; i++) {
    if (strcmp(argv[i],"--Hierarchical") == 0) {
@ -114,13 +117,21 @@ int main(int argc, char* argv[])
    } else if (strcmp(argv[i],"--LogProb") == 0) {
      logProbFlag = true;
      cerr << "using log-probabilities\n";
+    } else if (strcmp(argv[i],"--SourceLabels") == 0) {
+      sourceLabelsFlag = true;
+      if (i+1==argc) {
+        cerr << "ERROR: specify source label set file!\n";
+        exit(1);
+      }
+      fileNameSourceLabelSet = argv[++i];
+      cerr << "processing source labels property\n";
    } else {
      cerr << "ERROR: unknown option " << argv[i] << endl;
      exit(1);
    }
  }

-  processFiles( fileNameDirect, fileNameIndirect, fileNameConsolidated, fileNameCountOfCounts );
+  processFiles( fileNameDirect, fileNameIndirect, fileNameConsolidated, fileNameCountOfCounts, fileNameSourceLabelSet );
 }

 vector< float > countOfCounts;
@ -169,7 +180,7 @@ void loadCountOfCounts( char* fileNameCountOfCounts )
  if (kneserNey_D3 > 2.9) kneserNey_D3 = 2.9;
 }

-void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameConsolidated, char* fileNameCountOfCounts )
+void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameConsolidated, char* fileNameCountOfCounts, char* fileNameSourceLabelSet )
 {
  if (goodTuringFlag || kneserNeyFlag)
    loadCountOfCounts( fileNameCountOfCounts );
@ -198,6 +209,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
    exit(1);
  }

+  // create properties consolidator 
+  // (in case any additional phrase property requires further processing)
+  MosesTraining::PropertiesConsolidator propertiesConsolidator = MosesTraining::PropertiesConsolidator();
+  if (sourceLabelsFlag) {
+    propertiesConsolidator.ActivateSourceLabelsProcessing(fileNameSourceLabelSet);
+  }
+
  // loop through all extracted phrase translations
  int i=0;
  while(true) {
@ -307,12 +325,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
    // counts, for debugging
    fileConsolidated << "||| " << countE << " " << countF << " " << countEF;

-    // count bin feature (as a sparse feature)
+    // sparse features
    fileConsolidated << " |||";
    if (directSparseScores.compare("") != 0)
      fileConsolidated << " " << directSparseScores;
    if (indirectSparseScores.compare("") != 0)
      fileConsolidated << " " << indirectSparseScores;
+    // count bin feature (as a sparse feature)
    if (sparseCountBinFeatureFlag) {
      bool foundBin = false;
      for(size_t i=0; i < countBin.size(); i++) {
@ -332,9 +351,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
    }

    // arbitrary key-value pairs
-    fileConsolidated << " ||| ";
+    fileConsolidated << " |||";
    if (itemDirect.size() >= 6) {
-      fileConsolidated << itemDirect[5];
+      //if (sourceLabelsFlag) {
+        fileConsolidated << propertiesConsolidator.ProcessPropertiesString(itemDirect[5]);
+      //} else {
+      //  fileConsolidated << itemDirect[5];
+      //}
    }

    fileConsolidated << endl;
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@ -248,7 +248,7 @@ int ExtractGHKM::Main(int argc, char *argv[])

      const std::vector<const Subgraph *> &rules = (*p)->GetRules();

-      REO_POS l2rOrientation, r2lOrientation;
+      REO_POS l2rOrientation=UNKNOWN, r2lOrientation=UNKNOWN;
      if (options.phraseOrientation && !rules.empty()) {
        int sourceSpanBegin = *((*p)->GetSpan().begin());
        int sourceSpanEnd   = *((*p)->GetSpan().rbegin());
@ -617,9 +617,8 @@ void ExtractGHKM::WriteGlueGrammar(
    }
  }

-  std::string sourceTopLabel = "TOPLABEL";
-  std::string sourceSLabel = "S";
-  std::string sourceSomeLabel = "SOMELABEL";
+  size_t sourceLabelGlueTop = 0;
+  size_t sourceLabelGlueX = 1;

  // basic rules
  out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| ||| ||| |||";
@ -627,7 +626,7 @@ void ExtractGHKM::WriteGlueGrammar(
    out << " {{Tree [" << topLabel << " <s>]}}";
  }
  if (options.sourceLabels) {
-    out << " {{SourceLabels 1 1 " << sourceTopLabel << " 1}}";
+    out << " {{SourceLabels 1 1 " << sourceLabelGlueTop << " 1}}";
  }
  out << std::endl;

@ -636,7 +635,7 @@ void ExtractGHKM::WriteGlueGrammar(
    out << " {{Tree [" << topLabel << " [" << topLabel << "] </s>]}}";
  }
  if (options.sourceLabels) {
-    out << " {{SourceLabels 2 1 " << sourceTopLabel << " 1 1 " << sourceTopLabel << " 1}}";
+    out << " {{SourceLabels 2 1 " << sourceLabelGlueTop << " 1 1 " << sourceLabelGlueTop << " 1}}";
  }
  out << std::endl;

@ -648,7 +647,7 @@ void ExtractGHKM::WriteGlueGrammar(
      out << " {{Tree [" << topLabel << " <s> [" << i->first << "] </s>]}}";
    }
    if (options.sourceLabels) {
-      out << " {{SourceLabels 2 1 " << sourceSLabel << " 1 1 " << sourceTopLabel << " 1}}";
+      out << " {{SourceLabels 2 1 " << sourceLabelGlueX << " 1 1 " << sourceLabelGlueTop << " 1}}";
    }
    out << std::endl;
  }
@ -661,7 +660,7 @@ void ExtractGHKM::WriteGlueGrammar(
      out << " {{Tree [" << topLabel << " ["<< topLabel << "] [" << *i << "]]}}";
    }
    if (options.sourceLabels) {
-      out << " {{SourceLabels 3 2.718 " << sourceTopLabel << " " << sourceSomeLabel << " 2.718 1 " << sourceTopLabel << " 2.718}}"; // TODO: there should be better options than using "SOMELABEL" 
+      out << " {{SourceLabels 3 2.718 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 2.718 1 " << sourceLabelGlueTop << " 2.718}}"; // TODO: there should be better options than using "SOMELABEL" 
    }
    out << std::endl;
  }
@ -672,7 +671,7 @@ void ExtractGHKM::WriteGlueGrammar(
    out << " {{Tree [" << topLabel << " [" << topLabel << "] [X]]}}";
  }
  if (options.sourceLabels) {
-    out << " {{SourceLabels 3 1 " << sourceTopLabel << " " << sourceSomeLabel << " 1 1 " << sourceTopLabel << " 1}}"; // TODO: there should be better options than using "SOMELABEL"
+    out << " {{SourceLabels 3 1 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 1 1 " << sourceLabelGlueTop << " 1}}"; // TODO: there should be better options than using "SOMELABEL"
  }
  out << std::endl;
 }
--- a/phrase-extract/extract-ghkm/PhraseOrientation.cpp
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
@ -187,7 +187,7 @@ const std::string PhraseOrientation::GetOrientationInfoString(int startF, int en

 const std::string PhraseOrientation::GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction) const
 {
-  REO_POS hierPrevOrient, hierNextOrient;
+  REO_POS hierPrevOrient=UNKNOWN, hierNextOrient=UNKNOWN;

  bool connectedLeftTopP  = IsAligned( startF-1, startE-1 );
  bool connectedRightTopP = IsAligned( endF+1,   startE-1 );
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -1860,7 +1860,7 @@ sub define_tuning_tune {
 	$cmd .= " --lambdas \"$lambda\"" if $lambda;
 	$cmd .= " --continue" if $tune_continue;
 	$cmd .= " --skip-decoder" if $skip_decoder;
-	$cmd .= " --inputtype $tune_inputtype" if $tune_inputtype;
+	$cmd .= " --inputtype $tune_inputtype" if defined($tune_inputtype);
    
 	my $qsub_args = &get_qsub_args("TUNING");
 	$cmd .= " --queue-flags=\"$qsub_args\"" if ($CLUSTER && $qsub_args);
@ -2217,6 +2217,10 @@ sub define_training_extract_phrases {
        my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
        $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
      }
+
+      if (&get("TRAINING:ghkm-source-labels")) {
+        $cmd .= "-ghkm-source-labels ";
+      }
    }

    my $extract_settings = &get("TRAINING:extract-settings");
@ -2254,6 +2258,11 @@ sub define_training_build_ttable {
        my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
        $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
      }
+      if (&get("TRAINING:ghkm-source-labels")) {
+        $cmd .= "-ghkm-source-labels ";
+        my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
+        $cmd .= "-ghkm-source-labels-file $source_labels_file ";
+      }
    }
    
    &create_step($step_id,$cmd);
@ -2438,6 +2447,12 @@ sub define_training_create_config {
      }
    }

+    if (&get("TRAINING:ghkm-source-labels")) {
+      $cmd .= "-ghkm-source-labels ";
+      my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
+      $cmd .= "-ghkm-source-labels-file $source_labels_file ";
+    }
+
    # sparse lexical features provide additional content for config file
    $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;

@ -3412,7 +3427,7 @@ sub check_backoff_and_get_array {
 # the following two functions deal with getting information about
 # files that are passed between steps. this are either specified
 # in the meta file (default) or in the configuration file (here called
-# 'specified', in the step management refered to as 'given').
+# 'specified', in the step management referred to as 'given').

 sub get_specified_or_default_file {
    my ($specified_module,$specified_set,$specified_parameter,
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@ -219,14 +219,14 @@ foreach (@children) {
 	waitpid($_, 0);
 }

-# glue rules
+# merge glue rules
 if (defined($glueFile)) {
  my $cmd = "cat $TMPDIR/glue.* | LC_ALL=C sort | uniq > $glueFile";
  print STDERR "Merging glue rules: $cmd \n";
  print STDERR `$cmd`;
 }

-# phrase orientation priors (GHKM extraction)
+# merge phrase orientation priors (GHKM extraction)
 if ($phraseOrientation && defined($phraseOrientationPriorsFile)) {
  print STDERR "Merging phrase orientation priors\n";

--- a/scripts/generic/score-parallel.perl
+++ b/scripts/generic/score-parallel.perl
@ -27,10 +27,22 @@ my $scoreCmd		= $ARGV[2];
 my $extractFile = $ARGV[3]; # 1st arg of extract argument
 my $lexFile 		= $ARGV[4]; 
 my $ptHalf 			= $ARGV[5]; # output
+my $inverse = 0;
+my $sourceLabelsFile;

 my $otherExtractArgs= "";
 for (my $i = 6; $i < $#ARGV; ++$i)
 {
+  if ($ARGV[$i] eq '--SourceLabels') {
+    $sourceLabelsFile = $ARGV[++$i];
+    $otherExtractArgs .= "--SourceLabels --SourceLabelCountsLHS --SourceLabelSet ";
+    next;
+  }
+  if ($ARGV[$i] eq '--Inverse') {
+    $inverse = 1;
+    $otherExtractArgs .= $ARGV[$i] ." ";
+    next;
+  }
  $otherExtractArgs .= $ARGV[$i] ." ";
 }
 #$scoreCmd $extractFile $lexFile $ptHalf $otherExtractArgs
@ -258,6 +270,14 @@ if (-e $cocPath)
  close(FHCOC);
 }

+# merge source label files
+if (!$inverse && defined($sourceLabelsFile)) 
+{
+  my $cmd = "(echo \"GlueTop 0\"; echo \"GlueX 1\"; cat $TMPDIR/phrase-table.half.*.gz.syntaxLabels.src | LC_ALL=C sort | uniq | perl -pe \"s/\$/ \@{[\$.+1]}/\") > $sourceLabelsFile";
+  print STDERR "Merging source label files: $cmd \n";
+  `$cmd`;
+}
+
 $cmd = "rm -rf $TMPDIR \n";
 print STDERR $cmd;
 systemCheck($cmd);
--- a/scripts/share/nonbreaking_prefixes/README.txt
+++ b/scripts/share/nonbreaking_prefixes/README.txt
@ -2,4 +2,7 @@ The language suffix can be found here:

 http://www.loc.gov/standards/iso639-2/php/code_list.php

+This code includes data from Daniel Naber's Language Tools (czech abbreviations).
+This code includes data from czech wiktionary (also czech abbreviations).
+

--- a/scripts/tokenizer/basic-protected-patterns
+++ b/scripts/tokenizer/basic-protected-patterns
@ -0,0 +1,5 @@
+<\/?\S+\/?>
+<\S+( [a-zA-Z0-9]+\=\"?[^\"]\")+ ?\/?>
+<\S+( [a-zA-Z0-9]+\=\'?[^\']\')+ ?\/?>
+(\w\-\_\.)+\@((\w\-\_)+\.)+[a-zA-Z]{2,}
+(http[s]?|ftp):\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]+
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@ -232,15 +232,20 @@ sub tokenize
    # Find protected patterns
    my @protected = ();
    foreach my $protected_pattern (@protected_patterns) {
-      foreach ($text =~ /($protected_pattern)/) {
-        push @protected, $_;
+      my $t = $text;
+      while ($t =~ /($protected_pattern)(.*)$/) {
+        push @protected, $1;
+        $t = $2;
      }
    }

    for (my $i = 0; $i < scalar(@protected); ++$i) {
      my $subst = sprintf("THISISPROTECTED%.3d", $i);
-      $text =~ s,\Q$protected[$i],$subst,g;
+      $text =~ s,\Q$protected[$i], $subst ,g;
    }
+    $text =~ s/ +/ /g;
+    $text =~ s/^ //g;
+    $text =~ s/ $//g;

    # seperate out all "other" special characters
    $text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@ -127,8 +127,8 @@ my $___NOCASE = 0;
 # Use "--nonorm" to non normalize translation before computing scores
 my $___NONORM = 0;

-# set 0 if input type is text, set 1 if input type is confusion network
-my $___INPUTTYPE = 0;
+# set 0 if input type is text, set 1 if input type is confusion network, set 3 if input type is parse tree
+my $___INPUTTYPE;


 my $mertdir = undef; # path to new mert directory
@ -1228,14 +1228,18 @@ sub run_decoder {

    if (defined $___JOBS && $___JOBS > 0) {
      die "Hypergraph mira not supported by moses-parallel" if $___HG_MIRA;
-      $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" $lsamp_cmd -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
+      $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG";
+      $decoder_cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE); 
+      $decoder_cmd .= " -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" $lsamp_cmd -n-best-list \"$filename $___N_BEST_LIST_SIZE distinct\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
    } else {
-      my $nbest_list_cmd = "-n-best-list $filename $___N_BEST_LIST_SIZE";
+      my $nbest_list_cmd = "-n-best-list $filename $___N_BEST_LIST_SIZE distinct";
      if ($___HG_MIRA) {
        safesystem("rm -rf $hypergraph_dir");
        $nbest_list_cmd = "-output-search-graph-hypergraph true gz";
      }
-      $decoder_cmd = "$___DECODER $___DECODER_FLAGS  -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config $lsamp_cmd $nbest_list_cmd  -input-file $___DEV_F > run$run.out";
+      $decoder_cmd = "$___DECODER $___DECODER_FLAGS  -config $___CONFIG";
+      $decoder_cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
+      $decoder_cmd .= " $decoder_config $lsamp_cmd $nbest_list_cmd  -input-file $___DEV_F > run$run.out";
    }

    print STDERR "Executing: $decoder_cmd \n";
@ -1309,7 +1313,9 @@ sub get_featlist_from_moses {
    print STDERR "Using cached features list: $featlistfn\n";
  } else {
    print STDERR "Asking moses for feature names and values from $___CONFIG\n";
-    my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn  -inputtype $___INPUTTYPE -show-weights > $featlistfn";
+    my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn";
+    $cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
+    $cmd .= " -show-weights > $featlistfn";
    print STDERR "Executing: $cmd\n";
    safesystem($cmd) or die "Failed to run moses with the config $configfn";
  }
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@ -32,7 +32,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
   $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
   @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
   $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT, $_TRANSLITERATION_PHRASE_TABLE,
-   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_GHKM_PHRASE_ORIENTATION,$_PHRASE_ORIENTATION_PRIORS_FILE,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
+   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_GHKM_PHRASE_ORIENTATION,$_PHRASE_ORIENTATION_PRIORS_FILE,$_GHKM_SOURCE_LABELS,$_GHKM_SOURCE_LABELS_FILE,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
   $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
   $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
   $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
@ -112,6 +112,8 @@ $_HELP = 1
 		       'ghkm-tree-fragments' => \$_GHKM_TREE_FRAGMENTS,
 		       'ghkm-phrase-orientation' => \$_GHKM_PHRASE_ORIENTATION,
 		       'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for GHKM extraction only; phrase orientation for PBT has different implementation
+               'ghkm-source-labels' => \$_GHKM_SOURCE_LABELS,
+               'ghkm-source-labels-file=s' => \$_GHKM_SOURCE_LABELS_FILE,
 		       'pcfg' => \$_PCFG,
 		       'alt-direct-rule-score-1' => \$_ALT_DIRECT_RULE_SCORE_1,
 		       'alt-direct-rule-score-2' => \$_ALT_DIRECT_RULE_SCORE_2,
@ -1427,10 +1429,15 @@ sub extract_phrase {
        $cmd .= " --PCFG" if $_PCFG;
        $cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
        $cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
-        $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
-        $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
-        $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE);
-        if (!defined($_GHKM)) {
+        if (defined($_GHKM)) 
+        {
+          $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
+          $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
+          $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE);
+          $cmd .= " --SourceLabels" if $_GHKM_SOURCE_LABELS;
+        }
+        else
+        {
          $cmd .= " --SourceSyntax" if $_SOURCE_SYNTAX;
          $cmd .= " --TargetSyntax" if $_TARGET_SYNTAX;
          $cmd .= " --MaxSpan $max_length";
@ -1609,6 +1616,7 @@ sub score_phrase_phrase_extract {
        $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
        $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
        $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_GHKM_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE);
+        $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
        $cmd .= " $DOMAIN" if $DOMAIN;
        $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
        $cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;
@ -1659,6 +1667,7 @@ sub score_phrase_phrase_extract {
    $cmd .= " --SparseCountBinFeature $SPARSE_COUNT_BIN" if $SPARSE_COUNT_BIN;
    $cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
    $cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
+    $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
    
    $cmd .= " | gzip -c > $ttable_file.gz";
    
@ -2164,6 +2173,7 @@ sub create_ini {
  print INI "WordPenalty\n";
  print INI "PhrasePenalty\n";
  print INI "SoftMatchingFeature name=SM0 path=$_UNKNOWN_WORD_SOFT_MATCHES_FILE\n" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_SOFT_MATCHES_FILE);
+  print INI "SoftSourceSyntacticConstraintsFeature sourceLabelSetFile=$_GHKM_SOURCE_LABELS_FILE\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
  print INI $feature_spec;

  print INI "\n# dense weights for feature functions\n";
@ -2171,6 +2181,7 @@ sub create_ini {
  print INI "UnknownWordPenalty0= 1\n";
  print INI "WordPenalty0= -1\n";
  print INI "PhrasePenalty0= 0.2\n";
+  print INI "SoftSourceSyntacticConstraintsFeature0= 0.3 -0.3 -0.3\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
  print INI $weight_spec;
  close(INI);
 }