Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-09-20 07:42:21 +03:00 · 2013-07-24 11:52:21 +01:00 · 2013-07-24 11:52:21 +01:00 · 68779c66b9
commit 68779c66b9
parent 08f64dea28 b5584fdecf
41 changed files with 724 additions and 348 deletions
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@ -1,7 +1,5 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?>
-
-<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
 	<storageModule moduleId="org.eclipse.cdt.core.settings">
 		<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.162355801">
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
@ -55,6 +53,7 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
@ -77,6 +76,7 @@
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="lm"/>
 									<listOptionValue builtIn="false" value="util"/>
+									<listOptionValue builtIn="false" value="RandLM"/>
 									<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
 									<listOptionValue builtIn="false" value="boost_system-mt"/>
 									<listOptionValue builtIn="false" value="boost_thread-mt"/>
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@ -1,7 +1,5 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<?fileVersion 4.0.0?>
-
-<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
 	<storageModule moduleId="org.eclipse.cdt.core.settings">
 		<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.461114338">
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
@ -49,6 +47,7 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
@ -73,6 +72,7 @@
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="lm"/>
 									<listOptionValue builtIn="false" value="util"/>
+									<listOptionValue builtIn="false" value="RandLM"/>
 									<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
 									<listOptionValue builtIn="false" value="boost_system-mt"/>
 									<listOptionValue builtIn="false" value="boost_thread-mt"/>
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@ -35,6 +35,7 @@
 									<listOptionValue builtIn="false" value="/opt/local/include/"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
@ -46,6 +47,7 @@
 									<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
 									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
 									<listOptionValue builtIn="false" value="LM_IRST"/>
+									<listOptionValue builtIn="false" value="LM_RAND"/>
 									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
 									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
 								</option>
@ -68,8 +70,9 @@
 							</tool>
 						</toolChain>
 					</folderInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
 					<sourceEntries>
-						<entry excluding="FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -1061,6 +1061,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ChartBasedFeatureContext.h</locationURI>
 		</link>
+		<link>
+			<name>FF/ControlRecombination.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ControlRecombination.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/ControlRecombination.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ControlRecombination.h</locationURI>
+		</link>
 		<link>
 			<name>FF/DistortionScoreProducer.cpp</name>
 			<type>1</type>
@ -1081,6 +1091,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/FFState.h</locationURI>
 		</link>
+		<link>
+			<name>FF/Factory.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/Factory.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/Factory.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/Factory.h</locationURI>
+		</link>
 		<link>
 			<name>FF/FeatureFunction.cpp</name>
 			<type>1</type>
--- a/moses/FF/ControlRecombination.cpp
+++ b/moses/FF/ControlRecombination.cpp
@ -0,0 +1,69 @@
+#include "ControlRecombination.h"
+#include "moses/Hypothesis.h"
+#include "util/exception.hh"
+
+using namespace std;
+
+namespace Moses {
+
+ControlRecombination::ControlRecombination(const std::string &line)
+:StatefulFeatureFunction("ControlRecombination", 0, line)
+,m_type(Output)
+{
+}
+
+void ControlRecombination::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "type") {
+    m_type = (Type) Scan<size_t>(value);
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
+FFState* ControlRecombination::Evaluate(
+  const Hypothesis& cur_hypo,
+  const FFState* prev_state,
+  ScoreComponentCollection* accumulator) const
+{
+  ControlRecombinationState *state = new ControlRecombinationState(&cur_hypo);
+  return state;
+}
+
+FFState* ControlRecombination::EvaluateChart(
+  const ChartHypothesis& /* cur_hypo */,
+  int /* featureID - used to index the state in the previous hypotheses */,
+  ScoreComponentCollection* accumulator) const
+{
+  UTIL_THROW(util::Exception, "Not implemented yet");
+}
+
+const FFState* ControlRecombination::EmptyHypothesisState(const InputType &input) const
+{
+  ControlRecombinationState *state = new ControlRecombinationState();
+}
+
+ControlRecombinationState::ControlRecombinationState()
+:m_hypo(NULL)
+{
+}
+
+ControlRecombinationState::ControlRecombinationState(const Hypothesis *hypo)
+:m_hypo(hypo)
+{
+}
+
+int ControlRecombinationState::Compare(const FFState& other) const
+{
+  const ControlRecombinationState &other2 = static_cast<const ControlRecombinationState&>(other);
+  const Hypothesis *otherHypo = other2.m_hypo;
+
+  Phrase thisOutputPhrase, otherOutputPhrase;
+  m_hypo->GetOutputPhrase(thisOutputPhrase);
+  otherHypo->GetOutputPhrase(otherOutputPhrase);
+
+  int ret = thisOutputPhrase.Compare(otherOutputPhrase);
+  return ret;
+}
+
+}
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@ -0,0 +1,58 @@
+#pragma once
+
+#include <string>
+#include "StatefulFeatureFunction.h"
+#include "moses/FF/FFState.h"
+
+namespace Moses {
+
+class ControlRecombinationState;
+
+// force hypotheses NOT to recombine. For forced decoding
+class ControlRecombination : public StatefulFeatureFunction
+{
+public:
+	enum Type
+	{
+	  None,
+	  Output,
+	  Segmentation
+	};
+
+  ControlRecombination(const std::string &line);
+
+  bool IsUseable(const FactorMask &mask) const {
+	  return true;
+  }
+
+  virtual FFState* Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+
+  virtual FFState* EvaluateChart(
+    const ChartHypothesis& /* cur_hypo */,
+    int /* featureID - used to index the state in the previous hypotheses */,
+    ScoreComponentCollection* accumulator) const;
+
+  //! return the state associated with the empty hypothesis for a given sentence
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const;
+
+  void SetParameter(const std::string& key, const std::string& value);
+protected:
+  Type m_type;
+};
+
+class ControlRecombinationState : public FFState
+{
+protected:
+  const Hypothesis *m_hypo;
+
+public:
+  ControlRecombinationState();
+  ControlRecombinationState(const Hypothesis *hypo);
+  int Compare(const FFState& other) const;
+
+};
+
+} // namespace
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -0,0 +1,177 @@
+#include "moses/FF/Factory.h"
+#include "moses/StaticData.h"
+
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
+#include "moses/TranslationModel/PhraseDictionaryMemory.h"
+#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
+#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
+#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
+#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
+#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
+
+#include "moses/LexicalReordering.h"
+
+#include "moses/FF/BleuScoreFeature.h"
+#include "moses/FF/TargetWordInsertionFeature.h"
+#include "moses/FF/SourceWordDeletionFeature.h"
+#include "moses/FF/GlobalLexicalModel.h"
+#include "moses/FF/GlobalLexicalModelUnlimited.h"
+#include "moses/FF/UnknownWordPenaltyProducer.h"
+#include "moses/FF/WordTranslationFeature.h"
+#include "moses/FF/TargetBigramFeature.h"
+#include "moses/FF/TargetNgramFeature.h"
+#include "moses/FF/PhraseBoundaryFeature.h"
+#include "moses/FF/PhrasePairFeature.h"
+#include "moses/FF/PhraseLengthFeature.h"
+#include "moses/FF/DistortionScoreProducer.h"
+#include "moses/FF/WordPenaltyProducer.h"
+#include "moses/FF/InputFeature.h"
+#include "moses/FF/PhrasePenalty.h"
+#include "moses/FF/OSM-Feature/OpSequenceModel.h"
+#include "moses/FF/ControlRecombination.h"
+
+#include "moses/LM/Ken.h"
+#ifdef LM_IRST
+#include "moses/LM/IRST.h"
+#endif
+
+#ifdef LM_SRI
+#include "moses/LM/SRI.h"
+#endif
+
+#ifdef LM_RAND
+#include "moses/LM/Rand.h"
+#endif
+
+#ifdef HAVE_SYNLM
+#include "moses/SyntacticLanguageModel.h"
+#endif
+
+#include "util/exception.hh"
+
+#include <vector>
+
+namespace Moses
+{
+
+class FeatureFactory
+{
+public:
+  virtual ~FeatureFactory() {}
+
+  virtual void Create(const std::string &line) = 0;
+
+protected:
+  template <class F> static void DefaultSetup(F *feature);
+
+  FeatureFactory() {}
+};
+
+template <class F> void FeatureFactory::DefaultSetup(F *feature)
+{
+  StaticData &static_data = StaticData::InstanceNonConst();
+  std::vector<float> &weights = static_data.GetParameter()->GetWeights(feature->GetScoreProducerDescription());
+
+  if (feature->IsTuneable() || weights.size()) {
+    // if it's tuneable, ini file MUST have weights
+    // even it it's not tuneable, people can still set the weights in the ini file
+    static_data.SetWeights(feature, weights);
+  } else {
+    std::vector<float> defaultWeights = feature->DefaultWeights();
+    static_data.SetWeights(feature, defaultWeights);
+  }
+}
+
+namespace
+{
+
+template <class F> class DefaultFeatureFactory : public FeatureFactory
+{
+public:
+  void Create(const std::string &line) {
+    DefaultSetup(new F(line));
+  }
+};
+
+class KenFactory : public FeatureFactory
+{
+public:
+  void Create(const std::string &line) {
+    DefaultSetup(ConstructKenLM(line));
+  }
+};
+
+} // namespace
+
+FeatureRegistry::FeatureRegistry()
+{
+// Feature with same name as class
+#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
+// Feature with different name than class.
+#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
+  MOSES_FNAME(GlobalLexicalModel);
+  //MOSES_FNAME(GlobalLexicalModelUnlimited); This was commented out in the original
+  MOSES_FNAME(SourceWordDeletionFeature);
+  MOSES_FNAME(TargetWordInsertionFeature);
+  MOSES_FNAME(PhraseBoundaryFeature);
+  MOSES_FNAME(PhraseLengthFeature);
+  MOSES_FNAME(WordTranslationFeature);
+  MOSES_FNAME(TargetBigramFeature);
+  MOSES_FNAME(TargetNgramFeature);
+  MOSES_FNAME(PhrasePairFeature);
+  MOSES_FNAME(LexicalReordering);
+  MOSES_FNAME2("Generation", GenerationDictionary);
+  MOSES_FNAME(BleuScoreFeature);
+  MOSES_FNAME2("Distortion", DistortionScoreProducer);
+  MOSES_FNAME2("WordPenalty", WordPenaltyProducer);
+  MOSES_FNAME(InputFeature);
+  MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
+  MOSES_FNAME(PhraseDictionaryOnDisk);
+  MOSES_FNAME(PhraseDictionaryMemory);
+  MOSES_FNAME(PhraseDictionaryCompact);
+  MOSES_FNAME(PhraseDictionaryMultiModel);
+  MOSES_FNAME(PhraseDictionaryMultiModelCounts);
+  MOSES_FNAME(PhraseDictionaryALSuffixArray);
+  MOSES_FNAME(PhraseDictionaryDynSuffixArray);
+  MOSES_FNAME(OpSequenceModel);
+  MOSES_FNAME(PhrasePenalty);
+  MOSES_FNAME2("UnknownWordPenalty", UnknownWordPenaltyProducer);
+  MOSES_FNAME(ControlRecombination);
+
+#ifdef HAVE_SYNLM
+  MOSES_FNAME(SyntacticLanguageModel);
+#endif
+#ifdef LM_IRST
+  MOSES_FNAME2("IRSTLM", LanguageModelIRST);
+#endif
+#ifdef LM_SRI
+  MOSES_FNAME2("SRILM", LanguageModelSRI);
+#endif
+#ifdef LM_RAND
+  MOSES_FNAME2("RANDLM", LanguageModelRandLM);
+#endif
+  Add("KENLM", new KenFactory());
+}
+
+FeatureRegistry::~FeatureRegistry() {}
+
+void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
+{
+  std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name, boost::shared_ptr<FeatureFactory>(factory));
+  UTIL_THROW_IF(!registry_.insert(to_ins).second, util::Exception, "Duplicate feature name " << name);
+}
+
+namespace
+{
+class UnknownFeatureException : public util::Exception {};
+}
+
+void FeatureRegistry::Construct(const std::string &name, const std::string &line)
+{
+  Map::iterator i = registry_.find(name);
+  UTIL_THROW_IF(i == registry_.end(), UnknownFeatureException, "Feature name " << name << " is not registered.");
+  i->second->Create(line);
+}
+
+} // namespace Moses
--- a/moses/FF/Factory.h
+++ b/moses/FF/Factory.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <string>
+
+#include <boost/shared_ptr.hpp>
+#include <boost/unordered_map.hpp>
+
+namespace Moses
+{
+
+class FeatureFactory;
+
+class FeatureRegistry
+{
+public:
+  FeatureRegistry();
+
+  ~FeatureRegistry();
+
+  void Construct(const std::string &name, const std::string &line);
+
+private:
+  void Add(const std::string &name, FeatureFactory *factory);
+
+  typedef boost::unordered_map<std::string, boost::shared_ptr<FeatureFactory> > Map;
+
+  Map registry_;
+};
+
+} // namespace Moses
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@ -74,7 +74,7 @@ void FeatureFunction::ParseLine(const std::string& description, const std::strin
  set<string> keys;

  for (size_t i = 1; i < toks.size(); ++i) {
-    vector<string> args = Tokenize(toks[i], "=");
+    vector<string> args = TokenizeFirstOnly(toks[i], "=");
    CHECK(args.size() == 2);

    pair<set<string>::iterator,bool> ret = keys.insert(args[0]);
@ -109,5 +109,10 @@ void FeatureFunction::ReadParameters()
  }
 }

+std::vector<float> FeatureFunction::DefaultWeights() const
+{
+  UTIL_THROW(util::Exception, "No default weights");
+}
+
 }

--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@ -78,6 +78,7 @@ public:
  virtual bool IsTuneable() const {
    return m_tuneable;
  }
+  virtual std::vector<float> DefaultWeights() const;

  //! Called before search and collecting of translation options
  virtual void InitializeForInput(InputType const& source) {
--- a/moses/FF/UnknownWordPenaltyProducer.cpp
+++ b/moses/FF/UnknownWordPenaltyProducer.cpp
@ -13,5 +13,11 @@ UnknownWordPenaltyProducer::UnknownWordPenaltyProducer(const std::string &line)
  ReadParameters();
 }

+std::vector<float> UnknownWordPenaltyProducer::DefaultWeights() const
+{
+  std::vector<float> ret(1, 1.0f);
+  return ret;
+}
+
 }

--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@ -20,6 +20,7 @@ public:
  bool IsUseable(const FactorMask &mask) const {
    return true;
  }
+  std::vector<float> DefaultWeights() const;

 };

--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@ -401,6 +401,14 @@ void Hypothesis::CleanupArcList()
  }
 }

+void Hypothesis::GetOutputPhrase(Phrase &out) const
+{
+  if (m_prevHypo != NULL) {
+    m_prevHypo->GetOutputPhrase(out);
+  }
+  out.Append(GetCurrTargetPhrase());
+}
+
 TO_STRING_BODY(Hypothesis)

 // friend
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@ -200,11 +200,12 @@ public:

  int RecombineCompare(const Hypothesis &compare) const;

+  void GetOutputPhrase(Phrase &out) const;
+
  void ToStream(std::ostream& out) const {
-    if (m_prevHypo != NULL) {
-      m_prevHypo->ToStream(out);
-    }
-    out << (Phrase) GetCurrTargetPhrase();
+    Phrase ret;
+    GetOutputPhrase(ret);
+    out << ret;
  }

  void ToStringStream(std::stringstream& out) const {
--- a/moses/Jamfile
+++ b/moses/Jamfile
@ -30,6 +30,25 @@ if $(have-clock[2]) = 0 {
  alias rt ;
 }

+#This is a kludge to force rebuilding if different --with options are passed. 
+#Could have used features like <srilm>on but getting these to apply only to
+#linking was ugly and it still didn't trigger an install (since the install
+#path doesn't encode features).  It stores a file lm.log with the previous
+#options and forces a rebuild if the current options differ.  
+local current = ;
+for local i in srilm irstlm randlm {
+  local optval = [ option.get "with-$(i)" ] ;
+  if $(optval) {
+    current += "--with-$(i)=$(optval)" ;
+  }
+}
+current = $(current:J=" ") ;
+current ?= "" ;
+path-constant LM-LOG : bin/lm.log ;
+update-if-changed $(LM-LOG) $(current) ;
+
+obj FF_Factory.o : FF/Factory.cpp LM//macros headers ../lm//kenlm : <dependency>$(LM-LOG) ;
+
 lib moses :
 [ glob 
  *.cpp
@ -45,8 +64,9 @@ lib moses :
  ThreadPool.cpp
  SyntacticLanguageModel.cpp
  *Test.cpp Mock*.cpp
+  LM/Factory.cpp
 ]
-headers LM//LM TranslationModel/CompactPT//CompactPT synlm ThreadPool rt
+headers FF_Factory.o LM//LM TranslationModel/CompactPT//CompactPT synlm ThreadPool rt
 ..//search ../util/double-conversion//double-conversion ..//z ../OnDiskPt//OnDiskPt ;

 alias headers-to-install : [ glob-tree *.h ] ;
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@ -7,26 +7,8 @@

 import option path ;

-#This is a kludge to force rebuilding if different --with options are passed. 
-#Could have used features like <srilm>on but getting these to apply only to
-#linking was ugly and it still didn't trigger an install (since the install
-#path doesn't encode features).  It stores a file lm.log with the previous
-#options and forces a rebuild if the current options differ.  
-local current = ;
-for local i in srilm irstlm randlm {
-  local optval = [ option.get "with-$(i)" ] ;
-  if $(optval) {
-    current += "--with-$(i)=$(optval)" ;
-  }
-}
-current = $(current:J=" ") ;
-current ?= "" ;
-
-path-constant LM-LOG : bin/lm.log ;
-update-if-changed $(LM-LOG) $(current) ;
-
-
 local dependencies = ;
+local lmmacros = ;

 #IRSTLM
 local with-irstlm = [ option.get "with-irstlm" ] ;
@ -35,6 +17,7 @@ if $(with-irstlm) {
  obj IRST.o : IRST.cpp ..//headers : <include>$(with-irstlm)/include <include>$(with-irstlm)/include/irstlm ;
  alias irst : IRST.o irstlm : : : <define>LM_IRST ;
  dependencies += irst ;
+  lmmacros += LM_IRST ;
  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
  echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
@ -63,6 +46,7 @@ if $(with-srilm) {
  obj ParallelBackoff.o : ParallelBackoff.cpp ..//headers : <include>$(with-srilm)/include <include>$(with-srilm)/include/srilm <warnings>off ;
  alias sri : SRI.o ParallelBackoff.o sri-libs : : : <define>LM_SRI ;
  dependencies += sri ;
+  lmmacros += LM_SRI ;
 }

 #RandLM
@ -72,6 +56,7 @@ if $(with-randlm) {
  obj Rand.o : Rand.cpp RandLM ..//headers : <include>$(with-randlm)/include <include>$(with-randlm)/include/RandLM ;
  alias rand : Rand.o RandLM : : : <define>LM_RAND ;
  dependencies += rand ;
+  lmmacros += LM_RAND ;
 }

 # LDHTLM
@ -82,6 +67,7 @@ if $(with-ldhtlm) {
  obj LDHT.o : LDHT.cpp LDHT ..//headers : <include>$(with-ldhtlm)/include <include>$(with-ldhtlm)/include/LDHT ;
  alias ldht : LDHT.o LDHT ticpp : : : <define>LM_LDHT ;
  dependencies += ldht ;
+  lmmacros += LM_LDHT ;
 }

 #ORLM is always compiled but needs special headers
@ -92,4 +78,4 @@ obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : :
 alias LM : Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
  ../../lm//kenlm ..//headers $(dependencies) ;

-
+alias macros : : : : <define>$(lmmacros) ;
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@ -66,7 +66,7 @@ struct KenLMState : public FFState {
 template <class Model> class LanguageModelKen : public LanguageModel
 {
 public:
-  LanguageModelKen(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+  LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);

  const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
    KenLMState *ret = new KenLMState();
@ -137,8 +137,8 @@ private:
  std::vector<lm::WordIndex> &m_mapping;
 };

-template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy)
-  :LanguageModel(description, line)
+template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+  :LanguageModel("KENLM", line)
  ,m_factorType(factorType)
 {
  lm::ngram::Config config;
@ -351,7 +351,7 @@ bool LanguageModelKen<Model>::IsUseable(const FactorMask &mask) const

 } // namespace

-LanguageModel *ConstructKenLM(const std::string &description, const std::string &line)
+LanguageModel *ConstructKenLM(const std::string &line)
 {
  FactorType factorType;
  string filePath;
@ -375,10 +375,10 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string
    }
  }

-  return ConstructKenLM(description, line, filePath, factorType, lazy);
+  return ConstructKenLM(line, filePath, factorType, lazy);
 }

-LanguageModel *ConstructKenLM(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
 {
  try {
    lm::ngram::ModelType model_type;
@ -386,23 +386,23 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string

      switch(model_type) {
      case lm::ngram::PROBING:
-        return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);
+        return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
      case lm::ngram::REST_PROBING:
-        return new LanguageModelKen<lm::ngram::RestProbingModel>(description, line, file, factorType, lazy);
+        return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
      case lm::ngram::TRIE:
-        return new LanguageModelKen<lm::ngram::TrieModel>(description, line, file, factorType, lazy);
+        return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
      case lm::ngram::QUANT_TRIE:
-        return new LanguageModelKen<lm::ngram::QuantTrieModel>(description, line, file, factorType, lazy);
+        return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
      case lm::ngram::ARRAY_TRIE:
-        return new LanguageModelKen<lm::ngram::ArrayTrieModel>(description, line, file, factorType, lazy);
+        return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
      case lm::ngram::QUANT_ARRAY_TRIE:
-        return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(description, line, file, factorType, lazy);
+        return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
      default:
        std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
        abort();
      }
    } else {
-      return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);
+      return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
    }
  } catch (std::exception &e) {
    std::cerr << e.what() << std::endl;
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@ -31,10 +31,10 @@ namespace Moses

 class LanguageModel;

-LanguageModel *ConstructKenLM(const std::string &description, const std::string &line);
+LanguageModel *ConstructKenLM(const std::string &line);

 //! This will also load. Returns a templated KenLM class
-LanguageModel *ConstructKenLM(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);

 } // namespace Moses

--- a/moses/LM/Rand.cpp
+++ b/moses/LM/Rand.cpp
@ -20,11 +20,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <limits>
 #include <iostream>
 #include <fstream>
-#include <string>
-#include <vector>

-#include "SingleFactor.h"
-#include "RandLM.h"
 #include "Rand.h"
 #include "moses/Factor.h"
 #include "moses/Util.h"
@ -33,62 +29,34 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "moses/InputFileStream.h"
 #include "moses/StaticData.h"
 #include "util/check.hh"
+#include "RandLM.h"

+using namespace std;

 namespace Moses
 {
-namespace
+
+LanguageModelRandLM::LanguageModelRandLM(const std::string &line)
+  :LanguageModelSingleFactor("RandLM", line)
+  , m_lm(0)
 {
-using namespace std;
+}

-class LanguageModelRandLM : public LanguageModelSingleFactor
-{
-public:
-  LanguageModelRandLM(const std::string &line)
-    :LanguageModelSingleFactor("RandLM", line)
-    , m_lm(0) {
-  }
-  bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
-  virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
-  ~LanguageModelRandLM() {
-    delete m_lm;
-  }
-  void InitializeForInput(InputType const& source) {
-    m_lm->initThreadSpecificData(); // Creates thread specific data iff                                    // compiled with multithreading.
-  }
-  void CleanUpAfterSentenceProcessing(const InputType& source) {
-    m_lm->clearCaches(); // clear caches
-  }
-protected:
-  std::vector<randlm::WordID> m_randlm_ids_vec;
-  randlm::RandLM* m_lm;
-  randlm::WordID m_oov_id;
-  void CreateFactors(FactorCollection &factorCollection);
-  randlm::WordID GetLmID( const std::string &str ) const;
-  randlm::WordID GetLmID( const Factor *factor ) const {
-    size_t factorId = factor->GetId();
-    return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
-  };
+LanguageModelRandLM::~LanguageModelRandLM() {
+  delete m_lm;
+}

-};
-
-
-bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType,
-                               size_t nGramOrder)
+void LanguageModelRandLM::Load()
 {
  cerr << "Loading LanguageModelRandLM..." << endl;
  FactorCollection &factorCollection = FactorCollection::Instance();
-  m_filePath = filePath;
-  m_factorType = factorType;
-  m_nGramOrder = nGramOrder;
  int cache_MB = 50; // increase cache size
-  m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB);
+  m_lm = randlm::RandLM::initRandLM(m_filePath, m_nGramOrder, cache_MB);
  CHECK(m_lm != NULL);
  // get special word ids
  m_oov_id = m_lm->getWordID(m_lm->getOOV());
  CreateFactors(factorCollection);
  m_lm->initThreadSpecificData();
-  return true;
 }

 void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection)   // add factors which have randlm id
@ -132,6 +100,11 @@ randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const
  return m_lm->getWordID(str);
 }

+randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const {
+  size_t factorId = factor->GetId();
+  return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
+}
+
 LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
                                       State* finalState) const
 {
@ -154,6 +127,11 @@ LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
  return ret;
 }

+void LanguageModelRandLM::InitializeForInput(InputType const& source) {
+  m_lm->initThreadSpecificData(); // Creates thread specific data iff                                    // compiled with multithreading.
+}
+void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source) {
+  m_lm->clearCaches(); // clear caches
 }

 }
--- a/moses/LM/Rand.h
+++ b/moses/LM/Rand.h
@ -1,3 +1,5 @@
+#ifndef moses_LM_Rand_h
+#define moses_LM_Rand_h
 /***********************************************************************
 Moses - factored phrase-based language decoder
 Copyright (C) 2006 University of Edinburgh
@ -16,14 +18,43 @@ You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
+#include <vector>
+#include <string>
+#include <stdint.h>
+#include "SingleFactor.h"
+#include "moses/TypeDef.h"
+#include "moses/Word.h"
+//#include "RandLM.h"

-#ifndef moses_LM_Rand_h
-#define moses_LM_Rand_h
+namespace randlm
+{
+ class RandLM;
+}

 namespace Moses
 {
-class LanguageModelPointerState;
-LanguageModelPointerState *NewRandLM();
+class LanguageModelRandLM : public LanguageModelSingleFactor
+{
+public:
+  LanguageModelRandLM(const std::string &line);
+  ~LanguageModelRandLM();
+
+  void Load();
+  virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
+  void InitializeForInput(InputType const& source);
+  void CleanUpAfterSentenceProcessing(const InputType& source);
+
+protected:
+  //std::vector<randlm::WordID> m_randlm_ids_vec;
+  std::vector<uint32_t> m_randlm_ids_vec; // Ken made me do this
+
+  randlm::RandLM* m_lm;
+  uint32_t m_oov_id;
+  void CreateFactors(FactorCollection &factorCollection);
+  uint32_t GetLmID( const std::string &str ) const;
+  uint32_t GetLmID( const Factor *factor ) const;
+
+};

 }

--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@ -30,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "InputFileStream.h"
 #include "StaticData.h"
 #include "UserMessage.h"
+#include "util/exception.hh"

 using namespace std;

@ -38,25 +39,18 @@ namespace Moses
 /** define allowed parameters */
 Parameter::Parameter()
 {
+  AddParam("mapping", "description of decoding steps");
  AddParam("beam-threshold", "b", "threshold for threshold pruning");
  AddParam("config", "f", "location of the configuration file");
  AddParam("continue-partial-translation", "cpt", "start from nonempty hypothesis");
  AddParam("decoding-graph-backoff", "dpb", "only use subsequent decoding paths for unknown spans of given length");
-  AddParam("dlm-model", "Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
  AddParam("drop-unknown", "du", "drop unknown words instead of copying them");
  AddParam("disable-discarding", "dd", "disable hypothesis discarding");
  AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
-  AddParam("generation-file", "location and properties of the generation table");
-  AddParam("global-lexical-file", "gl", "discriminatively trained global lexical translation model file");
-  AddParam("glm-feature", "discriminatively trained global lexical translation feature, sparse producer");
  AddParam("input-factors", "list of factors in the input");
  AddParam("input-file", "i", "location of the input file to be translated");
  AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
  AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
-  AddParam("lmodel-file", "location and properties of the language models");
-  AddParam("lmodel-dub", "dictionary upper bounds of language models");
-  AddParam("lmodel-oov-feature", "add language model oov feature, one per model");
-  AddParam("mapping", "description of decoding steps");
  AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
  AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
  AddParam("max-phrase-length", "maximum phrase length (default 20)");
@ -68,16 +62,10 @@ Parameter::Parameter()
  AddParam("phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
  AddParam("report-all-factors", "report all factors in output, not just first");
  AddParam("report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
-#ifdef HAVE_SYNLM
-  AddParam("slmodel-file", "location of the syntactic language model file(s)");
-  AddParam("slmodel-factor", "factor to use with syntactic language model");
-  AddParam("slmodel-beam", "beam width to use with syntactic language model's parser");
-#endif
  AddParam("stack", "s", "maximum stack size for histogram pruning");
  AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
  AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)");
  AddParam("translation-details", "T", "for each best hypothesis, report translation details to the given file");
-  AddParam("ttable-file", "location and properties of the translation tables");
  AddParam("translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
  AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
  AddParam("verbose", "v", "verbosity level of the logging");
@ -103,6 +91,7 @@ Parameter::Parameter()
  AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
  AddParam("lmbr-map-weight", "weight given to map solution when doing lattice MBR (default 0)");
  AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
+  AddParam("lmodel-oov-feature", "add language model oov feature, one per model");
  AddParam("clean-lm-cache", "clean language model caches after N translations (default N=1)");
  AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
  AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
@ -129,13 +118,6 @@ Parameter::Parameter()
  AddParam("source-label-overlap", "What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0");
  AddParam("output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
  AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
-  AddParam("phrase-pair-feature", "Source and target factors for phrase pair feature");
-  AddParam("phrase-boundary-source-feature", "Source factors for phrase boundary feature");
-  AddParam("phrase-boundary-target-feature", "Target factors for phrase boundary feature");
-  AddParam("phrase-length-feature", "Count features for source length, target length, both of each phrase");
-  AddParam("target-word-insertion-feature", "Count feature for each unaligned target word");
-  AddParam("source-word-deletion-feature", "Count feature for each unaligned source word");
-  AddParam("word-translation-feature", "Count feature for word translation according to word alignment");
  AddParam("cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
  AddParam("parsing-algorithm", "Which parsing algorithm to use. 0=CYK+, 1=scope-3. (default = 0)");
  AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing, 4=stack with batched lm requests (default = 0)");
@ -185,6 +167,27 @@ Parameter::Parameter()
  AddParam("text-type", "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
  AddParam("input-scores", "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");

+  AddParam("dlm-model", "DEPRECATED. DO NOT USE. Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
+  AddParam("generation-file", "DEPRECATED. DO NOT USE. location and properties of the generation table");
+  AddParam("global-lexical-file", "gl", "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation model file");
+  AddParam("glm-feature", "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation feature, sparse producer");
+  AddParam("lmodel-file", "DEPRECATED. DO NOT USE. location and properties of the language models");
+  AddParam("lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models");
+
+  #ifdef HAVE_SYNLM
+  AddParam("slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
+  AddParam("slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
+  AddParam("slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
+#endif
+AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
+  AddParam("phrase-pair-feature", "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature");
+  AddParam("phrase-boundary-source-feature", "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature");
+  AddParam("phrase-boundary-target-feature", "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature");
+  AddParam("phrase-length-feature", "DEPRECATED. DO NOT USE. Count features for source length, target length, both of each phrase");
+  AddParam("target-word-insertion-feature", "DEPRECATED. DO NOT USE. Count feature for each unaligned target word");
+  AddParam("source-word-deletion-feature", "DEPRECATED. DO NOT USE. Count feature for each unaligned source word");
+  AddParam("word-translation-feature", "DEPRECATED. DO NOT USE. Count feature for word translation according to word alignment");
+
  AddParam("weight-file", "wf", "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini");

  AddParam("weight", "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
@ -195,6 +198,8 @@ Parameter::Parameter()
  AddParam("print-id", "prefix translations with id. Default if false");

  AddParam("alternate-weight-setting", "aws", "alternate set of weights to used per xml specification");
+
+  AddParam("placeholder-factor", "Which factor to use to store the original text for placeholders");
 }

 Parameter::~Parameter()
@ -305,9 +310,27 @@ bool Parameter::LoadParam(int argc, char* argv[])
    }
  }

+  // don't mix old and new format
+  if ((isParamSpecified("feature") || isParamSpecified("weight"))
+	&& (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
+		isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
+		isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
+		isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
+		isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
+		isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
+		isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
+		isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
+		isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
+		isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
+		isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
+		isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
+		)
+	 ) {
+	  UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
+  }
+
  // convert old weights args to new format
-  // WHAT IS GOING ON HERE??? - UG
-  if (!isParamSpecified("feature")) // UG
+  if (!isParamSpecified("feature"))
    ConvertWeightArgs();
  CreateWeightsMap();
  WeightOverwrite();
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@ -121,15 +121,22 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
  //const StaticData &staticData = StaticData::Instance();
  std::vector<XmlOption*> xmlOptionsList(0);
  std::vector< size_t > xmlWalls;
+  std::vector< std::pair<size_t, std::string> > placeholders;
+
  if (staticData.GetXmlInputType() != XmlPassThrough) {
-    if (!ProcessAndStripXMLTags(line, xmlOptionsList, m_reorderingConstraint, xmlWalls, staticData.GetXmlBrackets().first, staticData.GetXmlBrackets().second)) {
+    if (!ProcessAndStripXMLTags(line, xmlOptionsList, m_reorderingConstraint, xmlWalls, placeholders,
+                                staticData.GetXmlBrackets().first, staticData.GetXmlBrackets().second)) {
      const string msg("Unable to parse XML in line: " + line);
      TRACE_ERR(msg << endl);
      throw runtime_error(msg);
    }
  }
+
  Phrase::CreateFromString(Input, factorOrder, line, factorDelimiter, NULL);

+  // placeholders
+  ProcessPlaceholders(placeholders);
+
  if (staticData.IsChart()) {
    InitStartEndWord();
  }
@ -194,6 +201,22 @@ void Sentence::InitStartEndWord()
  AddWord(endWord);
 }

+void Sentence::ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders)
+{
+  FactorType factorType = StaticData::Instance().GetPlaceholderFactor();
+  if (factorType == NOT_FOUND) {
+    return;
+  }
+
+  for (size_t i = 0; i < placeholders.size(); ++i) {
+    size_t pos = placeholders[i].first;
+    const string &str = placeholders[i].second;
+    const Factor *factor = FactorCollection::Instance().AddFactor(str);
+    Word &word = Phrase::GetWord(pos);
+    word[factorType] = factor;
+  }
+}
+
 TranslationOptionCollection*
 Sentence::CreateTranslationOptionCollection() const
 {
--- a/moses/Sentence.h
+++ b/moses/Sentence.h
@ -57,6 +57,7 @@ private:
  NonTerminalSet m_defaultLabelSet;

  void InitStartEndWord();
+  void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);


 public:
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@ -22,14 +22,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 #include <string>
 #include "util/check.hh"
-#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
-#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
-#include "moses/TranslationModel/PhraseDictionaryMemory.h"
-#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
-#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
-#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
-#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
-#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
+
+#include "moses/FF/Factory.h"
+#include "moses/FF/WordPenaltyProducer.h"
+#include "moses/FF/UnknownWordPenaltyProducer.h"
+#include "moses/FF/InputFeature.h"

 #include "DecodeStepTranslation.h"
 #include "DecodeStepGeneration.h"
@ -46,37 +43,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "InputFileStream.h"
 #include "ScoreComponentCollection.h"

-#include "moses/FF/BleuScoreFeature.h"
-#include "moses/FF/TargetWordInsertionFeature.h"
-#include "moses/FF/SourceWordDeletionFeature.h"
-#include "moses/FF/GlobalLexicalModel.h"
-#include "moses/FF/GlobalLexicalModelUnlimited.h"
-#include "moses/FF/UnknownWordPenaltyProducer.h"
-#include "moses/FF/WordTranslationFeature.h"
-#include "moses/FF/TargetBigramFeature.h"
-#include "moses/FF/TargetNgramFeature.h"
-#include "moses/FF/PhraseBoundaryFeature.h"
-#include "moses/FF/PhrasePairFeature.h"
-#include "moses/FF/PhraseLengthFeature.h"
-#include "moses/FF/DistortionScoreProducer.h"
-#include "moses/FF/WordPenaltyProducer.h"
-#include "moses/FF/InputFeature.h"
-#include "moses/FF/PhrasePenalty.h"
-#include "moses/FF/OSM-Feature/OpSequenceModel.h"
-
-#include "LM/Ken.h"
-#ifdef LM_IRST
-#include "LM/IRST.h"
-#endif
-
-#ifdef LM_SRI
-#include "LM/SRI.h"
-#endif
-
-#ifdef HAVE_SYNLM
-#include "SyntacticLanguageModel.h"
-#endif
-
 #ifdef WITH_THREADS
 #include <boost/thread.hpp>
 #endif
@ -556,10 +522,18 @@ bool StaticData::LoadData(Parameter *parameter)
    cerr << "XML tags opening and closing brackets for XML input are: " << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl;
  }

+  if (m_parameter->GetParam("placeholder-factor").size() > 0) {
+    m_placeHolderFactor = Scan<FactorType>(m_parameter->GetParam("placeholder-factor")[0]);
+  } else {
+    m_placeHolderFactor = NOT_FOUND;
+  }
+
+
  // all features
  map<string, int> featureIndexMap;

  const vector<string> &features = m_parameter->GetParam("feature");
+  FeatureRegistry registry;
  for (size_t i = 0; i < features.size(); ++i) {
    const string &line = Trim(features[i]);
    cerr << "line=" << line << endl;
@ -569,151 +543,8 @@ bool StaticData::LoadData(Parameter *parameter)
    vector<string> toks = Tokenize(line);

    const string &feature = toks[0];
-    //int featureIndex = GetFeatureIndex(featureIndexMap, feature);

-    if (feature == "GlobalLexicalModel") {
-      GlobalLexicalModel *model = new GlobalLexicalModel(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "GlobalLexicalModelUnlimited") {
-      GlobalLexicalModelUnlimited *model = NULL; //new GlobalLexicalModelUnlimited(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "SourceWordDeletionFeature") {
-      SourceWordDeletionFeature *model = new SourceWordDeletionFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "TargetWordInsertionFeature") {
-      TargetWordInsertionFeature *model = new TargetWordInsertionFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "PhraseBoundaryFeature") {
-      PhraseBoundaryFeature *model = new PhraseBoundaryFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "PhraseLengthFeature") {
-      PhraseLengthFeature *model = new PhraseLengthFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "WordTranslationFeature") {
-      WordTranslationFeature *model = new WordTranslationFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "TargetBigramFeature") {
-      TargetBigramFeature *model = new TargetBigramFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "TargetNgramFeature") {
-      TargetNgramFeature *model = new TargetNgramFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "PhrasePairFeature") {
-      PhrasePairFeature *model = new PhrasePairFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      //SetWeights(model, weights);
-    } else if (feature == "LexicalReordering") {
-      LexicalReordering *model = new LexicalReordering(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "KENLM") {
-      LanguageModel *model = ConstructKenLM(feature, line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    }
-#ifdef LM_IRST
-    else if (feature == "IRSTLM") {
-      LanguageModelIRST *model = new LanguageModelIRST(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    }
-#endif
-#ifdef LM_SRI
-    else if (feature == "SRILM") {
-      LanguageModelSRI *model = new LanguageModelSRI(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    }
-#endif
-    else if (feature == "Generation") {
-      GenerationDictionary *model = new GenerationDictionary(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "BleuScoreFeature") {
-      BleuScoreFeature *model = new BleuScoreFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "Distortion") {
-      DistortionScoreProducer *model = new DistortionScoreProducer(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "WordPenalty") {
-      WordPenaltyProducer *model = new WordPenaltyProducer(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "UnknownWordPenalty") {
-      UnknownWordPenaltyProducer *model = new UnknownWordPenaltyProducer(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      if (weights.size() == 0)
-        weights.push_back(1.0f);
-      SetWeights(model, weights);
-    } else if (feature == "InputFeature") {
-      InputFeature *model = new InputFeature(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-
-    } else if (feature == "PhraseDictionaryBinary") {
-      PhraseDictionaryTreeAdaptor* model = new PhraseDictionaryTreeAdaptor(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhraseDictionaryOnDisk") {
-      PhraseDictionaryOnDisk* model = new PhraseDictionaryOnDisk(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhraseDictionaryMemory") {
-      PhraseDictionaryMemory* model = new PhraseDictionaryMemory(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhraseDictionaryCompact") {
-      PhraseDictionaryCompact* model = new PhraseDictionaryCompact(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhraseDictionaryMultiModel") {
-      PhraseDictionaryMultiModel* model = new PhraseDictionaryMultiModel(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhraseDictionaryMultiModelCounts") {
-      PhraseDictionaryMultiModelCounts* model = new PhraseDictionaryMultiModelCounts(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhraseDictionaryALSuffixArray") {
-      PhraseDictionaryALSuffixArray* model = new PhraseDictionaryALSuffixArray(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhraseDictionaryDynSuffixArray") {
-      PhraseDictionaryDynSuffixArray* model = new PhraseDictionaryDynSuffixArray(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "OpSequenceModel") {
-      OpSequenceModel* model = new OpSequenceModel(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    } else if (feature == "PhrasePenalty") {
-      PhrasePenalty* model = new PhrasePenalty(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    }
-
-#ifdef HAVE_SYNLM
-    else if (feature == "SyntacticLanguageModel") {
-      SyntacticLanguageModel *model = new SyntacticLanguageModel(line);
-      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-      SetWeights(model, weights);
-    }
-#endif
-    else {
-      UserMessage::Add("Unknown feature function:" + feature);
-      return false;
-    }
+    registry.Construct(feature, line);
  }

  OverrideFeatures();
@ -1317,7 +1148,7 @@ void StaticData::OverrideFeatures()
      CHECK(keyVal.size() == 2);

      VERBOSE(1, "Override " << ff.GetScoreProducerDescription() << " "
-    		     << keyVal[0] << "=" << keyVal[1] << endl);
+              << keyVal[0] << "=" << keyVal[1] << endl);

      ff.SetParameter(keyVal[0], keyVal[1]);

--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@ -214,6 +214,8 @@ protected:
  std::map< std::string, std::set< std::string > > m_weightSettingIgnoreFF; // feature function
  std::map< std::string, std::set< size_t > > m_weightSettingIgnoreDP; // decoding path

+  FactorType m_placeHolderFactor;
+
  StaticData();

  void LoadChartDecodingParameters();
@ -768,6 +770,9 @@ public:

  void OverrideFeatures();

+  FactorType GetPlaceholderFactor() const {
+    return m_placeHolderFactor;
+  }
 };

 }
--- a/moses/TargetPhraseCollection.h
+++ b/moses/TargetPhraseCollection.h
@ -85,7 +85,7 @@ public:
  void Sort(bool adhereTableLimit, size_t tableLimit);

  void Clear() {
-    RemoveAllInColl(m_collection); 
+    RemoveAllInColl(m_collection);
  }

 };
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
@ -154,12 +154,10 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
    const PhraseDictionaryNodeMemory &node = dottedRule.GetLastNode();

    // look up target sides
-    const TargetPhraseCollection *tpc = node.GetTargetPhraseCollection();
+    const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();

    // add the fully expanded rule (with lexical target side)
-    if (tpc != NULL) {
-      AddCompletedRule(dottedRule, *tpc, range, outColl);
-    }
+    AddCompletedRule(dottedRule, tpc, range, outColl);
  }

  dottedRuleCol.Clear(relEndPos+1);
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
@ -153,12 +153,10 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
    const PhraseDictionaryNodeMemory &node = dottedRule.GetLastNode();

    // look up target sides
-    const TargetPhraseCollection *tpc = node.GetTargetPhraseCollection();
+    const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();

    // add the fully expanded rule (with lexical target side)
-    if (tpc != NULL) {
-      AddCompletedRule(dottedRule, *tpc, range, outColl);
-    }
+    AddCompletedRule(dottedRule, tpc, range, outColl);
  }

  dottedRuleCol.Clear(relEndPos+1);
--- a/moses/TranslationModel/PhraseDictionaryMemory.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemory.cpp
@ -51,7 +51,7 @@ TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollectio
  , const Word *sourceLHS)
 {
  PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
-  return currNode.GetOrCreateTargetPhraseCollection();
+  return currNode.GetTargetPhraseCollection();
 }

 const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(const Phrase& sourceOrig) const
@ -70,7 +70,7 @@ const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(
      return NULL;
  }

-  return currNode->GetTargetPhraseCollection();
+  return &currNode->GetTargetPhraseCollection();
 }

 PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@ -223,7 +223,6 @@ void PhraseDictionaryMultiModelCounts::Load()

 const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollection(const Phrase& src) const
 {
-
  vector<vector<float> > multimodelweights;
  bool normalize;
  normalize = (m_mode == "interpolate") ? true : false;
@ -346,11 +345,11 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
 {

  const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
-  TargetPhraseCollection *ret_raw = (TargetPhraseCollection*)  pd.GetTargetPhraseCollection(target);
+  const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollection(target);

  // in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
-  if (ret_raw != NULL) {
-    TargetPhrase * targetPhrase = *(ret_raw->begin());
+  if (ret_raw && ret_raw->GetSize() > 0) {
+    const TargetPhrase * targetPhrase = *(ret_raw->begin());
    return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd)[0]);
  }

--- a/moses/TranslationModel/PhraseDictionaryNodeMemory.h
+++ b/moses/TranslationModel/PhraseDictionaryNodeMemory.h
@ -133,10 +133,10 @@ public:
  const PhraseDictionaryNodeMemory *GetChild(const Word &sourceTerm) const;
  const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;

-  const TargetPhraseCollection *GetTargetPhraseCollection() const {
-    return &m_targetPhraseCollection;
+  const TargetPhraseCollection &GetTargetPhraseCollection() const {
+    return m_targetPhraseCollection;
  }
-  TargetPhraseCollection &GetOrCreateTargetPhraseCollection() {
+  TargetPhraseCollection &GetTargetPhraseCollection() {
    return m_targetPhraseCollection;
  }

--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@ -263,7 +263,7 @@ TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseColle
    , const Word *sourceLHS)
 {
  PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(rootNode, source, target, sourceLHS);
-  return currNode.GetOrCreateTargetPhraseCollection();
+  return currNode.GetTargetPhraseCollection();
 }

 PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDictionaryNodeMemory &rootNode
--- a/moses/Util.h
+++ b/moses/Util.h
@ -233,6 +233,26 @@ inline void TokenizeMultiCharSeparator(std::vector<std::string> &output
  output.push_back(Trim(str.substr(pos, nextPos - pos)));
 }

+/** only split of the first delimiter. Used by class FeatureFunction for parse key=value pair.
+ * Value may have = character
+*/
+inline std::vector<std::string> TokenizeFirstOnly(const std::string& str,
+    const std::string& delimiters = " \t")
+{
+  std::vector<std::string> tokens;
+  std::string::size_type pos     = str.find_first_of(delimiters);
+
+  if (std::string::npos != pos) {
+    // Found a token, add it to the vector.
+    tokens.push_back(str.substr(0, pos));
+    tokens.push_back(str.substr(pos + 1, str.size() - pos  - 1));
+  } else {
+    tokens.push_back(str);
+  }
+
+  return tokens;
+}
+

 /**
 * Convert vector of type T to string
--- a/moses/Word.h
+++ b/moses/Word.h
@ -134,6 +134,11 @@ public:
    return Compare(*this, compare) != 0;
  }

+  int Compare(const Word &other) const {
+    return Compare(*this, other);
+  }
+
+
  /* static functions */

  /** transitive comparison of 2 word objects. Used by operator<.
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@ -150,10 +150,13 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons
 * \param rbrackStr xml tag's right bracket string, typically ">"
 */
 bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingConstraint &reorderingConstraint, vector< size_t > &walls,
+                            std::vector< std::pair<size_t, std::string> > &placeholders,
                            const std::string& lbrackStr, const std::string& rbrackStr)
 {
  //parse XML markup in translation line

+  const StaticData &staticData = StaticData::Instance();
+
  // no xml tag? we're done.
 //if (line.find_first_of('<') == string::npos) {
  if (line.find(lbrackStr) == string::npos) {
@ -172,8 +175,8 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
  string cleanLine; // return string (text without xml)
  size_t wordPos = 0; // position in sentence (in terms of number of words)

-  const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
-  const string &factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+  const vector<FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+  const string &factorDelimiter = staticData.GetFactorDelimiter();

  // loop through the tokens
  for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) {
@ -290,6 +293,16 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
          reorderingConstraint.SetZone( startPos, endPos-1 );
        }

+        // name-entity placeholder
+        else if (tagName == "ne") {
+          if (startPos != (endPos - 1)) {
+            TRACE_ERR("ERROR: Placeholder must only span 1 word: " << line << endl);
+            return false;
+          }
+          string entity = ParseXmlTagAttribute(tagContent,"entity");
+          placeholders.push_back(std::pair<size_t, std::string>(startPos, entity));
+        }
+
        // default: opening tag that specifies translation options
        else {
          if (startPos >= endPos) {
@ -329,7 +342,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
          }

          // store translation options into members
-          if (StaticData::Instance().GetXmlInputType() != XmlIgnore) {
+          if (staticData.GetXmlInputType() != XmlIgnore) {
            // only store options if we aren't ignoring them
            for (size_t i=0; i<altTexts.size(); ++i) {
              Phrase sourcePhrase; // TODO don't know what the source phrase is
--- a/moses/XmlOption.h
+++ b/moses/XmlOption.h
@ -31,6 +31,7 @@ bool isXmlTag(const std::string& tag, const std::string& lbrackStr="<", const st
 std::vector<std::string> TokenizeXml(const std::string& str, const std::string& lbrackStr="<", const std::string& rbrackStr=">");

 bool ProcessAndStripXMLTags(std::string &line, std::vector<XmlOption*> &res, ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls,
+                            std::vector< std::pair<size_t, std::string> > &placeholders,
                            const std::string& lbrackStr="<", const std::string& rbrackStr=">");

 }
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@ -388,6 +388,12 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
  if (vm.count("UnpairedExtractFormat")) {
    options.unpairedExtractFormat = true;
  }
+
+  // Workaround for extract-parallel issue.
+  if (options.sentenceOffset > 0) {
+    options.glueGrammarFile.clear();
+    options.unknownWordFile.clear();
+  }
 }

 void ExtractGHKM::Error(const std::string &msg) const
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@ -371,6 +371,23 @@ alignment-symmetrization-method = grow-diag-final-and
 #
 #biconcor = $moses-script-dir/ems/biconcor/biconcor

+############################################################
+
+### use of Operation Sequence Model 
+### Durrani, Schmid and Fraser. (2011): "A Joint Sequence Translation Model with Integrated Reordering"
+
+#operation-sequence-model = "yes"
+#operation-sequence-model-order = 5
+### compile Moses with --max-kenlm-order=9 if higher order is required
+
+### if OSM training should be skipped,
+# point to OSM Model 
+#
+# osm-model =
+
+############################################################
+
+
 ### lexicalized reordering: specify orientation type
 # (default: only distance-based reordering model)
 #
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@ -355,6 +355,23 @@ alignment-symmetrization-method = grow-diag-final-and
 #
 #biconcor = $moses-script-dir/ems/biconcor/biconcor

+############################################################
+
+### use of Operation Sequence Model 
+### Durrani, Schmid and Fraser. (2011): "A Joint Sequence Translation Model with Integrated Reordering"
+
+#operation-sequence-model = "yes"
+#operation-sequence-model-order = 5
+### compile Moses with --max-kenlm-order=9 if higher order is required
+
+### if OSM training should be skipped,
+# point to OSM Model 
+#
+# osm-model =
+
+############################################################
+
+
 ### lexicalized reordering: specify orientation type
 # (default: only distance-based reordering model)
 #
--- a/scripts/generic/ph_numbers.perl
+++ b/scripts/generic/ph_numbers.perl
@ -1,4 +1,5 @@
 #!/usr/bin/perl -w
+package ph_numbers;

 # Script to recognize and replace numbers in Moses training corpora
 # and decoder input
@ -7,45 +8,65 @@

 use strict;

+run() unless caller();
 use Getopt::Std;

 my $debug = $ENV{DEBUG} || 0;

-my %opts;
-if(!getopts('s:t:cm:hl',\%opts) || $opts{h}) {
-    print "Usage: perl $0 [-s source_locale][-t target_locale][-c][-h][-l][-m symbol] < in > out\n";
-    exit;
+sub run {
+    my %opts;
+    if(!getopts('s:t:cm:hl',\%opts) || $opts{h}) {
+	print "Usage: perl $0 [-s source_locale][-t target_locale][-c][-h][-l][-m symbol] < in > out\n";
+	exit;
+    }
+    my $sourceLocale = $opts{s} || "";
+    my $targetLocale = $opts{t} || "";
+    my $numberSymbol = $opts{m} || '@NUM@';
+    while(<>) {
+	chomp;
+	print recognize($_,$opts{c},$opts{l},$numberSymbol,$_),"\n";
+    }
 }
-my $sourceLocale = $opts{s} || "";
-my $targetLocale = $opts{t} || "";
-my $numberSymbol = $opts{m} || '@NUM@';

-while(<>) {
+sub recognize {
+    my $line = shift;
+    my $corpusMode = shift;
+    my $legacyMode = shift;
+    my $numberSymbol = shift || '@NUM@';
+
    # [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?
    # while(/\G(.*?)\s*([+-]?\p{Digit}+[+-\p{Digit}\.,eE])/) {
-    chomp;
    my $output = "";
    my $remainder = "";
-    while(/\G(.*?)(\s*)([+-]?\p{Digit}*[\.,]?\p{Digit}+[\p{Digit}\.,+-eE]*)/g) {
-	print STDERR "Between: x$1x\n" if $debug;
-	print STDERR "Number: x$3x\n" if $debug;
-	$output .= $1;
-	if($opts{c}) {
+    while($line =~ /\G(.*?)(\s*)([+-]?\p{Digit}*[\.,]?\p{Digit}+[\p{Digit}\.,+-eE]*)/g) {
+	my $between = $1;
+	my $number = $3;
+	print STDERR "Between: x${between}x\n" if $debug;
+	print STDERR "Number: x${number}x\n" if $debug;
+	# If there are more numbers separated by whitespace, add these
+	my $numberContinuation = "";
+	while($line = /\G(\s+)([\p{Digit}\.,+-eE]*)/g) {
+	    $numberContinuation .= $1.$2;
+	}
+	$number .= $numberContinuation;
+	$output .= $between;
+	if($corpusMode) {
 	    $output .= $2.$numberSymbol;
 	}
 	else {
-	    if($opts{l}) {
-		$output .= $2."<ne translation=\"$3\">$numberSymbol</ne>";
+	    if($legacyMode) {
+		$output .= $2."<ne translation=\"$number\">$numberSymbol</ne>";
 	    }
 	    else {
-		$output .= $2."<ne translation=\"$numberSymbol\" entity=\"$3\">$numberSymbol</ne>";
+		$output .= $2."<ne translation=\"$numberSymbol\" entity=\"$number\">$numberSymbol</ne>";
 	    }
 	}
 	$remainder = $';
    }
-    print STDERR "Remainder: x".$remainder."x\n" if $debug;
-    print STDERR "\n" if $debug;
-    $output .= $remainder if $remainder;
-    $output .= "\n";
-    print $output;
+    print STDERR "Remainder: x".$remainder."x\n" if $debug; 
+    print STDERR "\n" if $debug; 
+    $output .= $remainder if $remainder; 
+    return $output; 
 }
+
+1;
--- a/scripts/training/wrappers/parse-de-berkeley.perl
+++ b/scripts/training/wrappers/parse-de-berkeley.perl
@ -33,6 +33,9 @@ while(<STDIN>) {
  s/\(/*LRB*/g;
  s/\)/*RRB*/g;

+  # handle @ (the parser does something weird with these)
+  s/\@/\\\@/g;
+
  print TMP $_;
 }
 close(TMP);
@ -42,6 +45,7 @@ print STDERR $cmd."\n";

 open(PARSE,"$cmd|");
 while(<PARSE>) {
+  s/\\\@/\@/g;
  print $_;
 }
 close(PARSE);