Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-12-27 05:55:02 +03:00 · 2013-07-03 10:55:30 +01:00 · 2013-07-03 10:55:30 +01:00 · e20fbc0754
commit e20fbc0754
parent e0c9a05a9a 33a167ac9d
123 changed files with 3041 additions and 1535 deletions
--- a/4
+++ b/4
@ -108,6 +108,10 @@ project : default-build
  <link>static
  ;

+#Apparently OS X likes to link against iconv for fgetsUTF8.
+lib iconv ;
+requirements += <os>MACOSX:<library>iconv ;
+
 project : requirements 
  <threading>multi:<define>WITH_THREADS
  <threading>multi:<library>boost_thread
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -1121,6 +1121,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InputFeature.h</locationURI>
 		</link>
+		<link>
+			<name>FF/OSM-Feature</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>FF/PhraseBasedFeatureContext.cpp</name>
 			<type>1</type>
@ -1166,6 +1171,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhrasePairFeature.h</locationURI>
 		</link>
+		<link>
+			<name>FF/PhrasePenalty.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhrasePenalty.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/PhrasePenalty.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhrasePenalty.h</locationURI>
+		</link>
 		<link>
 			<name>FF/SourceWordDeletionFeature.cpp</name>
 			<type>1</type>
@ -1556,6 +1571,16 @@
 			<type>2</type>
 			<locationURI>virtual:/virtual</locationURI>
 		</link>
+		<link>
+			<name>TranslationModel/WordCoocTable.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/WordCoocTable.cpp</locationURI>
+		</link>
+		<link>
+			<name>TranslationModel/WordCoocTable.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/WordCoocTable.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/fuzzy-match</name>
 			<type>2</type>
@ -1581,6 +1606,26 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/bin/lm.log</locationURI>
 		</link>
+		<link>
+			<name>FF/OSM-Feature/OpSequenceModel.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/OpSequenceModel.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/OSM-Feature/OpSequenceModel.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/OpSequenceModel.h</locationURI>
+		</link>
+		<link>
+			<name>FF/OSM-Feature/osmHyp.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/OSM-Feature/osmHyp.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</name>
 			<type>1</type>
--- a/lm/builder/lmplz_main.cc
+++ b/lm/builder/lmplz_main.cc
@ -33,6 +33,8 @@ int main(int argc, char *argv[]) {
    po::options_description options("Language model building options");
    lm::builder::PipelineConfig pipeline;

+    std::string text, arpa;
+
    options.add_options()
      ("order,o", po::value<std::size_t>(&pipeline.order)
 #if BOOST_VERSION >= 104200
@ -47,18 +49,21 @@ int main(int argc, char *argv[]) {
      ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
      ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
      ("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write vocabulary file")
-      ("verbose_header", po::bool_switch(&pipeline.verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.");
+      ("verbose_header", po::bool_switch(&pipeline.verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.")
+      ("text", po::value<std::string>(&text), "Read text from a file instead of stdin")
+      ("arpa", po::value<std::string>(&arpa), "Write ARPA to a file instead of stdout");
    if (argc == 1) {
      std::cerr << 
        "Builds unpruned language models with modified Kneser-Ney smoothing.\n\n"
        "Please cite:\n"
-        "@inproceedings{kenlm,\n"
-        "author    = {Kenneth Heafield},\n"
-        "title     = {{KenLM}: Faster and Smaller Language Model Queries},\n"
-        "booktitle = {Proceedings of the Sixth Workshop on Statistical Machine Translation},\n"
-        "month     = {July}, year={2011},\n"
-        "address   = {Edinburgh, UK},\n"
-        "publisher = {Association for Computational Linguistics},\n"
+        "@inproceedings{Heafield-estimate,\n"
+        "  author = {Kenneth Heafield and Ivan Pouzyrevsky and Jonathan H. Clark and Philipp Koehn},\n"
+        "  title = {Scalable Modified {Kneser-Ney} Language Model Estimation},\n"
+        "  year = {2013},\n"
+        "  month = {8},\n"
+        "  booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics},\n"
+        "  address = {Sofia, Bulgaria},\n"
+        "  url = {http://kheafield.com/professional/edinburgh/estimate\\_paper.pdf},\n"
        "}\n\n"
        "Provide the corpus on stdin.  The ARPA file will be written to stdout.  Order of\n"
        "the model (-o) is the only mandatory option.  As this is an on-disk program,\n"
@ -91,9 +96,17 @@ int main(int argc, char *argv[]) {
    initial.adder_out.block_count = 2;
    pipeline.read_backoffs = initial.adder_out;

+    util::scoped_fd in(0), out(1);
+    if (vm.count("text")) {
+      in.reset(util::OpenReadOrThrow(text.c_str()));
+    }
+    if (vm.count("arpa")) {
+      out.reset(util::CreateOrThrow(arpa.c_str()));
+    }
+
    // Read from stdin
    try {
-      lm::builder::Pipeline(pipeline, 0, 1);
+      lm::builder::Pipeline(pipeline, in.release(), out.release());
    } catch (const util::MallocException &e) {
      std::cerr << e.what() << std::endl;
      std::cerr << "Try rerunning with a more conservative -S setting than " << vm["memory"].as<std::string>() << std::endl;
--- a/lm/builder/ngram.hh
+++ b/lm/builder/ngram.hh
@ -53,7 +53,7 @@ class NGram {
    Payload &Value() { return *reinterpret_cast<Payload *>(end_); }

    uint64_t &Count() { return Value().count; }
-    const uint64_t Count() const { return Value().count; }
+    uint64_t Count() const { return Value().count; }

    std::size_t Order() const { return end_ - begin_; }

--- a/lm/model.cc
+++ b/lm/model.cc
@ -304,5 +304,26 @@ template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiks
 template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::ArrayBhiksha>, SortedVocabulary>;

 } // namespace detail
+
+base::Model *LoadVirtual(const char *file_name, const Config &config, ModelType model_type) {
+  RecognizeBinary(file_name, model_type);
+  switch (model_type) {
+    case PROBING:
+      return new ProbingModel(file_name, config);
+    case REST_PROBING:
+      return new RestProbingModel(file_name, config);
+    case TRIE:
+      return new TrieModel(file_name, config);
+    case QUANT_TRIE:
+      return new QuantTrieModel(file_name, config);
+    case ARRAY_TRIE:
+      return new ArrayTrieModel(file_name, config);
+    case QUANT_ARRAY_TRIE:
+      return new QuantArrayTrieModel(file_name, config);
+    default:
+      UTIL_THROW(FormatLoadException, "Confused by model type " << model_type);
+  }
+}
+
 } // namespace ngram
 } // namespace lm
--- a/lm/model.hh
+++ b/lm/model.hh
@ -67,7 +67,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
    FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;

    /* Get the state for a context.  Don't use this if you can avoid it.  Use
-     * BeginSentenceState or EmptyContextState and extend from those.  If
+     * BeginSentenceState or NullContextState and extend from those.  If
     * you're only going to use this state to call FullScore once, use
     * FullScoreForgotState. 
     * To use this function, make an array of WordIndex containing the context
@ -153,6 +153,11 @@ LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<Separat
 typedef ::lm::ngram::ProbingVocabulary Vocabulary;
 typedef ProbingModel Model;

+/* Autorecognize the file type, load, and return the virtual base class.  Don't
+ * use the virtual base class if you can avoid it.  Instead, use the above
+ * classes as template arguments to your own virtual feature function.*/
+base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING);
+
 } // namespace ngram
 } // namespace lm

--- a/lm/search_hashed.cc
+++ b/lm/search_hashed.cc
@ -54,7 +54,7 @@ template <class Weights> class ActivateUnigram {
    Weights *modify_;
 };

-// Find the lower order entry, inserting blanks along the way as necessary.  
+// Find the lower order entry, inserting blanks along the way as necessary.
 template <class Value> void FindLower(
    const std::vector<uint64_t> &keys,
    typename Value::Weights &unigram,
@ -64,7 +64,7 @@ template <class Value> void FindLower(
  typename Value::ProbingEntry entry;
  // Backoff will always be 0.0.  We'll get the probability and rest in another pass.
  entry.value.backoff = kNoExtensionBackoff;
-  // Go back and find the longest right-aligned entry, informing it that it extends left.  Normally this will match immediately, but sometimes SRI is dumb.  
+  // Go back and find the longest right-aligned entry, informing it that it extends left.  Normally this will match immediately, but sometimes SRI is dumb.
  for (int lower = keys.size() - 2; ; --lower) {
    if (lower == -1) {
      between.push_back(&unigram);
@ -77,11 +77,11 @@ template <class Value> void FindLower(
  }
 }

-// Between usually has  single entry, the value to adjust.  But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here.  
+// Between usually has  single entry, the value to adjust.  But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here.
 template <class Added, class Build> void AdjustLower(
    const Added &added,
    const Build &build,
-    std::vector<typename Build::Value::Weights *> &between, 
+    std::vector<typename Build::Value::Weights *> &between,
    const unsigned int n,
    const std::vector<WordIndex> &vocab_ids,
    typename Build::Value::Weights *unigrams,
@ -93,14 +93,14 @@ template <class Added, class Build> void AdjustLower(
  }
  typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
  float prob = -fabs(between.back()->prob);
-  // Order of the n-gram on which probabilities are based.  
+  // Order of the n-gram on which probabilities are based.
  unsigned char basis = n - between.size();
  assert(basis != 0);
  typename Build::Value::Weights **change = &between.back();
  // Skip the basis.
  --change;
  if (basis == 1) {
-    // Hallucinate a bigram based on a unigram's backoff and a unigram probability.  
+    // Hallucinate a bigram based on a unigram's backoff and a unigram probability.
    float &backoff = unigrams[vocab_ids[1]].backoff;
    SetExtension(backoff);
    prob += backoff;
@ -128,14 +128,14 @@ template <class Added, class Build> void AdjustLower(
  typename std::vector<typename Value::Weights *>::const_iterator i(between.begin());
  build.MarkExtends(**i, added);
  const typename Value::Weights *longer = *i;
-  // Everything has probability but is not marked as extending.  
+  // Everything has probability but is not marked as extending.
  for (++i; i != between.end(); ++i) {
    build.MarkExtends(**i, *longer);
    longer = *i;
  }
 }

-// Continue marking lower entries even they know that they extend left.  This is used for upper/lower bounds.  
+// Continue marking lower entries even they know that they extend left.  This is used for upper/lower bounds.
 template <class Build> void MarkLower(
    const std::vector<uint64_t> &keys,
    const Build &build,
@ -144,15 +144,15 @@ template <class Build> void MarkLower(
    int start_order,
    const typename Build::Value::Weights &longer) {
  if (start_order == 0) return;
-  typename util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash>::MutableIterator iter;
-  // Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code.  
+  // Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code.
  for (int even_lower = start_order - 2 /* index in middle */; ; --even_lower) {
    if (even_lower == -1) {
      build.MarkExtends(unigram, longer);
      return;
    }
-    middle[even_lower].UnsafeMutableFind(keys[even_lower], iter);
-    if (!build.MarkExtends(iter->value, longer)) return;
+    if (!build.MarkExtends(
+          middle[even_lower].UnsafeMutableMustFind(keys[even_lower])->value,
+          longer)) return;
  }
 }

@ -168,7 +168,6 @@ template <class Build, class Activate, class Store> void ReadNGrams(
    Store &store,
    PositiveProbWarn &warn) {
  typedef typename Build::Value Value;
-  typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
  assert(n >= 2);
  ReadNGramHeader(f, n);

@ -186,7 +185,7 @@ template <class Build, class Activate, class Store> void ReadNGrams(
    for (unsigned int h = 1; h < n - 1; ++h) {
      keys[h] = detail::CombineWordHash(keys[h-1], vocab_ids[h+1]);
    }
-    // Initially the sign bit is on, indicating it does not extend left.  Most already have this but there might +0.0.  
+    // Initially the sign bit is on, indicating it does not extend left.  Most already have this but there might +0.0.
    util::SetSign(entry.value.prob);
    entry.key = keys[n-2];

@ -203,7 +202,7 @@ template <class Build, class Activate, class Store> void ReadNGrams(

 } // namespace
 namespace detail {
- 
+
 template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
  std::size_t allocated = Unigram::Size(counts[0]);
  unigram_ = Unigram(start, counts[0], allocated);
--- a/lm/search_hashed.hh
+++ b/lm/search_hashed.hh
@ -71,7 +71,7 @@ template <class Value> class HashedSearch {
    static const bool kDifferentRest = Value::kDifferentRest;
    static const unsigned int kVersion = 0;

-    // TODO: move probing_multiplier here with next binary file format update.  
+    // TODO: move probing_multiplier here with next binary file format update.
    static void UpdateConfigFromBinary(int, const std::vector<uint64_t> &, Config &) {}

    static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
@ -102,14 +102,9 @@ template <class Value> class HashedSearch {
      return ret;
    }

-#pragma GCC diagnostic ignored "-Wuninitialized"
    MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const {
      node = extend_pointer;
-      typename Middle::ConstIterator found;
-      bool got = middle_[extend_length - 2].Find(extend_pointer, found);
-      assert(got);
-      (void)got;
-      return MiddlePointer(found->value);
+      return MiddlePointer(middle_[extend_length - 2].MustFind(extend_pointer)->value);
    }

    MiddlePointer LookupMiddle(unsigned char order_minus_2, WordIndex word, Node &node, bool &independent_left, uint64_t &extend_pointer) const {
@ -126,14 +121,14 @@ template <class Value> class HashedSearch {
    }

    LongestPointer LookupLongest(WordIndex word, const Node &node) const {
-      // Sign bit is always on because longest n-grams do not extend left.  
+      // Sign bit is always on because longest n-grams do not extend left.
      typename Longest::ConstIterator found;
      if (!longest_.Find(CombineWordHash(node, word), found)) return LongestPointer();
      return LongestPointer(found->value.prob);
    }

-    // Generate a node without necessarily checking that it actually exists.  
-    // Optionally return false if it's know to not exist.  
+    // Generate a node without necessarily checking that it actually exists.
+    // Optionally return false if it's know to not exist.
    bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const {
      assert(begin != end);
      node = static_cast<Node>(*begin);
@ -144,7 +139,7 @@ template <class Value> class HashedSearch {
    }

  private:
-    // Interpret config's rest cost build policy and pass the right template argument to ApplyBuild.  
+    // Interpret config's rest cost build policy and pass the right template argument to ApplyBuild.
    void DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn);

    template <class Build> void ApplyBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build);
@ -153,7 +148,7 @@ template <class Value> class HashedSearch {
      public:
        Unigram() {}

-        Unigram(void *start, uint64_t count, std::size_t /*allocated*/) : 
+        Unigram(void *start, uint64_t count, std::size_t /*allocated*/) :
          unigram_(static_cast<typename Value::Weights*>(start))
 #ifdef DEBUG
         ,  count_(count)
--- a/lm/virtual_interface.hh
+++ b/lm/virtual_interface.hh
@ -6,6 +6,7 @@
 #include "util/string_piece.hh"

 #include <string>
+#include <string.h>

 namespace lm {
 namespace base {
@ -119,7 +120,9 @@ class Model {

    size_t StateSize() const { return state_size_; }
    const void *BeginSentenceMemory() const { return begin_sentence_memory_; }
+    void BeginSentenceWrite(void *to) const { memcpy(to, begin_sentence_memory_, StateSize()); }
    const void *NullContextMemory() const { return null_context_memory_; }
+    void NullContextWrite(void *to) const { memcpy(to, null_context_memory_, StateSize()); }

    // Requires in_state != out_state
    virtual float Score(const void *in_state, const WordIndex new_word, void *out_state) const = 0;
--- a/misc/queryPhraseTableMin.cpp
+++ b/misc/queryPhraseTableMin.cpp
@ -65,7 +65,7 @@ int main(int argc, char **argv)
    sourcePhrase.CreateFromString(Input, input, line, "||dummy_string||", NULL);

    TargetPhraseVectorPtr decodedPhraseColl
-      = pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
+    = pdc.GetTargetPhraseCollectionRaw(sourcePhrase);

    if(decodedPhraseColl != NULL) {
      if(reportCounts)
--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@ -61,6 +61,9 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range

  Phrase* unksrc = new Phrase(1);
  unksrc->AddWord() = sourceWord;
+  Word &newWord = unksrc->GetWord(0);
+  newWord.SetIsOOV(true);
+
  m_unksrcs.push_back(unksrc);

  //TranslationOption *transOpt;
--- a/moses/DecodeFeature.cpp
+++ b/moses/DecodeFeature.cpp
@ -34,16 +34,6 @@ DecodeFeature::DecodeFeature(  const std::string& description
  : StatelessFeatureFunction(description, line)
 {
  VERBOSE(2,"DecodeFeature:" << std::endl);
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
 }

 DecodeFeature::DecodeFeature(  const std::string& description
@ -67,7 +57,7 @@ DecodeFeature::DecodeFeature(const std::string& description
  VERBOSE(2,"DecodeFeature: input=" << m_inputFactors << "  output=" << m_outputFactors << std::endl);
 }

-bool DecodeFeature::SetParameter(const std::string& key, const std::string& value)
+void DecodeFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "input-factor") {
    m_input =Tokenize<FactorType>(value, ",");
@ -76,9 +66,8 @@ bool DecodeFeature::SetParameter(const std::string& key, const std::string& valu
    m_output =Tokenize<FactorType>(value, ",");
    m_outputFactors = FactorMask(m_output);
  } else {
-    return StatelessFeatureFunction::SetParameter(key, value);
+    StatelessFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }


--- a/moses/DecodeFeature.h
+++ b/moses/DecodeFeature.h
@ -61,7 +61,7 @@ public:
  const std::vector<FactorType>& GetOutput() const;

  bool IsUseable(const FactorMask &mask) const;
-  virtual bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 protected:
  std::vector<FactorType> m_input;
--- a/moses/FF/DistortionScoreProducer.cpp
+++ b/moses/FF/DistortionScoreProducer.cpp
@ -4,6 +4,8 @@
 #include "moses/WordsRange.h"
 #include "moses/StaticData.h"

+using namespace std;
+
 namespace Moses
 {
 struct DistortionState_traditional : public FFState {
@ -19,6 +21,12 @@ struct DistortionState_traditional : public FFState {
  }
 };

+DistortionScoreProducer::DistortionScoreProducer(const std::string &line)
+  : StatefulFeatureFunction("Distortion", 1, line)
+{
+  ReadParameters();
+}
+
 const FFState* DistortionScoreProducer::EmptyHypothesisState(const InputType &input) const
 {
  // fake previous translated phrase start and end
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@ -18,10 +18,7 @@ class WordsRange;
 class DistortionScoreProducer : public StatefulFeatureFunction
 {
 public:
-  DistortionScoreProducer(const std::string &line)
-    : StatefulFeatureFunction("Distortion", 1, line) {
-    CHECK(m_args.size() == 0);
-  }
+  DistortionScoreProducer(const std::string &line);

  bool IsUseable(const FactorMask &mask) const {
    return true;
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@ -49,17 +49,6 @@ void FeatureFunction::Initialize(const std::string& description, const std::stri
 {
  ParseLine(description, line);

-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-
  if (m_description == "") {
    size_t index = description_counts.count(description);

@ -91,29 +80,33 @@ void FeatureFunction::ParseLine(const std::string& description, const std::strin
    pair<set<string>::iterator,bool> ret = keys.insert(args[0]);
    UTIL_THROW_IF(!ret.second, util::Exception, "Duplicate key in line " << line);

-    m_args.push_back(args);
+    if (args[0] == "num-features") {
+      m_numScoreComponents = Scan<size_t>(args[1]);
+    } else if (args[0] == "name") {
+      m_description = args[1];
+    } else {
+      m_args.push_back(args);
+    }
  }
 }

-bool FeatureFunction::SetParameter(const std::string& key, const std::string& value)
+void FeatureFunction::SetParameter(const std::string& key, const std::string& value)
 {
-  if (key == "num-features") {
-    m_numScoreComponents = Scan<size_t>(value);
-  } else if (key == "name") {
-    m_description = value;
-  } else if (key == "tuneable") {
+  if (key == "tuneable") {
    m_tuneable = Scan<bool>(value);
  } else {
-    return false;
+    UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
  }
-
-  return true;
 }

-void FeatureFunction::OverrideParameter(const std::string& key, const std::string& value)
+void FeatureFunction::ReadParameters()
 {
-  bool ret = SetParameter(key, value);
-  UTIL_THROW_IF(!ret, util::Exception, "Unknown argument" << key);
+  while (!m_args.empty()) {
+    const vector<string> &args = m_args[0];
+    SetParameter(args[0], args[1]);
+
+    m_args.erase(m_args.begin());
+  }
 }

 }
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@ -106,8 +106,8 @@ public:
                        , ScoreComponentCollection &scoreBreakdown) const {
  }

-  virtual bool SetParameter(const std::string& key, const std::string& value);
-  virtual void OverrideParameter(const std::string& key, const std::string& value);
+  virtual void SetParameter(const std::string& key, const std::string& value);
+  virtual void ReadParameters();
 };

 }
--- a/moses/FF/GlobalLexicalModel.cpp
+++ b/moses/FF/GlobalLexicalModel.cpp
@ -13,18 +13,7 @@ GlobalLexicalModel::GlobalLexicalModel(const std::string &line)
  : StatelessFeatureFunction("GlobalLexicalModel",1, line)
 {
  std::cerr << "Creating global lexical model...\n";
-
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
+  ReadParameters();

  // define bias word
  FactorCollection &factorCollection = FactorCollection::Instance();
@ -34,7 +23,7 @@ GlobalLexicalModel::GlobalLexicalModel(const std::string &line)

 }

-bool GlobalLexicalModel::SetParameter(const std::string& key, const std::string& value)
+void GlobalLexicalModel::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "file") {
    m_filePath = value;
@ -43,9 +32,8 @@ bool GlobalLexicalModel::SetParameter(const std::string& key, const std::string&
  } else if (key == "outputFactors") {
    m_outputFactorsVec = Tokenize<FactorType>(value,",");
  } else {
-    return StatelessFeatureFunction::SetParameter(key, value);
+    StatelessFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }

 GlobalLexicalModel::~GlobalLexicalModel()
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@ -77,7 +77,7 @@ public:
    ScoreComponentCollection* accumulator) const {
    throw std::logic_error("GlobalLexicalModel not supported in chart decoder, yet");
  }
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 };

--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@ -0,0 +1,305 @@
+#include <fstream>
+#include "OpSequenceModel.h"
+#include "osmHyp.h"
+#include "util/check.hh"
+#include "moses/Util.h"
+
+using namespace std;
+using namespace lm::ngram;
+
+namespace Moses
+{
+
+OpSequenceModel::OpSequenceModel(const std::string &line)
+:StatefulFeatureFunction("OpSequenceModel", 5, line )
+{
+
+  ReadParameters();
+}
+
+void OpSequenceModel :: readLanguageModel(const char *lmFile)
+{
+
+    string unkOp = "_TRANS_SLF_";
+
+	
+	/* 
+
+	// Code for SRILM	
+
+	vector <int> numbers;
+        int nonWordFlag = 0;
+  
+	ptrOp = new Api;
+	ptrOp -> read_lm(lmFile,lmOrder);
+	numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
+	unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
+	
+	*/
+
+	// Code to load KenLM
+
+	OSM = new Model(m_lmPath.c_str());
+	State startState = OSM->NullContextState();
+	State endState;
+	unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
+}
+
+
+void OpSequenceModel::Load()
+{
+
+  /*
+  // load future cost
+
+  //vector <string> input;
+  ifstream sr (m_featurePath.c_str());
+  char* tmp;
+
+  CHECK(sr.is_open());
+
+  vector<FactorType> factorOrder;
+  factorOrder.push_back(0);
+
+  string line;
+  while (std::getline(sr, line))
+  {
+    std::vector<std::string> tokens;
+    tokens = TokenizeMultiCharSeparator(line, "|||");
+    CHECK(tokens.size() == 3);
+
+    Phrase source, target;
+    source.CreateFromString(Input, factorOrder, tokens[0], "|", NULL);
+    target.CreateFromString(Output, factorOrder, tokens[1], "|", NULL);
+
+    ParallelPhrase pp(source, target);
+    Scores scores = Tokenize<float>(tokens[2], " ");
+    m_futureCost[pp] = scores;
+   // m_coll[pp] = scores;
+  }
+
+  */
+  readLanguageModel(m_lmPath.c_str());
+
+}
+
+
+
+void OpSequenceModel:: Evaluate(const Phrase &source
+                        , const TargetPhrase &targetPhrase
+                        , ScoreComponentCollection &scoreBreakdown
+                        , ScoreComponentCollection &estimatedFutureScore) const 
+{
+
+	osmHypothesis obj;
+	obj.setState(OSM->NullContextState());
+	WordsBitmap myBitmap(source.GetSize());
+	vector <string> mySourcePhrase;
+  	vector <string> myTargetPhrase;
+  	vector<float> scores(5);
+	vector <int> alignments;
+	int startIndex = 0;
+	int endIndex = source.GetSize();
+
+	const AlignmentInfo &align = targetPhrase.GetAlignTerm();
+	AlignmentInfo::const_iterator iter;
+
+
+      	for (iter = align.begin(); iter != align.end(); ++iter) 
+      	{
+      	 alignments.push_back(iter->first);
+    	 alignments.push_back(iter->second);
+      	}
+
+	for (int i = 0; i < targetPhrase.GetSize(); i++)
+  	{
+	  	if (targetPhrase.GetWord(i).IsOOV())
+		 myTargetPhrase.push_back("_TRANS_SLF_");
+	 	else
+		  myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
+   	 }
+
+	 for (int i = 0; i < source.GetSize(); i++)
+  	 {
+		  mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
+   	 }
+	
+	 obj.setPhrases(mySourcePhrase , myTargetPhrase);
+	 obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
+	 obj.computeOSMFeature(startIndex,myBitmap);	
+	 obj.calculateOSMProb(*OSM);
+         obj.populateScores(scores);
+         estimatedFutureScore.PlusEquals(this, scores);
+
+}
+
+
+FFState* OpSequenceModel::Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const
+{
+  const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
+  const WordsBitmap &bitmap = cur_hypo.GetWordsBitmap();
+  WordsBitmap myBitmap = bitmap;
+  const Manager &manager = cur_hypo.GetManager();
+  const InputType &source = manager.GetSource();
+  const Sentence &sourceSentence = static_cast<const Sentence&>(source);
+  osmHypothesis obj;
+  vector <string> mySourcePhrase;
+  vector <string> myTargetPhrase;
+  vector<float> scores(5);
+
+
+  //target.GetWord(0)
+
+  //cerr << target <<" --- "<<target.GetSourcePhrase()<< endl;  // English ...
+
+  //cerr << align << endl;   // Alignments ...
+  //cerr << cur_hypo.GetCurrSourceWordsRange() << endl;
+
+  //cerr << source <<endl;
+
+ // int a = sourceRange.GetStartPos();
+ // cerr << source.GetWord(a);
+  //cerr <<a<<endl;
+
+  //const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
+
+
+   const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
+   int startIndex  = sourceRange.GetStartPos();
+   int endIndex = sourceRange.GetEndPos();
+   const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
+   osmState * statePtr;
+
+   vector <int> alignments;
+
+
+
+   AlignmentInfo::const_iterator iter;
+
+      for (iter = align.begin(); iter != align.end(); ++iter) {
+        //cerr << iter->first << "----" << iter->second << " ";
+    	 alignments.push_back(iter->first);
+    	 alignments.push_back(iter->second);
+      }
+
+
+   //cerr<<bitmap<<endl;
+   //cerr<<startIndex<<" "<<endIndex<<endl;
+
+
+  for (int i = startIndex; i <= endIndex; i++)
+  {
+	  myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
+	 mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
+	 // cerr<<mySourcePhrase[i]<<endl;
+  }
+
+  for (int i = 0; i < target.GetSize(); i++)
+  {
+
+	  if (target.GetWord(i).IsOOV())
+		  myTargetPhrase.push_back("_TRANS_SLF_");
+	  else
+		  myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());
+
+  }
+
+ 
+  //cerr<<myBitmap<<endl;
+
+  obj.setState(prev_state);
+  obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
+  obj.setPhrases(mySourcePhrase , myTargetPhrase);
+  obj.computeOSMFeature(startIndex,myBitmap);	
+  obj.calculateOSMProb(*OSM);
+  obj.populateScores(scores);
+
+/*
+  if (bitmap.GetFirstGapPos() == NOT_FOUND)
+  {
+
+    int xx;
+	 cerr<<bitmap<<endl;
+	 int a = bitmap.GetFirstGapPos();
+	 obj.print();
+    cin>>xx;
+  }
+  */
+
+/*
+  vector<float> scores(5);
+  scores[0] = 0.343423f;
+  scores[1] = 1.343423f;
+  scores[2] = 2.343423f;
+  scores[3] = 3.343423f;
+  scores[4] = 4.343423f;
+  */
+
+  accumulator->PlusEquals(this, scores);
+
+  return obj.saveState();
+
+
+
+
+  //return statePtr;
+ // return NULL;
+}
+
+FFState* OpSequenceModel::EvaluateChart(
+  const ChartHypothesis& /* cur_hypo */,
+  int /* featureID - used to index the state in the previous hypotheses */,
+  ScoreComponentCollection* accumulator) const
+{
+  abort();
+
+}
+
+const FFState* OpSequenceModel::EmptyHypothesisState(const InputType &input) const
+{
+  cerr << "OpSequenceModel::EmptyHypothesisState()" << endl;
+
+  State startState = OSM->BeginSentenceState();
+
+  return new osmState(startState);
+}
+
+std::string OpSequenceModel::GetScoreProducerWeightShortName(unsigned idx) const
+{
+  return "osm";
+}
+
+std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const Phrase &target) const
+{
+  ParallelPhrase pp(source, target);
+  std::map<ParallelPhrase, Scores>::const_iterator iter;
+  iter = m_futureCost.find(pp);
+ //iter = m_coll.find(pp);
+  if (iter == m_futureCost.end()) {
+    vector<float> scores(5, 0);
+    scores[0] = unkOpProb;
+    return scores;
+  }
+  else {
+    const vector<float> &scores = iter->second;
+	return scores;
+  }
+}
+
+void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
+{
+	  if (key == "feature-path") {
+		  m_featurePath = value;
+	  } else if (key == "path") {
+		  m_lmPath = value;
+	  } else if (key == "order") {
+		  lmOrder = Scan<int>(value);
+	  } else {
+		  StatefulFeatureFunction::SetParameter(key, value);
+	  }
+}
+
+} // namespace
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@ -0,0 +1,69 @@
+#pragma once
+
+#include <string>
+#include <map>
+#include <vector>
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/Manager.h"
+#include "moses/FF/OSM-Feature/osmHyp.h"
+#include "lm/model.hh"
+
+
+namespace Moses
+{
+
+class OpSequenceModel : public StatefulFeatureFunction
+{
+public:
+
+	
+	lm::ngram::Model * OSM;
+	
+	int lmOrder;
+	float unkOpProb;
+
+	OpSequenceModel(const std::string &line);
+
+	void readLanguageModel(const char *);
+	void Load();
+
+	FFState* Evaluate(
+	    const Hypothesis& cur_hypo,
+	    const FFState* prev_state,
+	    ScoreComponentCollection* accumulator) const;
+
+	void  Evaluate(const Phrase &source
+                        , const TargetPhrase &targetPhrase
+                        , ScoreComponentCollection &scoreBreakdown
+                        , ScoreComponentCollection &estimatedFutureScore) const;
+
+  virtual FFState* EvaluateChart(
+    const ChartHypothesis& /* cur_hypo */,
+    int /* featureID - used to index the state in the previous hypotheses */,
+    ScoreComponentCollection* accumulator) const;
+
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const;
+
+  virtual std::string GetScoreProducerWeightShortName(unsigned idx=0) const;
+
+  std::vector<float> GetFutureScores(const Phrase &source, const Phrase &target) const;
+  void SetParameter(const std::string& key, const std::string& value);
+
+  bool IsUseable(const FactorMask &mask) const
+  { return true; }
+
+protected:
+	typedef std::pair<Phrase, Phrase> ParallelPhrase;
+	typedef std::vector<float> Scores;
+	std::map<ParallelPhrase, Scores> m_futureCost;
+
+	std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+	std::set <int> targetNullWords;
+	std::string m_featurePath, m_lmPath;
+
+
+
+};
+
+
+} // namespace
--- a/moses/FF/OSM-Feature/osmHyp.cpp
+++ b/moses/FF/OSM-Feature/osmHyp.cpp
@ -0,0 +1,650 @@
+ #include "osmHyp.h"
+#include <sstream>
+
+using namespace std;
+using namespace lm::ngram;
+
+namespace Moses
+{
+osmState::osmState(const State & val)
+:j(0)
+,E(0)
+{
+  lmState = val;
+  
+}
+
+void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
+{
+	gap.clear();
+	gap = gapVal;
+	j = jVal;
+	E = eVal;
+}
+
+int osmState::Compare(const FFState& otherBase) const
+{
+  const osmState &other = static_cast<const osmState&>(otherBase);
+  if (j != other.j)
+    return (j < other.j) ? -1 : +1;
+  if (E != other.E)
+    return (E < other.E) ? -1 : +1;
+  if (gap != other.gap)
+    return (gap < other.gap) ? -1 : +1;
+
+  if (lmState.length < other.lmState.length) return -1;
+  
+  if (lmState.length > other.lmState.length) return 1;
+
+  return 0;
+}
+
+
+std::string osmState :: getName() const
+{
+
+		return "done";
+}
+
+//////////////////////////////////////////////////
+
+osmHypothesis :: osmHypothesis()
+{
+	opProb = 0;
+	gapWidth = 0;
+	gapCount = 0;
+	openGapCount = 0;
+	deletionCount = 0;
+	gapCount = 0;
+	j = 0;
+	E = 0;
+	gap.clear();
+}
+
+void osmHypothesis :: setState(const FFState* prev_state)
+{
+
+	if(prev_state != NULL)
+	{
+
+		j = static_cast <const osmState *> (prev_state)->getJ();
+		E =  static_cast <const osmState *> (prev_state)->getE();
+		gap = static_cast <const osmState *> (prev_state)->getGap();
+		lmState = static_cast <const osmState *> (prev_state)->getLMState();
+	}
+}
+
+osmState * osmHypothesis :: saveState()
+{
+
+	osmState * statePtr = new osmState(lmState);
+	statePtr->saveState(j,E,gap);
+	return statePtr;
+}
+
+int osmHypothesis :: isTranslationOperation(int x)
+{
+	if (operations[x].find("_JMP_BCK_") != -1)
+	  return 0;
+	
+	if (operations[x].find("_JMP_FWD_") != -1)
+	  return 0;
+	
+	if (operations[x].find("_CONT_CEPT_") != -1)
+	  return 0;
+	
+	if (operations[x].find("_INS_GAP_") != -1)
+	  return 0;
+		
+	return 1;
+	
+}
+
+void osmHypothesis :: removeReorderingOperations()
+{
+	gapCount = 0; 	
+       deletionCount = 0;
+	openGapCount = 0;
+	gapWidth = 0;	
+	//cout<<"I came here"<<endl;
+
+	std::vector <std::string> tupleSequence;	
+
+	for (int x = 0; x < operations.size(); x++)
+	{
+		// cout<<operations[x]<<endl;
+
+		if(isTranslationOperation(x) == 1)
+		{
+			tupleSequence.push_back(operations[x]);
+		}
+		
+	} 
+
+	operations.clear();
+	operations = tupleSequence;
+}
+
+void osmHypothesis :: calculateOSMProb(Model & ptrOp)
+{
+	
+	opProb = 0;
+	State currState = lmState;
+	State temp;	
+
+	for (int i = 0; i<operations.size(); i++)
+	{
+		temp = currState;
+		opProb += ptrOp.Score(temp,ptrOp.GetVocabulary().Index(operations[i]),currState);
+	}
+
+	lmState = currState;
+
+	//print();
+}
+
+
+int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
+{
+	
+	int firstOG =-1;
+
+	for(int nd = 0; nd < coverageVector.size(); nd++)
+	{
+		if(coverageVector[nd]==0)
+		{
+		 firstOG = nd;
+		 return firstOG;
+		}
+	}
+	
+	return firstOG;	
+
+}
+
+string osmHypothesis :: intToString(int num)
+{
+ 	
+       std::ostringstream stm;
+       stm<<num;
+
+    return stm.str();
+
+}
+
+void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , WordsBitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
+{
+	
+	int gFlag = 0;
+	int gp = 0; 	
+	int ans;
+	
+
+		if ( j < j1) // j1 is the index of the source word we are about to generate ...
+		{
+			//if(coverageVector[j]==0) // if source word at j is not generated yet ...
+			if(coverageVector.GetValue(j)==0) // if source word at j is not generated yet ...
+			{
+				operations.push_back("_INS_GAP_");
+				gFlag++;
+				gap[j]="Unfilled";
+			}
+			if (j == E)
+			{
+				j = j1;
+			}
+			else
+			{
+				operations.push_back("_JMP_FWD_");
+				j=E;
+			}
+		}
+		
+		if (j1 < j)
+		{
+			// if(j < E && coverageVector[j]==0)
+			if(j < E && coverageVector.GetValue(j)==0)
+			{
+				operations.push_back("_INS_GAP_");
+				gFlag++;
+				gap[j]="Unfilled";
+			}
+
+			j=closestGap(gap,j1,gp);
+			operations.push_back("_JMP_BCK_"+ intToString(gp));
+
+			//cout<<"I am j "<<j<<endl;
+			//cout<<"I am j1 "<<j1<<endl;
+
+			if(j==j1)
+			  gap[j]="Filled";
+		}
+
+		if (j < j1)
+		{
+			operations.push_back("_INS_GAP_");
+			gap[j] = "Unfilled";
+			gFlag++;
+			j=j1;
+		}
+
+		if(contFlag == 0) // First words of the multi-word cept ...
+		{
+
+			if(english == "_TRANS_SLF_") // Unknown word ...
+			{
+				operations.push_back("_TRANS_SLF_");
+			}
+			else
+			{
+				operations.push_back("_TRANS_" + english + "_TO_" + german);
+			}
+
+			//ans = firstOpenGap(coverageVector);
+			ans = coverageVector.GetFirstGapPos();
+		
+			if (ans != -1)
+		 		gapWidth += j - ans;
+
+		}
+		else if (contFlag == 2)
+		{
+
+			operations.push_back("_INS_" + german);
+			ans = coverageVector.GetFirstGapPos();
+
+			if (ans != -1)
+				gapWidth += j - ans;
+			deletionCount++;
+		}
+		else
+		{
+			operations.push_back("_CONT_CEPT_");
+		}
+
+		//coverageVector[j]=1;
+		coverageVector.SetValue(j,1);
+		j+=1;
+		
+		if(E<j)
+		  E=j;
+
+	if (gFlag > 0)
+		gapCount++;
+
+	openGapCount += getOpenGaps();
+
+	//if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
+	if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end())
+	{
+		j1 = j;
+		german = currF[j1-startIndex];
+		english = "_INS_";
+		generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
+	}
+
+}
+
+void osmHypothesis :: print()
+{
+	for (int i = 0; i< operations.size(); i++)
+	{
+		cerr<<operations[i]<<" ";
+
+	}
+
+	cerr<<endl<<endl;
+	
+	cerr<<"Operation Probability "<<opProb<<endl;
+	cerr<<"Gap Count "<<gapCount<<endl;
+	cerr<<"Open Gap Count "<<openGapCount<<endl;
+	cerr<<"Gap Width "<<gapWidth<<endl;
+	cerr<<"Deletion Count "<<deletionCount<<endl;
+
+	cerr<<"_______________"<<endl;
+}
+
+int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
+{
+
+	int dist=1172;
+	int value=-1;
+	int temp=0;
+	gp=0;
+	int opGap=0;
+
+	map <int,string> :: iterator iter;
+
+	iter=gap.end();
+		
+		do
+		{
+			iter--;
+			//cout<<"Trapped "<<iter->first<<endl;
+
+		   	if(iter->first==j1 && iter->second== "Unfilled")
+			{
+				opGap++;
+				gp = opGap;
+				return j1;
+
+			}
+		
+		   	if(iter->second =="Unfilled")
+		   	{
+				opGap++;
+				temp = iter->first - j1;
+
+				if(temp<0)
+			 	temp=temp * -1;
+			
+				if(dist>temp && iter->first < j1)
+				{
+					dist=temp;
+					value=iter->first;
+					gp=opGap;
+				}
+		  	}
+			 
+
+		}
+		while(iter!=gap.begin());
+
+	return value;
+}
+
+
+
+int osmHypothesis :: getOpenGaps()
+{
+	map <int,string> :: iterator iter;
+
+	int nd = 0;
+	for (iter = gap.begin(); iter!=gap.end(); iter++)
+	{
+		if(iter->second == "Unfilled")
+		 nd++;
+	}
+
+	return nd;
+
+}
+
+void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
+{
+
+	operations.push_back("_DEL_" + english);
+	currTargetIndex++;
+
+	while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end())
+	{
+		currTargetIndex++;
+	}
+
+	if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end())
+	{
+			english = currE[currTargetIndex];
+			generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
+	}
+
+}
+
+void osmHypothesis :: computeOSMFeature(int startIndex , WordsBitmap & coverageVector)
+{
+
+	set <int> doneTargetIndexes;
+	set <int> eSide;
+	set <int> fSide;
+	set <int> :: iterator iter;
+	string english;
+	string source;
+	int j1;
+	int start = 0;
+	int targetIndex = 0;
+	doneTargetIndexes.clear();
+
+
+	if (targetNullWords.size() != 0) // Source words to be deleted in the start of this phrase ...
+	{
+		iter = targetNullWords.begin();
+
+		if (*iter == startIndex)
+		{
+
+			j1 = startIndex;
+			source = currF[j1-startIndex];
+			english = "_INS_";
+			generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
+		}
+	}
+
+	if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) // first word has to be deleted ...
+	{
+			english = currE[targetIndex];
+			generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+	}
+
+
+	for (int i = 0; i < ceptsInPhrase.size(); i++)
+	{
+		source = "";
+		english = "";
+
+		fSide = ceptsInPhrase[i].first;
+		eSide = ceptsInPhrase[i].second;
+
+		iter = eSide.begin();
+		targetIndex = *iter;
+		english += currE[*iter];
+		iter++;
+
+		for (; iter != eSide.end(); iter++)
+		{
+			if(*iter == targetIndex+1)
+				targetIndex++;
+			else
+				doneTargetIndexes.insert(*iter);
+
+			english += "^_^";
+			english += currE[*iter];
+		}
+
+		iter = fSide.begin();
+		source += currF[*iter];
+		iter++;
+
+		for (; iter != fSide.end(); iter++)
+		{
+			source += "^_^";
+			source += currF[*iter];
+		}
+
+		iter = fSide.begin();
+		j1 = *iter + startIndex;
+		iter++;
+
+		generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
+
+
+		for (; iter != fSide.end(); iter++)
+		{
+		     j1 = *iter + startIndex;
+		     generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
+		}
+
+		targetIndex++; // Check whether the next target word is unaligned ...
+
+		while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end())
+		{
+				targetIndex++;
+		}
+
+		if(sourceNullWords.find(targetIndex) != sourceNullWords.end())
+		{
+			english = currE[targetIndex];
+			generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+		}
+	}
+
+	//removeReorderingOperations();
+	
+	//print();
+
+}
+
+void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
+{
+	set <int> :: iterator iter;
+
+	int sz = eSide.size();
+	vector <int> t;
+
+	for (iter = eSide.begin(); iter != eSide.end(); iter++)
+	{
+	   t = tS[*iter];
+
+	   for (int i = 0; i < t.size(); i++)
+	   {
+		fSide.insert(t[i]);
+	   }
+
+	}
+
+	for (iter = fSide.begin(); iter != fSide.end(); iter++)
+	{
+
+		t = sT[*iter];
+
+		for (int i = 0 ; i<t.size(); i++)
+		{
+				eSide.insert(t[i]);
+		}
+
+	}
+
+	if (eSide.size () > sz)
+	{
+		getMeCepts(eSide,fSide,tS,sT);
+	}
+
+}
+
+void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
+{
+
+		std::map <int , vector <int> > sT;
+		std::map <int , vector <int> > tS;
+		std::set <int> eSide;
+		std::set <int> fSide;
+		std::set <int> :: iterator iter;
+		std :: map <int , vector <int> > :: iterator iter2;
+		std :: pair < set <int> , set <int> > cept;
+		int src;
+		int tgt;
+
+
+		for (int i = 0;  i < align.size(); i+=2)
+		{
+			src = align[i];
+			tgt = align[i+1];
+			tS[tgt].push_back(src);
+			sT[src].push_back(tgt);
+		}
+
+		for (int i = startIndex; i<= endIndex; i++)  // What are unaligned source words in this phrase ...
+		{
+			if (sT.find(i-startIndex) == sT.end())
+			{
+				targetNullWords.insert(i);
+			}
+		}
+
+		for (int i = 0; i < targetPhraseLength; i++)  // What are unaligned target words in this phrase ...
+		{
+			if (tS.find(i) == tS.end())
+			{
+				sourceNullWords.insert(i);
+			}
+		}
+
+
+		while (tS.size() != 0 && sT.size() != 0)
+		{
+
+			iter2 = tS.begin();
+
+			eSide.clear();
+			fSide.clear();
+			eSide.insert (iter2->first);
+
+			getMeCepts(eSide, fSide, tS , sT);
+
+			for (iter = eSide.begin(); iter != eSide.end(); iter++)
+			{
+				iter2 = tS.find(*iter);
+				tS.erase(iter2);
+			}
+
+			for (iter = fSide.begin(); iter != fSide.end(); iter++)
+			{
+				iter2 = sT.find(*iter);
+				sT.erase(iter2);
+			}
+
+			cept = make_pair (fSide , eSide);
+			ceptsInPhrase.push_back(cept);
+		}
+
+
+
+/*
+
+	  cerr<<"Extracted Cepts "<<endl;
+		for (int i = 0; i < ceptsInPhrase.size(); i++)
+			{
+
+				fSide = ceptsInPhrase[i].first;
+				eSide = ceptsInPhrase[i].second;
+
+				for (iter = eSide.begin(); iter != eSide.end(); iter++)
+				{
+			   		cerr<<*iter<<" ";
+				}
+			    	cerr<<"<---> ";
+
+				for (iter = fSide.begin(); iter != fSide.end(); iter++)
+				{
+					cerr<<*iter<<" ";
+				}
+
+				cerr<<endl;
+			}
+			cerr<<endl;
+
+		cerr<<"Unaligned Target Words"<<endl;
+
+		for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
+			cerr<<*iter<<"<--->"<<endl;
+
+		cerr<<"Unaligned Source Words"<<endl;
+
+		for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
+			cerr<<*iter<<"<--->"<<endl;
+
+*/
+
+}
+
+void osmHypothesis :: populateScores(vector <float> & scores)
+{
+	scores.clear();
+	scores.push_back(opProb);
+	scores.push_back(gapWidth);
+	scores.push_back(gapCount);
+	scores.push_back(openGapCount);
+	scores.push_back(deletionCount);
+}
+
+
+} // namespace
+
--- a/moses/FF/OSM-Feature/osmHyp.h
+++ b/moses/FF/OSM-Feature/osmHyp.h
@ -0,0 +1,89 @@
+#pragma once
+
+# include "moses/FF/FFState.h"
+# include "moses/Manager.h"
+#include "lm/model.hh"
+# include <set>
+# include <map>
+# include <string>
+# include <vector>
+
+namespace Moses
+{
+
+class osmState : public FFState
+{
+public:
+  osmState(const lm::ngram::State & val);
+  int Compare(const FFState& other) const;
+  void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
+  int getJ()const {return j;}
+  int getE()const {return E;}
+  std::map <int , std::string> getGap() const { return gap;}
+
+  lm::ngram::State getLMState() const {return lmState;}
+
+  void print() const;
+  std::string getName() const;
+  
+protected:
+  int j, E;
+  std::map <int,std::string> gap;
+  lm::ngram::State lmState;
+};
+
+class osmHypothesis
+{
+
+	private:
+	
+	
+	std::vector <std::string> operations;	// List of operations required to generated this hyp ...
+	std::map <int,std::string> gap;	// Maintains gap history ...
+	int j;	// Position after the last source word generated ...
+	int E; // Position after the right most source word so far generated ...
+	lm::ngram::State lmState; // KenLM's Model State ...
+
+	int gapCount; // Number of gaps inserted ...
+	int deletionCount;
+	int openGapCount;
+	int gapWidth;	
+	double opProb;
+
+	std::vector <std::string> currE;
+	std::vector <std::string> currF;
+	std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+	std::set <int> targetNullWords;
+	std::set <int> sourceNullWords;
+
+	int closestGap(std::map <int,std::string> gap,int j1, int & gp);
+	int firstOpenGap(std::vector <int> & coverageVector);
+	std::string intToString(int);
+	int  getOpenGaps();
+	int isTranslationOperation(int j);
+	void removeReorderingOperations();
+
+	void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
+
+	public:
+
+	osmHypothesis();
+	~osmHypothesis(){};
+	void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
+	void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
+	void calculateOSMProb(lm::ngram::Model & ptrOp);
+	void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
+	void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
+	void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2){currF = val1; currE = val2;}
+	void setState(const FFState* prev_state);
+	osmState * saveState();
+	void print();
+	void populateScores(std::vector <float> & scores);
+	void setState(const lm::ngram::State & val){lmState = val;}
+
+};
+
+} // namespace
+
+
+
--- a/moses/FF/PhraseBoundaryFeature.cpp
+++ b/moses/FF/PhraseBoundaryFeature.cpp
@ -19,31 +19,18 @@ PhraseBoundaryFeature::PhraseBoundaryFeature(const std::string &line)
  : StatefulFeatureFunction("PhraseBoundaryFeature", 0, line)
 {
  std::cerr << "Initializing source word deletion feature.." << std::endl;
-
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
-
+  ReadParameters();
 }

-bool PhraseBoundaryFeature::SetParameter(const std::string& key, const std::string& value)
+void PhraseBoundaryFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "source") {
    m_sourceFactors = Tokenize<FactorType>(value, ",");
  } else if (key == "target") {
    m_targetFactors = Tokenize<FactorType>(value, ",");
  } else {
-    return StatefulFeatureFunction::SetParameter(key, value);
+    StatefulFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }

 const FFState* PhraseBoundaryFeature::EmptyHypothesisState(const InputType &) const
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@ -52,7 +52,7 @@ public:
                                  ScoreComponentCollection* ) const {
    throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
  }
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 private:
  void AddFeatures(
--- a/moses/FF/PhraseLengthFeature.cpp
+++ b/moses/FF/PhraseLengthFeature.cpp
@ -12,7 +12,7 @@ using namespace std;
 PhraseLengthFeature::PhraseLengthFeature(const std::string &line)
  :StatelessFeatureFunction("PhraseLengthFeature", 0, line)
 {
-  CHECK(m_args.size() == 0);
+  ReadParameters();
 }

 void PhraseLengthFeature::Evaluate(const Phrase &source
--- a/moses/FF/PhrasePairFeature.cpp
+++ b/moses/FF/PhrasePairFeature.cpp
@ -17,18 +17,7 @@ PhrasePairFeature::PhrasePairFeature(const std::string &line)
  :StatelessFeatureFunction("PhrasePairFeature", 0, line)
 {
  std::cerr << "Initializing PhrasePairFeature.." << std::endl;
-
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
+  ReadParameters();

  if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
  if (m_sourceContext == 1) std::cerr << "using source context.. ";
@ -43,7 +32,7 @@ PhrasePairFeature::PhrasePairFeature(const std::string &line)
  }
 }

-bool PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
+void PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "input-factor") {
    m_sourceFactorId = Scan<FactorType>(value);
@ -62,10 +51,8 @@ bool PhrasePairFeature::SetParameter(const std::string& key, const std::string&
  } else if (key == "ignore-punctuation") {
    m_filePathSource = value;
  } else {
-    return StatelessFeatureFunction::SetParameter(key, value);
+    StatelessFeatureFunction::SetParameter(key, value);
  }
-  return true;
-
 }

 void PhrasePairFeature::Load()
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@ -46,7 +46,7 @@ public:
  }

  void Load();
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 };

--- a/moses/FF/PhrasePenalty.cpp
+++ b/moses/FF/PhrasePenalty.cpp
@ -0,0 +1,22 @@
+
+#include "PhrasePenalty.h"
+#include "moses/ScoreComponentCollection.h"
+
+namespace Moses
+{
+PhrasePenalty::PhrasePenalty(const std::string &line)
+  : StatelessFeatureFunction("PhrasePenalty",1, line)
+{
+  ReadParameters();
+}
+
+void PhrasePenalty::Evaluate(const Phrase &source
+                             , const TargetPhrase &targetPhrase
+                             , ScoreComponentCollection &scoreBreakdown
+                             , ScoreComponentCollection &estimatedFutureScore) const
+{
+  scoreBreakdown.Assign(this, 1.0f);
+}
+
+} // namespace
+
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@ -0,0 +1,24 @@
+#pragma once
+
+#include "StatelessFeatureFunction.h"
+
+namespace Moses
+{
+
+class PhrasePenalty : public StatelessFeatureFunction
+{
+public:
+  PhrasePenalty(const std::string &line);
+
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+
+  virtual void Evaluate(const Phrase &source
+                        , const TargetPhrase &targetPhrase
+                        , ScoreComponentCollection &scoreBreakdown
+                        , ScoreComponentCollection &estimatedFutureScore) const;
+};
+
+} //namespace
+
--- a/moses/FF/SourceWordDeletionFeature.cpp
+++ b/moses/FF/SourceWordDeletionFeature.cpp
@ -22,30 +22,18 @@ SourceWordDeletionFeature::SourceWordDeletionFeature(const std::string &line)
   m_unrestricted(true)
 {
  std::cerr << "Initializing source word deletion feature.." << std::endl;
-
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
+  ReadParameters();
 }

-bool SourceWordDeletionFeature::SetParameter(const std::string& key, const std::string& value)
+void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "factor") {
    m_factorType = Scan<FactorType>(value);
  } else if (key == "path") {
    m_filename = value;
  } else {
-    return StatelessFeatureFunction::SetParameter(key, value);
+    StatelessFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }

 void SourceWordDeletionFeature::Load()
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@ -37,7 +37,7 @@ public:
                       const TargetPhrase& targetPhrase,
                       ScoreComponentCollection* accumulator,
                       const AlignmentInfo &alignmentInfo) const;
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 };

--- a/moses/FF/TargetBigramFeature.cpp
+++ b/moses/FF/TargetBigramFeature.cpp
@ -21,18 +21,7 @@ TargetBigramFeature::TargetBigramFeature(const std::string &line)
  :StatefulFeatureFunction("TargetBigramFeature", 0, line)
 {
  std::cerr << "Initializing target bigram feature.." << std::endl;
-
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
+  ReadParameters();

  FactorCollection& factorCollection = FactorCollection::Instance();
  const Factor* bosFactor =
@ -41,7 +30,7 @@ TargetBigramFeature::TargetBigramFeature(const std::string &line)

 }

-bool TargetBigramFeature::SetParameter(const std::string& key, const std::string& value)
+void TargetBigramFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "factor") {
    m_factorType = Scan<FactorType>(value);
@ -50,7 +39,6 @@ bool TargetBigramFeature::SetParameter(const std::string& key, const std::string
  } else {
    StatefulFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }

 void TargetBigramFeature::Load()
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@ -47,7 +47,7 @@ public:
                                  ScoreComponentCollection* ) const {
    abort();
  }
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 private:
  FactorType m_factorType;
--- a/moses/FF/TargetNgramFeature.cpp
+++ b/moses/FF/TargetNgramFeature.cpp
@ -41,22 +41,10 @@ TargetNgramFeature::TargetNgramFeature(const std::string &line)
  :StatefulFeatureFunction("TargetNgramFeature", 0, line)
 {
  std::cerr << "Initializing target ngram feature.." << std::endl;
-
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
-
+  ReadParameters();
 }

-bool TargetNgramFeature::SetParameter(const std::string& key, const std::string& value)
+void TargetNgramFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "factor") {
    m_factorType = Scan<FactorType>(value);
@ -65,9 +53,8 @@ bool TargetNgramFeature::SetParameter(const std::string& key, const std::string&
  } else if (key == "lower-ngrams") {
    m_lower_ngrams = Scan<bool>(value);
  } else {
-    return StatefulFeatureFunction::SetParameter(key, value);
+    StatefulFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }

 bool TargetNgramFeature::Load(const std::string &filePath)
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@ -191,7 +191,7 @@ public:

  virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
                                 ScoreComponentCollection* accumulator) const;
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 private:
  FactorType m_factorType;
--- a/moses/FF/TargetWordInsertionFeature.cpp
+++ b/moses/FF/TargetWordInsertionFeature.cpp
@ -20,30 +20,18 @@ TargetWordInsertionFeature::TargetWordInsertionFeature(const std::string &line)
   m_unrestricted(true)
 {
  std::cerr << "Initializing target word insertion feature.." << std::endl;
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
-
+  ReadParameters();
 }

-bool TargetWordInsertionFeature::SetParameter(const std::string& key, const std::string& value)
+void TargetWordInsertionFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "factor") {
    m_factorType = Scan<FactorType>(value);
  } else if (key == "path") {
    m_filename = value;
  } else {
-    return StatelessFeatureFunction::SetParameter(key, value);
+    StatelessFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }

 void TargetWordInsertionFeature::Load()
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@ -37,7 +37,7 @@ public:
                       const TargetPhrase& targetPhrase,
                       ScoreComponentCollection* accumulator,
                       const AlignmentInfo &alignmentInfo) const;
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 };

--- a/moses/FF/UnknownWordPenaltyProducer.cpp
+++ b/moses/FF/UnknownWordPenaltyProducer.cpp
@ -1,7 +1,17 @@
 #include "UnknownWordPenaltyProducer.h"
+#include <vector>
+#include <string>
+
+using namespace std;

 namespace Moses
 {
+UnknownWordPenaltyProducer::UnknownWordPenaltyProducer(const std::string &line)
+  : StatelessFeatureFunction("UnknownWordPenalty",1, line)
+{
+  m_tuneable = false;
+  ReadParameters();
+}

 }

--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@ -15,11 +15,7 @@ class WordsRange;
 class UnknownWordPenaltyProducer : public StatelessFeatureFunction
 {
 public:
-  UnknownWordPenaltyProducer(const std::string &line)
-    : StatelessFeatureFunction("UnknownWordPenalty",1, line) {
-    m_tuneable = false;
-    CHECK(m_args.size() == 0);
-  }
+  UnknownWordPenaltyProducer(const std::string &line);

  bool IsUseable(const FactorMask &mask) const {
    return true;
--- a/moses/FF/WordPenaltyProducer.cpp
+++ b/moses/FF/WordPenaltyProducer.cpp
@ -2,8 +2,16 @@
 #include "moses/TargetPhrase.h"
 #include "moses/ScoreComponentCollection.h"

+using namespace std;
+
 namespace Moses
 {
+WordPenaltyProducer::WordPenaltyProducer(const std::string &line)
+  : StatelessFeatureFunction("WordPenalty",1, line)
+{
+  ReadParameters();
+}
+
 void WordPenaltyProducer::Evaluate(const Phrase &source
                                   , const TargetPhrase &targetPhrase
                                   , ScoreComponentCollection &scoreBreakdown
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@ -12,10 +12,7 @@ class ScoreComponentCollection;
 class WordPenaltyProducer : public StatelessFeatureFunction
 {
 public:
-  WordPenaltyProducer(const std::string &line)
-    : StatelessFeatureFunction("WordPenalty",1, line) {
-    CHECK(m_args.size() == 0);
-  }
+  WordPenaltyProducer(const std::string &line);

  bool IsUseable(const FactorMask &mask) const {
    return true;
--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@ -26,18 +26,7 @@ WordTranslationFeature::WordTranslationFeature(const std::string &line)
  ,m_domainTrigger(false)
 {
  std::cerr << "Initializing word translation feature.. " << endl;
-
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
+  ReadParameters();

  if (m_simple == 1) std::cerr << "using simple word translations.. ";
  if (m_sourceContext == 1) std::cerr << "using source context.. ";
@ -71,7 +60,7 @@ WordTranslationFeature::WordTranslationFeature(const std::string &line)

 }

-bool WordTranslationFeature::SetParameter(const std::string& key, const std::string& value)
+void WordTranslationFeature::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "input-factor") {
    m_factorTypeSource = Scan<FactorType>(value);
@ -94,9 +83,8 @@ bool WordTranslationFeature::SetParameter(const std::string& key, const std::str
  } else if (key == "target-path") {
    m_filePathTarget = value;
  } else {
-    return StatelessFeatureFunction::SetParameter(key, value);
+    StatelessFeatureFunction::SetParameter(key, value);
  }
-  return true;
 }

 void WordTranslationFeature::Load()
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@ -52,7 +52,7 @@ public:

  void EvaluateChart(const ChartBasedFeatureContext& context,
                     ScoreComponentCollection* accumulator) const;
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);
 };

 }
--- a/moses/GenerationDictionary.cpp
+++ b/moses/GenerationDictionary.cpp
@ -38,14 +38,7 @@ namespace Moses
 GenerationDictionary::GenerationDictionary(const std::string &line)
  : DecodeFeature("Generation", line)
 {
-  for (size_t i = 0; i < m_args.size(); ++i) {
-    const vector<string> &args = m_args[i];
-
-    if (args[0] == "path") {
-      m_filePath = args[1];
-    }
-  }
-
+  ReadParameters();
 }

 void GenerationDictionary::Load()
@ -133,5 +126,14 @@ const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) con
  return ret;
 }

+void GenerationDictionary::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "path") {
+    m_filePath = value;
+  } else {
+    DecodeFeature::SetParameter(key, value);
+  }
+}
+
 }

--- a/moses/GenerationDictionary.h
+++ b/moses/GenerationDictionary.h
@ -68,6 +68,7 @@ public:
  *	Or NULL if the input word isn't found. The search function used is the WordComparer functor
  */
  const OutputWordCollection *FindWord(const Word &word) const;
+  void SetParameter(const std::string& key, const std::string& value);

 };

--- a/moses/Jamfile
+++ b/moses/Jamfile
@ -11,7 +11,6 @@ if $(with-dlib) {
 }

 alias headers : ../util//kenutil : : : $(max-factors) $(dlib) ; 
-
 alias ThreadPool : ThreadPool.cpp ;

 if [ option.get "with-synlm" : no : yes ] = yes
@ -41,6 +40,7 @@ lib moses :
  TranslationModel/Scope3Parser/*.cpp
  TranslationModel/CYKPlusParser/*.cpp
  FF/*.cpp
+  FF/OSM-Feature/*.cpp
 : #exceptions
  ThreadPool.cpp
  SyntacticLanguageModel.cpp
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@ -383,9 +383,10 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string
  try {
    lm::ngram::ModelType model_type;
    if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+	  
      switch(model_type) {
      case lm::ngram::PROBING:
-        return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file,  factorType, lazy);
+        return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);
      case lm::ngram::REST_PROBING:
        return new LanguageModelKen<lm::ngram::RestProbingModel>(description, line, file, factorType, lazy);
      case lm::ngram::TRIE:
--- a/moses/LVoc.h
+++ b/moses/LVoc.h
@ -38,7 +38,7 @@ public:
  }
  LabelId add(const Key& k) {
    std::pair<typename M::iterator,bool> p
-      =m.insert(std::make_pair(k,data.size()));
+    =m.insert(std::make_pair(k,data.size()));
    if(p.second) data.push_back(k);
    CHECK(static_cast<size_t>(p.first->second)<data.size());
    return p.first->second;
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@ -68,6 +68,7 @@ Manager::~Manager()
 {
  delete m_transOptColl;
  delete m_search;
+  // this is a comment ...

  StaticData::Instance().CleanUpAfterSentenceProcessing(m_source);
 }
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@ -275,13 +275,15 @@ bool Parameter::LoadParam(int argc, char* argv[])
  }

  // overwrite parameters with values from switches
-  for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) {
+  for(PARAM_STRING::const_iterator iterParam = m_description.begin();
+      iterParam != m_description.end(); iterParam++) {
    const string paramName = iterParam->first;
    OverwriteParam("-" + paramName, paramName, argc, argv);
  }

  // ... also shortcuts
-  for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++) {
+  for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin();
+      iterParam != m_abbreviation.end(); iterParam++) {
    const string paramName = iterParam->first;
    const string paramShortName = iterParam->second;
    OverwriteParam("-" + paramShortName, paramName, argc, argv);
@ -294,7 +296,8 @@ bool Parameter::LoadParam(int argc, char* argv[])
    verbose = Scan<int>(m_setting["verbose"][0]);
  if (verbose >= 1) { // only if verbose
    TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
-    for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {
+    for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ;
+        iterParam != m_setting.end(); iterParam++) {
      TRACE_ERR( "\t" << iterParam->first << ": ");
      for ( size_t i = 0; i < iterParam->second.size(); i++ )
        TRACE_ERR( iterParam->second[i] << " ");
@ -303,7 +306,8 @@ bool Parameter::LoadParam(int argc, char* argv[])
  }

  // convert old weights args to new format
-  if (!isParamSpecified("feature"))
+  // WHAT IS GOING ON HERE??? - UG
+  if (!isParamSpecified("feature")) // UG
    ConvertWeightArgs();
  CreateWeightsMap();
  WeightOverwrite();
@ -331,11 +335,11 @@ std::vector<float> &Parameter::GetWeights(const std::string &name)
 {
  std::vector<float> &ret = m_weights[name];

-  cerr << "WEIGHT " << name << "=";
-  for (size_t i = 0; i < ret.size(); ++i) {
-    cerr << ret[i] << ",";
-  }
-  cerr << endl;
+  // cerr << "WEIGHT " << name << "=";
+  // for (size_t i = 0; i < ret.size(); ++i) {
+  //   cerr << ret[i] << ",";
+  // }
+  // cerr << endl;
  return ret;
 }

@ -357,7 +361,10 @@ void Parameter::SetWeight(const std::string &name, size_t ind, const vector<floa
  newWeights.push_back(line);
 }

-void Parameter::AddWeight(const std::string &name, size_t ind, const std::vector<float> &weights)
+void
+Parameter::
+AddWeight(const std::string &name, size_t ind,
+          const std::vector<float> &weights)
 {
  PARAM_VEC &newWeights = m_setting["weight"];

@ -478,6 +485,12 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
      case Compact:
        ptType = "PhraseDictionaryCompact";
        break;
+      case SuffixArray:
+        ptType = "PhraseDictionarySuffixArray";
+        break;
+      case DSuffixArray:
+        ptType = "PhraseDictionaryDynSuffixArray";
+        break;
      default:
        break;
      }
@ -502,6 +515,9 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)

        ++currOldInd;
      }
+
+      // cerr << weights.size() << " PHRASE TABLE WEIGHTS "
+      // << __FILE__ << ":" << __LINE__ << endl;
      AddWeight(ptType, ptInd, weights);

      // actual pt
@ -527,7 +543,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
      ptLine << "num-features=" << numScoreComponent << " ";
      ptLine << "table-limit=" << maxTargetPhrase[currDict] << " ";

-      if (implementation == SuffixArray) {
+      if (implementation == SuffixArray || implementation == DSuffixArray) {
        ptLine << "target-path=" << token[5] << " ";
        ptLine << "alignment-path=" << token[6] << " ";
      }
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@ -61,6 +61,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "moses/FF/DistortionScoreProducer.h"
 #include "moses/FF/WordPenaltyProducer.h"
 #include "moses/FF/InputFeature.h"
+#include "moses/FF/PhrasePenalty.h"
+#include "moses/FF/OSM-Feature/OpSequenceModel.h"

 #include "LM/Ken.h"
 #ifdef LM_IRST
@ -691,6 +693,14 @@ bool StaticData::LoadData(Parameter *parameter)
      PhraseDictionaryDynSuffixArray* model = new PhraseDictionaryDynSuffixArray(line);
      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
      SetWeights(model, weights);
+    } else if (feature == "OpSequenceModel") {
+	  OpSequenceModel* model = new OpSequenceModel(line);
+	  vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
+	  SetWeights(model, weights);
+    } else if (feature == "PhrasePenalty") {
+      PhrasePenalty* model = new PhrasePenalty(line);
+      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
+      SetWeights(model, weights);
    }

 #ifdef HAVE_SYNLM
@ -938,7 +948,7 @@ const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGra
  boost::mutex::scoped_lock lock(m_transOptCacheMutex);
 #endif
  std::map<std::pair<std::pair<size_t, std::string>, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
-    = m_transOptCache.find(key);
+  = m_transOptCache.find(key);
  if (iter == m_transOptCache.end())
    return NULL;
  iter->second.second = clock(); // update last used time
@ -1166,7 +1176,6 @@ void StaticData::LoadFeatureFunctions()
    }
  }

-  // load phrase table
  for (size_t i = 0; i < m_phraseDictionary.size(); ++i) {
    PhraseDictionary *pt = m_phraseDictionary[i];
    pt->Load();
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@ -673,7 +673,7 @@ public:
      return false;
    }
    std::map< std::string, std::set< std::string > >::const_iterator lookupIgnoreFF
-      =  m_weightSettingIgnoreFF.find( m_currentWeightSetting );
+    =  m_weightSettingIgnoreFF.find( m_currentWeightSetting );
    if (lookupIgnoreFF == m_weightSettingIgnoreFF.end()) {
      return false;
    }
@ -691,7 +691,7 @@ public:
      return false;
    }
    std::map< std::string, std::set< size_t > >::const_iterator lookupIgnoreDP
-      =  m_weightSettingIgnoreDP.find( m_currentWeightSetting );
+    =  m_weightSettingIgnoreDP.find( m_currentWeightSetting );
    if (lookupIgnoreDP == m_weightSettingIgnoreDP.end()) {
      return false;
    }
--- a/moses/TargetPhraseCollection.cpp
+++ b/moses/TargetPhraseCollection.cpp
@ -35,11 +35,11 @@ struct CompareTargetPhrase {

 void TargetPhraseCollection::NthElement(size_t tableLimit)
 {
-  vector<TargetPhrase*>::iterator
-  iterMiddle = (tableLimit == 0 || m_collection.size() < tableLimit) ?m_collection.end() : m_collection.begin() + tableLimit;
-
-  //std::sort(m_collection.begin(), m_collection.end(), CompareTargetPhrase());
-  std::nth_element(m_collection.begin(), iterMiddle, m_collection.end(), CompareTargetPhrase());
+  vector<TargetPhrase*>::iterator nth;
+  nth = (tableLimit && tableLimit <= m_collection.size()
+         ? m_collection.begin() + tableLimit
+         : m_collection.end());
+  std::nth_element(m_collection.begin(), nth, m_collection.end(), CompareTargetPhrase());
 }

 void TargetPhraseCollection::Prune(bool adhereTableLimit, size_t tableLimit)
--- a/moses/TranslationModel/BilingualDynSuffixArray.cpp
+++ b/moses/TranslationModel/BilingualDynSuffixArray.cpp
--- a/moses/TranslationModel/BilingualDynSuffixArray.h
+++ b/moses/TranslationModel/BilingualDynSuffixArray.h
@ -5,23 +5,29 @@
 #include "moses/TranslationModel/DynSAInclude/vocab.h"
 #include "moses/TranslationModel/DynSAInclude/types.h"
 #include "moses/TranslationModel/DynSAInclude/utils.h"
+#include "moses/TranslationModel/WordCoocTable.h"
 #include "moses/InputFileStream.h"
 #include "moses/FactorTypeSet.h"
 #include "moses/TargetPhrase.h"
+#include <boost/dynamic_bitset.hpp>
+#include "moses/TargetPhraseCollection.h"
+#include <map>

+using namespace std;
 namespace Moses
 {
+class PhraseDictionaryDynSuffixArray;

 /** @todo ask Abbey Levenberg
 */
 class SAPhrase
 {
 public:
-  std::vector<wordID_t> words;
+  vector<wordID_t> words;

  SAPhrase(size_t phraseSize)
-    :words(phraseSize) {
-  }
+    :words(phraseSize)
+  {}

  void SetId(size_t pos, wordID_t id) {
    CHECK(pos < words.size());
@ -43,12 +49,16 @@ public:
    , m_endTarget(endTarget)
    , m_startSource(startSource)
    , m_endSource(endSource)
-    , m_sntIndex(sntIndex) {
-  }
+    , m_sntIndex(sntIndex)
+  {}

  size_t GetTargetSize() const {
    return m_endTarget - m_startTarget + 1;
  }
+
+  size_t GetSourceSize() const {
+    return m_endSource - m_startSource + 1;
+  }
 };

 /** @todo ask Abbey Levenberg
@ -58,32 +68,43 @@ class SentenceAlignment
 public:
  SentenceAlignment(int sntIndex, int sourceSize, int targetSize);
  int m_sntIndex;
-  std::vector<wordID_t>* trgSnt;
-  std::vector<wordID_t>* srcSnt;
-  std::vector<int> numberAligned;
-  std::vector< std::vector<int> > alignedList;
-  bool Extract(int maxPhraseLength, std::vector<PhrasePair*> &ret, int startSource, int endSource) const;
+  vector<wordID_t>* trgSnt;
+  vector<wordID_t>* srcSnt;
+  vector<int> numberAligned;
+  vector< vector<int> > alignedList;
+  bool Extract(int maxPhraseLength, vector<PhrasePair*> &ret,
+               int startSource, int endSource) const;
 };
+
 class ScoresComp
 {
 public:
-  ScoresComp(const std::vector<float>& weights): m_weights(weights) {}
+  ScoresComp(const vector<float>& weights): m_weights(weights) {}
  bool operator()(const Scores& s1, const Scores& s2) const {
    return s1[0] < s2[0]; // just p(e|f) as approximation
-    /*float score1(0), score2(0);
-    int idx1(0), idx2(0);
-    for (Scores::const_iterator itr = s1.begin();
-            itr != s1.end(); ++itr) {
-        score1 += log(*itr * m_weights.at(idx1++));
-    }
-    for (Scores::const_iterator itr = s2.begin();
-        itr != s2.end(); ++itr) {
-        score2 += log(*itr * m_weights.at(idx2++));
-    }
-    return score1 < score2;*/
+    // float score1(0), score2(0);
+    // int idx1(0), idx2(0);
+    // for (Scores::const_iterator itr = s1.begin();
+    // 	 itr != s1.end(); ++itr) {
+    //   score1 += log(*itr * m_weights.at(idx1++));
+    // }
+    // for (Scores::const_iterator itr = s2.begin();
+    // 	 itr != s2.end(); ++itr) {
+    //   score2 += log(*itr * m_weights.at(idx2++));
+    // }
+    // return score1 < score2;
  }
 private:
-  const std::vector<float>& m_weights;
+  const vector<float>& m_weights;
+};
+
+struct BetterPhrase {
+  ScoresComp const& cmp;
+  BetterPhrase(ScoresComp const& sc);
+  // bool operator()(pair<Scores, TargetPhrase const*> const& a,
+  // pair<Scores, TargetPhrase const*> const& b) const;
+  bool operator()(pair<Scores, SAPhrase const*> const& a,
+                  pair<Scores, SAPhrase const*> const& b) const;
 };

 /** @todo ask Abbey Levenberg
@ -93,66 +114,70 @@ class BilingualDynSuffixArray
 public:
  BilingualDynSuffixArray();
  ~BilingualDynSuffixArray();
-  bool Load( const std::vector<FactorType>& inputFactors,
-             const std::vector<FactorType>& outputTactors,
-             std::string source, std::string target, std::string alignments,
-             const std::vector<float> &weight);
-  bool LoadTM( const std::vector<FactorType>& inputFactors,
-               const std::vector<FactorType>& outputTactors,
-               std::string source, std::string target, std::string alignments,
-               const std::vector<float> &weight);
-  void GetTargetPhrasesByLexicalWeight(const Phrase& src, std::vector< std::pair<Scores, TargetPhrase*> >& target) const;
-  void addSntPair(string& source, string& target, string& alignment);
-private:
-  DynSuffixArray* m_srcSA;
-  DynSuffixArray* m_trgSA;
-  std::vector<wordID_t>* m_srcCorpus;
-  std::vector<wordID_t>* m_trgCorpus;
-  std::vector<FactorType> m_inputFactors;
-  std::vector<FactorType> m_outputFactors;
+  bool Load( const vector<FactorType>& inputFactors,
+             const vector<FactorType>& outputTactors,
+             string source, string target, string alignments,
+             const vector<float> &weight);
+  // bool LoadTM( const vector<FactorType>& inputFactors,
+  // 	     const vector<FactorType>& outputTactors,
+  // 	     string source, string target, string alignments,
+  // 	     const vector<float> &weight);
+  void GetTargetPhrasesByLexicalWeight(const Phrase& src, vector< pair<Scores, TargetPhrase*> >& target) const;

-  std::vector<unsigned> m_srcSntBreaks, m_trgSntBreaks;
+  void CleanUp(const InputType& source);
+  void addSntPair(string& source, string& target, string& alignment);
+  pair<float,float>
+  GatherCands(Phrase const& src, map<SAPhrase, vector<float> >& pstats) const;
+
+  TargetPhrase*
+  GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase) const;
+
+private:
+
+
+  mutable WordCoocTable m_wrd_cooc;
+  DynSuffixArray * m_srcSA;
+  DynSuffixArray * m_trgSA;
+  vector<wordID_t>* m_srcCorpus;
+  vector<wordID_t>* m_trgCorpus;
+  vector<FactorType> m_inputFactors;
+  vector<FactorType> m_outputFactors;
+
+  vector<unsigned> m_srcSntBreaks, m_trgSntBreaks;

  Vocab* m_srcVocab, *m_trgVocab;
  ScoresComp* m_scoreCmp;

-  std::vector<SentenceAlignment> m_alignments;
-  std::vector<std::vector<short> > m_rawAlignments;
+  vector<SentenceAlignment> m_alignments;
+  vector<vector<short> > m_rawAlignments;

-  mutable std::map<std::pair<wordID_t, wordID_t>, std::pair<float, float> > m_wordPairCache;
-  mutable std::set<wordID_t> m_freqWordsCached;
+  mutable map<pair<wordID_t, wordID_t>, pair<float, float> > m_wordPairCache;
+  mutable set<wordID_t> m_freqWordsCached;
  const size_t m_maxPhraseLength, m_maxSampleSize;
-
-  int LoadCorpus(FactorDirection direction, InputFileStream&, const std::vector<FactorType>& factors,
-                 std::vector<wordID_t>&, std::vector<wordID_t>&,
+  const size_t m_maxPTEntries;
+  int LoadCorpus(FactorDirection direction,
+                 InputFileStream&, const vector<FactorType>& factors,
+                 vector<wordID_t>&, vector<wordID_t>&,
                 Vocab*);
  int LoadAlignments(InputFileStream& aligs);
  int LoadRawAlignments(InputFileStream& aligs);
  int LoadRawAlignments(string& aligs);

-  bool ExtractPhrases(const int&, const int&, const int&, std::vector<PhrasePair*>&, bool=false) const;
+  bool ExtractPhrases(const int&, const int&, const int&, vector<PhrasePair*>&, bool=false) const;
  SentenceAlignment GetSentenceAlignment(const int, bool=false) const;
-  int SampleSelection(std::vector<unsigned>&, int = 300) const;
+  int SampleSelection(vector<unsigned>&, int = 300) const;

-  std::vector<int> GetSntIndexes(std::vector<unsigned>&, int, const std::vector<unsigned>&) const;
-  TargetPhrase* GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase) const;
+  vector<int> GetSntIndexes(vector<unsigned>&, int, const vector<unsigned>&) const;
  SAPhrase TrgPhraseFromSntIdx(const PhrasePair&) const;
  bool GetLocalVocabIDs(const Phrase&, SAPhrase &) const;
  void CacheWordProbs(wordID_t) const;
  void CacheFreqWords() const;
  void ClearWordInCache(wordID_t);
-  std::pair<float, float> GetLexicalWeight(const PhrasePair&) const;
+  pair<float, float> GetLexicalWeight(const PhrasePair&) const;
+
+  int GetSourceSentenceSize(size_t sentenceId) const;
+  int GetTargetSentenceSize(size_t sentenceId) const;

-  int GetSourceSentenceSize(size_t sentenceId) const {
-    return (sentenceId==m_srcSntBreaks.size()-1) ?
-           m_srcCorpus->size() - m_srcSntBreaks.at(sentenceId) :
-           m_srcSntBreaks.at(sentenceId+1) - m_srcSntBreaks.at(sentenceId);
-  }
-  int GetTargetSentenceSize(size_t sentenceId) const {
-    return (sentenceId==m_trgSntBreaks.size()-1) ?
-           m_trgCorpus->size() - m_trgSntBreaks.at(sentenceId) :
-           m_trgSntBreaks.at(sentenceId+1) - m_trgSntBreaks.at(sentenceId);
-  }
 };
 } // end namespace
 #endif
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
@ -234,12 +234,12 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(

            std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
            targetPhraseCollection
-              = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
-                                                 ,m_outputFactorsVec
-                                                 ,m_dictionary
-                                                 ,weightT
-                                                 ,m_filePath
-                                                 , m_dbWrapper.GetVocab());
+            = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
+                                               ,m_outputFactorsVec
+                                               ,m_dictionary
+                                               ,weightT
+                                               ,m_filePath
+                                               , m_dbWrapper.GetVocab());

            delete tpcollBerkeleyDb;
            m_cache[tpCollFilePos] = targetPhraseCollection;
--- a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
@ -428,7 +428,7 @@ void CompressionTaskReordering::operator()()
  while(scoresNum < m_encodedScores.size()) {
    std::string scores = m_encodedScores[scoresNum];
    std::string compressedScores
-      = m_creator.CompressEncodedScores(scores);
+    = m_creator.CompressEncodedScores(scores);

    std::string dummy;
    PackedItem packedItem(scoresNum, dummy, compressedScores, 0);
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
@ -61,7 +61,7 @@ PhraseDecoder::~PhraseDecoder()
 inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
 {
  boost::unordered_map<std::string, unsigned>::iterator it
-    = m_sourceSymbolsMap.find(symbol);
+  = m_sourceSymbolsMap.find(symbol);
  if(it != m_sourceSymbolsMap.end())
    return it->second;

@ -200,7 +200,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &

  if(m_coding == PREnc) {
    std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
-      = m_decodingCache.Retrieve(sourcePhrase);
+    = m_decodingCache.Retrieve(sourcePhrase);

    // Has been cached and is complete or does not need to be completed
    if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
@ -255,7 +255,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
  if(m_coding == REnc) {
    for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
      std::string sourceWord
-        = sourcePhrase.GetWord(i).GetString(*m_input, false);
+      = sourcePhrase.GetWord(i).GetString(*m_input, false);
      unsigned idx = GetSourceSymbolId(sourceWord);
      sourceWords.push_back(idx);
    }
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@ -41,6 +41,17 @@ using namespace std;
 namespace Moses
 {

+PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
+  :PhraseDictionary("PhraseDictionaryCompact", line)
+  ,m_inMemory(true)
+  ,m_useAlignmentInfo(true)
+  ,m_hash(10, 16)
+  ,m_phraseDecoder(0)
+  ,m_weight(0)
+{
+  ReadParameters();
+}
+
 void PhraseDictionaryCompact::Load()
 {
  const StaticData &staticData = StaticData::Instance();
@ -106,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c

  // Retrieve target phrase collection from phrase table
  TargetPhraseVectorPtr decodedPhraseColl
-    = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
+  = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);

  if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
    TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h
@ -68,14 +68,7 @@ protected:

  std::vector<float> m_weight;
 public:
-  PhraseDictionaryCompact(const std::string &line)
-    :PhraseDictionary("PhraseDictionaryCompact", line)
-    ,m_inMemory(true)
-    ,m_useAlignmentInfo(true)
-    ,m_hash(10, 16)
-    ,m_phraseDecoder(0)
-    ,m_weight(0) {
-  }
+  PhraseDictionaryCompact(const std::string &line);

  ~PhraseDictionaryCompact();

--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
@ -426,7 +426,7 @@ void PhraseTableCreator::AddTargetSymbolId(std::string& symbol)
 unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
 {
  boost::unordered_map<std::string, unsigned>::iterator it
-    = m_sourceSymbolsMap.find(symbol);
+  = m_sourceSymbolsMap.find(symbol);

  if(it != m_sourceSymbolsMap.end())
    return it->second;
@ -437,7 +437,7 @@ unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
 unsigned PhraseTableCreator::GetTargetSymbolId(std::string& symbol)
 {
  boost::unordered_map<std::string, unsigned>::iterator it
-    = m_targetSymbolsMap.find(symbol);
+  = m_targetSymbolsMap.find(symbol);

  if(it != m_targetSymbolsMap.end())
    return it->second;
@ -451,7 +451,7 @@ unsigned PhraseTableCreator::GetOrAddTargetSymbolId(std::string& symbol)
  boost::mutex::scoped_lock lock(m_mutex);
 #endif
  boost::unordered_map<std::string, unsigned>::iterator it
-    = m_targetSymbolsMap.find(symbol);
+  = m_targetSymbolsMap.find(symbol);

  if(it != m_targetSymbolsMap.end())
    return it->second;
@ -1200,7 +1200,7 @@ void CompressionTask::operator()()
  while(collectionNum < m_encodedCollections.size()) {
    std::string collection = m_encodedCollections[collectionNum];
    std::string compressedCollection
-      = m_creator.CompressEncodedCollection(collection);
+    = m_creator.CompressEncodedCollection(collection);

    std::string dummy;
    PackedItem packedItem(collectionNum, dummy, compressedCollection, 0);
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.h
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.h
@ -143,7 +143,7 @@ public:
      return data;
    else {
      typename std::vector<DataType>::iterator it
-        = std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data);
+      = std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data);
      if(it != m_bestVec.end())
        return *it;
      else
--- a/moses/TranslationModel/DynSuffixArray.cpp
+++ b/moses/TranslationModel/DynSuffixArray.cpp
@ -1,5 +1,6 @@
 #include "DynSuffixArray.h"
 #include <iostream>
+#include <boost/foreach.hpp>

 using namespace std;

@ -215,8 +216,37 @@ void DynSuffixArray::Substitute(vuint_t* /* newSents */, unsigned /* newIndex */
  return;
 }

+ComparePosition::
+ComparePosition(vuint_t const& crp, vuint_t const& sfa)
+  : m_crp(crp), m_sfa(sfa) { }
+
+bool
+ComparePosition::
+operator()(unsigned const& i, vector<wordID_t> const& phrase) const
+{
+  unsigned const* x = &m_crp.at(i);
+  unsigned const* e = &m_crp.back();
+  size_t k = 0;
+  for (; k < phrase.size() && x < e; ++k, ++x)
+    if (*x != phrase[k]) return *x < phrase[k];
+  return (x == e && k < phrase.size());
+}
+
+bool
+ComparePosition::
+operator()(vector<wordID_t> const& phrase, unsigned const& i) const
+{
+  unsigned const* x = &m_crp.at(i);
+  unsigned const* e = &m_crp.back();
+  size_t k = 0;
+  for (; k < phrase.size() && x < e; ++k, ++x)
+    if (*x != phrase[k]) return phrase[k] < *x;
+  return false; // (k == phrase.size() && x < e);
+}
+
 bool DynSuffixArray::GetCorpusIndex(const vuint_t* phrase, vuint_t* indices)
 {
+  // DOES THIS EVEN WORK WHEN A DynSuffixArray has been saved and reloaded????
  pair<vuint_t::iterator,vuint_t::iterator> bounds;
  indices->clear();
  size_t phrasesize = phrase->size();
@ -251,6 +281,16 @@ bool DynSuffixArray::GetCorpusIndex(const vuint_t* phrase, vuint_t* indices)
  return (indices->size() > 0);
 }

+size_t
+DynSuffixArray::
+GetCount(vuint_t const& phrase) const
+{
+  ComparePosition cmp(*m_corpus, *m_SA);
+  vuint_t::const_iterator lb = lower_bound(m_SA->begin(), m_SA->end(), phrase, cmp);
+  vuint_t::const_iterator ub = upper_bound(m_SA->begin(), m_SA->end(), phrase, cmp);
+  return ub-lb;
+}
+
 void DynSuffixArray::Save(FILE* fout)
 {
  fWriteVector(fout, *m_SA);
--- a/moses/TranslationModel/DynSuffixArray.h
+++ b/moses/TranslationModel/DynSuffixArray.h
@ -11,9 +11,25 @@

 namespace Moses
 {
-
+using namespace std;
 typedef std::vector<unsigned> vuint_t;

+
+/// compare position /i/ in the suffix array /m_sfa/ into corpus /m_crp/
+/// against reference phrase /phrase/
+// added by Ulrich Germann
+class ComparePosition
+{
+  vuint_t const& m_crp;
+  vuint_t const& m_sfa;
+
+public:
+  ComparePosition(vuint_t const& crp, vuint_t const& sfa);
+  bool operator()(unsigned const& i, vector<wordID_t> const& phrase) const;
+  bool operator()(vector<wordID_t> const& phrase, unsigned const& i) const;
+};
+
+
 /** @todo ask Abbey Levenberg
 */
 class DynSuffixArray
@ -30,6 +46,8 @@ public:
  void Delete(unsigned, unsigned);
  void Substitute(vuint_t*, unsigned);

+  size_t GetCount(vuint_t const& phrase) const;
+
 private:
  vuint_t* m_SA;
  vuint_t* m_ISA;
@ -46,10 +64,10 @@ private:
  void PrintAuxArrays() {
    std::cerr << "SA\tISA\tF\tL\n";
    for(size_t i=0; i < m_SA->size(); ++i)
-      std::cerr << m_SA->at(i) << "\t" << m_ISA->at(i) << "\t" << m_F->at(i) << "\t" << m_L->at(i) << std::endl;
+      std::cerr << m_SA->at(i) << "\t" << m_ISA->at(i) << "\t"
+                << m_F->at(i) << "\t" << m_L->at(i) << std::endl;
  }
 };
-
 } //end namespace

 #endif
--- a/moses/TranslationModel/PhraseDictionary.cpp
+++ b/moses/TranslationModel/PhraseDictionary.cpp
@ -34,16 +34,6 @@ PhraseDictionary::PhraseDictionary(const std::string &description, const std::st
  :DecodeFeature(description, line)
  ,m_tableLimit(20) // default
 {
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
 }


@ -54,16 +44,15 @@ GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
  return GetTargetPhraseCollection(phrase);
 }

-bool PhraseDictionary::SetParameter(const std::string& key, const std::string& value)
+void PhraseDictionary::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "path") {
    m_filePath = value;
  } else if (key == "table-limit") {
    m_tableLimit = Scan<size_t>(value);
  } else {
-    return DecodeFeature::SetParameter(key, value);
+    DecodeFeature::SetParameter(key, value);
  }
-  return true;
 }

 void PhraseDictionary::SetFeaturesToApply()
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@ -91,7 +91,7 @@ public:
    return m_featuresToApply;
  }

-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 protected:
  size_t m_tableLimit;
--- a/moses/TranslationModel/PhraseDictionaryDynSuffixArray.README
+++ b/moses/TranslationModel/PhraseDictionaryDynSuffixArray.README
@ -0,0 +1,4 @@
+Specifying Dynamic Suffix Array-based Phrase Tables in moses.ini
+
+[ttable-file]
+14 0 0 5 <source language text file> <target language text file> <file with alignment info in symal format>
--- a/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
+++ b/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp
@ -3,84 +3,35 @@
 #include "moses/StaticData.h"
 #include "moses/TargetPhrase.h"
 #include <iomanip>
-
+#include <boost/foreach.hpp>
 using namespace std;

 namespace Moses
 {
-PhraseDictionaryDynSuffixArray::PhraseDictionaryDynSuffixArray(const std::string &line)
-  :PhraseDictionary("PhraseDictionaryDynSuffixArray", line)
+PhraseDictionaryDynSuffixArray::
+PhraseDictionaryDynSuffixArray(const std::string &line)
+  : PhraseDictionary("PhraseDictionaryDynSuffixArray", line)
  ,m_biSA(new BilingualDynSuffixArray())
 {
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
-
+  ReadParameters();
 }

-PhraseDictionaryDynSuffixArray::~PhraseDictionaryDynSuffixArray()
-{
-  delete m_biSA;
-}

 void PhraseDictionaryDynSuffixArray::Load()
 {
  SetFeaturesToApply();

-  const StaticData &staticData = StaticData::Instance();
-  vector<float> weight = staticData.GetWeights(this);
-
-  m_biSA->Load( m_input, m_output, m_source, m_target, m_alignments, weight);
+  vector<float> weight = StaticData::Instance().GetWeights(this);
+  m_biSA->Load(m_input, m_output, m_source, m_target, m_alignments, weight);
 }

-const TargetPhraseCollection *PhraseDictionaryDynSuffixArray::GetTargetPhraseCollection(const Phrase& src) const
+PhraseDictionaryDynSuffixArray::
+~PhraseDictionaryDynSuffixArray()
 {
-  TargetPhraseCollection *ret = new TargetPhraseCollection();
-  std::vector< std::pair< Scores, TargetPhrase*> > trg;
-  // extract target phrases and their scores from suffix array
-  m_biSA->GetTargetPhrasesByLexicalWeight( src, trg);
-
-  std::vector< std::pair< Scores, TargetPhrase*> >::iterator itr;
-  for(itr = trg.begin(); itr != trg.end(); ++itr) {
-    Scores scoreVector = itr->first;
-    TargetPhrase *targetPhrase = itr->second;
-    //std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),NegateScore);
-    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
-
-    targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
-    targetPhrase->Evaluate(src);
-
-    //cout << *targetPhrase << "\t" << std::setprecision(8) << scoreVector[2] << endl;
-    ret->Add(targetPhrase);
-  }
-  ret->NthElement(m_tableLimit); // sort the phrases for the dcoder
-  return ret;
+  delete m_biSA;
 }

-void PhraseDictionaryDynSuffixArray::insertSnt(string& source, string& target, string& alignment)
-{
-  m_biSA->addSntPair(source, target, alignment); // insert sentence pair into suffix arrays
-  //StaticData::Instance().ClearTransOptionCache(); // clear translation option cache
-}
-void PhraseDictionaryDynSuffixArray::deleteSnt(unsigned /* idx */, unsigned /* num2Del */)
-{
-  // need to implement --
-}
-
-ChartRuleLookupManager *PhraseDictionaryDynSuffixArray::CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&)
-{
-  throw "Chart decoding not supported by PhraseDictionaryDynSuffixArray";
-}
-
-bool PhraseDictionaryDynSuffixArray::SetParameter(const std::string& key, const std::string& value)
+void PhraseDictionaryDynSuffixArray::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "source") {
    m_source = value;
@ -89,9 +40,66 @@ bool PhraseDictionaryDynSuffixArray::SetParameter(const std::string& key, const
  } else if (key == "alignment") {
    m_alignments = value;
  } else {
-    return PhraseDictionary::SetParameter(key, value);
+    PhraseDictionary::SetParameter(key, value);
  }
-  return true;
+}
+
+const TargetPhraseCollection*
+PhraseDictionaryDynSuffixArray::
+GetTargetPhraseCollection(const Phrase& src) const
+{
+  typedef map<SAPhrase, vector<float> >::value_type pstat_entry;
+  map<SAPhrase, vector<float> > pstats; // phrase (pair) statistics
+  m_biSA->GatherCands(src,pstats);
+
+  TargetPhraseCollection *ret = new TargetPhraseCollection();
+  BOOST_FOREACH(pstat_entry & e, pstats) {
+    TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src);
+    tp->GetScoreBreakdown().Assign(this,e.second);
+    ret->Add(tp);
+  }
+  // return ret;
+  // TargetPhraseCollection *ret = new TargetPhraseCollection();
+  // std::vector< std::pair< Scores, TargetPhrase*> > trg;
+  //
+  // // extract target phrases and their scores from suffix array
+  // m_biSA->GetTargetPhrasesByLexicalWeight(src, trg);
+  //
+  // std::vector< std::pair< Scores, TargetPhrase*> >::iterator itr;
+  // for(itr = trg.begin(); itr != trg.end(); ++itr) {
+  //   Scores scoreVector = itr->first;
+  //   TargetPhrase *targetPhrase = itr->second;
+  //   std::transform(scoreVector.begin(),scoreVector.end(),
+  // 		   scoreVector.begin(),FloorScore);
+  //   targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
+  //   targetPhrase->Evaluate();
+  //   ret->Add(targetPhrase);
+  // }
+  ret->NthElement(m_tableLimit); // sort the phrases for the decoder
+  return ret;
+}
+
+void
+PhraseDictionaryDynSuffixArray::
+insertSnt(string& source, string& target, string& alignment)
+{
+  m_biSA->addSntPair(source, target, alignment); // insert sentence pair into suffix arrays
+  //StaticData::Instance().ClearTransOptionCache(); // clear translation option cache
+}
+
+void
+PhraseDictionaryDynSuffixArray::
+deleteSnt(unsigned /* idx */, unsigned /* num2Del */)
+{
+  // need to implement --
+}
+
+ChartRuleLookupManager*
+PhraseDictionaryDynSuffixArray::
+CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&)
+{
+  CHECK(false);
+  return 0;
 }

 }// end namepsace
--- a/moses/TranslationModel/PhraseDictionaryDynSuffixArray.h
+++ b/moses/TranslationModel/PhraseDictionaryDynSuffixArray.h
@ -17,21 +17,19 @@ class PhraseDictionaryDynSuffixArray: public PhraseDictionary
 public:
  PhraseDictionaryDynSuffixArray(const std::string &line);
  ~PhraseDictionaryDynSuffixArray();
-
+  bool InitDictionary();
  void Load();
-
  // functions below required by base class
  const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
  void insertSnt(string&, string&, string&);
  void deleteSnt(unsigned, unsigned);
  ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&);
-
-  bool SetParameter(const std::string& key, const std::string& value);
-
+  void SetParameter(const std::string& key, const std::string& value);
 private:
  BilingualDynSuffixArray *m_biSA;
  std::string m_source, m_target, m_alignments;

+  std::vector<float> m_weight;
 };

 } // end namespace
--- a/moses/TranslationModel/PhraseDictionaryMemory.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemory.cpp
@ -39,6 +39,11 @@ using namespace std;

 namespace Moses
 {
+PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
+  : RuleTableTrie("PhraseDictionaryMemory", line)
+{
+  ReadParameters();
+}

 TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection(
  const Phrase &source
--- a/moses/TranslationModel/PhraseDictionaryMemory.h
+++ b/moses/TranslationModel/PhraseDictionaryMemory.h
@ -43,10 +43,7 @@ protected:
  }

 public:
-  PhraseDictionaryMemory(const std::string &line)
-    : RuleTableTrie("PhraseDictionaryMemory", line) {
-    CHECK(m_args.size() == 0);
-  }
+  PhraseDictionaryMemory(const std::string &line);

  const PhraseDictionaryNodeMemory &GetRootNode() const {
    return m_collection;
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@ -28,17 +28,7 @@ namespace Moses
 PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
  :PhraseDictionary("PhraseDictionaryMultiModel", line)
 {
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
+  ReadParameters();

  if (m_mode != "interpolate") {
    ostringstream msg;
@ -56,23 +46,12 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
 PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &description, const std::string &line)
  :PhraseDictionary(description, line)
 {
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-
  if (description == "PhraseDictionaryMultiModelCounts") {
    CHECK(m_pdStr.size() == m_multimodelweights.size() || m_pdStr.size()*4 == m_multimodelweights.size());
  }
 }

-bool PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std::string& value)
+void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "mode") {
    m_mode = value;
@ -82,9 +61,8 @@ bool PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std:
  } else if (key == "lambda") {
    m_multimodelweights = Tokenize<float>(value, ",");
  } else {
-    return PhraseDictionary::SetParameter(key, value);
+    PhraseDictionary::SetParameter(key, value);
  }
-  return true;
 }

 PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h
@ -81,7 +81,7 @@ public:
    /* Don't do anything source specific here as this object is shared between threads.*/
  }
  ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&);
-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

  const std::vector<float>* GetTemporaryMultiModelWeightsVector() const;
  void SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@ -68,17 +68,7 @@ PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::st
  //m_mode = "interpolate";
  //m_combineFunction = LinearInterpolationFromCounts;
  cerr << "m_args=" << m_args.size() << endl;
-  size_t ind = 0;
-  while (ind < m_args.size()) {
-    vector<string> &args = m_args[ind];
-    bool consumed = SetParameter(args[0], args[1]);
-    if (consumed) {
-      m_args.erase(m_args.begin() + ind);
-    } else {
-      ++ind;
-    }
-  }
-  CHECK(m_args.size() == 0);
+  ReadParameters();

  CHECK(m_targetTable.size() == m_pdStr.size());

@ -94,7 +84,7 @@ PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::st

 }

-bool PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, const std::string& value)
+void PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, const std::string& value)
 {
  if (key == "mode") {
    m_mode = value;
@ -107,10 +97,8 @@ bool PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, cons
  } else if (key == "target-table") {
    m_targetTable = Tokenize(value, ",");
  } else {
-	return PhraseDictionaryMultiModel::SetParameter(key, value);
+    PhraseDictionaryMultiModel::SetParameter(key, value);
  }
-
-  return true;
 }

 PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h
@ -103,7 +103,7 @@ public:
    /* Don't do anything source specific here as this object is shared between threads.*/
  }

-  bool SetParameter(const std::string& key, const std::string& value);
+  void SetParameter(const std::string& key, const std::string& value);

 private:
  std::vector<PhraseDictionary*> m_inverse_pd;
--- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp
@ -29,7 +29,7 @@ PhraseDictionaryTreeAdaptor::
 PhraseDictionaryTreeAdaptor(const std::string &line)
  : PhraseDictionary("PhraseDictionaryBinary", line)
 {
-  CHECK(m_args.size() == 0);
+  ReadParameters();
 }

 PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
--- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h
+++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h
@ -3,11 +3,17 @@
 #ifndef moses_PhraseDictionaryTreeAdaptor_h
 #define moses_PhraseDictionaryTreeAdaptor_h

-#include <vector>
-#include "util/check.hh"
 #include "moses/TypeDef.h"
 #include "moses/TargetPhraseCollection.h"
 #include "moses/TranslationModel/PhraseDictionary.h"
+#include "util/check.hh"
+#include <vector>
+
+#ifdef WITH_THREADS
+#include <boost/thread/tss.hpp>
+#else
+#include <boost/scoped_ptr.hpp>
+#endif

 namespace Moses
 {
@ -24,7 +30,11 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary
 {
  typedef PhraseDictionary MyBase;

+#ifdef WITH_THREADS
  boost::thread_specific_ptr<PDTAimp> m_implementation;
+#else
+  boost::scoped_ptr<PDTAimp> m_implementation;
+#endif

  friend class PDTAimp;
  PhraseDictionaryTreeAdaptor();
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
@ -27,7 +27,8 @@ PhraseDictionaryALSuffixArray::PhraseDictionaryALSuffixArray(const std::string &
  if (staticData.ThreadCount() > 1) {
    throw runtime_error("Suffix array implementation is not threadsafe");
  }
-  CHECK(m_args.size() == 0);
+
+  ReadParameters();
 }

 void PhraseDictionaryALSuffixArray::Load()
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
@ -30,6 +30,12 @@ using namespace std;

 namespace Moses
 {
+PhraseDictionaryOnDisk::PhraseDictionaryOnDisk(const std::string &line)
+  : MyBase("PhraseDictionaryOnDisk", line)
+{
+  ReadParameters();
+}
+
 PhraseDictionaryOnDisk::~PhraseDictionaryOnDisk()
 {
 }
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
@ -30,6 +30,12 @@
 #include "OnDiskPt/PhraseNode.h"
 #include "util/check.hh"

+#ifdef WITH_THREADS
+#include <boost/thread/tss.hpp>
+#else
+#include <boost/scoped_ptr.hpp>
+#endif
+
 namespace Moses
 {
 class TargetPhraseCollection;
@ -43,16 +49,17 @@ class PhraseDictionaryOnDisk : public PhraseDictionary
  friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryOnDisk&);

 protected:
+#ifdef WITH_THREADS
  boost::thread_specific_ptr<OnDiskPt::OnDiskWrapper> m_implementation;
+#else
+  boost::scoped_ptr<OnDiskPt::OnDiskWrapper> m_implementation;
+#endif

  OnDiskPt::OnDiskWrapper &GetImplementation();
  const OnDiskPt::OnDiskWrapper &GetImplementation() const;

 public:
-  PhraseDictionaryOnDisk(const std::string &line)
-    : MyBase("PhraseDictionaryOnDisk", line) {
-    CHECK(m_args.size() == 0);
-  }
+  PhraseDictionaryOnDisk(const std::string &line);
  ~PhraseDictionaryOnDisk();
  void Load();

--- a/moses/TranslationModel/RuleTable/Trie.h
+++ b/moses/TranslationModel/RuleTable/Trie.h
@ -48,12 +48,6 @@ public:

  void Load();

-  // Required by PhraseDictionary.
-  virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const {
-    CHECK(false);
-    return NULL;
-  }
-
 private:
  friend class RuleTableLoader;

--- a/moses/TranslationModel/WordCoocTable.cpp
+++ b/moses/TranslationModel/WordCoocTable.cpp
@ -0,0 +1,72 @@
+#include "moses/TranslationModel/WordCoocTable.h"
+using namespace std;
+namespace Moses
+{
+
+WordCoocTable::
+WordCoocTable()
+{
+  m_cooc.reserve(1000000);
+  m_marg1.reserve(1000000);
+  m_marg2.reserve(1000000);
+}
+
+WordCoocTable::
+WordCoocTable(wordID_t const VocabSize1, wordID_t const VocabSize2)
+  : m_cooc(VocabSize1), m_marg1(VocabSize1,0), m_marg2(VocabSize2, 0)
+{}
+
+void
+WordCoocTable::
+Count(size_t const a, size_t const b)
+{
+  while (a >= m_marg1.size()) {
+    m_cooc.push_back(my_map_t());
+    m_marg1.push_back(0);
+  }
+  while (b >= m_marg2.size())
+    m_marg2.push_back(0);
+  ++m_marg1[a];
+  ++m_marg2[b];
+  ++m_cooc[a][b];
+}
+
+uint32_t
+WordCoocTable::
+GetJoint(size_t const a, size_t const b) const
+{
+  if (a >= m_marg1.size() || b >= m_marg2.size()) return 0;
+  my_map_t::const_iterator m = m_cooc.at(a).find(b);
+  if (m == m_cooc[a].end()) return 0;
+  return m->second;
+}
+
+uint32_t
+WordCoocTable::
+GetMarg1(size_t const x) const
+{
+  return x >= m_marg1.size() ? 0 : m_marg1[x];
+}
+
+uint32_t
+WordCoocTable::
+GetMarg2(size_t const x) const
+{
+  return x >= m_marg2.size() ? 0 : m_marg2[x];
+}
+
+float
+WordCoocTable::
+pfwd(size_t const a, size_t const b) const
+{
+  return float(GetJoint(a,b))/GetMarg1(a);
+}
+
+float
+WordCoocTable::
+pbwd(size_t const a, size_t const b) const
+{
+  // cerr << "at " << __FILE__ << ":" << __LINE__ << endl;
+  return float(GetJoint(a,b))/GetMarg2(b);
+}
+}
--- a/moses/TranslationModel/WordCoocTable.h
+++ b/moses/TranslationModel/WordCoocTable.h
@ -0,0 +1,72 @@
+#ifndef moses_WordCoocTable_h
+#define moses_WordCoocTable_h
+
+#include "moses/TranslationModel/DynSAInclude/vocab.h"
+#include "moses/TranslationModel/DynSAInclude/types.h"
+#include "moses/TranslationModel/DynSAInclude/utils.h"
+#include "moses/InputFileStream.h"
+#include "moses/FactorTypeSet.h"
+#include "moses/TargetPhrase.h"
+#include <boost/dynamic_bitset.hpp>
+#include <map>
+
+namespace Moses
+{
+
+using namespace std;
+
+#ifndef bitvector
+typedef boost::dynamic_bitset<uint64_t> bitvector;
+#endif
+
+
+/**
+ *  Stores word cooccurrence counts
+ *  @todo ask Uli Germann
+ */
+class WordCoocTable
+{
+  typedef map<wordID_t,uint32_t> my_map_t;
+  vector<my_map_t> m_cooc;
+  vector<uint32_t> m_marg1;
+  vector<uint32_t> m_marg2;
+public:
+  WordCoocTable();
+  WordCoocTable(wordID_t const VocabSize1, wordID_t const VocabSize2);
+  uint32_t GetJoint(size_t const a, size_t const b) const;
+  uint32_t GetMarg1(size_t const x) const;
+  uint32_t GetMarg2(size_t const x) const;
+  float pfwd(size_t const a, size_t const b) const;
+  float pbwd(size_t const a, size_t const b) const;
+  void
+  Count(size_t const a, size_t const b);
+
+  template<typename idvec, typename alnvec>
+  void
+  Count(idvec const& s1, idvec const& s2, alnvec const& aln,
+        wordID_t const NULL1, wordID_t const NULL2);
+
+};
+
+template<typename idvec, typename alnvec>
+void
+WordCoocTable::
+Count(idvec const& s1, idvec const& s2, alnvec const& aln,
+      wordID_t const NULL1, wordID_t const NULL2)
+{
+  boost::dynamic_bitset<uint64_t> check1(s1.size()), check2(s2.size());
+  check1.set();
+  check2.set();
+  for (size_t i = 0; i < aln.size(); i += 2) {
+    Count(s1[aln[i]], s2[aln[i+1]]);
+    check1.reset(aln[i]);
+    check2.reset(aln[i+1]);
+  }
+  for (size_t i = check1.find_first(); i < check1.size(); i = check1.find_next(i))
+    Count(s1[i], NULL2);
+  for (size_t i = check2.find_first(); i < check2.size(); i = check2.find_next(i))
+    Count(NULL1, s2[i]);
+}
+
+}
+#endif
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@ -39,17 +39,6 @@ using namespace std;

 namespace Moses
 {
-InputLatticeNode::InputLatticeNode(const Phrase &phrase, const WordsRange &range)
-:m_phrase(phrase)
-,m_range(range)
-{
-}
-
-void InputLatticeNode::AddNext(const InputLatticeNode &next)
-{
-  m_next.push_back(&next);
-}
-
 /** helper for pruning */
 bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
 {
@ -245,6 +234,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
    // add to dictionary

    Word &targetWord = targetPhrase.AddWord();
+    targetWord.SetIsOOV(true);

    for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
      FactorType factorType = static_cast<FactorType>(currFactor);
@ -373,7 +363,6 @@ void TranslationOptionCollection::CreateTranslationOptions()
  // in the phraseDictionary (which is the- possibly filtered-- phrase
  // table loaded on initialization), generate TranslationOption objects
  // for all phrases
-  const StaticData &staticData = StaticData::Instance();

  // there may be multiple decoding graphs (factorizations of decoding)
  const vector <DecodeGraph*> &decodeGraphList = StaticData::Instance().GetDecodeGraphs();
@ -384,13 +373,10 @@ void TranslationOptionCollection::CreateTranslationOptions()

  // loop over all decoding graphs, each generates translation options
  for (size_t graphInd = 0 ; graphInd < decodeGraphList.size() ; graphInd++) {
-    if (staticData.IsDecodingGraphIgnored( graphInd )) {
-      std::cerr << "ignoring decoding path " << graphInd << std::endl;
-      continue;
-    }
    if (decodeGraphList.size() > 1) {
      VERBOSE(3,"Creating translation options from decoding graph " << graphInd << endl);
    }
+
    const DecodeGraph &decodeGraph = *decodeGraphList[graphInd];
    // generate phrases that start at startPos ...
    for (size_t startPos = 0 ; startPos < size; startPos++) {
@ -401,12 +387,10 @@ void TranslationOptionCollection::CreateTranslationOptions()
      // ... and that end at endPos
      for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++) {
        if (graphInd > 0 && // only skip subsequent graphs
-            decodeGraphBackoff[graphInd] != 0 && // limited use of backoff specified
-            (endPos-startPos+1 > decodeGraphBackoff[graphInd] || // size exceeds backoff limit or ...
-             m_collection[startPos][endPos-startPos].size() > 0)) { // already covered
-          VERBOSE(3,"No backoff to graph " << graphInd << " for span [" << startPos << ";" << endPos << "]");
-          VERBOSE(3,", length limit: " << decodeGraphBackoff[graphInd]);
-          VERBOSE(3,", found so far: " << m_collection[startPos][endPos-startPos].size() << endl);
+            decodeGraphBackoff[graphInd] != 0 && // use of backoff specified
+            (endPos-startPos+1 >= decodeGraphBackoff[graphInd] || // size exceeds backoff limit or ...
+             m_collection[startPos][endPos-startPos].size() > 0)) { // no phrases found so far
+          VERBOSE(3,"No backoff to graph " << graphInd << " for span [" << startPos << ";" << endPos << "]" << endl);
          // do not create more options
          continue;
        }
@ -472,6 +456,117 @@ void TranslationOptionCollection::Sort()
 }


+/** create translation options that exactly cover a specific input span.
+ * Called by CreateTranslationOptions() and ProcessUnknownWord()
+ * \param decodeGraph list of decoding steps
+ * \param factorCollection input sentence with all factors
+ * \param startPos first position in input sentence
+ * \param lastPos last position in input sentence
+ * \param adhereTableLimit whether phrase & generation table limits are adhered to
+ */
+void TranslationOptionCollection::CreateTranslationOptionsForRange(
+  const DecodeGraph &decodeGraph
+  , size_t startPos
+  , size_t endPos
+  , bool adhereTableLimit
+  , size_t graphInd)
+{
+  if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
+    Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network
+
+    // consult persistent (cross-sentence) cache for stored translation options
+    bool skipTransOptCreation = false
+                                , useCache = StaticData::Instance().GetUseTransOptCache();
+    if (useCache) {
+      const WordsRange wordsRange(startPos, endPos);
+      sourcePhrase = new Phrase(m_source.GetSubString(wordsRange));
+
+      const TranslationOptionList *transOptList = StaticData::Instance().FindTransOptListInCache(decodeGraph, *sourcePhrase);
+      // is phrase in cache?
+      if (transOptList != NULL) {
+        skipTransOptCreation = true;
+        TranslationOptionList::const_iterator iterTransOpt;
+        for (iterTransOpt = transOptList->begin() ; iterTransOpt != transOptList->end() ; ++iterTransOpt) {
+          TranslationOption *transOpt = new TranslationOption(**iterTransOpt, wordsRange);
+          Add(transOpt);
+        }
+      }
+    } // useCache
+
+    if (!skipTransOptCreation) {
+      // partial trans opt stored in here
+      PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
+      size_t totalEarlyPruned = 0;
+
+      // initial translation step
+      list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
+      const DecodeStep &decodeStep = **iterStep;
+
+      static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
+      (m_source, *oldPtoc
+       , startPos, endPos, adhereTableLimit );
+
+      // do rest of decode steps
+      int indexStep = 0;
+
+      for (++iterStep ; iterStep != decodeGraph.end() ; ++iterStep) {
+
+        const DecodeStep &decodeStep = **iterStep;
+        PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
+
+        // go thru each intermediate trans opt just created
+        const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
+        vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
+        for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) {
+          TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
+
+          decodeStep.Process(inputPartialTranslOpt
+                             , decodeStep
+                             , *newPtoc
+                             , this
+                             , adhereTableLimit
+                             , *sourcePhrase);
+        }
+
+        // last but 1 partial trans not required anymore
+        totalEarlyPruned += newPtoc->GetPrunedCount();
+        delete oldPtoc;
+        oldPtoc = newPtoc;
+
+        indexStep++;
+      } // for (++iterStep
+
+      // add to fully formed translation option list
+      PartialTranslOptColl &lastPartialTranslOptColl	= *oldPtoc;
+      const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
+      vector<TranslationOption*>::const_iterator iterColl;
+      for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) {
+        TranslationOption *transOpt = *iterColl;
+        Add(transOpt);
+      }
+
+      // storing translation options in persistent cache (kept across sentences)
+      if (useCache) {
+        if (partTransOptList.size() > 0) {
+          TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
+          StaticData::Instance().AddTransOptListToCache(decodeGraph, *sourcePhrase, transOptList);
+        }
+      }
+
+      lastPartialTranslOptColl.DetachAll();
+      totalEarlyPruned += oldPtoc->GetPrunedCount();
+      delete oldPtoc;
+      // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
+    } // if (!skipTransOptCreation)
+
+    if (useCache)
+      delete sourcePhrase;
+  } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
+
+  if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) {
+    CreateXmlOptionsForRange(startPos, endPos);
+  }
+}

 /** Check if this range overlaps with any XML options. This doesn't need to be an exact match, only an overlap.
 * by default, we don't support XML options. subclasses need to override this function.
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@ -43,25 +43,6 @@ class FactorMask;
 class Word;
 class DecodeGraph;

-/** Each node contains
-	1. substring used to searching the phrase table
-	2. the source range it covers
-	3. a list of InputLatticeNode that it is a prefix of
-  This is for both sentence input, and confusion network/lattices
-  */
-class InputLatticeNode
-{
-protected:
-  Phrase m_phrase;
-  WordsRange m_range;
-  std::vector<const InputLatticeNode*> m_next;
-
-public:
-  InputLatticeNode(const Phrase &phrase, const WordsRange &range);
-  void AddNext(const InputLatticeNode &next);
-
-};
-
 /** Contains all phrase translations applicable to current input type (a sentence or confusion network).
 * A key insight into efficient decoding is that various input
 * conditions (trelliss, factored input, normal text, xml markup)
@ -133,21 +114,12 @@ public:

  //! Create all possible translations from the phrase tables
  virtual void CreateTranslationOptions();
-
  //! Create translation options that exactly cover a specific input span.
-  /** create translation options that exactly cover a specific input span.
-   * Called by CreateTranslationOptions() and ProcessUnknownWord()
-   * \param decodeGraph list of decoding steps
-   * \param factorCollection input sentence with all factors
-   * \param startPos first position in input sentence
-   * \param lastPos last position in input sentence
-   * \param adhereTableLimit whether phrase & generation table limits are adhered to
-   */
  virtual void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
      , size_t startPosition
      , size_t endPosition
      , bool adhereTableLimit
-      , size_t graphInd) = 0;
+      , size_t graphInd);

  //!Check if this range has XML options
  virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
--- a/moses/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/TranslationOptionCollectionConfusionNet.cpp
@ -1,15 +1,9 @@
 // $Id$

-#include <cassert>
-#include <iostream>
 #include "TranslationOptionCollectionConfusionNet.h"
+#include "ConfusionNet.h"
 #include "DecodeStep.h"
 #include "FactorCollection.h"
-#include "DecodeStepTranslation.h"
-#include "DecodeStepGeneration.h"
-#include "moses/FF/InputFeature.h"
-
-using namespace std;

 namespace Moses
 {
@ -18,104 +12,7 @@ namespace Moses
 TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet(
  const ConfusionNet &input
  , size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
-  : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
-{
-  const StaticData &staticData = StaticData::Instance();
-  const InputFeature *inputFeature = staticData.GetInputFeature();
-  CHECK(inputFeature);
-
-  size_t size = input.GetSize();
-
-  // create matrix
-  for (size_t startPos = 0; startPos < size; ++startPos) {
-    std::vector<std::vector<SourcePath> > vec;
-    m_collection.push_back( vec );
-    size_t maxSize = size - startPos;
-    size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
-    maxSize = std::min(maxSize, maxSizePhrase);
-
-    for (size_t endPos = 0 ; endPos < maxSize ; ++endPos) {
-      std::vector<SourcePath> vec;
-      m_collection[startPos].push_back( vec );
-    }
-
-
-    // cut up confusion network into substrings
-    // start with 1-word phrases
-    std::vector<SourcePath> &subphrases = GetPhrases(startPos, startPos);
-    assert(subphrases.size() == 0);
-
-    const ConfusionNet::Column &col = input.GetColumn(startPos);
-    ConfusionNet::Column::const_iterator iter;
-    for (iter = col.begin(); iter != col.end(); ++iter) {
-      subphrases.push_back(SourcePath());
-      SourcePath &sourcePath = subphrases.back();
-
-      const std::pair<Word,std::vector<float> > &inputNode = *iter;
-
-      //cerr << "word=" << inputNode.first << " scores=" << inputNode.second.size() << endl;
-      sourcePath.first.AddWord(inputNode.first);
-      sourcePath.second.PlusEquals(inputFeature, inputNode.second);
-
-    } // for (iter = col.begin(); iter != col.end(); ++iter) {
-  } // for (size_t startPos = 0; startPos < size; ++startPos) {
-
-  // create subphrases by appending words to previous subphrases
-  for (size_t startPos = 0; startPos < size; ++startPos) {
-	size_t maxSize = size - startPos;
-	size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
-	maxSize = std::min(maxSize, maxSizePhrase);
-	size_t end = startPos + maxSize - 1;
-
-    for (size_t endPos = startPos + 1; endPos < end; ++endPos) {
-      std::vector<SourcePath> &newSubphrases = GetPhrases(startPos, endPos);
-      const std::vector<SourcePath> &prevSubphrases = GetPhrases(startPos, endPos - 1);
-      const ConfusionNet::Column &col = input.GetColumn(endPos);
-      CreateSubPhrases(newSubphrases, prevSubphrases, col, *inputFeature);
-    }
-  }
-
-  /*
-  for (size_t startPos = 0; startPos < size; ++startPos) {
-    for (size_t endPos = startPos; endPos < size; ++endPos) {
-    	cerr << "RANGE=" << startPos << "-" << endPos << endl;
-
-    	const std::vector<SourcePath> &subphrases = GetPhrases(startPos, endPos);
-    	std::vector<SourcePath>::const_iterator iterSourcePath;
-    	for (iterSourcePath = subphrases.begin(); iterSourcePath != subphrases.end(); ++iterSourcePath) {
-    		const SourcePath &sourcePath = *iterSourcePath;
-    		cerr << sourcePath.first << " " <<sourcePath.second << endl;
-    	}
-    }
-  }
-  */
-}
-
-void TranslationOptionCollectionConfusionNet::CreateSubPhrases(std::vector<SourcePath> &newSubphrases
-    , const std::vector<SourcePath> &prevSubphrases
-    , const ConfusionNet::Column &col
-    , const InputFeature &inputFeature)
-{
-  std::vector<SourcePath>::const_iterator iterSourcePath;
-  for (iterSourcePath = prevSubphrases.begin(); iterSourcePath != prevSubphrases.end(); ++iterSourcePath) {
-    const SourcePath &sourcePath = *iterSourcePath;
-    const Phrase &prevSubPhrase = sourcePath.first;
-    const ScoreComponentCollection &prevScore = sourcePath.second;
-
-    ConfusionNet::Column::const_iterator iterCol;
-    for (iterCol = col.begin(); iterCol != col.end(); ++iterCol) {
-      const std::pair<Word,std::vector<float> > &node = *iterCol;
-      Phrase subphrase(prevSubPhrase);
-      subphrase.AddWord(node.first);
-
-      ScoreComponentCollection score(prevScore);
-      score.PlusEquals(&inputFeature, node.second);
-
-      SourcePath newSourcePath(subphrase, score);
-      newSubphrases.push_back(newSourcePath);
-    }
-  }
-}
+  : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) {}

 /* forcibly create translation option for a particular source word.
 	* call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
@ -133,122 +30,6 @@ void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(size_t sourcePo

 }

-const std::vector<TranslationOptionCollectionConfusionNet::SourcePath> &TranslationOptionCollectionConfusionNet::GetPhrases(size_t startPos, size_t endPos) const
-{
-  size_t offset = endPos - startPos;
-  CHECK(offset < m_collection[startPos].size());
-  return m_collection[startPos][offset];
 }

-std::vector<TranslationOptionCollectionConfusionNet::SourcePath> &TranslationOptionCollectionConfusionNet::GetPhrases(size_t startPos, size_t endPos)
-{
-  size_t offset = endPos - startPos;
-  CHECK(offset < m_collection[startPos].size());
-  return m_collection[startPos][offset];
-}
-
-void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRange(
-  const DecodeGraph &decodeGraph
-  , size_t startPos
-  , size_t endPos
-  , bool adhereTableLimit
-  , size_t graphInd)
-{
-  if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
-    Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network
-
-    // consult persistent (cross-sentence) cache for stored translation options
-    bool skipTransOptCreation = false
-                                , useCache = StaticData::Instance().GetUseTransOptCache();
-    if (useCache) {
-      const WordsRange wordsRange(startPos, endPos);
-      sourcePhrase = new Phrase(m_source.GetSubString(wordsRange));
-
-      const TranslationOptionList *transOptList = StaticData::Instance().FindTransOptListInCache(decodeGraph, *sourcePhrase);
-      // is phrase in cache?
-      if (transOptList != NULL) {
-        skipTransOptCreation = true;
-        TranslationOptionList::const_iterator iterTransOpt;
-        for (iterTransOpt = transOptList->begin() ; iterTransOpt != transOptList->end() ; ++iterTransOpt) {
-          TranslationOption *transOpt = new TranslationOption(**iterTransOpt, wordsRange);
-          Add(transOpt);
-        }
-      }
-    } // useCache
-
-    if (!skipTransOptCreation) {
-      // partial trans opt stored in here
-      PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
-      size_t totalEarlyPruned = 0;
-
-      // initial translation step
-      list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
-      const DecodeStep &decodeStep = **iterStep;
-
-      static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
-      (m_source, *oldPtoc
-       , startPos, endPos, adhereTableLimit );
-
-      // do rest of decode steps
-      int indexStep = 1;
-
-      for (++iterStep; iterStep != decodeGraph.end() ; ++iterStep, ++indexStep) {
-        const DecodeStep &decodeStep = **iterStep;
-        PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
-
-        // go thru each intermediate trans opt just created
-        const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
-        vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
-        for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) {
-          TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
-
-          decodeStep.Process(inputPartialTranslOpt
-                             , decodeStep
-                             , *newPtoc
-                             , this
-                             , adhereTableLimit
-                             , *sourcePhrase);
-        }
-
-        // last but 1 partial trans not required anymore
-        totalEarlyPruned += newPtoc->GetPrunedCount();
-        delete oldPtoc;
-        oldPtoc = newPtoc;
-
-      } // for (++iterStep
-
-      // add to fully formed translation option list
-      PartialTranslOptColl &lastPartialTranslOptColl	= *oldPtoc;
-      const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
-      vector<TranslationOption*>::const_iterator iterColl;
-      for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) {
-        TranslationOption *transOpt = *iterColl;
-        Add(transOpt);
-      }
-
-      // storing translation options in persistent cache (kept across sentences)
-      if (useCache) {
-        if (partTransOptList.size() > 0) {
-          TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
-          StaticData::Instance().AddTransOptListToCache(decodeGraph, *sourcePhrase, transOptList);
-        }
-      }
-
-      lastPartialTranslOptColl.DetachAll();
-      totalEarlyPruned += oldPtoc->GetPrunedCount();
-      delete oldPtoc;
-      // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
-    } // if (!skipTransOptCreation)
-
-    if (useCache)
-      delete sourcePhrase;
-  } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
-
-  if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) {
-    CreateXmlOptionsForRange(startPos, endPos);
-  }
-}
-
-} // namespace
-

--- a/moses/TranslationOptionCollectionConfusionNet.h
+++ b/moses/TranslationOptionCollectionConfusionNet.h
@ -3,11 +3,11 @@
 #define moses_TranslationOptionCollectionConfusionNet_h

 #include "TranslationOptionCollection.h"
-#include "ConfusionNet.h"

 namespace Moses
 {
-class InputFeature;
+
+class ConfusionNet;

 /** Holds all translation options, for all spans, of a particular confusion network input
 * Inherited from TranslationOptionCollection.
@ -15,28 +15,12 @@ class InputFeature;
 class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection
 {
 public:
-  typedef std::pair<Phrase, ScoreComponentCollection> SourcePath;
-
  TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);

  void ProcessUnknownWord(size_t sourcePos);

-  const std::vector<SourcePath> &GetPhrases(size_t startPos, size_t endPos) const;
-  std::vector<SourcePath> &GetPhrases(size_t startPos, size_t endPos);
-protected:
-  std::vector<std::vector<std::vector<SourcePath> > > m_collection;
-
-  void CreateSubPhrases(std::vector<SourcePath> &newSubphrases
-                        , const std::vector<SourcePath> &prevSubphrases
-                        , const ConfusionNet::Column &col
-                        , const InputFeature &inputFeature);
-
-  void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
-        , size_t startPosition
-        , size_t endPosition
-        , bool adhereTableLimit
-        , size_t graphInd);
 };

 }
 #endif
+
--- a/moses/TranslationOptionCollectionText.cpp
+++ b/moses/TranslationOptionCollectionText.cpp
@ -24,44 +24,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "DecodeStep.h"
 #include "FactorCollection.h"
 #include "WordsRange.h"
-#include "DecodeStepTranslation.h"
-#include "DecodeStepGeneration.h"

 using namespace std;

 namespace Moses
 {
 /** constructor; just initialize the base class */
-TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &input
-    , size_t maxNoTransOptPerCoverage
-    , float translationOptionThreshold)
-  : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
-{
-  size_t size = input.GetSize();
-  m_collection.resize(size);
-  for (size_t startPos = 0; startPos < size; ++startPos) {
-    std::vector<InputLatticeNode> &vec = m_collection[startPos];
-    for (size_t endPos = startPos; endPos < size; ++endPos) {
-      Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
-      WordsRange range(startPos, endPos);
-      InputLatticeNode node(subphrase, range);
-
-      if (range.GetNumWordsCovered() > 1) {
-    	  InputLatticeNode prevNode = GetPhrase(startPos, endPos - 1);
-    	  node.AddNext(prevNode);
-      }
-
-      vec.push_back(node);
-    }
-  }
-  /*
-  for (size_t startPos = 0; startPos < size; ++startPos) {
-    for (size_t endPos = startPos; endPos < size; ++endPos) {
-      cerr << startPos << "-" << endPos << "=" << GetPhrase(startPos, endPos) << endl;
-    }
-  }
-  */
-}
+TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
+  : TranslationOptionCollection(inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}

 /* forcibly create translation option for a particular source word.
 	* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
@ -96,118 +66,10 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit
  for(size_t i=0; i<xmlOptions.size(); i++) {
    Add(xmlOptions[i]);
  }
+
+};
+
 }

-const InputLatticeNode &TranslationOptionCollectionText::GetPhrase(size_t startPos, size_t endPos) const
-{
-  size_t offset = endPos - startPos;
-  CHECK(offset < m_collection[startPos].size());
-  return m_collection[startPos][offset];
-}
-
-void TranslationOptionCollectionText::CreateTranslationOptionsForRange(
-  const DecodeGraph &decodeGraph
-  , size_t startPos
-  , size_t endPos
-  , bool adhereTableLimit
-  , size_t graphInd)
-{
-  if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
-    Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network
-
-    // consult persistent (cross-sentence) cache for stored translation options
-    bool skipTransOptCreation = false
-                                , useCache = StaticData::Instance().GetUseTransOptCache();
-    if (useCache) {
-      const WordsRange wordsRange(startPos, endPos);
-      sourcePhrase = new Phrase(m_source.GetSubString(wordsRange));
-
-      const TranslationOptionList *transOptList = StaticData::Instance().FindTransOptListInCache(decodeGraph, *sourcePhrase);
-      // is phrase in cache?
-      if (transOptList != NULL) {
-        skipTransOptCreation = true;
-        TranslationOptionList::const_iterator iterTransOpt;
-        for (iterTransOpt = transOptList->begin() ; iterTransOpt != transOptList->end() ; ++iterTransOpt) {
-          TranslationOption *transOpt = new TranslationOption(**iterTransOpt, wordsRange);
-          Add(transOpt);
-        }
-      }
-    } // useCache
-
-    if (!skipTransOptCreation) {
-      // partial trans opt stored in here
-      PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
-      size_t totalEarlyPruned = 0;
-
-      // initial translation step
-      list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
-      const DecodeStep &decodeStep = **iterStep;
-
-      static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
-      (m_source, *oldPtoc
-       , startPos, endPos, adhereTableLimit );
-
-      // do rest of decode steps
-      int indexStep = 1;
-
-      for (++iterStep; iterStep != decodeGraph.end() ; ++iterStep, ++indexStep) {
-        const DecodeStep &decodeStep = **iterStep;
-        PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
-
-        // go thru each intermediate trans opt just created
-        const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
-        vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
-        for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) {
-          TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
-
-          decodeStep.Process(inputPartialTranslOpt
-                             , decodeStep
-                             , *newPtoc
-                             , this
-                             , adhereTableLimit
-                             , *sourcePhrase);
-        }
-
-        // last but 1 partial trans not required anymore
-        totalEarlyPruned += newPtoc->GetPrunedCount();
-        delete oldPtoc;
-        oldPtoc = newPtoc;
-
-      } // for (++iterStep
-
-      // add to fully formed translation option list
-      PartialTranslOptColl &lastPartialTranslOptColl	= *oldPtoc;
-      const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
-      vector<TranslationOption*>::const_iterator iterColl;
-      for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) {
-        TranslationOption *transOpt = *iterColl;
-        Add(transOpt);
-      }
-
-      // storing translation options in persistent cache (kept across sentences)
-      if (useCache) {
-        if (partTransOptList.size() > 0) {
-          TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
-          StaticData::Instance().AddTransOptListToCache(decodeGraph, *sourcePhrase, transOptList);
-        }
-      }
-
-      lastPartialTranslOptColl.DetachAll();
-      totalEarlyPruned += oldPtoc->GetPrunedCount();
-      delete oldPtoc;
-      // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
-    } // if (!skipTransOptCreation)
-
-    if (useCache)
-      delete sourcePhrase;
-  } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
-
-  if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) {
-    CreateXmlOptionsForRange(startPos, endPos);
-  }
-}
-
-} // namespace
-


--- a/moses/TranslationOptionCollectionText.h
+++ b/moses/TranslationOptionCollectionText.h
@ -22,9 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #ifndef moses_TranslationOptionCollectionText_h
 #define moses_TranslationOptionCollectionText_h

-#include <vector>
 #include "TranslationOptionCollection.h"
-#include "Phrase.h"

 namespace Moses
 {
@ -37,23 +35,18 @@ class Sentence;
 class TranslationOptionCollectionText : public TranslationOptionCollection
 {
 public:
-  TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
-
  void ProcessUnknownWord(size_t sourcePos);
+
+  TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
+
  bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
+
  void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
-  const InputLatticeNode &GetPhrase(size_t startPos, size_t endPos) const;

-protected:
-  std::vector<std::vector<InputLatticeNode> > m_collection;

-  void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
-        , size_t startPosition
-        , size_t endPosition
-        , bool adhereTableLimit
-        , size_t graphInd);
 };

 }

 #endif
+
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@ -108,27 +108,28 @@ enum DistortionOrientationOptions {

 enum PhraseTableImplementation {
  Memory				= 0
-                  ,Binary				= 1
-                                  ,OnDisk				= 2
-                                      //,GlueRule		= 3
-                                      //,Joshua			= 4
-                                      //,MemorySourceLabel	= 5
-                                      ,SCFG					= 6
-                                          //,BerkeleyDb	= 7
-                                          ,SuffixArray	= 8
-                                              ,Hiero        = 9
-                                                  ,ALSuffixArray = 10
-                                                      ,FuzzyMatch    = 11
-                                                          ,Compact      = 12
-                                                              ,Interpolated = 13
+  ,Binary				= 1
+  ,OnDisk				= 2
+  //,GlueRule		= 3
+  //,Joshua			= 4
+  //,MemorySourceLabel	= 5
+  ,SCFG					= 6
+  //,BerkeleyDb	= 7
+  ,SuffixArray	= 8
+  ,Hiero        = 9
+  ,ALSuffixArray = 10
+  ,FuzzyMatch    = 11
+  ,Compact      = 12
+  ,Interpolated = 13
+  ,DSuffixArray = 14
 };

 enum InputTypeEnum {
  SentenceInput						= 0
-                            ,ConfusionNetworkInput	= 1
-                                ,WordLatticeInput				= 2
-                                    ,TreeInputType					= 3
-                                        ,WordLatticeInput2			= 4
+  ,ConfusionNetworkInput	= 1
+  ,WordLatticeInput				= 2
+  ,TreeInputType					= 3
+  ,WordLatticeInput2			= 4

 };

@ -141,7 +142,7 @@ enum XmlInputType {

 enum DictionaryFind {
  Best		= 0
-            ,All		= 1
+  ,All		= 1
 };

 enum ParsingAlgorithm {
@ -151,22 +152,22 @@ enum ParsingAlgorithm {

 enum SearchAlgorithm {
  Normal				= 0
-                  ,CubePruning	= 1
-                                  ,CubeGrowing	= 2
-                                      ,ChartDecoding= 3
-                                          ,NormalBatch  = 4
-                                              ,ChartIncremental = 5
+  ,CubePruning	= 1
+  ,CubeGrowing	= 2
+  ,ChartDecoding= 3
+  ,NormalBatch  = 4
+  ,ChartIncremental = 5
 };

 enum SourceLabelOverlap {
  SourceLabelOverlapAdd = 0
-                          ,SourceLabelOverlapReplace = 1
-                              ,SourceLabelOverlapDiscard = 2
+  ,SourceLabelOverlapReplace = 1
+  ,SourceLabelOverlapDiscard = 2
 };

 enum WordAlignmentSort {
  NoSort = 0
-           ,TargetOrder = 1
+  ,TargetOrder = 1
 };

 enum FormatType {
--- a/moses/Word.h
+++ b/moses/Word.h
@ -52,11 +52,14 @@ protected:

  FactorArray m_factorArray; /**< set of factors */
  bool m_isNonTerminal;
+  bool m_isOOV;

 public:
  /** deep copy */
  Word(const Word &copy)
-    :m_isNonTerminal(copy.m_isNonTerminal) {
+    :m_isNonTerminal(copy.m_isNonTerminal)
+	,m_isOOV(copy.m_isOOV)
+  {
    std::memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
  }

@ -64,6 +67,7 @@ public:
  explicit Word(bool isNonTerminal = false) {
    std::memset(m_factorArray, 0, sizeof(FactorArray));
    m_isNonTerminal = isNonTerminal;
+    m_isOOV = false;
  }

  ~Word() {}
@ -92,6 +96,13 @@ public:
    m_isNonTerminal = val;
  }

+  inline bool IsOOV() const {
+    return m_isOOV;
+  }
+  inline void SetIsOOV(bool val) {
+    m_isOOV = val;
+  }
+
  /** add the factors from sourceWord into this representation,
   * NULL elements in sourceWord will be skipped */
  void Merge(const Word &sourceWord);
--- a/moses/generic/sampling/Sampling.h
+++ b/moses/generic/sampling/Sampling.h
@ -0,0 +1,51 @@
+#ifndef __sampling_h
+#define __sampling_h
+
+// Utility functions for proper sub-sampling.
+// (c) 2007-2012 Ulrich Germann
+
+
+namespace Moses
+{
+
+inline
+size_t
+randInt(size_t N)
+{
+  return N*(rand()/(RAND_MAX+1.));
+}
+
+// select a random sample of size /s/ without restitution from the range of
+// integers [0,N);
+template<typename idx_t>
+void
+randomSample(vector<idx_t>& v, size_t s, size_t N)
+{
+  // see also Knuth: Art of Computer Programming Vol. 2, p. 142
+
+  s = min(s,N);
+  v.resize(s);
+
+  // the first option tries to be a bit more efficient than O(N) in picking
+  // the samples. The threshold is an ad-hoc, off-the-cuff guess. I still
+  // need to figure out the optimal break-even point between a linear sweep
+  // and repeatedly picking random numbers with the risk of hitting the same
+  // number many times.
+  if (s*10<N) {
+    boost::dynamic_bitset<uint64_t> check(N,0);
+    for (size_t i = 0; i < v.size(); i++) {
+      size_t x = randInt(N);
+      while (check[x]) x = randInt(N);
+      check[x]=true;
+      v[i] = x;
+    }
+  } else {
+    size_t m=0;
+    for (size_t t = 0; m <= s && t < N; t++)
+      if (s==N || randInt(N-t) < s-m) v[m++] = t;
+  }
+}
+
+};
+
+#endif
--- a/moses/generic/sorting/NBestList.h
+++ b/moses/generic/sorting/NBestList.h
@ -0,0 +1,85 @@
+#ifndef __n_best_list_h
+#define __n_best_list_h
+#include <algorithm>
+#include "moses/generic/sorting/VectorIndexSorter.h"
+
+// NBest List; (c) 2007-2012 Ulrich Germann
+//
+// The 'trick' used in this implementation is to maintain a heap of size <= N
+// such that the lowest-scoring item is on top of the heap. For each incoming
+// item we can then determine easily if it is in the top N.
+
+namespace Moses
+{
+using namespace std;
+
+template<typename THINGY, typename CMP>
+class
+  NBestList
+{
+  vector<uint32_t> m_heap;
+  vector<THINGY>   m_list;
+  VectorIndexSorter<THINGY, CMP, uint32_t> m_better;
+  mutable vector<uint32_t> m_order;
+  mutable bool m_changed;
+public:
+  NBestList(size_t const max_size, CMP const& cmp);
+  NBestList(size_t const max_size);
+  bool add(THINGY const& item);
+  THINGY const& operator[](int i) const;
+  size_t size() const {
+    return m_heap.size();
+  }
+};
+
+template<typename THINGY, typename CMP>
+NBestList<THINGY,CMP>::
+NBestList(size_t const max_size, CMP const& cmp)
+  : m_better(m_list, cmp), m_changed(false)
+{
+  m_heap.reserve(max_size);
+}
+
+template<typename THINGY, typename CMP>
+NBestList<THINGY,CMP>::
+NBestList(size_t const max_size)
+  : m_better(m_heap), m_changed(false)
+{
+  m_heap.reserve(max_size);
+}
+
+template<typename THINGY, typename CMP>
+bool
+NBestList<THINGY,CMP>::
+add(THINGY const& item)
+{
+  if (m_heap.size() == m_heap.capacity()) {
+    if (m_better.Compare(item, m_list[m_heap.at(0)])) {
+      pop_heap(m_heap.begin(),m_heap.end(),m_better);
+      m_list[m_heap.back()] = item;
+    } else return false;
+  } else {
+    m_list.push_back(item);
+    m_heap.push_back(m_heap.size());
+  }
+  push_heap(m_heap.begin(),m_heap.end(),m_better);
+  return m_changed = true;
+}
+
+template<typename THINGY, typename CMP>
+THINGY const&
+NBestList<THINGY,CMP>::
+operator[](int i) const
+{
+  if (m_changed) {
+    m_order.assign(m_heap.begin(),m_heap.end());
+    for (size_t k = m_heap.size(); k != 0; --k)
+      pop_heap(m_order.begin(), m_order.begin()+k);
+    m_changed = false;
+  }
+  if (i < 0) i += m_order.size();
+  return m_list[m_order.at(i)];
+}
+
+}
+#endif
--- a/moses/generic/sorting/VectorIndexSorter.h
+++ b/moses/generic/sorting/VectorIndexSorter.h
@ -0,0 +1,69 @@
+#ifndef __vector_index_sorter_h
+#define __vector_index_sorter_h
+
+// VectorIndexSorter; (c) 2007-2012 Ulrich Germann
+
+// A VectorIndexSorter is a function object for sorting indices into a vector
+// of objects (instead of sorting the vector itself).
+//
+// typcial use:
+// vector<thingy> my_vector;
+// VectorIndexSorter<thingy,less<thingy>,int> sorter(my_vector);
+// vector<int> order;
+// sorter.get_order(order);
+
+namespace Moses
+{
+template<typename VAL, typename COMP = greater<VAL>,  typename IDX_T=size_t>
+class
+  VectorIndexSorter : public binary_function<IDX_T const&, IDX_T const&, bool>
+{
+  vector<VAL> const&    m_vecref;
+  boost::shared_ptr<COMP> m_comp;
+public:
+
+  COMP const& Compare;
+  VectorIndexSorter(vector<VAL> const& v, COMP const& comp)
+    : m_vecref(v), Compare(comp)
+  { }
+
+  VectorIndexSorter(vector<VAL> const& v)
+    : m_vecref(v), m_comp(new COMP()), Compare(*m_comp)
+  { }
+
+  bool operator()(IDX_T const & a, IDX_T const & b) const {
+    bool fwd = Compare(m_vecref.at(a) ,m_vecref.at(b));
+    bool bwd = Compare(m_vecref[b],    m_vecref[a]);
+    return (fwd == bwd ? a < b : fwd);
+  }
+
+  boost::shared_ptr<vector<IDX_T> >
+  GetOrder() const;
+
+  void
+  GetOrder(vector<IDX_T> & order) const;
+
+};
+
+template<typename VAL, typename COMP, typename IDX_T>
+boost::shared_ptr<vector<IDX_T> >
+VectorIndexSorter<VAL,COMP,IDX_T>::
+GetOrder() const
+{
+  boost::shared_ptr<vector<IDX_T> > ret(new vector<IDX_T>(m_vecref.size()));
+  get_order(*ret);
+  return ret;
+}
+
+template<typename VAL, typename COMP, typename IDX_T>
+void
+VectorIndexSorter<VAL,COMP,IDX_T>::
+GetOrder(vector<IDX_T> & order) const
+{
+  order.resize(m_vecref.size());
+  for (IDX_T i = 0; i < IDX_T(m_vecref.size()); ++i) order[i] = i;
+  sort(order.begin(), order.end(), *this);
+}
+
+}
+#endif
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@ -137,7 +137,7 @@ void LeftBinarize( SyntaxTree &tree, ParentNodes &parents )
    const SplitPoints &point = *p;
    if (point.size() > 3) {
      const vector< SyntaxNode* >& topNodes
-        = tree.GetNodes( point[0], point[point.size()-1]-1);
+      = tree.GetNodes( point[0], point[point.size()-1]-1);
      string topLabel = topNodes[0]->GetLabel();

      for(size_t i=2; i<point.size()-1; i++) {
@ -155,7 +155,7 @@ void RightBinarize( SyntaxTree &tree, ParentNodes &parents )
    if (point.size() > 3) {
      int endPoint = point[point.size()-1]-1;
      const vector< SyntaxNode* >& topNodes
-        = tree.GetNodes( point[0], endPoint);
+      = tree.GetNodes( point[0], endPoint);
      string topLabel = topNodes[0]->GetLabel();

      for(size_t i=1; i<point.size()-2; i++) {
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -2552,6 +2552,8 @@ sub define_tuningevaluation_filter {

    # get model, and whether suffix array is used. Determines the pt implementation.
    my $sa_exec_dir = &get("TRAINING:suffix-array");
+    my $sa_extractors = &get("GENERAL:sa_extractors");
+    $sa_extractors = 1 unless $sa_extractors;

    my ($ptImpl, $numFF);
    if ($hierarchical) {
@ -2564,7 +2566,7 @@ sub define_tuningevaluation_filter {
 	}
    }
    else {
-	$ptImpl = 0; # phrase-based
+    	$ptImpl = 0; # phrase-based
    }

    # config file specified?
@ -2589,11 +2591,14 @@ sub define_tuningevaluation_filter {
    # filter command
    if ($sa_exec_dir) {
 	# suffix array
-	$cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir \n";
+	$cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir $sa_extractors \n";
 	
 	my $escaped_filter_dir = $filter_dir;
 	$escaped_filter_dir =~ s/\//\\\\\//g;
 	$cmd .= "cat $config | sed s/10\\ 0\\ 0\\ 7.*/10\\ 0\\ 0\\ 7\\ $escaped_filter_dir/g > $filter_dir/moses.ini \n";
+    # kind of a hack -- the correct thing would be to make the generation of the config file ($filter_dir/moses.ini) 
+    # set the PhraseDictionaryALSuffixArray's path to the filtered directory rather than to the suffix array itself 
+    $cmd .= "sed -i 's%path=$phrase_translation_table%path=$filter_dir%' $filter_dir/moses.ini\n";
    }
    else {
 	# normal phrase table
--- a/scripts/generic/ph_numbers.perl
+++ b/scripts/generic/ph_numbers.perl
@ -0,0 +1,51 @@
+#!/usr/bin/perl -w
+
+# Script to recognize and replace numbers in Moses training corpora
+# and decoder input
+#
+# (c) 2013 TAUS
+
+use strict;
+
+use Getopt::Std;
+
+my $debug = $ENV{DEBUG} || 0;
+
+my %opts;
+if(!getopts('s:t:cm:hl',\%opts) || $opts{h}) {
+    print "Usage: perl $0 [-s source_locale][-t target_locale][-c][-h][-l][-m symbol] < in > out\n";
+    exit;
+}
+my $sourceLocale = $opts{s} || "";
+my $targetLocale = $opts{t} || "";
+my $numberSymbol = $opts{m} || '@NUM@';
+
+while(<>) {
+    # [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?
+    # while(/\G(.*?)\s*([+-]?\p{Digit}+[+-\p{Digit}\.,eE])/) {
+    chomp;
+    my $output = "";
+    my $remainder = "";
+    while(/\G(.*?)(\s*)([+-]?\p{Digit}*[\.,]?\p{Digit}+[\p{Digit}\.,+-eE]*)/g) {
+	print STDERR "Between: x$1x\n" if $debug;
+	print STDERR "Number: x$3x\n" if $debug;
+	$output .= $1;
+	if($opts{c}) {
+	    $output .= $2.$numberSymbol;
+	}
+	else {
+	    if($opts{l}) {
+		$output .= $2."<ne translation=\"$3\">$numberSymbol</ne>";
+	    }
+	    else {
+		$output .= $2."<ne translation=\"$numberSymbol\" entity=\"$3\">$numberSymbol</ne>";
+	    }
+	}
+	$remainder = $';
+    }
+    print STDERR "Remainder: x".$remainder."x\n" if $debug;
+    print STDERR "\n" if $debug;
+    $output .= $remainder if $remainder;
+    $output .= "\n";
+    print $output;
+}
--- a/scripts/generic/trainlm-irst.perl
+++ b/scripts/generic/trainlm-irst.perl
@ -1,88 +0,0 @@
-#!/usr/bin/perl -w
-
-# Compatible with sri LM-creating script, eg.
-#    ngram-count -order 5 -interpolate -wbdiscount -unk -text corpus.txt -lm lm.txt
-# To use it in the EMS, add this to the [LM] section
-#    lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irst-dir"
-#    settings = ""
-# Also, make sure that $irst-dir is defined (in the [LM] or [GENERAL] section. 
-# It should point to the root of the LM toolkit, eg
-#    irst-dir = /Users/hieu/workspace/irstlm/trunk/bin
-# And make sure that $cores is defined, eg $cores = 8
-# And make sure the $settings variable is empty. This script doesn't understand some of the sri args like -unk and will complain.
-
-use strict;
-use FindBin qw($RealBin);
-use Getopt::Long;
-
-my $order = 3;
-my $corpusPath;
-my $lmPath;
-my $cores = 2;
-my $irstPath;
-my $tempPath = "tmp";
-my $p = 1;
-my $s;
-my $temp;
-
-GetOptions("order=s"  => \$order,
-           "text=s"   => \$corpusPath,
-           "lm=s"     => \$lmPath,
-           "cores=s"  => \$cores,
-           "irst-dir=s"  => \$irstPath,
-           "temp-dir=s"  => \$tempPath,
-           "p=i" => \$p,   # irstlm parameter: delete singletons
-           "s=s" => \$s, # irstlm parameter: smoothing method
-	   "interpolate!" => \$temp,  #ignore
-	   "kndiscount!" => \$temp    #ignore
-	   ) or exit 1;
-
-#die("ERROR: please set order") unless defined($order);
-die("ERROR: please set text") unless defined($corpusPath);
-die("ERROR: please set lm") unless defined($lmPath);
-die("ERROR: please set irst-dir") unless defined($irstPath);
-
-my $ext = ($corpusPath =~ m/([^.]+)$/)[0];
-print "extension is $ext\n";
-
-$tempPath .= "/irstlm-build-tmp.$$";
-`mkdir -p $tempPath`;
-
-my $cmd;
-if ($ext eq "gz")
-{
-    $cmd = "zcat $corpusPath | $irstPath/add-start-end.sh | gzip -c > $tempPath/monolingual.setagged.gz";
-}
-else
-{
-    $cmd = "cat $corpusPath | $irstPath/add-start-end.sh | gzip -c > $tempPath/monolingual.setagged.gz";
-}
-print STDERR "EXECUTING $cmd\n";
-`$cmd`;
-
-$cmd = "IRSTLM=$irstPath/.. $irstPath/build-lm.sh -t $tempPath/stat4 -i \"gunzip -c $tempPath/monolingual.setagged.gz\" -n $order -o $tempPath/iarpa.gz -k $cores";
-$cmd .= " -p" if $p;
-$cmd .= " -s $s" if defined($s);
-print STDERR "EXECUTING $cmd\n";
-`$cmd`;
-
-$ext = ($lmPath =~ m/([^.]+)$/)[0];
-print "extension is $ext\n";
-
-if ($ext eq "gz")
-{
-    $cmd = "$irstPath/compile-lm --text $tempPath/iarpa.gz /dev/stdout | gzip -c > $lmPath";
-}
-else
-{
-    $cmd = "$irstPath/compile-lm --text $tempPath/iarpa.gz $lmPath";
-}
-
-print STDERR "EXECUTING $cmd\n";
-`$cmd`;
-
-$cmd = "rm -rf $tempPath";
-print STDERR "EXECUTING $cmd\n";
-`$cmd`;
-
-print STDERR "FINISH.\n";
--- a/scripts/generic/trainlm-lmplz.perl
+++ b/scripts/generic/trainlm-lmplz.perl
@ -0,0 +1,40 @@
+#!/usr/bin/perl -w
+
+# Compatible with sri LM-creating script, eg.
+#    ngram-count -order 5 -interpolate -wbdiscount -unk -text corpus.txt -lm lm.txt
+# To use it in the EMS, add this to the [LM] section
+#    lm-training = "$moses-script-dir/generic/trainlm-lmplz.perl -lmplz $lmplz"
+#    settings = "-T $working-dir/tmp -S 10G"
+# Also, make sure that $lmplz is defined (in the [LM] or [GENERAL] section. 
+# It should point to the binary file
+#    lmplz = /home/waziz/workspace/github/moses/bin/lmplz
+
+use strict;
+use FindBin qw($RealBin);
+use Getopt::Long qw/GetOptionsFromArray/;
+#use Getopt::Long;
+Getopt::Long::Configure("pass_through", "no_ignore_case");
+
+my $order = 3; # order of language model (default trigram)
+my $corpus; # input text data
+my $lm; # generated language model
+my $lmplz; # bin directory of IRSTLM 
+my $help = 0;
+
+my @optconfig = (
+    "-order=s"  => \$order,
+    "-text=s"   => \$corpus,
+    "-lm=s"     => \$lm,
+    "-lmplz=s"  => \$lmplz,
+);
+
+GetOptionsFromArray(\@ARGV, @optconfig);
+die("ERROR: please set text") unless defined($corpus);
+die("ERROR: please set lm") unless defined($lm);
+die("ERROR: please set lmplz") unless defined($lmplz);
+
+my $settings = join(' ', @ARGV);
+my $cmd = "$lmplz --order $order $settings < $corpus > $lm";
+
+print STDERR "EXECUTING $cmd\n";
+`$cmd`;
--- a/Show More
+++ b/Show More