Merge github.com:moses-smt/mosesdecoder into hieu_opt_input2

2024-12-28 14:32:38 +03:00 · 2013-07-08 10:37:53 +01:00 · 2013-07-08 10:37:53 +01:00 · a602e2052f
commit a602e2052f
parent ed6b5add67 d3b4c11be2
15 changed files with 657 additions and 692 deletions
--- a/mert/BleuDocScorer.cpp
+++ b/mert/BleuDocScorer.cpp
@ -31,11 +31,11 @@ const char REFLEN_CLOSEST[] = "closest";

 namespace MosesTuning
 {
-  
+

 BleuDocScorer::BleuDocScorer(const string& config)
-    : BleuScorer("BLEUDOC", config),
-      m_ref_length_type(CLOSEST)
+  : BleuScorer("BLEUDOC", config),
+    m_ref_length_type(CLOSEST)
 {
  const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
  if (reflen == REFLEN_AVERAGE) {
@ -63,41 +63,40 @@ bool BleuDocScorer::OpenReferenceStream(istream* is, size_t file_id)

    if (line.find("<doc docid") != std::string::npos) {  // new document
      doc_id++;
-      m_references.push_back(new ScopedVector<Reference>()); 
+      m_references.push_back(new ScopedVector<Reference>());
      sid = 0;
-    }
-    else if (line.find("<seg") != std::string::npos) {  //new sentence
+    } else if (line.find("<seg") != std::string::npos) { //new sentence
      int start = line.find_first_of('>') + 1;
      std::string trans = line.substr(start, line.find_last_of('<')-start);
      trans = preprocessSentence(trans);

      if (file_id == 0) {
-		  Reference* ref = new Reference;
-		  m_references[doc_id]->push_back(ref);    // Take ownership of the Reference object.
+        Reference* ref = new Reference;
+        m_references[doc_id]->push_back(ref);    // Take ownership of the Reference object.
      }

      if (m_references[doc_id]->size() <= sid) {
-      	return false;
+        return false;
      }
      NgramCounts counts;
      size_t length = CountNgrams(trans, counts, kBleuNgramOrder);
-      
+
      //for any counts larger than those already there, merge them in
      for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
-		  const NgramCounts::Key& ngram = ci->first;
-		  const NgramCounts::Value newcount = ci->second;
-	
-		  NgramCounts::Value oldcount = 0;
-		  m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
-		  if (newcount > oldcount) {
-			  m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
-		  }
+        const NgramCounts::Key& ngram = ci->first;
+        const NgramCounts::Value newcount = ci->second;
+
+        NgramCounts::Value oldcount = 0;
+        m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
+        if (newcount > oldcount) {
+          m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
+        }
      }
      //add in the length

-	  m_references[doc_id]->get().at(sid)->push_back(length);
+      m_references[doc_id]->get().at(sid)->push_back(length);
      if (sid > 0 && sid % 100 == 0) {
-		  TRACE_ERR(".");
+        TRACE_ERR(".");
      }
      ++sid;
    }
@ -127,14 +126,14 @@ void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& ent

    //precision on each ngram type
    for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
-	 testcounts_it != testcounts.end(); ++testcounts_it) {
+         testcounts_it != testcounts.end(); ++testcounts_it) {
      const NgramCounts::Value guess = testcounts_it->second;
      const size_t len = testcounts_it->first.size();
      NgramCounts::Value correct = 0;
-    
+
      NgramCounts::Value v = 0;
      if (m_references[sid]->get().at(i)->get_counts()->Lookup(testcounts_it->first, &v)) {
-	correct = min(v, guess);
+        correct = min(v, guess);
      }
      stats[len * 2 - 2] += correct;
      stats[len * 2 - 1] += guess;
@ -143,13 +142,13 @@ void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& ent
    const int reference_len = CalcReferenceLength(sid, i, length);
    stats.push_back(reference_len);

-    //ADD stats to totStats 
-    std::transform(stats.begin(), stats.end(), totStats.begin(), 
-		   totStats.begin(), std::plus<int>());
+    //ADD stats to totStats
+    std::transform(stats.begin(), stats.end(), totStats.begin(),
+                   totStats.begin(), std::plus<int>());
  }
-  entry.set(totStats); 
+  entry.set(totStats);
 }
-  
+
 std::vector<std::string> BleuDocScorer::splitDoc(const std::string& text)
 {
  std::vector<std::string> res;
@ -188,18 +187,18 @@ statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const
 int BleuDocScorer::CalcReferenceLength(size_t doc_id, size_t sentence_id, size_t length)
 {
  switch (m_ref_length_type) {
-    case AVERAGE:
-      return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
-      break;
-    case CLOSEST:
-      return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
-      break;
-    case SHORTEST:
-      return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
-      break;
-    default:
-      cerr << "unknown reference types." << endl;
-      exit(1);
+  case AVERAGE:
+    return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
+    break;
+  case CLOSEST:
+    return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
+    break;
+  case SHORTEST:
+    return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
+    break;
+  default:
+    cerr << "unknown reference types." << endl;
+    exit(1);
  }
 }

--- a/mert/BleuDocScorer.h
+++ b/mert/BleuDocScorer.h
@ -29,7 +29,7 @@ public:
  virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
  virtual statscore_t calculateScore(const std::vector<int>& comps) const;

-  int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length); 
+  int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);

  // NOTE: this function is used for unit testing.
  virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@ -67,7 +67,7 @@ public:
  // NOTE: this function is used for unit testing.
  virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);

-	//private:
+  //private:
 protected:
  ReferenceLengthType m_ref_length_type;

@ -76,7 +76,7 @@ protected:

  // constructor used by subclasses
  BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {}
-	
+
  // no copying allowed
  BleuScorer(const BleuScorer&);
  BleuScorer& operator=(const BleuScorer&);
--- a/misc/queryPhraseTableMin.cpp
+++ b/misc/queryPhraseTableMin.cpp
@ -51,12 +51,12 @@ int main(int argc, char **argv)
  const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
  const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
  const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
-  const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
-  const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
+  //const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
+  //const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");

  StaticData::InstanceNonConst().LoadData(parameter);

-  PhraseDictionaryCompact pdc("input-factor=0 output-factor=0 num-features=5 path=" + ttable);
+  PhraseDictionaryCompact pdc("PhraseDictionaryCompact input-factor=0 output-factor=0 num-features=5 path=" + ttable);
  pdc.Load();

  std::string line;
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@ -11,7 +11,7 @@ namespace Moses
 {

 OpSequenceModel::OpSequenceModel(const std::string &line)
-:StatefulFeatureFunction("OpSequenceModel", 5, line )
+  :StatefulFeatureFunction("OpSequenceModel", 5, line )
 {
  ReadParameters();
 }
@ -19,29 +19,29 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
 void OpSequenceModel :: readLanguageModel(const char *lmFile)
 {

-    string unkOp = "_TRANS_SLF_";
+  string unkOp = "_TRANS_SLF_";

-	
-	/* 

-	// Code for SRILM	
+  /*

-	vector <int> numbers;
+  // Code for SRILM
+
+  vector <int> numbers;
        int nonWordFlag = 0;
-  
-	ptrOp = new Api;
-	ptrOp -> read_lm(lmFile,lmOrder);
-	numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
-	unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
-	
-	*/

-	// Code to load KenLM
+  ptrOp = new Api;
+  ptrOp -> read_lm(lmFile,lmOrder);
+  numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
+  unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);

-	OSM = new Model(m_lmPath.c_str());
-	State startState = OSM->NullContextState();
-	State endState;
-	unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
+  */
+
+  // Code to load KenLM
+
+  OSM = new Model(m_lmPath.c_str());
+  State startState = OSM->NullContextState();
+  State endState;
+  unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
 }


@ -85,58 +85,55 @@ void OpSequenceModel::Load()


 void OpSequenceModel:: Evaluate(const Phrase &source
-                        , const TargetPhrase &targetPhrase
-                        , ScoreComponentCollection &scoreBreakdown
-                        , ScoreComponentCollection &estimatedFutureScore) const 
+                                , const TargetPhrase &targetPhrase
+                                , ScoreComponentCollection &scoreBreakdown
+                                , ScoreComponentCollection &estimatedFutureScore) const
 {

-	osmHypothesis obj;
-	obj.setState(OSM->NullContextState());
-	WordsBitmap myBitmap(source.GetSize());
-	vector <string> mySourcePhrase;
-  	vector <string> myTargetPhrase;
-  	vector<float> scores(5);
-	vector <int> alignments;
-	int startIndex = 0;
-	int endIndex = source.GetSize();
+  osmHypothesis obj;
+  obj.setState(OSM->NullContextState());
+  WordsBitmap myBitmap(source.GetSize());
+  vector <string> mySourcePhrase;
+  vector <string> myTargetPhrase;
+  vector<float> scores(5);
+  vector <int> alignments;
+  int startIndex = 0;
+  int endIndex = source.GetSize();

-	const AlignmentInfo &align = targetPhrase.GetAlignTerm();
-	AlignmentInfo::const_iterator iter;
+  const AlignmentInfo &align = targetPhrase.GetAlignTerm();
+  AlignmentInfo::const_iterator iter;


-      	for (iter = align.begin(); iter != align.end(); ++iter) 
-      	{
-      	 alignments.push_back(iter->first);
-    	 alignments.push_back(iter->second);
-      	}
+  for (iter = align.begin(); iter != align.end(); ++iter) {
+    alignments.push_back(iter->first);
+    alignments.push_back(iter->second);
+  }

-	for (int i = 0; i < targetPhrase.GetSize(); i++)
-  	{
-	  	if (targetPhrase.GetWord(i).IsOOV())
-		 myTargetPhrase.push_back("_TRANS_SLF_");
-	 	else
-		  myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
-   	 }
+  for (int i = 0; i < targetPhrase.GetSize(); i++) {
+    if (targetPhrase.GetWord(i).IsOOV())
+      myTargetPhrase.push_back("_TRANS_SLF_");
+    else
+      myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
+  }

-	 for (int i = 0; i < source.GetSize(); i++)
-  	 {
-		  mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
-   	 }
-	
-	 obj.setPhrases(mySourcePhrase , myTargetPhrase);
-	 obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
-	 obj.computeOSMFeature(startIndex,myBitmap);	
-	 obj.calculateOSMProb(*OSM);
-         obj.populateScores(scores);
-         estimatedFutureScore.PlusEquals(this, scores);
+  for (int i = 0; i < source.GetSize(); i++) {
+    mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
+  }
+
+  obj.setPhrases(mySourcePhrase , myTargetPhrase);
+  obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
+  obj.computeOSMFeature(startIndex,myBitmap);
+  obj.calculateOSMProb(*OSM);
+  obj.populateScores(scores);
+  estimatedFutureScore.PlusEquals(this, scores);

 }


 FFState* OpSequenceModel::Evaluate(
-    const Hypothesis& cur_hypo,
-    const FFState* prev_state,
-    ScoreComponentCollection* accumulator) const
+  const Hypothesis& cur_hypo,
+  const FFState* prev_state,
+  ScoreComponentCollection* accumulator) const
 {
  const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
  const WordsBitmap &bitmap = cur_hypo.GetWordsBitmap();
@ -159,83 +156,81 @@ FFState* OpSequenceModel::Evaluate(

  //cerr << source <<endl;

- // int a = sourceRange.GetStartPos();
- // cerr << source.GetWord(a);
+// int a = sourceRange.GetStartPos();
+// cerr << source.GetWord(a);
  //cerr <<a<<endl;

  //const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());


-   const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
-   int startIndex  = sourceRange.GetStartPos();
-   int endIndex = sourceRange.GetEndPos();
-   const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
-   osmState * statePtr;
+  const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
+  int startIndex  = sourceRange.GetStartPos();
+  int endIndex = sourceRange.GetEndPos();
+  const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
+  osmState * statePtr;

-   vector <int> alignments;
+  vector <int> alignments;



-   AlignmentInfo::const_iterator iter;
+  AlignmentInfo::const_iterator iter;

-      for (iter = align.begin(); iter != align.end(); ++iter) {
-        //cerr << iter->first << "----" << iter->second << " ";
-    	 alignments.push_back(iter->first);
-    	 alignments.push_back(iter->second);
-      }
-
-
-   //cerr<<bitmap<<endl;
-   //cerr<<startIndex<<" "<<endIndex<<endl;
-
-
-  for (int i = startIndex; i <= endIndex; i++)
-  {
-	  myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
-	 mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
-	 // cerr<<mySourcePhrase[i]<<endl;
+  for (iter = align.begin(); iter != align.end(); ++iter) {
+    //cerr << iter->first << "----" << iter->second << " ";
+    alignments.push_back(iter->first);
+    alignments.push_back(iter->second);
  }

-  for (int i = 0; i < target.GetSize(); i++)
-  {

-	  if (target.GetWord(i).IsOOV())
-		  myTargetPhrase.push_back("_TRANS_SLF_");
-	  else
-		  myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());
+  //cerr<<bitmap<<endl;
+  //cerr<<startIndex<<" "<<endIndex<<endl;
+
+
+  for (int i = startIndex; i <= endIndex; i++) {
+    myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
+    mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
+    // cerr<<mySourcePhrase[i]<<endl;
+  }
+
+  for (int i = 0; i < target.GetSize(); i++) {
+
+    if (target.GetWord(i).IsOOV())
+      myTargetPhrase.push_back("_TRANS_SLF_");
+    else
+      myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());

  }

- 
+
  //cerr<<myBitmap<<endl;

  obj.setState(prev_state);
  obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
  obj.setPhrases(mySourcePhrase , myTargetPhrase);
-  obj.computeOSMFeature(startIndex,myBitmap);	
+  obj.computeOSMFeature(startIndex,myBitmap);
  obj.calculateOSMProb(*OSM);
  obj.populateScores(scores);

-/*
-  if (bitmap.GetFirstGapPos() == NOT_FOUND)
-  {
+  /*
+    if (bitmap.GetFirstGapPos() == NOT_FOUND)
+    {

-    int xx;
-	 cerr<<bitmap<<endl;
-	 int a = bitmap.GetFirstGapPos();
-	 obj.print();
-    cin>>xx;
-  }
-  */
+      int xx;
+  	 cerr<<bitmap<<endl;
+  	 int a = bitmap.GetFirstGapPos();
+  	 obj.print();
+      cin>>xx;
+    }
+    */

-/*
-  vector<float> scores(5);
-  scores[0] = 0.343423f;
-  scores[1] = 1.343423f;
-  scores[2] = 2.343423f;
-  scores[3] = 3.343423f;
-  scores[4] = 4.343423f;
-  */
+  /*
+    vector<float> scores(5);
+    scores[0] = 0.343423f;
+    scores[1] = 1.343423f;
+    scores[2] = 2.343423f;
+    scores[3] = 3.343423f;
+    scores[4] = 4.343423f;
+    */

  accumulator->PlusEquals(this, scores);

@ -245,7 +240,7 @@ FFState* OpSequenceModel::Evaluate(


  //return statePtr;
- // return NULL;
+// return NULL;
 }

 FFState* OpSequenceModel::EvaluateChart(
@ -276,29 +271,28 @@ std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const
  ParallelPhrase pp(source, target);
  std::map<ParallelPhrase, Scores>::const_iterator iter;
  iter = m_futureCost.find(pp);
- //iter = m_coll.find(pp);
+//iter = m_coll.find(pp);
  if (iter == m_futureCost.end()) {
    vector<float> scores(5, 0);
    scores[0] = unkOpProb;
    return scores;
-  }
-  else {
+  } else {
    const vector<float> &scores = iter->second;
-	return scores;
+    return scores;
  }
 }

 void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
 {
-	  if (key == "feature-path") {
-		  m_featurePath = value;
-	  } else if (key == "path") {
-		  m_lmPath = value;
-	  } else if (key == "order") {
-		  lmOrder = Scan<int>(value);
-	  } else {
-		  StatefulFeatureFunction::SetParameter(key, value);
-	  }
+  if (key == "feature-path") {
+    m_featurePath = value;
+  } else if (key == "path") {
+    m_lmPath = value;
+  } else if (key == "order") {
+    lmOrder = Scan<int>(value);
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
 }

 } // namespace
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@ -16,26 +16,26 @@ class OpSequenceModel : public StatefulFeatureFunction
 {
 public:

-	
-	lm::ngram::Model * OSM;
-	
-	int lmOrder;
-	float unkOpProb;

-	OpSequenceModel(const std::string &line);
+  lm::ngram::Model * OSM;

-	void readLanguageModel(const char *);
-	void Load();
+  int lmOrder;
+  float unkOpProb;

-	FFState* Evaluate(
-	    const Hypothesis& cur_hypo,
-	    const FFState* prev_state,
-	    ScoreComponentCollection* accumulator) const;
+  OpSequenceModel(const std::string &line);

-	void  Evaluate(const Phrase &source
-                        , const TargetPhrase &targetPhrase
-                        , ScoreComponentCollection &scoreBreakdown
-                        , ScoreComponentCollection &estimatedFutureScore) const;
+  void readLanguageModel(const char *);
+  void Load();
+
+  FFState* Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+
+  void  Evaluate(const Phrase &source
+                 , const TargetPhrase &targetPhrase
+                 , ScoreComponentCollection &scoreBreakdown
+                 , ScoreComponentCollection &estimatedFutureScore) const;

  virtual FFState* EvaluateChart(
    const ChartHypothesis& /* cur_hypo */,
@ -49,17 +49,18 @@ public:
  std::vector<float> GetFutureScores(const Phrase &source, const Phrase &target) const;
  void SetParameter(const std::string& key, const std::string& value);

-  bool IsUseable(const FactorMask &mask) const
-  { return true; }
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }

 protected:
-	typedef std::pair<Phrase, Phrase> ParallelPhrase;
-	typedef std::vector<float> Scores;
-	std::map<ParallelPhrase, Scores> m_futureCost;
+  typedef std::pair<Phrase, Phrase> ParallelPhrase;
+  typedef std::vector<float> Scores;
+  std::map<ParallelPhrase, Scores> m_futureCost;

-	std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
-	std::set <int> targetNullWords;
-	std::string m_featurePath, m_lmPath;
+  std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+  std::set <int> targetNullWords;
+  std::string m_featurePath, m_lmPath;



--- a/moses/FF/OSM-Feature/osmHyp.cpp
+++ b/moses/FF/OSM-Feature/osmHyp.cpp
--- a/moses/FF/OSM-Feature/osmHyp.h
+++ b/moses/FF/OSM-Feature/osmHyp.h
@ -17,15 +17,23 @@ public:
  osmState(const lm::ngram::State & val);
  int Compare(const FFState& other) const;
  void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
-  int getJ()const {return j;}
-  int getE()const {return E;}
-  std::map <int , std::string> getGap() const { return gap;}
+  int getJ()const {
+    return j;
+  }
+  int getE()const {
+    return E;
+  }
+  std::map <int , std::string> getGap() const {
+    return gap;
+  }

-  lm::ngram::State getLMState() const {return lmState;}
+  lm::ngram::State getLMState() const {
+    return lmState;
+  }

  void print() const;
  std::string getName() const;
-  
+
 protected:
  int j, E;
  std::map <int,std::string> gap;
@ -35,51 +43,56 @@ protected:
 class osmHypothesis
 {

-	private:
-	
-	
-	std::vector <std::string> operations;	// List of operations required to generated this hyp ...
-	std::map <int,std::string> gap;	// Maintains gap history ...
-	int j;	// Position after the last source word generated ...
-	int E; // Position after the right most source word so far generated ...
-	lm::ngram::State lmState; // KenLM's Model State ...
+private:

-	int gapCount; // Number of gaps inserted ...
-	int deletionCount;
-	int openGapCount;
-	int gapWidth;	
-	double opProb;

-	std::vector <std::string> currE;
-	std::vector <std::string> currF;
-	std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
-	std::set <int> targetNullWords;
-	std::set <int> sourceNullWords;
+  std::vector <std::string> operations;	// List of operations required to generated this hyp ...
+  std::map <int,std::string> gap;	// Maintains gap history ...
+  int j;	// Position after the last source word generated ...
+  int E; // Position after the right most source word so far generated ...
+  lm::ngram::State lmState; // KenLM's Model State ...

-	int closestGap(std::map <int,std::string> gap,int j1, int & gp);
-	int firstOpenGap(std::vector <int> & coverageVector);
-	std::string intToString(int);
-	int  getOpenGaps();
-	int isTranslationOperation(int j);
-	void removeReorderingOperations();
+  int gapCount; // Number of gaps inserted ...
+  int deletionCount;
+  int openGapCount;
+  int gapWidth;
+  double opProb;

-	void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
+  std::vector <std::string> currE;
+  std::vector <std::string> currF;
+  std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+  std::set <int> targetNullWords;
+  std::set <int> sourceNullWords;

-	public:
+  int closestGap(std::map <int,std::string> gap,int j1, int & gp);
+  int firstOpenGap(std::vector <int> & coverageVector);
+  std::string intToString(int);
+  int  getOpenGaps();
+  int isTranslationOperation(int j);
+  void removeReorderingOperations();

-	osmHypothesis();
-	~osmHypothesis(){};
-	void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
-	void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
-	void calculateOSMProb(lm::ngram::Model & ptrOp);
-	void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
-	void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
-	void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2){currF = val1; currE = val2;}
-	void setState(const FFState* prev_state);
-	osmState * saveState();
-	void print();
-	void populateScores(std::vector <float> & scores);
-	void setState(const lm::ngram::State & val){lmState = val;}
+  void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
+
+public:
+
+  osmHypothesis();
+  ~osmHypothesis() {};
+  void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
+  void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
+  void calculateOSMProb(lm::ngram::Model & ptrOp);
+  void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
+  void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
+  void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
+    currF = val1;
+    currE = val2;
+  }
+  void setState(const FFState* prev_state);
+  osmState * saveState();
+  void print();
+  void populateScores(std::vector <float> & scores);
+  void setState(const lm::ngram::State & val) {
+    lmState = val;
+  }

 };

--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@ -383,7 +383,7 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string
  try {
    lm::ngram::ModelType model_type;
    if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
-	  
+
      switch(model_type) {
      case lm::ngram::PROBING:
        return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@ -694,9 +694,9 @@ bool StaticData::LoadData(Parameter *parameter)
      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
      SetWeights(model, weights);
    } else if (feature == "OpSequenceModel") {
-	  OpSequenceModel* model = new OpSequenceModel(line);
-	  vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
-	  SetWeights(model, weights);
+      OpSequenceModel* model = new OpSequenceModel(line);
+      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
+      SetWeights(model, weights);
    } else if (feature == "PhrasePenalty") {
      PhrasePenalty* model = new PhrasePenalty(line);
      vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
@ -190,7 +190,7 @@ std::string PhraseDecoder::MakeSourceKey(std::string &source)
  return source + m_separator;
 }

-TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
+TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel, bool eval)
 {

  // Not using TargetPhraseCollection avoiding "new" operator
@ -234,7 +234,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &

    // Decompress and decode target phrase collection
    TargetPhraseVectorPtr decodedPhraseColl =
-      DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
+      DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel, eval);

    return decodedPhraseColl;
  } else
@ -243,7 +243,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &

 TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
  TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream,
-  const Phrase &sourcePhrase, bool topLevel)
+  const Phrase &sourcePhrase, bool topLevel, bool eval)
 {

  bool extending = tpv->size();
@ -397,7 +397,8 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(

      if(scores.size() == m_numScoreComponent) {
        targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
-        targetPhrase->Evaluate(sourcePhrase);
+        if(eval)
+          targetPhrase->Evaluate(sourcePhrase);

        if(m_containsAlignmentInfo)
          state = Alignment;
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.h
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.h
@ -131,12 +131,13 @@ public:
  size_t Load(std::FILE* in);

  TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
-      bool topLevel = false);
+      bool topLevel = false, bool eval = true);

  TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
                                         BitWrapper<> &encodedBitStream,
                                         const Phrase &sourcePhrase,
-                                         bool topLevel);
+                                         bool topLevel,
+                                         bool eval);

  void PruneCache();
 };
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c

  // Retrieve target phrase collection from phrase table
  TargetPhraseVectorPtr decodedPhraseColl
-  = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
+  = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);

  if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
    TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
@ -130,7 +130,6 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
    std::nth_element(tpv->begin(), nth, tpv->end(), CompareTargetPhrase());
    for(TargetPhraseVector::iterator it = tpv->begin(); it != nth; it++) {
      TargetPhrase *tp = new TargetPhrase(*it);
-      cerr << *tp << endl;
      phraseColl->Add(tp);
    }

@ -152,7 +151,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase
    return TargetPhraseVectorPtr();

  // Retrieve target phrase collection from phrase table
-  return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
+  return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
 }

 PhraseDictionaryCompact::~PhraseDictionaryCompact()
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
@ -38,7 +38,7 @@ bool operator<(const PackedItem &pi1, const PackedItem &pi2)
 }

 std::string PhraseTableCreator::m_phraseStopSymbol = "__SPECIAL_STOP_SYMBOL__";
-std::string PhraseTableCreator::m_separator = " ||| ";
+std::string PhraseTableCreator::m_separator = "|||";

 PhraseTableCreator::PhraseTableCreator(std::string inPath,
                                       std::string outPath,
@ -332,12 +332,12 @@ void PhraseTableCreator::CreateRankHash()

 inline std::string PhraseTableCreator::MakeSourceKey(std::string &source)
 {
-  return source + m_separator;
+  return source + " " + m_separator + " ";
 }

 inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
 {
-  return source + m_separator + target + m_separator;
+  return source + " " + m_separator + " " + target + " " + m_separator + " ";
 }

 void PhraseTableCreator::EncodeTargetPhrases()
@ -1034,17 +1034,24 @@ void RankingTask::operator()()
    for(size_t i = 0; i < lines.size(); i++) {
      std::vector<std::string> tokens;
      Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
-
-      if(tokens.size() < 3) {
+      
+      for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
+        *it = Moses::Trim(*it);
+      
+      if(tokens.size() < 4) {
        std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
        std::cerr << "Line " << i << ": " << lines[i] << std::endl;
        abort();
      }
-      if(tokens.size() == 3 && m_creator.m_warnMe) {
-        std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
-        std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl;
-        std::cerr << "Better use  -encoding None  or disable this warning with  -no-warnings ." << std::endl;
+      
+      if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
+        std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
+        std::cerr << "but you are using ";
+        std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+        std::cerr << " encoding which makes use of alignment data. " << std::endl;
+        std::cerr << "Use -encoding None" << std::endl;
        std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+        abort();
      }

      std::vector<float> scores = Tokenize<float>(tokens[2]);
@ -1125,18 +1132,23 @@ void EncodingTask::operator()()
      std::vector<std::string> tokens;
      Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);

+      for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
+        *it = Moses::Trim(*it);
+      
      if(tokens.size() < 3) {
        std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
        std::cerr << "Line " << i << ": " << lines[i] << std::endl;
        abort();
      }
-      if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) {
-        std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
+      
+      if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
+        std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
        std::cerr << "but you are using ";
        std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
        std::cerr << " encoding which makes use of alignment data. " << std::endl;
-        std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl;
+        std::cerr << "Use -encoding None" << std::endl;
        std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+        abort();
      }

      size_t ownRank = 0;
--- a/moses/Word.h
+++ b/moses/Word.h
@ -59,8 +59,7 @@ public:
  /** deep copy */
  Word(const Word &copy)
    :m_isNonTerminal(copy.m_isNonTerminal)
-	,m_isOOV(copy.m_isOOV)
-  {
+    ,m_isOOV(copy.m_isOOV) {
    std::memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
  }