Merge ../mosesdecoder into perf_moses2

2025-01-05 02:22:21 +03:00 · 2016-06-04 12:45:30 +01:00 · 2016-06-04 12:45:30 +01:00 · b75ef6f619
commit b75ef6f619
parent 81dd25ec0a defbf8d7c3
13 changed files with 819 additions and 600 deletions
--- a/mert/M2.cpp
+++ b/mert/M2.cpp
@ -3,21 +3,23 @@

 #include "M2.h"

-namespace MosesTuning {
+namespace MosesTuning
+{

-namespace M2 {
+namespace M2
+{

 bool Annot::lowercase = true;

- std::string Annot::transform(const std::string& e) {
-      std::string temp = e;                                                                                                                           
-      if(lowercase) {
-        boost::erase_all(temp, " ");                                                                                                                                                                   
-        return ToLower(temp);
-      }                                                                                                                                                               
-      else                                                                                                                                                                               
-        return e;                                                                                                                                                                        
-    }  
+std::string Annot::transform(const std::string& e)
+{
+  std::string temp = e;
+  if(lowercase) {
+    boost::erase_all(temp, " ");
+    return ToLower(temp);
+  } else
+    return e;
+}

 const std::string ToLower(const std::string& str)
 {
@ -27,27 +29,30 @@ const std::string ToLower(const std::string& str)
 }


-Edit operator+(Edit& e1, Edit& e2) {
-    std::string edit;
-    if(e1.edit.size() > 0 && e2.edit.size() > 0)
-        edit = e1.edit + " " + e2.edit;
-    else if(e1.edit.size() > 0)
-        edit = e1.edit;
-    else if(e2.edit.size() > 0)
-        edit = e2.edit;
-    
-    return Edit(e1.cost + e2.cost, e1.changed + e2.changed, e1.unchanged + e2.unchanged, edit);
+Edit operator+(Edit& e1, Edit& e2)
+{
+  std::string edit;
+  if(e1.edit.size() > 0 && e2.edit.size() > 0)
+    edit = e1.edit + " " + e2.edit;
+  else if(e1.edit.size() > 0)
+    edit = e1.edit;
+  else if(e2.edit.size() > 0)
+    edit = e2.edit;
+
+  return Edit(e1.cost + e2.cost, e1.changed + e2.changed, e1.unchanged + e2.unchanged, edit);
 }


-Edge operator+(Edge e1, Edge e2) {
-    return Edge(e1.v, e2.u, e1.edit + e2.edit);
+Edge operator+(Edge e1, Edge e2)
+{
+  return Edge(e1.v, e2.u, e1.edit + e2.edit);
 }

-std::ostream& operator<<(std::ostream& o, Sentence s) {
-    for(Sentence::iterator it = s.begin(); it != s.end(); it++)
-        o << *it << " ";
-    return o;
+std::ostream& operator<<(std::ostream& o, Sentence s)
+{
+  for(Sentence::iterator it = s.begin(); it != s.end(); it++)
+    o << *it << " ";
+  return o;
 }


--- a/mert/M2.h
+++ b/mert/M2.h
@ -16,9 +16,11 @@



-namespace MosesTuning {
+namespace MosesTuning
+{

-namespace M2 {
+namespace M2
+{

 typedef std::vector<float> Stats;

@ -29,44 +31,44 @@ std::ostream& operator<<(std::ostream& o, Sentence s);
 const std::string ToLower(const std::string& str);

 struct Annot {
-    size_t i;
-    size_t j;
-    
-    std::string type;
-    std::string edit;
-    
-    size_t annotator;
-    
-    bool operator<(Annot a) const {
-        return i < a.i || (i == a.i && j < a.j)
-            || (i == a.i && j == a.j && annotator < a.annotator)
-            || (i == a.i && j == a.j && annotator == a.annotator && transform(edit) < transform(a.edit));
-    }
+  size_t i;
+  size_t j;

-    bool operator==(Annot a) const {
-       return (!(*this < a) && !(a < *this));
-    }
+  std::string type;
+  std::string edit;

-    static std::string transform(const std::string& e);
+  size_t annotator;

-    static bool lowercase;
+  bool operator<(Annot a) const {
+    return i < a.i || (i == a.i && j < a.j)
+           || (i == a.i && j == a.j && annotator < a.annotator)
+           || (i == a.i && j == a.j && annotator == a.annotator && transform(edit) < transform(a.edit));
+  }
+
+  bool operator==(Annot a) const {
+    return (!(*this < a) && !(a < *this));
+  }
+
+  static std::string transform(const std::string& e);
+
+  static bool lowercase;
 };

 typedef std::set<Annot> Annots;
 typedef std::set<size_t> Users;

 struct Unit {
-    Sentence first;
-    Annots second;
-    Users third;
+  Sentence first;
+  Annots second;
+  Users third;
 };

 typedef std::vector<Unit> M2File;

 struct Edit {
  Edit(float c = 1.0, size_t ch = 0, size_t unch = 1, std::string e = "")
-  : cost(c), changed(ch), unchanged(unch), edit(e) {}
-  
+    : cost(c), changed(ch), unchanged(unch), edit(e) {}
+
  float cost;
  size_t changed;
  size_t unchanged;
@ -77,7 +79,7 @@ Edit operator+(Edit& e1, Edit& e2);

 struct Vertex {
  Vertex(size_t a = 0, size_t b = 0) : i(a), j(b) {}
-  
+
  bool operator<(const Vertex &v) const {
    return i < v.i || (i == v.i && j < v.j);
  }
@ -85,19 +87,19 @@ struct Vertex {
  bool operator==(const Vertex &v) const {
    return i == v.i && j == v.j;
  }
-  
+
  size_t i;
  size_t j;
 };

 struct Edge {
  Edge(Vertex vv = Vertex(), Vertex uu = Vertex(), Edit editt = Edit())
-  : v(vv), u(uu), edit(editt) {}
-   
+    : v(vv), u(uu), edit(editt) {}
+
  bool operator<(const Edge &e) const {
    return v < e.v || (v == e.v && u < e.u);
  }
-  
+
  Vertex v;
  Vertex u;
  Edit edit;
@ -110,7 +112,7 @@ typedef std::vector<Row> Matrix;

 struct Info {
  Info(Vertex vv = Vertex(), Edit editt = Edit())
-  : v(vv), edit(editt) {}
+    : v(vv), edit(editt) {}

  bool operator<(const Info &i) const {
    return v < i.v;
@ -127,352 +129,350 @@ typedef std::vector<TrackRow> TrackMatrix;
 typedef std::set<Vertex> Vertices;
 typedef std::set<Edge> Edges;

-class M2 {
-  private:
-    M2File m_m2;
-    
-    size_t m_max_unchanged;
-    float m_beta;
-    bool m_lowercase;
-    bool m_verbose;
-    
-  public:
-    M2() : m_max_unchanged(2), m_beta(0.5), m_lowercase(true), m_verbose(false) { }
-    M2(size_t max_unchanged, float beta, bool truecase, bool verbose = false) 
+class M2
+{
+private:
+  M2File m_m2;
+
+  size_t m_max_unchanged;
+  float m_beta;
+  bool m_lowercase;
+  bool m_verbose;
+
+public:
+  M2() : m_max_unchanged(2), m_beta(0.5), m_lowercase(true), m_verbose(false) { }
+  M2(size_t max_unchanged, float beta, bool truecase, bool verbose = false)
    : m_max_unchanged(max_unchanged), m_beta(beta), m_lowercase(!truecase), m_verbose(verbose) {
-       if(!m_lowercase) {
-          Annot::lowercase = false;
-       }
+    if(!m_lowercase) {
+      Annot::lowercase = false;
    }
-    
-    float Beta() {
-        return m_beta;
-    }
-    
-    void ReadM2(const std::string& filename) {
-        std::ifstream m2file(filename.c_str());
-        std::string line;
-        
-        Unit unit;
-        bool first = true;
-        
-        while(std::getline(m2file, line)) {
-            if(line.size() > 2) {
-              if(line.substr(0, 2) == "S ") {
-                if(!first) {
-                    if(unit.third.empty())
-                        unit.third.insert(0);
-                    m_m2.push_back(unit);
-                }
-                first = false;
-    
-                unit.first = Sentence();
-                unit.second = Annots();
-    
-                std::string sentenceLine = line.substr(2);
-                boost::split(unit.first, sentenceLine, boost::is_any_of(" "), boost::token_compress_on);
-              }
-              if(line.substr(0, 2) == "A ") {
-                std::string annotLine = line.substr(2);
-                
-                std::vector<std::string> annot;
-                boost::iter_split(annot, annotLine, boost::algorithm::first_finder("|||"));
-                
-                if(annot[1] != "noop") {
-                    Annot a;
-                    std::stringstream rangeStr(annot[0]);            
-                    rangeStr >> a.i >> a.j;
-                    a.type = annot[1];
-                    a.edit = annot[2];            
-                        
-                    std::stringstream annotStr(annot[5]);
-                    annotStr >> a.annotator;
-                    
-                    unit.third.insert(a.annotator);
-                    unit.second.insert(a);
-                }
-                else {
-                    std::stringstream annotStr(annot[5]);
-                    size_t annotator;
-                    annotStr >> annotator;
-                    unit.third.insert(annotator);
-                }
-              }
-            }
+  }
+
+  float Beta() {
+    return m_beta;
+  }
+
+  void ReadM2(const std::string& filename) {
+    std::ifstream m2file(filename.c_str());
+    std::string line;
+
+    Unit unit;
+    bool first = true;
+
+    while(std::getline(m2file, line)) {
+      if(line.size() > 2) {
+        if(line.substr(0, 2) == "S ") {
+          if(!first) {
+            if(unit.third.empty())
+              unit.third.insert(0);
+            m_m2.push_back(unit);
+          }
+          first = false;
+
+          unit.first = Sentence();
+          unit.second = Annots();
+
+          std::string sentenceLine = line.substr(2);
+          boost::split(unit.first, sentenceLine, boost::is_any_of(" "), boost::token_compress_on);
        }
-        if(unit.third.empty())
-            unit.third.insert(0);
-        m_m2.push_back(unit);   
-    }
-    
-    size_t LevenshteinMatrix(const Sentence &s1, const Sentence &s2, Matrix &d, TrackMatrix &bt) {
-        size_t n = s1.size();
-        size_t m = s2.size();
-          
-        if (n == 0)
-          return m;
-        if (m == 0)
-          return n;
-        
-        d.resize(n + 1, Row(m + 1, 0));
-        bt.resize(n + 1, TrackRow(m + 1));
-        
-        for(size_t i = 0; i <= n; ++i) {
-          d[i][0] = i;
-          if(i > 0)
-              bt[i][0].insert(Info(Vertex(i - 1, 0), Edit(1, 1, 0, "")));
-        }
-        for(size_t j = 0; j <= m; ++j) {
-          d[0][j] = j;
-          if(j > 0)
-              bt[0][j].insert(Info(Vertex(0, j - 1), Edit(1, 1, 0, s2[j - 1])));
-        }
-        
-        int cost;
-        for(size_t i = 1; i <= n; ++i) {
-          for(size_t j = 1; j <= m; ++j) {
-            if(Annot::transform(s1[i-1]) == Annot::transform(s2[j-1]))
-              cost = 0;
-            else 
-              cost = 2;
-              
-            size_t left = d[i][j - 1] + 1;
-            size_t down = d[i - 1][j] + 1;
-            size_t diag = d[i - 1][j - 1] + cost;
-                  
-            d[i][j] = std::min(left, std::min(down, diag));
-            
-            if(d[i][j] == left)
-              bt[i][j].insert(Info(Vertex(i, j - 1), Edit(1, 1, 0, s2[j - 1])));
-            if(d[i][j] == down)
-              bt[i][j].insert(Info(Vertex(i - 1, j), Edit(1, 1, 0, "")));
-            if(d[i][j] == diag)
-              bt[i][j].insert(Info(Vertex(i - 1, j - 1), cost ? Edit(1, 1, 0, s2[j - 1]) : Edit(1, 0, 1, s2[j - 1]) ));
+        if(line.substr(0, 2) == "A ") {
+          std::string annotLine = line.substr(2);
+
+          std::vector<std::string> annot;
+          boost::iter_split(annot, annotLine, boost::algorithm::first_finder("|||"));
+
+          if(annot[1] != "noop") {
+            Annot a;
+            std::stringstream rangeStr(annot[0]);
+            rangeStr >> a.i >> a.j;
+            a.type = annot[1];
+            a.edit = annot[2];
+
+            std::stringstream annotStr(annot[5]);
+            annotStr >> a.annotator;
+
+            unit.third.insert(a.annotator);
+            unit.second.insert(a);
+          } else {
+            std::stringstream annotStr(annot[5]);
+            size_t annotator;
+            annotStr >> annotator;
+            unit.third.insert(annotator);
          }
        }
-        return d[n][m];
+      }
    }
-      
-      
-    void BuildGraph(const TrackMatrix &bt, Vertices &V, Edges &E) {    
-        Vertex start(bt.size() - 1, bt[0].size() - 1);
-        
-        std::queue<Vertex> Q;
-        Q.push(start);
-        while(!Q.empty()) {
-            Vertex v = Q.front();
-            Q.pop();
-            if(V.count(v) > 0)
-                continue;
-            V.insert(v);
-            for(Track::iterator it = bt[v.i][v.j].begin();
-                it != bt[v.i][v.j].end(); ++it) {
-                Edge e(it->v, v, it->edit);
-                E.insert(e);
-                if(V.count(e.v) == 0)
-                    Q.push(e.v);
-            }
-        }
-        
-        Edges newE;
-        do {
-            newE.clear();
-            for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
-                for(Edges::iterator it2 = E.begin(); it2 != E.end(); ++it2) {
-                    if(it1->u == it2->v) {
-                        Edge e = *it1 + *it2;
-                        if(e.edit.changed > 0 &&
-                           e.edit.unchanged <= m_max_unchanged &&
-                           E.count(e) == 0)
-                            newE.insert(e);
-                    }
-                }
-            }
-            E.insert(newE.begin(), newE.end());
-        } while(newE.size() > 0); 
+    if(unit.third.empty())
+      unit.third.insert(0);
+    m_m2.push_back(unit);
+  }
+
+  size_t LevenshteinMatrix(const Sentence &s1, const Sentence &s2, Matrix &d, TrackMatrix &bt) {
+    size_t n = s1.size();
+    size_t m = s2.size();
+
+    if (n == 0)
+      return m;
+    if (m == 0)
+      return n;
+
+    d.resize(n + 1, Row(m + 1, 0));
+    bt.resize(n + 1, TrackRow(m + 1));
+
+    for(size_t i = 0; i <= n; ++i) {
+      d[i][0] = i;
+      if(i > 0)
+        bt[i][0].insert(Info(Vertex(i - 1, 0), Edit(1, 1, 0, "")));
    }
-    
-    void AddWeights(Edges &E, const Unit &u, size_t aid) {
-        for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
-            if(it1->edit.changed > 0) {
-                const_cast<float&>(it1->edit.cost) += 0.001;   
-                for(Annots::iterator it2 = u.second.begin(); it2 != u.second.end(); ++it2) {
-                    // if matches an annotator
-                    if(it1->v.i == it2->i && it1->u.i == it2->j 
-                       && Annot::transform(it1->edit.edit) == Annot::transform(it2->edit)
-                       && it2->annotator == aid) {
-                        int newWeight = -(m_max_unchanged + 1) * E.size();
-                        const_cast<float&>(it1->edit.cost) = newWeight;
-                    }
-                }
-            }
-        }
-    }
-    
-    void BellmanFord(Vertices &V, Edges &E) {
-        Vertex source(0, 0);
-        std::map<Vertex, float> distance;
-        std::map<Vertex, Vertex> predecessor;
-        
-        for(Vertices::iterator it = V.begin(); it != V.end(); ++it) {
-            if(*it == source)
-                distance[*it] = 0;
-            else {
-                distance[*it] = std::numeric_limits<float>::infinity();
-            }
-        }
-        
-        for(size_t i = 1; i < V.size(); ++i) {
-            for(Edges::iterator it = E.begin(); it != E.end(); ++it) {
-                if(distance[it->v] + it->edit.cost < distance[it->u]) {
-                    distance[it->u] = distance[it->v] + it->edit.cost;
-                    predecessor[it->u] = it->v;
-                }
-            }
-        }
-        
-        Edges newE;
-        
-        Vertex v = *V.rbegin();
-        while(true) {
-            //std::cout << predecessor[v] << " -> " << v << std::endl;
-            Edges::iterator it = E.find(Edge(predecessor[v], v));
-            if(it != E.end()) {
-                Edge f = *it;
-                //std::cout << f << std::endl;
-                newE.insert(f);
-                
-                v = predecessor[v];
-                if(v == source)
-                    break;
-            }
-            else {
-                std::cout << "Error" << std::endl;
-                break;
-            }
-        }
-        E.clear();
-        E.insert(newE.begin(), newE.end());
+    for(size_t j = 0; j <= m; ++j) {
+      d[0][j] = j;
+      if(j > 0)
+        bt[0][j].insert(Info(Vertex(0, j - 1), Edit(1, 1, 0, s2[j - 1])));
    }

-    void AddStats(const std::vector<Edges> &Es, const Unit &u, Stats &stats, size_t line) {
-        
-        std::map<size_t, Stats> statsPerAnnotator;
-        for(std::set<size_t>::iterator it = u.third.begin();
-            it != u.third.end(); ++it) {
-            statsPerAnnotator[*it] = Stats(4, 0);
-        }
+    int cost;
+    for(size_t i = 1; i <= n; ++i) {
+      for(size_t j = 1; j <= m; ++j) {
+        if(Annot::transform(s1[i-1]) == Annot::transform(s2[j-1]))
+          cost = 0;
+        else
+          cost = 2;

-        for(Annots::iterator it = u.second.begin(); it != u.second.end(); it++)
-            statsPerAnnotator[it->annotator][2]++;
+        size_t left = d[i][j - 1] + 1;
+        size_t down = d[i - 1][j] + 1;
+        size_t diag = d[i - 1][j - 1] + cost;

-        for(std::set<size_t>::iterator ait = u.third.begin();
-                ait != u.third.end(); ++ait) {    
-          for(Edges::iterator eit = Es[*ait].begin(); eit != Es[*ait].end(); ++eit) {
-            if(eit->edit.changed > 0) {
-                statsPerAnnotator[*ait][1]++;
-                Annot f;
-                f.i = eit->v.i;
-                f.j = eit->u.i;
-                f.annotator = *ait;
-                f.edit = eit->edit.edit;
-                for(Annots::iterator fit = u.second.begin(); fit != u.second.end(); fit++) {
-                  if(f == *fit) 
-                    statsPerAnnotator[*ait][0]++;
-                }
-            }
+        d[i][j] = std::min(left, std::min(down, diag));
+
+        if(d[i][j] == left)
+          bt[i][j].insert(Info(Vertex(i, j - 1), Edit(1, 1, 0, s2[j - 1])));
+        if(d[i][j] == down)
+          bt[i][j].insert(Info(Vertex(i - 1, j), Edit(1, 1, 0, "")));
+        if(d[i][j] == diag)
+          bt[i][j].insert(Info(Vertex(i - 1, j - 1), cost ? Edit(1, 1, 0, s2[j - 1]) : Edit(1, 0, 1, s2[j - 1]) ));
+      }
+    }
+    return d[n][m];
+  }
+
+
+  void BuildGraph(const TrackMatrix &bt, Vertices &V, Edges &E) {
+    Vertex start(bt.size() - 1, bt[0].size() - 1);
+
+    std::queue<Vertex> Q;
+    Q.push(start);
+    while(!Q.empty()) {
+      Vertex v = Q.front();
+      Q.pop();
+      if(V.count(v) > 0)
+        continue;
+      V.insert(v);
+      for(Track::iterator it = bt[v.i][v.j].begin();
+          it != bt[v.i][v.j].end(); ++it) {
+        Edge e(it->v, v, it->edit);
+        E.insert(e);
+        if(V.count(e.v) == 0)
+          Q.push(e.v);
+      }
+    }
+
+    Edges newE;
+    do {
+      newE.clear();
+      for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
+        for(Edges::iterator it2 = E.begin(); it2 != E.end(); ++it2) {
+          if(it1->u == it2->v) {
+            Edge e = *it1 + *it2;
+            if(e.edit.changed > 0 &&
+                e.edit.unchanged <= m_max_unchanged &&
+                E.count(e) == 0)
+              newE.insert(e);
          }
        }
-        size_t bestAnnot = 0;
-        float  bestF = -1;
-        for(std::set<size_t>::iterator it = u.third.begin();
-            it != u.third.end(); ++it) {
-            Stats localStats = stats;
-            localStats[0] += statsPerAnnotator[*it][0];
-            localStats[1] += statsPerAnnotator[*it][1];
-            localStats[2] += statsPerAnnotator[*it][2];
-            if(m_verbose)
-              std::cerr << *it << " : " << localStats[0] << " " << localStats[1] << " " << localStats[2] << std::endl;
-            float f = FScore(localStats);
-            if(m_verbose)
-              std::cerr << f << std::endl;
-            if(f > bestF) {
-                bestF = f;
-                bestAnnot = *it;
-            }
+      }
+      E.insert(newE.begin(), newE.end());
+    } while(newE.size() > 0);
+  }
+
+  void AddWeights(Edges &E, const Unit &u, size_t aid) {
+    for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
+      if(it1->edit.changed > 0) {
+        const_cast<float&>(it1->edit.cost) += 0.001;
+        for(Annots::iterator it2 = u.second.begin(); it2 != u.second.end(); ++it2) {
+          // if matches an annotator
+          if(it1->v.i == it2->i && it1->u.i == it2->j
+              && Annot::transform(it1->edit.edit) == Annot::transform(it2->edit)
+              && it2->annotator == aid) {
+            int newWeight = -(m_max_unchanged + 1) * E.size();
+            const_cast<float&>(it1->edit.cost) = newWeight;
+          }
        }
-        if(m_verbose)
-          std::cerr << ">> Chosen Annotator for line " << line + 1 << " : " << bestAnnot << std::endl;
-        stats[0] += statsPerAnnotator[bestAnnot][0];
-        stats[1] += statsPerAnnotator[bestAnnot][1];
-        stats[2] += statsPerAnnotator[bestAnnot][2];
+      }
    }
-    
-    void SufStats(const std::string &sStr, size_t i, Stats &stats) {                
-        std::string temp = sStr;
-        
-        Sentence s;
-        boost::split(s, temp, boost::is_any_of(" "), boost::token_compress_on);
-        
-        Unit &unit = m_m2[i];
-                
-        Matrix d;
-        TrackMatrix bt;
-        size_t distance = LevenshteinMatrix(unit.first, s, d, bt);
+  }

-        std::vector<Vertices> Vs(unit.third.size());
-        std::vector<Edges> Es(unit.third.size());
+  void BellmanFord(Vertices &V, Edges &E) {
+    Vertex source(0, 0);
+    std::map<Vertex, float> distance;
+    std::map<Vertex, Vertex> predecessor;

-        if(distance > unit.first.size()) {
-            std::cerr << "Levenshtein distance is greater than source size." << std::endl;
-            stats[0] = 0;
-            stats[1] = distance;
-            stats[2] = 0;
-            stats[3] = unit.first.size();
-            return;
-        }
-        else if(distance > 0) {
-            for(size_t j = 0; j < unit.third.size(); j++) { 
-              BuildGraph(bt, Vs[j], Es[j]);
-              AddWeights(Es[j], unit, j);    
-              BellmanFord(Vs[j], Es[j]);
-            }
-        }
-        AddStats(Es, unit, stats, i);
-        stats[3] = unit.first.size();
+    for(Vertices::iterator it = V.begin(); it != V.end(); ++it) {
+      if(*it == source)
+        distance[*it] = 0;
+      else {
+        distance[*it] = std::numeric_limits<float>::infinity();
+      }
    }

-    
-    float FScore(const Stats& stats) {
-        float p = 1.0;
-        if(stats[1] != 0)
-            p = (float)stats[0] / (float)stats[1];
-        
-        float r = 1.0;
-        if(stats[2] != 0)
-            r = (float)stats[0] / (float)stats[2];
-        
-        float denom = (m_beta * m_beta * p + r);
-        float f = 0.0;
-        if(denom != 0)
-            f = ((1 + m_beta * m_beta) * p * r) / denom;
-        return f;
+    for(size_t i = 1; i < V.size(); ++i) {
+      for(Edges::iterator it = E.begin(); it != E.end(); ++it) {
+        if(distance[it->v] + it->edit.cost < distance[it->u]) {
+          distance[it->u] = distance[it->v] + it->edit.cost;
+          predecessor[it->u] = it->v;
+        }
+      }
    }
-    
-    void FScore(const Stats& stats, float &p, float &r, float &f) {
-        p = 1.0;
-        if(stats[1] != 0)
-            p = (float)stats[0] / (float)stats[1];
-        
-        r = 1.0;
-        if(stats[2] != 0)
-            r = (float)stats[0] / (float)stats[2];
-        
-        float denom = (m_beta * m_beta * p + r);
-        f = 0.0;
-        if(denom != 0)
-            f = ((1 + m_beta * m_beta) * p * r) / denom;
+
+    Edges newE;
+
+    Vertex v = *V.rbegin();
+    while(true) {
+      //std::cout << predecessor[v] << " -> " << v << std::endl;
+      Edges::iterator it = E.find(Edge(predecessor[v], v));
+      if(it != E.end()) {
+        Edge f = *it;
+        //std::cout << f << std::endl;
+        newE.insert(f);
+
+        v = predecessor[v];
+        if(v == source)
+          break;
+      } else {
+        std::cout << "Error" << std::endl;
+        break;
+      }
    }
+    E.clear();
+    E.insert(newE.begin(), newE.end());
+  }
+
+  void AddStats(const std::vector<Edges> &Es, const Unit &u, Stats &stats, size_t line) {
+
+    std::map<size_t, Stats> statsPerAnnotator;
+    for(std::set<size_t>::iterator it = u.third.begin();
+        it != u.third.end(); ++it) {
+      statsPerAnnotator[*it] = Stats(4, 0);
+    }
+
+    for(Annots::iterator it = u.second.begin(); it != u.second.end(); it++)
+      statsPerAnnotator[it->annotator][2]++;
+
+    for(std::set<size_t>::iterator ait = u.third.begin();
+        ait != u.third.end(); ++ait) {
+      for(Edges::iterator eit = Es[*ait].begin(); eit != Es[*ait].end(); ++eit) {
+        if(eit->edit.changed > 0) {
+          statsPerAnnotator[*ait][1]++;
+          Annot f;
+          f.i = eit->v.i;
+          f.j = eit->u.i;
+          f.annotator = *ait;
+          f.edit = eit->edit.edit;
+          for(Annots::iterator fit = u.second.begin(); fit != u.second.end(); fit++) {
+            if(f == *fit)
+              statsPerAnnotator[*ait][0]++;
+          }
+        }
+      }
+    }
+    size_t bestAnnot = 0;
+    float  bestF = -1;
+    for(std::set<size_t>::iterator it = u.third.begin();
+        it != u.third.end(); ++it) {
+      Stats localStats = stats;
+      localStats[0] += statsPerAnnotator[*it][0];
+      localStats[1] += statsPerAnnotator[*it][1];
+      localStats[2] += statsPerAnnotator[*it][2];
+      if(m_verbose)
+        std::cerr << *it << " : " << localStats[0] << " " << localStats[1] << " " << localStats[2] << std::endl;
+      float f = FScore(localStats);
+      if(m_verbose)
+        std::cerr << f << std::endl;
+      if(f > bestF) {
+        bestF = f;
+        bestAnnot = *it;
+      }
+    }
+    if(m_verbose)
+      std::cerr << ">> Chosen Annotator for line " << line + 1 << " : " << bestAnnot << std::endl;
+    stats[0] += statsPerAnnotator[bestAnnot][0];
+    stats[1] += statsPerAnnotator[bestAnnot][1];
+    stats[2] += statsPerAnnotator[bestAnnot][2];
+  }
+
+  void SufStats(const std::string &sStr, size_t i, Stats &stats) {
+    std::string temp = sStr;
+
+    Sentence s;
+    boost::split(s, temp, boost::is_any_of(" "), boost::token_compress_on);
+
+    Unit &unit = m_m2[i];
+
+    Matrix d;
+    TrackMatrix bt;
+    size_t distance = LevenshteinMatrix(unit.first, s, d, bt);
+
+    std::vector<Vertices> Vs(unit.third.size());
+    std::vector<Edges> Es(unit.third.size());
+
+    if(distance > unit.first.size()) {
+      std::cerr << "Levenshtein distance is greater than source size." << std::endl;
+      stats[0] = 0;
+      stats[1] = distance;
+      stats[2] = 0;
+      stats[3] = unit.first.size();
+      return;
+    } else if(distance > 0) {
+      for(size_t j = 0; j < unit.third.size(); j++) {
+        BuildGraph(bt, Vs[j], Es[j]);
+        AddWeights(Es[j], unit, j);
+        BellmanFord(Vs[j], Es[j]);
+      }
+    }
+    AddStats(Es, unit, stats, i);
+    stats[3] = unit.first.size();
+  }
+
+
+  float FScore(const Stats& stats) {
+    float p = 1.0;
+    if(stats[1] != 0)
+      p = (float)stats[0] / (float)stats[1];
+
+    float r = 1.0;
+    if(stats[2] != 0)
+      r = (float)stats[0] / (float)stats[2];
+
+    float denom = (m_beta * m_beta * p + r);
+    float f = 0.0;
+    if(denom != 0)
+      f = ((1 + m_beta * m_beta) * p * r) / denom;
+    return f;
+  }
+
+  void FScore(const Stats& stats, float &p, float &r, float &f) {
+    p = 1.0;
+    if(stats[1] != 0)
+      p = (float)stats[0] / (float)stats[1];
+
+    r = 1.0;
+    if(stats[2] != 0)
+      r = (float)stats[0] / (float)stats[2];
+
+    float denom = (m_beta * m_beta * p + r);
+    f = 0.0;
+    if(denom != 0)
+      f = ((1 + m_beta * m_beta) * p * r) / denom;
+  }
 };

 }
--- a/mert/M2Scorer.cpp
+++ b/mert/M2Scorer.cpp
@ -45,90 +45,92 @@ float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
  if (comps.size() != NumberOfScores()) {
    throw runtime_error("Size of stat vector for M2Scorer is not " + NumberOfScores());
  }
-  
+
  float beta = beta_;
-  
-  
+
+
  float p = 0.0;
  float r = 0.0;
  float f = 0.0;
-    
+
  if(comps[1] != 0)
    p = comps[0] / (double)comps[1];
  else
    p = 1.0;
-    
+
  if(comps[2] != 0)
    r = comps[0] / (double)comps[2];
  else
    r = 1.0;
-  
+
  float denom = beta * beta * p + r;
  if(denom != 0)
    f = (1.0 + beta * beta) * p * r / denom;
  else
    f = 0.0;

-  if(verbose_) 
+  if(verbose_)
    std::cerr << comps[0] << " " << comps[1] << " " << comps[2] << std::endl;

-  if(verbose_) 
+  if(verbose_)
    std::cerr << p << " " << r << " " << f << std::endl;
-  
+
  return f;
 }

-float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const {
+float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
+{
  return comps[3];
 }

-std::vector<ScoreStatsType> randomStats(float decay, int max) {
+std::vector<ScoreStatsType> randomStats(float decay, int max)
+{
  int gold = rand() % max;
  int prop = rand() % max;
  int corr = 0.0;
-  
+
  if(std::min(prop, gold) > 0)
    corr = rand() % std::min(prop, gold);
-  
+
  //std::cerr << corr << " " << prop << " " << gold << std::endl;
-  
+
  std::vector<ScoreStatsType> stats(3, 0.0);
  stats[0] = corr * decay;
  stats[1] = prop * decay;
  stats[2] = gold * decay;
-  
+
  return stats;
 }

 float sentenceM2(const std::vector<ScoreStatsType>& stats)
 {
  float beta = 0.5;
-  
+
  std::vector<ScoreStatsType> smoothStats(3, 0.0); // = randomStats(0.001, 5);
-  smoothStats[0] += stats[0]; 
-  smoothStats[1] += stats[1]; 
-  smoothStats[2] += stats[2]; 
-  
+  smoothStats[0] += stats[0];
+  smoothStats[1] += stats[1];
+  smoothStats[2] += stats[2];
+
  float p = 0.0;
  float r = 0.0;
  float f = 0.0;
-    
+
  if(smoothStats[1] != 0)
    p = smoothStats[0] / smoothStats[1];
  else
    p = 1.0;
-    
+
  if(smoothStats[2] != 0)
    r = smoothStats[0] / smoothStats[2];
  else
    r = 1.0;
-  
+
  float denom = beta * beta * p + r;
  if(denom != 0)
    f = (1.0 + beta * beta) * p * r / denom;
  else
    f = 0.0;
-  
+
  return f;
 }

--- a/mert/M2Scorer.h
+++ b/mert/M2Scorer.h
@ -31,15 +31,15 @@ public:
  virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
  virtual float getReferenceLength(const std::vector<ScoreStatsType>& comps) const;

-private:  
-  float beta_; 
+private:
+  float beta_;
  int max_unchanged_words_;
  bool truecase_;
  bool verbose_;
  M2::M2 m2_;
-  
+
  std::map<std::pair<size_t, std::string>, std::vector<ScoreStatsType> > seen_;
-    
+
  // no copying allowed
  M2Scorer(const M2Scorer&);
  M2Scorer& operator=(const M2Scorer&);
--- a/moses/FF/CorrectionPattern.cpp
+++ b/moses/FF/CorrectionPattern.cpp
@ -23,21 +23,22 @@ namespace Moses

 using namespace std;

-std::string MakePair(const std::string &s1, const std::string &s2, bool general) {
+std::string MakePair(const std::string &s1, const std::string &s2, bool general)
+{
  std::vector<std::string> sourceList;
  std::vector<std::string> targetList;
-  
+
  if(general) {
    Diffs diffs = CreateDiff(s1, s2);
-    
+
    size_t i = 0, j = 0;
    char lastType = 'm';
-      
-    std::string source, target;  
+
+    std::string source, target;
    std::string match;
-    
+
    int count = 1;
-    
+
    BOOST_FOREACH(Diff type, diffs) {
      if(type == 'm') {
        if(lastType != 'm') {
@ -46,7 +47,7 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
        }
        source.clear();
        target.clear();
-        
+
        if(s1[i] == '+') {
          if(match.size() >= 3) {
            sourceList.push_back("(\\w{3,})·");
@ -54,56 +55,51 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
            sprintf((char*)temp.c_str(), "%d", count);
            targetList.push_back("\\" + temp + "·");
            count++;
-          }
-          else {
+          } else {
            sourceList.push_back(match + "·");
-            targetList.push_back(match + "·");  
+            targetList.push_back(match + "·");
          }
          match.clear();
-        }
-        else 
+        } else
          match.push_back(s1[i]);
-        
+
        i++;
        j++;
-      }
-      else if(type == 'd') {
+      } else if(type == 'd') {
        if(s1[i] == '+')
          source += "·";
        else
          source.push_back(s1[i]);
        i++;
-      }
-      else if(type == 'i') {
+      } else if(type == 'i') {
        if(s2[j] == '+')
          target += "·";
        else
          target.push_back(s2[j]);
        j++;
      }
-      if(type != 'm' && !match.empty()) {      
+      if(type != 'm' && !match.empty()) {
        if(match.size() >= 3) {
          sourceList.push_back("(\\w{3,})");
          std::string temp = "1";
          sprintf((char*)temp.c_str(), "%d", count);
          targetList.push_back("\\" + temp);
          count++;
-        }
-        else {
+        } else {
          sourceList.push_back(match);
-          targetList.push_back(match);  
+          targetList.push_back(match);
        }
-      
+
        match.clear();
      }
-  
+
      lastType = type;
    }
    if(lastType != 'm') {
      sourceList.push_back(source);
      targetList.push_back(target);
    }
-    
+
    if(!match.empty()) {
      if(match.size() >= 3) {
        sourceList.push_back("(\\w{3,})");
@ -111,45 +107,42 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
        sprintf((char*)temp.c_str(), "%d", count);
        targetList.push_back("\\"+ temp);
        count++;
-      }
-      else {
+      } else {
        sourceList.push_back(match);
-        targetList.push_back(match);  
+        targetList.push_back(match);
      }
    }
    match.clear();
-  }
-  else {
+  } else {
    std::string cs1 = s1;
    std::string cs2 = s2;
    boost::replace_all(cs1, "+", "·");
    boost::replace_all(cs2, "+", "·");
-    
+
    sourceList.push_back(cs1);
    targetList.push_back(cs2);
  }
-  
+
  std::stringstream out;
  out << "sub(«";
  out << boost::join(sourceList, "");
  out << "»,«";
  out << boost::join(targetList, "");
  out << "»)";
-  
+
  return out.str();
 }

-std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const {
+std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const
+{
  std::stringstream out;
  if(s1.empty()) {
    out << "ins(«" << boost::join(s2, "·") << "»)";
    return out.str();
-  }
-  else if(s2.empty()) {
+  } else if(s2.empty()) {
    out << "del(«" << boost::join(s1, "·") << "»)";
    return out.str();
-  }
-  else {
+  } else {
    typename Tokens::value_type v1 = boost::join(s1, "+");
    typename Tokens::value_type v2 = boost::join(s2, "+");
    out << MakePair(v1, v2, m_general);
@ -158,36 +151,36 @@ std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Token
 }

 std::vector<std::string> GetContext(size_t pos,
-                       size_t len,
-                       size_t window,
-                       const InputType &input,
-                       const InputPath &inputPath,
-                       const std::vector<FactorType>& factorTypes,
-                       bool isRight) {
+                                    size_t len,
+                                    size_t window,
+                                    const InputType &input,
+                                    const InputPath &inputPath,
+                                    const std::vector<FactorType>& factorTypes,
+                                    bool isRight)
+{

  const Sentence& sentence = static_cast<const Sentence&>(input);
-  const Range& range = inputPath.GetWordsRange(); 
-  
-  int leftPos  = range.GetStartPos() + pos - len - 1; 
-  int rightPos = range.GetStartPos() + pos; 
-  
+  const Range& range = inputPath.GetWordsRange();
+
+  int leftPos  = range.GetStartPos() + pos - len - 1;
+  int rightPos = range.GetStartPos() + pos;
+
  std::vector<std::string> contexts;
-  
+
  for(int length = 1; length <= (int)window; ++length) {
    std::vector<std::string> current;
    if(!isRight) {
      for(int i = 0; i < length; i++) {
        if(leftPos - i >= 0) {
          current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false));
-        }
-        else {
+        } else {
          current.push_back("<s>");
        }
      }
-      
+
      if(current.back() == "<s>" && current.size() >= 2 && current[current.size()-2] == "<s>")
        continue;
-      
+
      std::reverse(current.begin(), current.end());
      contexts.push_back("left(«" + boost::join(current, "·") + "»)_");
    }
@ -195,8 +188,7 @@ std::vector<std::string> GetContext(size_t pos,
      for(int i = 0; i < length; i++) {
        if(rightPos + i < (int)sentence.GetSize()) {
          current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false));
-        }
-        else {
+        } else {
          current.push_back("</s>");
        }
      }
@ -206,7 +198,7 @@ std::vector<std::string> GetContext(size_t pos,

      contexts.push_back("_right(«" + boost::join(current, "·") + "»)");
    }
-  }  
+  }
  return contexts;
 }

@ -214,8 +206,9 @@ std::vector<std::string>
 CorrectionPattern::CreatePattern(const Tokens &s1,
                                 const Tokens &s2,
                                 const InputType &input,
-                                 const InputPath &inputPath) const {
-    
+                                 const InputPath &inputPath) const
+{
+
  Diffs diffs = CreateDiff(s1, s2);
  size_t i = 0, j = 0;
  char lastType = 'm';
@ -226,20 +219,20 @@ CorrectionPattern::CreatePattern(const Tokens &s1,
      if(lastType != 'm') {
        std::string pattern = CreateSinglePattern(source, target);
        patternList.push_back(pattern);
-      
+
        if(m_context > 0) {
          std::vector<std::string> leftContexts =  GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
          std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
-          
+
          BOOST_FOREACH(std::string left, leftContexts)
-            patternList.push_back(left + pattern);
+          patternList.push_back(left + pattern);

          BOOST_FOREACH(std::string right, rightContexts)
-            patternList.push_back(pattern + right);
-          
+          patternList.push_back(pattern + right);
+
          BOOST_FOREACH(std::string left, leftContexts)
-            BOOST_FOREACH(std::string right, rightContexts)
-              patternList.push_back(left + pattern + right);
+          BOOST_FOREACH(std::string right, rightContexts)
+          patternList.push_back(left + pattern + right);
        }
      }
      source.clear();
@ -250,12 +243,10 @@ CorrectionPattern::CreatePattern(const Tokens &s1,
      }
      i++;
      j++;
-    }
-    else if(type == 'd') {
+    } else if(type == 'd') {
      source.push_back(s1[i]);
      i++;
-    }
-    else if(type == 'i') {
+    } else if(type == 'i') {
      target.push_back(s2[j]);
      j++;
    }
@ -264,23 +255,23 @@ CorrectionPattern::CreatePattern(const Tokens &s1,
  if(lastType != 'm') {
    std::string pattern = CreateSinglePattern(source, target);
    patternList.push_back(pattern);
-    
+
    if(m_context > 0) {
      std::vector<std::string> leftContexts =  GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
      std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
-      
+
      BOOST_FOREACH(std::string left, leftContexts)
-        patternList.push_back(left + pattern);
+      patternList.push_back(left + pattern);

      BOOST_FOREACH(std::string right, rightContexts)
-        patternList.push_back(pattern + right);
-      
+      patternList.push_back(pattern + right);
+
      BOOST_FOREACH(std::string left, leftContexts)
-        BOOST_FOREACH(std::string right, rightContexts)
-          patternList.push_back(left + pattern + right);
+      BOOST_FOREACH(std::string right, rightContexts)
+      patternList.push_back(left + pattern + right);
    }
  }
-  
+
  return patternList;
 }

@ -308,36 +299,36 @@ void CorrectionPattern::SetParameter(const std::string& key, const std::string&
 }

 void CorrectionPattern::EvaluateWithSourceContext(const InputType &input
-              , const InputPath &inputPath
-              , const TargetPhrase &targetPhrase
-              , const StackVec *stackVec
-              , ScoreComponentCollection &scoreBreakdown
-              , ScoreComponentCollection *estimatedFutureScore) const
+    , const InputPath &inputPath
+    , const TargetPhrase &targetPhrase
+    , const StackVec *stackVec
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection *estimatedFutureScore) const
 {
  ComputeFeatures(input, inputPath, targetPhrase, &scoreBreakdown);
 }

 void CorrectionPattern::ComputeFeatures(
-    const InputType &input,
-    const InputPath &inputPath,
-    const TargetPhrase& target,
-    ScoreComponentCollection* accumulator) const
+  const InputType &input,
+  const InputPath &inputPath,
+  const TargetPhrase& target,
+  ScoreComponentCollection* accumulator) const
 {
  const Phrase &source = inputPath.GetPhrase();
-  
+
  std::vector<std::string> sourceTokens;
  for(size_t i = 0; i < source.GetSize(); ++i)
    sourceTokens.push_back(source.GetWord(i).GetString(m_factors, false));
-  
+
  std::vector<std::string> targetTokens;
  for(size_t i = 0; i < target.GetSize(); ++i)
    targetTokens.push_back(target.GetWord(i).GetString(m_factors, false));
-  
+
  std::vector<std::string> patternList = CreatePattern(sourceTokens, targetTokens, input, inputPath);
  for(size_t i = 0; i < patternList.size(); ++i)
    accumulator->PlusEquals(this, patternList[i], 1);

-  /*  
+  /*
  BOOST_FOREACH(std::string w, sourceTokens)
    std::cerr << w << " ";
  std::cerr << std::endl;
--- a/moses/FF/CorrectionPattern.h
+++ b/moses/FF/CorrectionPattern.h
@ -29,41 +29,41 @@ public:
  bool IsUseable(const FactorMask &mask) const;

  void EvaluateInIsolation(const Phrase &source
-                        , const TargetPhrase &targetPhrase
-                        , ScoreComponentCollection &scoreBreakdown
-                        , ScoreComponentCollection &estimatedFutureScore) const
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedFutureScore) const
+  {}
+
+  virtual void EvaluateWithSourceContext(const InputType &input
+                                         , const InputPath &inputPath
+                                         , const TargetPhrase &targetPhrase
+                                         , const StackVec *stackVec
+                                         , ScoreComponentCollection &scoreBreakdown
+                                         , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const
  {}
-  
-  virtual void EvaluateWithSourceContext(const InputType &input
-                , const InputPath &inputPath
-                , const TargetPhrase &targetPhrase
-                , const StackVec *stackVec
-                , ScoreComponentCollection &scoreBreakdown
-                , ScoreComponentCollection *estimatedFutureScore = NULL) const;

-  void EvaluateTranslationOptionListWithSourceContext(const InputType &input                                                                                                             
-              , const TranslationOptionList &translationOptionList) const                                                                                                                
-  {} 
-  
  void EvaluateWhenApplied(const Hypothesis& hypo,
-                ScoreComponentCollection* accumulator) const
+                           ScoreComponentCollection* accumulator) const
  {}
  void EvaluateWhenApplied(const ChartHypothesis &hypo,
-                     ScoreComponentCollection* accumulator) const
+                           ScoreComponentCollection* accumulator) const
  {}

  void ComputeFeatures(const InputType &input,
                       const InputPath &inputPath,
                       const TargetPhrase& targetPhrase,
                       ScoreComponentCollection* accumulator) const;
-  
+
  void SetParameter(const std::string& key, const std::string& value);
-  
+
  std::vector<std::string> CreatePattern(const Tokens &s1,
                                         const Tokens &s2,
                                         const InputType &input,
                                         const InputPath &inputPath) const;
-  
+
  std::string CreateSinglePattern(const Tokens &s1, const Tokens &s2) const;

 };
--- a/moses/FF/Diffs.h
+++ b/moses/FF/Diffs.h
@ -11,22 +11,21 @@ typedef std::vector<Diff> Diffs;

 template <class Sequence, class Pred>
 void CreateDiffRec(size_t** c,
-              const Sequence &s1,
-              const Sequence &s2,
-              size_t start,
-              size_t i,
-              size_t j,
-              Diffs& diffs,
-              Pred pred) {
+                   const Sequence &s1,
+                   const Sequence &s2,
+                   size_t start,
+                   size_t i,
+                   size_t j,
+                   Diffs& diffs,
+                   Pred pred)
+{
  if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
    CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
    diffs.push_back(Diff('m'));
-  }
-  else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
+  } else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
    CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
    diffs.push_back(Diff('i'));
-  }
-  else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
+  } else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
    CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
    diffs.push_back(Diff('d'));
  }
@ -34,17 +33,18 @@ void CreateDiffRec(size_t** c,

 template <class Sequence, class Pred>
 Diffs CreateDiff(const Sequence& s1,
-           const Sequence& s2,
-           Pred pred) {
-  
+                 const Sequence& s2,
+                 Pred pred)
+{
+
  Diffs diffs;
-  
+
  size_t n = s2.size();
-  
+
  int start = 0;
  int m_end = s1.size() - 1;
  int n_end = s2.size() - 1;
-    
+
  while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
    diffs.push_back(Diff('m'));
    start++;
@ -53,49 +53,51 @@ Diffs CreateDiff(const Sequence& s1,
    m_end--;
    n_end--;
  }
-  
+
  size_t m_new = m_end - start + 1;
  size_t n_new = n_end - start + 1;
-  
+
  size_t** c = new size_t*[m_new + 1];
  for(size_t i = 0; i <= m_new; ++i) {
    c[i] = new size_t[n_new + 1];
    c[i][0] = 0;
  }
  for(size_t j = 0; j <= n_new; ++j)
-    c[0][j] = 0;  
+    c[0][j] = 0;
  for(size_t i = 1; i <= m_new; ++i)
    for(size_t j = 1; j <= n_new; ++j)
      if(pred(s1[i - 1 + start], s2[j - 1 + start]))
        c[i][j] = c[i-1][j-1] + 1;
      else
        c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
-  
+
  CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
-  
+
  for(size_t i = 0; i <= m_new; ++i)
    delete[] c[i];
  delete[] c;
-    
+
  for (size_t i = n_end + 1; i < n; ++i)
    diffs.push_back(Diff('m'));
-  
+
  return diffs;
 }

 template <class Sequence>
-Diffs CreateDiff(const Sequence& s1, const Sequence& s2) {
+Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
+{
  return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
 }

 template <class Sequence, class Sig, class Stats>
-void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats) {
+void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
+{
  if(sig.size() != stats.size())
    throw "Signature size differs from score array size.";
-  
+
  size_t m = 0, d = 0, i = 0, s = 0;
-  Diffs diff = CreateDiff(s1, s2);  
-  
+  Diffs diff = CreateDiff(s1, s2);
+
  for(int j = 0; j < (int)diff.size(); ++j) {
    if(diff[j] == 'm')
      m++;
@ -109,27 +111,36 @@ void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& sta
        k++;
      }
      j += k;
-    }
-    else if(diff[j] == 'i')
+    } else if(diff[j] == 'i')
      i++;
  }
-  
+
  for(size_t j = 0; j < sig.size(); ++j) {
    switch (sig[j]) {
-      case 'l': stats[j] += d + i + s; break;
-      case 'm': stats[j] += m; break;
-      case 'd': stats[j] += d; break;
-      case 'i': stats[j] += i; break;
-      case 's': stats[j] += s; break;
-      case 'r':
-        float macc = 1;
-        if (d + i + s + m)
-          macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
-        if(macc > 0)
-          stats[j] += log(macc);
-        else
-          stats[j] += log(1.0/(float)(d + i + s + m + 1));
-        break;
+    case 'l':
+      stats[j] += d + i + s;
+      break;
+    case 'm':
+      stats[j] += m;
+      break;
+    case 'd':
+      stats[j] += d;
+      break;
+    case 'i':
+      stats[j] += i;
+      break;
+    case 's':
+      stats[j] += s;
+      break;
+    case 'r':
+      float macc = 1;
+      if (d + i + s + m)
+        macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
+      if(macc > 0)
+        stats[j] += log(macc);
+      else
+        stats[j] += log(1.0/(float)(d + i + s + m + 1));
+      break;
    }
  }
 }
--- a/moses/FF/EditOps.cpp
+++ b/moses/FF/EditOps.cpp
@ -21,14 +21,15 @@ namespace Moses

 using namespace std;

-std::string ParseScores(const std::string &line, const std::string& defaultScores) {
+std::string ParseScores(const std::string &line, const std::string& defaultScores)
+{
  std::vector<std::string> toks = Tokenize(line);
  UTIL_THROW_IF2(toks.empty(), "Empty line");

  for (size_t i = 1; i < toks.size(); ++i) {
    std::vector<std::string> args = TokenizeFirstOnly(toks[i], "=");
    UTIL_THROW_IF2(args.size() != 2,
-      "Incorrect format for feature function arg: " << toks[i]);
+                   "Incorrect format for feature function arg: " << toks[i]);

    if (args[0] == "scores") {
      return args[1];
@ -62,30 +63,29 @@ void EditOps::Load()
 { }

 void EditOps::EvaluateInIsolation(const Phrase &source
-    , const TargetPhrase &target
-    , ScoreComponentCollection &scoreBreakdown
-    , ScoreComponentCollection &estimatedFutureScore) const
+                                  , const TargetPhrase &target
+                                  , ScoreComponentCollection &scoreBreakdown
+                                  , ScoreComponentCollection &estimatedFutureScore) const
 {
  ComputeFeatures(source, target, &scoreBreakdown);
 }

 void EditOps::ComputeFeatures(
-    const Phrase &source,
-    const TargetPhrase& target,
-    ScoreComponentCollection* accumulator) const
+  const Phrase &source,
+  const TargetPhrase& target,
+  ScoreComponentCollection* accumulator) const
 {
  std::vector<float> ops(GetNumScoreComponents(), 0);
-  
+
  if(m_chars) {
    std::vector<FactorType> factors;
    factors.push_back(m_factorType);
-    
+
    std::string sourceStr = source.GetStringRep(factors);
    std::string targetStr = target.GetStringRep(factors);
-    
+
    AddStats(sourceStr, targetStr, m_scores, ops);
-  }
-  else {
+  } else {
    std::vector<std::string> sourceTokens;
    //std::cerr << "Ed src: ";
    for(size_t i = 0; i < source.GetSize(); ++i) {
@ -94,7 +94,7 @@ void EditOps::ComputeFeatures(
      //std::cerr << sourceTokens.back() << " ";
    }
    //std::cerr << std::endl;
-    
+
    std::vector<std::string> targetTokens;
    //std::cerr << "Ed trg: ";
    for(size_t i = 0; i < target.GetSize(); ++i) {
@ -103,10 +103,10 @@ void EditOps::ComputeFeatures(
      //std::cerr << targetTokens.back() << " ";
    }
    //std::cerr << std::endl;
-    
+
    AddStats(sourceTokens, targetTokens, m_scores, ops);
  }
-  
+
  accumulator->PlusEquals(this, ops);
 }

--- a/moses/FF/EditOps.h
+++ b/moses/FF/EditOps.h
@ -32,26 +32,26 @@ public:
  void Load();

  virtual void EvaluateInIsolation(const Phrase &source
-                        , const TargetPhrase &targetPhrase
-                        , ScoreComponentCollection &scoreBreakdown
-                        , ScoreComponentCollection &estimatedFutureScore) const;
-  
+                                   , const TargetPhrase &targetPhrase
+                                   , ScoreComponentCollection &scoreBreakdown
+                                   , ScoreComponentCollection &estimatedFutureScore) const;
+
  void EvaluateWithSourceContext(const InputType &input
-                , const InputPath &inputPath
-                , const TargetPhrase &targetPhrase
-                , const StackVec *stackVec
-                , ScoreComponentCollection &scoreBreakdown
-                , ScoreComponentCollection *estimatedFutureScore = NULL) const
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
  {}
  void EvaluateWhenApplied(const Hypothesis& hypo,
-                ScoreComponentCollection* accumulator) const
+                           ScoreComponentCollection* accumulator) const
  {}
  void EvaluateWhenApplied(const ChartHypothesis &hypo,
-                     ScoreComponentCollection* accumulator) const
+                           ScoreComponentCollection* accumulator) const
+  {}
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const
  {}
-  void EvaluateTranslationOptionListWithSourceContext(const InputType &input                                                                                                             
-              , const TranslationOptionList &translationOptionList) const                                                                                                                
-  {} 

  void ComputeFeatures(const Phrase &source,
                       const TargetPhrase& targetPhrase,
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@ -1540,6 +1540,150 @@ analysis-precision
 	rerun-on-change: precision-by-coverage-base
 	final-model: yes

+[QUALITY-ESTIMATION] single
+tokenize-input
+	in: raw-input
+	out: tokenized-input
+	default-name: quality-estimation/input.tok
+	pass-unless: input-tokenizer
+	template: $input-tokenizer < IN > OUT
+tokenize-input-devtest
+	in: raw-input-devtest
+	out: tokenized-input-devtest
+	default-name: quality-estimation/input.devtest.tok
+	pass-unless: input-tokenizer
+	template: $input-tokenizer < IN > OUT
+lowercase-input
+	in: tokenized-input
+	out: truecased-input
+	default-name: quality-estimation/input.lc
+	pass-unless: input-lowercaser
+	ignore-if: input-truecaser
+	template: $input-lowercaser < IN > OUT
+lowercase-input-devtest
+	in: tokenized-input-devtest
+	out: truecased-input-devtest
+	default-name: quality-estimation/input.devtest.lc
+	pass-unless: input-lowercaser
+	ignore-if: input-truecaser
+	template: $input-lowercaser < IN > OUT
+truecase-input
+	in: tokenized-input TRUECASER:truecase-model
+	out: truecased-input
+	rerun-on-change: input-truecaser
+	default-name: quality-estimation/input.tc
+	ignore-unless: input-truecaser
+        template: $input-truecaser -model IN1.$input-extension < IN > OUT
+truecase-input-devtest
+	in: tokenized-input-devtest TRUECASER:truecase-model
+	out: truecased-input-devtest
+	rerun-on-change: input-truecaser
+	ignore-unless: input-truecaser
+	default-name: quality-estimation/input.devtest.tc
+        template: $input-truecaser -model IN1.$input-extension < IN > OUT
+split-input 
+	in: truecased-input SPLITTER:splitter-model
+	out: split-input
+	rerun-on-change: input-splitter
+	default-name: quality-estimation/input.split
+	pass-unless: input-splitter
+	template: $input-splitter -model IN1.$input-extension < IN > OUT
+split-input-devtest
+	in: truecased-input-devtest SPLITTER:splitter-model
+	out: split-input-devtest
+	rerun-on-change: input-splitter
+	default-name: quality-estimation/input.devtest.split
+	pass-unless: input-splitter
+	template: $input-splitter -model IN1.$input-extension < IN > OUT
+tokenize-reference
+	in: raw-reference
+	out: tokenized-reference
+	default-name: quality-estimation/reference.tok
+	pass-unless: output-tokenizer
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-tokenizer < IN > OUT
+tokenize-reference-devtest
+	in: raw-reference-devtest
+	out: tokenized-reference-devtest
+	default-name: quality-estimation/reference.devtest.tok
+	pass-unless: output-tokenizer
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-tokenizer < IN > OUT
+lowercase-reference
+	in: tokenized-reference
+	out: truecased-reference
+	default-name: quality-estimation/reference.lc
+	pass-unless: output-lowercaser
+	ignore-if: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-lowercaser < IN > OUT	
+lowercase-reference-devtest
+	in: tokenized-reference-devtest
+	out: truecased-reference-devtest
+	default-name: quality-estimation/reference.devtest.lc
+	pass-unless: output-lowercaser
+	ignore-if: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-lowercaser < IN > OUT	
+truecase-reference
+	in: tokenized-reference TRUECASER:truecase-model
+	out: truecased-reference
+	rerun-on-change: output-truecaser
+	default-name: quality-estimation/reference.tc
+	ignore-unless: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+        template: $output-truecaser -model IN1.$output-extension < IN > OUT
+truecase-reference-devtest
+	in: tokenized-reference-devtest TRUECASER:truecase-model
+	out: truecased-reference-devtest
+	rerun-on-change: output-truecaser
+	default-name: quality-estimation/reference.devtest.tc
+	ignore-unless: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+        template: $output-truecaser -model IN1.$output-extension < IN > OUT
+decode
+	in: TUNING:config-with-reused-weights split-input
+	out: rich-output
+	default-name: quality-estimation/output
+	template: $decoder -v 0 -tt -f IN < IN1 > OUT
+	error: Translation was not performed correctly
+	not-error: trans: No such file or directory
+decode-devtest
+	in: TUNING:config-with-reused-weights split-input-devtest
+	out: rich-output-devtest
+	default-name: quality-estimation/output-devtest
+	template: $decoder -v 0 -tt -f IN < IN1 > OUT
+	error: Translation was not performed correctly
+	not-error: trans: No such file or directory
+remove-markup
+	in: rich-output
+	out: cleaned-output
+	default-name: quality-estimation/tokenized-output
+	template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
+remove-markup-devtest
+	in: rich-output-devtest
+	out: cleaned-output-devtest
+	default-name: quality-estimation/tokenized-output-devtest
+	template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
+score-output
+	in: cleaned-output truecased-reference
+	out: scored-output
+	default-name: quality-estimation/output-scored
+	tmp-name: quality-estimation/ter
+	template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
+score-output-devtest
+	in: cleaned-output-devtest truecased-reference-devtest
+	out: scored-output-devtest
+	default-name: quality-estimation/output-scored-devtest
+	tmp-name: quality-estimation/ter-devtest
+	template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
+train
+	in: input rich-output scored-output input-devtest rich-output-devtest scored-output-devtest
+	out: quality-estimation-model
+	default-name: quality-estimation/model
+	template: $trainer --train-rich IN1 --train-ter IN2 --eval-rich IN4 --eval-ter IN5 --model OUT
+	final-model: yes
+
 [REPORTING] single
 report
 	in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:multi-bleu-detok-score EVALUATION:multi-bleu-c-detok-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model EVALUATION:wade-analysis
--- a/scripts/ems/support/create-xml.perl
+++ b/scripts/ems/support/create-xml.perl
@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use warnings;
+use strict;
+
+my ($type) = @ARGV;
+if ($type =~ /^s/i) {
+	print "<srcset setid=\"test\" srclang=\"any\">\n";
+	print "<doc docid=\"doc\">\n";
+}
+elsif ($type =~ /^t/i) {
+	print "<tstset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
+	print "<doc sysid=\"moses\" docid=\"doc\">\n";
+}
+elsif ($type =~ /^r/i) {
+	print "<refset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
+	print "<doc sysid=\"ref\" docid=\"doc\">\n";
+}
+else {
+	die("ERROR: specify source / target / ref");
+}
+
+my $i = 0;
+while(<STDIN>) {
+  chomp;
+  print "<seg id=\"".(++$i)."\">$_</seg>\n";
+}
+
+print "</doc>\n";
+
+if ($type =~ /^s/i) {
+	print "</srcset>\n";
+}
+elsif ($type =~ /^t/i) {
+	print "</tstset>\n";	
+}
+elsif ($type =~ /^r/i) {
+	print "</refset>\n";
+}
--- a/scripts/ems/support/remove-segmentation-markup.perl
+++ b/scripts/ems/support/remove-segmentation-markup.perl
@ -9,7 +9,16 @@ use strict;
 $|++;

 while(<STDIN>) {
-  s/ \|\d+\-\d+\| / /g;
-  s/ \|\d+\-\d+\|$//;
-  print $_;
+  chop;
+  s/\|[^\|]+\|//g;
+  s/\s+/ /g;
+  s/^ //;
+  s/ $//;
+  print $_."\n";
 }
+
+#while(<STDIN>) {
+#  s/ \|\d+\-\d+\| / /g;
+#  s/ \|\d+\-\d+\|$//;
+#  print $_;
+#}
--- a/scripts/ems/support/ter.perl
+++ b/scripts/ems/support/ter.perl
@ -0,0 +1,15 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use strict;
+use FindBin qw($RealBin);
+
+my ($jar, $hyp,$ref,$tmp) = @ARGV;
+`mkdir -p $tmp`;
+`$RealBin/create-xml.perl test < $hyp > $tmp/hyp`;
+`$RealBin/create-xml.perl ref  < $ref > $tmp/ref`;
+`java -jar $jar -h $tmp/hyp -r $tmp/ref -o ter -n $tmp/out`;
+print `cat $tmp/out.ter`;
+