Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-09-20 07:42:21 +03:00 · 2012-03-20 10:45:40 -04:00 · 2012-03-20 10:45:40 -04:00 · ee580d22be
commit ee580d22be
parent 989bb308a0 8e79cd5692
71 changed files with 2186 additions and 1542 deletions
--- a/contrib/other-builds/lm.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/lm.xcodeproj/project.pbxproj
@ -66,8 +66,6 @@
 		1EBA458F14B97E92003CC0EA /* Jamfile in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454814B97E92003CC0EA /* Jamfile */; };
 		1EBA459014B97E92003CC0EA /* joint_sort_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454914B97E92003CC0EA /* joint_sort_test.cc */; };
 		1EBA459114B97E92003CC0EA /* joint_sort.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454A14B97E92003CC0EA /* joint_sort.hh */; };
-		1EBA459214B97E92003CC0EA /* key_value_packing_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454B14B97E92003CC0EA /* key_value_packing_test.cc */; };
-		1EBA459314B97E92003CC0EA /* key_value_packing.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454C14B97E92003CC0EA /* key_value_packing.hh */; };
 		1EBA459414B97E92003CC0EA /* mmap.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454E14B97E92003CC0EA /* mmap.cc */; };
 		1EBA459514B97E92003CC0EA /* mmap.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454F14B97E92003CC0EA /* mmap.hh */; };
 		1EBA459614B97E92003CC0EA /* murmur_hash.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA455014B97E92003CC0EA /* murmur_hash.cc */; };
@ -165,8 +163,6 @@
 		1EBA454814B97E92003CC0EA /* Jamfile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.jam; name = Jamfile; path = ../../util/Jamfile; sourceTree = "<group>"; };
 		1EBA454914B97E92003CC0EA /* joint_sort_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = joint_sort_test.cc; path = ../../util/joint_sort_test.cc; sourceTree = "<group>"; };
 		1EBA454A14B97E92003CC0EA /* joint_sort.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = joint_sort.hh; path = ../../util/joint_sort.hh; sourceTree = "<group>"; };
-		1EBA454B14B97E92003CC0EA /* key_value_packing_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = key_value_packing_test.cc; path = ../../util/key_value_packing_test.cc; sourceTree = "<group>"; };
-		1EBA454C14B97E92003CC0EA /* key_value_packing.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = key_value_packing.hh; path = ../../util/key_value_packing.hh; sourceTree = "<group>"; };
 		1EBA454D14B97E92003CC0EA /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = LICENSE; path = ../../util/LICENSE; sourceTree = "<group>"; };
 		1EBA454E14B97E92003CC0EA /* mmap.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mmap.cc; path = ../../util/mmap.cc; sourceTree = "<group>"; };
 		1EBA454F14B97E92003CC0EA /* mmap.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = mmap.hh; path = ../../util/mmap.hh; sourceTree = "<group>"; };
@ -277,8 +273,6 @@
 				1EBA454814B97E92003CC0EA /* Jamfile */,
 				1EBA454914B97E92003CC0EA /* joint_sort_test.cc */,
 				1EBA454A14B97E92003CC0EA /* joint_sort.hh */,
-				1EBA454B14B97E92003CC0EA /* key_value_packing_test.cc */,
-				1EBA454C14B97E92003CC0EA /* key_value_packing.hh */,
 				1EBA454D14B97E92003CC0EA /* LICENSE */,
 				1EBA454E14B97E92003CC0EA /* mmap.cc */,
 				1EBA454F14B97E92003CC0EA /* mmap.hh */,
@ -363,7 +357,6 @@
 				1EBA458D14B97E92003CC0EA /* getopt.hh in Headers */,
 				1EBA458E14B97E92003CC0EA /* have.hh in Headers */,
 				1EBA459114B97E92003CC0EA /* joint_sort.hh in Headers */,
-				1EBA459314B97E92003CC0EA /* key_value_packing.hh in Headers */,
 				1EBA459514B97E92003CC0EA /* mmap.hh in Headers */,
 				1EBA459714B97E92003CC0EA /* murmur_hash.hh in Headers */,
 				1EBA459914B97E92003CC0EA /* probing_hash_table.hh in Headers */,
@ -466,7 +459,6 @@
 				1EBA458C14B97E92003CC0EA /* getopt.c in Sources */,
 				1EBA458F14B97E92003CC0EA /* Jamfile in Sources */,
 				1EBA459014B97E92003CC0EA /* joint_sort_test.cc in Sources */,
-				1EBA459214B97E92003CC0EA /* key_value_packing_test.cc in Sources */,
 				1EBA459414B97E92003CC0EA /* mmap.cc in Sources */,
 				1EBA459614B97E92003CC0EA /* murmur_hash.cc in Sources */,
 				1EBA459814B97E92003CC0EA /* probing_hash_table_test.cc in Sources */,
--- a/lm/left.hh
+++ b/lm/left.hh
@ -112,7 +112,7 @@ inline size_t hash_value(const ChartState &state) {
  size_t hashes[2];
  hashes[0] = hash_value(state.left);
  hashes[1] = hash_value(state.right);
-  return util::MurmurHashNative(hashes, sizeof(size_t), state.full);
+  return util::MurmurHashNative(hashes, sizeof(size_t) * 2, state.full);
 }

 template <class M> class RuleScore {
--- a/lm/read_arpa.cc
+++ b/lm/read_arpa.cc
@ -7,6 +7,7 @@
 #include <vector>

 #include <ctype.h>
+#include <math.h>
 #include <string.h>
 #include <stdint.h>

@ -93,7 +94,11 @@ void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) {
    case '\t':
      weights.backoff = in.ReadFloat();
      if (weights.backoff == ngram::kExtensionBackoff) weights.backoff = ngram::kNoExtensionBackoff;
-      if ((in.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff");
+      {
+        int float_class = fpclassify(weights.backoff);
+        UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << weights.backoff);
+      }
+      UTIL_THROW_IF((in.get() != '\n'), FormatLoadException, "Expected newline after backoff");
      break;
    case '\n':
      weights.backoff = ngram::kNoExtensionBackoff;
--- a/lm/read_arpa.hh
+++ b/lm/read_arpa.hh
@ -10,6 +10,8 @@
 #include <iosfwd>
 #include <vector>

+#include <math.h>
+
 namespace lm {

 void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number);
@ -29,20 +31,26 @@ class PositiveProbWarn {

    explicit PositiveProbWarn(WarningAction action) : action_(action) {}

-    void Warn(float prob);
+    float ReadProb(util::FilePiece &f) {
+      float prob = f.ReadFloat();
+      UTIL_THROW_IF(f.get() != '\t', FormatLoadException, "Expected tab after probability");
+      UTIL_THROW_IF(isnan(prob), FormatLoadException, "NaN probability");
+      if (prob > 0.0) {
+        Warn(prob);
+        prob = 0.0;
+      }
+      return prob;
+    }

  private:
+    void Warn(float prob);
+
    WarningAction action_;
 };

 template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) {
  try {
-    float prob = f.ReadFloat();
-    if (prob > 0.0) {
-      warn.Warn(prob);
-      prob = 0.0;
-    }
-    if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability");
+    float prob = warn.ReadProb(f);
    ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))];
    value.prob = prob;
    ReadBackoff(f, value);
@ -64,11 +72,7 @@ template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc
 // Return true if a positive log probability came out.
 template <class Voc, class Weights> void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &vocab, WordIndex *const reverse_indices, Weights &weights, PositiveProbWarn &warn) {
  try {
-    weights.prob = f.ReadFloat();
-    if (weights.prob > 0.0) {
-      warn.Warn(weights.prob);
-      weights.prob = 0.0;
-    }
+    weights.prob = warn.ReadProb(f);
    for (WordIndex *vocab_out = reverse_indices + n - 1; vocab_out >= reverse_indices; --vocab_out) {
      *vocab_out = vocab.Index(f.ReadDelimited(kARPASpaces));
    }
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@ -1,12 +1,16 @@
 #include "BleuScorer.h"

 #include <algorithm>
+#include <cassert>
 #include <cmath>
 #include <climits>
 #include <fstream>
 #include <iostream>
 #include <stdexcept>
+#include "Ngram.h"
+#include "Reference.h"
 #include "Util.h"
+#include "Vocabulary.h"

 namespace {

@ -18,74 +22,8 @@ const char REFLEN_CLOSEST[] = "closest";

 } // namespace

-// A simple STL-map based n-gram counts.
-// Basically, we provide typical accessors and mutaors, but
-// we intentionally does not allow erasing elements.
-class BleuScorer::NgramCounts {
- public:
-  // Used to construct the ngram map
-  struct NgramComparator {
-    bool operator()(const vector<int>& a, const vector<int>& b) const {
-      size_t i;
-      const size_t as = a.size();
-      const size_t bs = b.size();
-      for (i = 0; i < as && i < bs; ++i) {
-        if (a[i] < b[i]) {
-          return true;
-        }
-        if (a[i] > b[i]) {
-          return false;
-        }
-      }
-      // entries are equal, shortest wins
-      return as < bs;
-    }
-  };
-
-  typedef vector<int> Key;
-  typedef int Value;
-  typedef map<Key, Value, NgramComparator>::iterator iterator;
-  typedef map<Key, Value, NgramComparator>::const_iterator const_iterator;
-
-  NgramCounts() : kDefaultCount(1) { }
-  virtual ~NgramCounts() { }
-
-  // If the specified "ngram" is found, we add counts.
-  // If not, we insert the default count in the container.
-  void add(const Key& ngram) {
-    const_iterator it = find(ngram);
-    if (it != end()) {
-      m_counts[ngram] = it->second + 1;
-    } else {
-      m_counts[ngram] = kDefaultCount;
-    }
-  }
-
-  void clear() { m_counts.clear(); }
-
-  bool empty() const { return m_counts.empty(); }
-
-  size_t size() const { return m_counts.size(); }
-  size_t max_size() const { return m_counts.max_size(); }
-
-  iterator find(const Key& ngram) { return m_counts.find(ngram); }
-  const_iterator find(const Key& ngram) const { return m_counts.find(ngram); }
-
-  Value& operator[](const Key& ngram) { return m_counts[ngram]; }
-
-  iterator begin() { return m_counts.begin(); }
-  const_iterator begin() const { return m_counts.begin(); }
-  iterator end() { return m_counts.end(); }
-  const_iterator end() const { return m_counts.end(); }
-
- private:
-  const int kDefaultCount;
-  map<Key, Value, NgramComparator> m_counts;
-};
-
 BleuScorer::BleuScorer(const string& config)
    : StatisticsBasedScorer("BLEU", config),
-      kLENGTH(4),
      m_ref_length_type(CLOSEST) {
  const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
  if (reflen == REFLEN_AVERAGE) {
@ -101,9 +39,10 @@ BleuScorer::BleuScorer(const string& config)

 BleuScorer::~BleuScorer() {}

-size_t BleuScorer::countNgrams(const string& line, NgramCounts& counts,
+size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
                               unsigned int n)
 {
+  assert(n > 0);
  vector<int> encoded_tokens;
  TokenizeAndEncode(line, encoded_tokens);
  for (size_t k = 1; k <= n; ++k) {
@ -116,7 +55,7 @@ size_t BleuScorer::countNgrams(const string& line, NgramCounts& counts,
      for (size_t j = i; j < i+k && j < encoded_tokens.size(); ++j) {
        ngram.push_back(encoded_tokens[j]);
      }
-      counts.add(ngram);
+      counts.Add(ngram);
    }
  }
  return encoded_tokens.size();
@ -124,10 +63,9 @@ size_t BleuScorer::countNgrams(const string& line, NgramCounts& counts,

 void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
 {
-  //make sure reference data is clear
-  m_ref_counts.reset();
-  m_ref_lengths.clear();
-  ClearEncoder();
+  // Make sure reference data is clear
+  m_references.reset();
+  mert::VocabularyFactory::GetVocabulary()->clear();

  //load reference data
  for (size_t i = 0; i < referenceFiles.size(); ++i) {
@ -139,33 +77,30 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
    string line;
    size_t sid = 0; //sentence counter
    while (getline(refin,line)) {
-      line = this->applyFactors(line);
+      line = applyFactors(line);
      if (i == 0) {
-        NgramCounts *counts = new NgramCounts; //these get leaked
-        m_ref_counts.push_back(counts);
-        vector<size_t> lengths;
-        m_ref_lengths.push_back(lengths);
+        Reference* ref = new Reference;
+        m_references.push_back(ref);    // Take ownership of the Reference object.
      }
-      if (m_ref_counts.size() <= sid) {
+      if (m_references.size() <= sid) {
        throw runtime_error("File " + referenceFiles[i] + " has too many sentences");
      }
      NgramCounts counts;
-      size_t length = countNgrams(line, counts, kLENGTH);
+      size_t length = CountNgrams(line, counts, kBleuNgramOrder);

      //for any counts larger than those already there, merge them in
      for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
-        NgramCounts::const_iterator oldcount_it = m_ref_counts[sid]->find(ci->first);
-        int oldcount = 0;
-        if (oldcount_it != m_ref_counts[sid]->end()) {
-          oldcount = oldcount_it->second;
-        }
-        int newcount = ci->second;
+        const NgramCounts::Key& ngram = ci->first;
+        const NgramCounts::Value newcount = ci->second;
+
+        NgramCounts::Value oldcount = 0;
+        m_references[sid]->get_counts()->Lookup(ngram, &oldcount);
        if (newcount > oldcount) {
-          m_ref_counts[sid]->operator[](ci->first) = newcount;
+          m_references[sid]->get_counts()->operator[](ngram) = newcount;
        }
      }
      //add in the length
-      m_ref_lengths[sid].push_back(length);
+      m_references[sid]->push_back(length);
      if (sid > 0 && sid % 100 == 0) {
        TRACE_ERR(".");
      }
@ -177,44 +112,33 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)

 void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
 {
-  if (sid >= m_ref_counts.size()) {
+  if (sid >= m_references.size()) {
    stringstream msg;
    msg << "Sentence id (" << sid << ") not found in reference set";
    throw runtime_error(msg.str());
  }
  NgramCounts testcounts;
  // stats for this line
-  vector<ScoreStatsType> stats(kLENGTH * 2);
-  string sentence = this->applyFactors(text);
-  const size_t length = countNgrams(sentence, testcounts, kLENGTH);
+  vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
+  string sentence = applyFactors(text);
+  const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder);

-  // Calculate effective reference length.
-  switch (m_ref_length_type) {
-    case SHORTEST:
-      CalcShortest(sid, stats);
-      break;
-    case AVERAGE:
-      CalcAverage(sid, stats);
-      break;
-    case CLOSEST:
-      CalcClosest(sid, length, stats);
-      break;
-    default:
-      throw runtime_error("Unsupported reflength strategy");
-  }
+  const int reference_len = CalcReferenceLength(sid, length);
+  stats.push_back(reference_len);

  //precision on each ngram type
  for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
       testcounts_it != testcounts.end(); ++testcounts_it) {
-    NgramCounts::const_iterator refcounts_it = m_ref_counts[sid]->find(testcounts_it->first);
-    int correct = 0;
-    const int guess = testcounts_it->second;
-    if (refcounts_it != m_ref_counts[sid]->end()) {
-      correct = min(refcounts_it->second,guess);
-    }
+    const NgramCounts::Value guess = testcounts_it->second;
    const size_t len = testcounts_it->first.size();
-    stats[len*2-2] += correct;
-    stats[len*2-1] += guess;
+    NgramCounts::Value correct = 0;
+
+    NgramCounts::Value v = 0;
+    if (m_references[sid]->get_counts()->Lookup(testcounts_it->first, &v)) {
+      correct = min(v, guess);
+    }
+    stats[len * 2 - 2] += correct;
+    stats[len * 2 - 1] += guess;
  }
  entry.set(stats);
 }
@ -222,23 +146,41 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
 float BleuScorer::calculateScore(const vector<int>& comps) const
 {
  float logbleu = 0.0;
-  for (int i = 0; i < kLENGTH; ++i) {
+  for (int i = 0; i < kBleuNgramOrder; ++i) {
    if (comps[2*i] == 0) {
      return 0.0;
    }
    logbleu += log(comps[2*i]) - log(comps[2*i+1]);

  }
-  logbleu /= kLENGTH;
-  const float brevity = 1.0 - static_cast<float>(comps[kLENGTH*2]) / comps[1];//reflength divided by test length
+  logbleu /= kBleuNgramOrder;
+  // reflength divided by test length
+  const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1];
  if (brevity < 0.0) {
    logbleu += brevity;
  }
  return exp(logbleu);
 }

-void BleuScorer::dump_counts(ostream* os,
-                             const NgramCounts& counts) const {
+int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length) {
+  switch (m_ref_length_type) {
+    case AVERAGE:
+      return m_references[sentence_id]->CalcAverage();
+      break;
+    case CLOSEST:
+      return m_references[sentence_id]->CalcClosest(length);
+      break;
+    case SHORTEST:
+      return m_references[sentence_id]->CalcShortest();
+      break;
+    default:
+      cerr << "unknown reference types." << endl;
+      exit(1);
+  }
+}
+
+void BleuScorer::DumpCounts(ostream* os,
+                            const NgramCounts& counts) const {
  for (NgramCounts::const_iterator it = counts.begin();
       it != counts.end(); ++it) {
    *os << "(";
@ -254,44 +196,3 @@ void BleuScorer::dump_counts(ostream* os,
  *os << endl;
 }

-void BleuScorer::CalcAverage(size_t sentence_id,
-                             vector<ScoreStatsType>& stats) const {
-  int total = 0;
-  for (size_t i = 0;
-       i < m_ref_lengths[sentence_id].size(); ++i) {
-    total += m_ref_lengths[sentence_id][i];
-  }
-  const float mean = static_cast<float>(total) /
-                     m_ref_lengths[sentence_id].size();
-  stats.push_back(static_cast<ScoreStatsType>(mean));
-}
-
-void BleuScorer::CalcClosest(size_t sentence_id,
-                             size_t length,
-                             vector<ScoreStatsType>& stats) const {
-  int min_diff = INT_MAX;
-  int min_idx = 0;
-  for (size_t i = 0; i < m_ref_lengths[sentence_id].size(); ++i) {
-    const int reflength = m_ref_lengths[sentence_id][i];
-    const int length_diff = abs(reflength - static_cast<int>(length));
-
-    // Look for the closest reference
-    if (length_diff < abs(min_diff)) {
-      min_diff = reflength - length;
-      min_idx = i;
-    // if two references has the same closest length, take the shortest
-    } else if (length_diff == abs(min_diff)) {
-      if (reflength < static_cast<int>(m_ref_lengths[sentence_id][min_idx])) {
-        min_idx = i;
-      }
-    }
-  }
-  stats.push_back(m_ref_lengths[sentence_id][min_idx]);
-}
-
-void BleuScorer::CalcShortest(size_t sentence_id,
-                              vector<ScoreStatsType>& stats) const {
-  const int shortest = *min_element(m_ref_lengths[sentence_id].begin(),
-                                    m_ref_lengths[sentence_id].end());
-  stats.push_back(shortest);
-}
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@ -12,55 +12,50 @@

 using namespace std;

+const int kBleuNgramOrder = 4;
+
+class NgramCounts;
+class Reference;
+
 /**
 * Bleu scoring
 */
 class BleuScorer: public StatisticsBasedScorer
 {
 public:
+  enum ReferenceLengthType {
+    AVERAGE,
+    CLOSEST,
+    SHORTEST
+  };
+
  explicit BleuScorer(const string& config = "");
  ~BleuScorer();

  virtual void setReferenceFiles(const vector<string>& referenceFiles);
  virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
  virtual float calculateScore(const vector<int>& comps) const;
-  virtual size_t NumberOfScores() const { return 2 * kLENGTH + 1; }
+  virtual size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }

-private:
-  enum ReferenceLengthType {
-    AVERAGE,
-    SHORTEST,
-    CLOSEST
-  };
+  int CalcReferenceLength(size_t sentence_id, size_t length);

-  /**
-   * A NgramCounts is a key-value store.
-   * Clients don't have to worry about the actual implementation
-   * since this type is used in internal only.
-   */
-  class NgramCounts;
+  ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; }
+  void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; }
+
+  const std::vector<Reference*>& GetReferences() const { return m_references.get(); }

  /**
   * Count the ngrams of each type, up to the given length in the input line.
   */
-  size_t countNgrams(const string& line, NgramCounts& counts, unsigned int n);
+  size_t CountNgrams(const string& line, NgramCounts& counts, unsigned int n);

-  void dump_counts(std::ostream* os, const NgramCounts& counts) const;
+  void DumpCounts(std::ostream* os, const NgramCounts& counts) const;

-  // For calculating effective reference length.
-  void CalcAverage(size_t sentence_id,
-                   vector<ScoreStatsType>& stats) const;
-  void CalcClosest(size_t sentence_id, size_t length,
-                   vector<ScoreStatsType>& stats) const;
-  void CalcShortest(size_t sentence_id,
-                    vector<ScoreStatsType>& stats) const;
-
-  const int kLENGTH;
+private:
  ReferenceLengthType m_ref_length_type;

-  // data extracted from reference files
-  ScopedVector<NgramCounts> m_ref_counts;
-  vector<vector<size_t> > m_ref_lengths;
+  // reference translations.
+  ScopedVector<Reference> m_references;

  // no copying allowed
  BleuScorer(const BleuScorer&);
--- a/mert/BleuScorerTest.cpp
+++ b/mert/BleuScorerTest.cpp
@ -0,0 +1,155 @@
+#include "BleuScorer.h"
+
+#define BOOST_TEST_MODULE MertBleuScorer
+#include <boost/test/unit_test.hpp>
+
+#include "Ngram.h"
+#include "Vocabulary.h"
+#include "Util.h"
+
+namespace {
+
+NgramCounts* g_counts = NULL;
+
+NgramCounts* GetNgramCounts() {
+  assert(g_counts);
+  return g_counts;
+}
+
+void SetNgramCounts(NgramCounts* counts) {
+  g_counts = counts;
+}
+
+struct Unigram {
+  Unigram(const std::string& a) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+  }
+  NgramCounts::Key instance;
+};
+
+struct Bigram {
+  Bigram(const std::string& a, const std::string& b) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+  }
+  NgramCounts::Key instance;
+};
+
+struct Trigram {
+  Trigram(const std::string& a, const std::string& b, const std::string& c) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
+  }
+  NgramCounts::Key instance;
+};
+
+struct Fourgram {
+  Fourgram(const std::string& a, const std::string& b,
+           const std::string& c, const std::string& d) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(d));
+  }
+  NgramCounts::Key instance;
+};
+
+bool CheckUnigram(const std::string& str) {
+  Unigram unigram(str);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(unigram.instance, &v);
+}
+
+bool CheckBigram(const std::string& a, const std::string& b) {
+  Bigram bigram(a, b);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(bigram.instance, &v);
+}
+
+bool CheckTrigram(const std::string& a, const std::string& b,
+                  const std::string& c) {
+  Trigram trigram(a, b, c);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(trigram.instance, &v);
+}
+
+bool CheckFourgram(const std::string& a, const std::string& b,
+                   const std::string& c, const std::string& d) {
+  Fourgram fourgram(a, b, c, d);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(fourgram.instance, &v);
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(bleu_reference_type) {
+  BleuScorer scorer;
+  // BleuScorer will use "closest" by default.
+  BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::CLOSEST);
+
+  scorer.SetReferenceLengthType(BleuScorer::AVERAGE);
+  BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
+
+  scorer.SetReferenceLengthType(BleuScorer::SHORTEST);
+  BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
+}
+
+BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
+  BleuScorer scorer;
+
+  std::string line = "I saw a girl with a telescope .";
+  // In the above string, we will get the 25 ngrams.
+  //
+  // unigram: "I", "saw", "a", "girl", "with", "telescope", "."
+  // bigram:  "I saw", "saw a", "a girl", "girl with", "with a", "a telescope"
+  //          "telescope ."
+  // trigram: "I saw a", "saw a girl", "a girl with", "girl with a",
+  //          "with a telescope", "a telescope ."
+  // 4-gram:  "I saw a girl", "saw a girl with", "a girl with a",
+  //          "girl with a telescope", "with a telescope ."
+  NgramCounts counts;
+  BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
+  BOOST_CHECK_EQUAL(25, counts.size());
+
+  mert::Vocabulary* vocab = scorer.GetVocab();
+  BOOST_CHECK_EQUAL(7, vocab->size());
+
+  std::vector<std::string> res;
+  Tokenize(line.c_str(), ' ', &res);
+  std::vector<int> ids(res.size());
+  for (std::size_t i = 0; i < res.size(); ++i) {
+    BOOST_CHECK(vocab->Lookup(res[i], &ids[i]));
+  }
+
+  SetNgramCounts(&counts);
+
+  // unigram
+  for (std::size_t i = 0; i < res.size(); ++i) {
+    BOOST_CHECK(CheckUnigram(res[i]));
+  }
+
+  // bigram
+  BOOST_CHECK(CheckBigram("I", "saw"));
+  BOOST_CHECK(CheckBigram("saw", "a"));
+  BOOST_CHECK(CheckBigram("a", "girl"));
+  BOOST_CHECK(CheckBigram("girl", "with"));
+  BOOST_CHECK(CheckBigram("with", "a"));
+  BOOST_CHECK(CheckBigram("a", "telescope"));
+  BOOST_CHECK(CheckBigram("telescope", "."));
+
+  // trigram
+  BOOST_CHECK(CheckTrigram("I", "saw", "a"));
+  BOOST_CHECK(CheckTrigram("saw", "a", "girl"));
+  BOOST_CHECK(CheckTrigram("a", "girl", "with"));
+  BOOST_CHECK(CheckTrigram("girl", "with", "a"));
+  BOOST_CHECK(CheckTrigram("with", "a", "telescope"));
+  BOOST_CHECK(CheckTrigram("a", "telescope", "."));
+
+  // 4-gram
+  BOOST_CHECK(CheckFourgram("I", "saw", "a", "girl"));
+  BOOST_CHECK(CheckFourgram("saw", "a", "girl", "with"));
+  BOOST_CHECK(CheckFourgram("a", "girl", "with", "a"));
+  BOOST_CHECK(CheckFourgram("girl", "with", "a", "telescope"));
+  BOOST_CHECK(CheckFourgram("with", "a", "telescope", "."));
+}
--- a/mert/CderScorer.cpp
+++ b/mert/CderScorer.cpp
@ -70,6 +70,7 @@ float CderScorer::calculateScore(const vector<int>& comps) const
  if (comps.size() != 2) {
    throw runtime_error("Size of stat vector for CDER is not 2");
  }
+  if (comps[1] == 0) return 1.0f;
  return 1.0f - (comps[0] / static_cast<float>(comps[1]));
 }

--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@ -7,7 +7,6 @@
 */

 #include <algorithm>
-#include "util/check.hh"
 #include <cmath>
 #include <fstream>

@ -16,87 +15,82 @@
 #include "Scorer.h"
 #include "ScorerFactory.h"
 #include "Util.h"
+#include "util/check.hh"

 Data::Data()
-  : theScorer(NULL),
-    number_of_scores(0),
-    _sparse_flag(false),
-    scoredata(),
-    featdata() {}
+  : m_scorer(NULL),
+    m_num_scores(0),
+    m_sparse_flag(false),
+    m_score_data(),
+    m_feature_data() {}

-Data::Data(Scorer& ptr)
-    : theScorer(&ptr),
-      score_type(theScorer->getName()),
-      number_of_scores(0),
-      _sparse_flag(false),
-      scoredata(new ScoreData(*theScorer)),
-      featdata(new FeatureData)
+Data::Data(Scorer* scorer)
+    : m_scorer(scorer),
+      m_score_type(m_scorer->getName()),
+      m_num_scores(0),
+      m_sparse_flag(false),
+      m_score_data(new ScoreData(m_scorer)),
+      m_feature_data(new FeatureData)
 {
-  TRACE_ERR("Data::score_type " << score_type << std::endl);
-  TRACE_ERR("Data::Scorer type from Scorer: " << theScorer->getName() << endl);
+  TRACE_ERR("Data::m_score_type " << m_score_type << endl);
+  TRACE_ERR("Data::Scorer type from Scorer: " << m_scorer->getName() << endl);
 }

 //ADDED BY TS
-void Data::remove_duplicates() {
+// TODO: This is too long; consider creating additional functions to
+// reduce the lines of this function.
+void Data::removeDuplicates() {
+  size_t nSentences = m_feature_data->size();
+  assert(m_score_data->size() == nSentences);

-  size_t nSentences = featdata->size();
-  assert(scoredata->size() == nSentences);
-
-  for (size_t s=0; s < nSentences; s++) {
-
-    FeatureArray& feat_array =  featdata->get(s);
-    ScoreArray& score_array =  scoredata->get(s);
+  for (size_t s = 0; s < nSentences; s++) {
+    FeatureArray& feat_array =  m_feature_data->get(s);
+    ScoreArray& score_array =  m_score_data->get(s);

    assert(feat_array.size() == score_array.size());

    //serves as a hash-map:
-    std::map<double, std::vector<size_t> > lookup;
+    map<double, vector<size_t> > lookup;

    size_t end_pos = feat_array.size() - 1;

    size_t nRemoved = 0;
-    for (size_t k=0; k <= end_pos; k++) {

+    for (size_t k = 0; k <= end_pos; k++) {
      const FeatureStats& cur_feats = feat_array.get(k);
-
      double sum = 0.0;
-      for (size_t l=0; l < cur_feats.size(); l++)
-	sum += cur_feats.get(l);
+      for (size_t l = 0; l < cur_feats.size(); l++)
+        sum += cur_feats.get(l);

      if (lookup.find(sum) != lookup.end()) {

-	//std::cerr << "hit" << std::endl;
+        //cerr << "hit" << endl;
+        vector<size_t>& cur_list = lookup[sum];

-	std::vector<size_t>& cur_list = lookup[sum];
+        // TODO: Make sure this is correct because we have already used 'l'.
+        // If this does not impact on the removing duplicates, it is better
+        // to change
+        size_t l = 0;
+        for (l = 0; l < cur_list.size(); l++) {
+          size_t j = cur_list[l];

-	size_t l=0;
-	for (l=0; l < cur_list.size(); l++) {
-	  
-	  size_t j=cur_list[l];
-
-	  if (cur_feats == feat_array.get(j)
-	      && score_array.get(k) == score_array.get(j)) {
-
-	    if (k < end_pos) {
-	      
-	      feat_array.swap(k,end_pos);
-	      score_array.swap(k,end_pos);
-	      
-	      k--;
-	    }
-	    
-	    end_pos--;
-	    nRemoved++;
-	    break;
-	  }
-	}
-
-	if (l == lookup[sum].size())
-	  cur_list.push_back(k);
+          if (cur_feats == feat_array.get(j)
+              && score_array.get(k) == score_array.get(j)) {
+            if (k < end_pos) {
+              feat_array.swap(k,end_pos);
+              score_array.swap(k,end_pos);
+              k--;
+            }
+            end_pos--;
+            nRemoved++;
+            break;
+          }
+        }
+        if (l == lookup[sum].size())
+          cur_list.push_back(k);
+      } else {
+        lookup[sum].push_back(k);
      }
-      else
-	lookup[sum].push_back(k);
-
      // for (size_t j=0; j < k; j++) {

      // 	if (feat_array.get(k) == feat_array.get(j)
@ -115,11 +109,9 @@ void Data::remove_duplicates() {
      //          break;
      // 	}
      // }
-    }
-
+    } // end for k

    if (nRemoved > 0) {
-
      feat_array.resize(end_pos+1);
      score_array.resize(end_pos+1);
    }
@ -127,124 +119,133 @@ void Data::remove_duplicates() {
 }
 //END_ADDED

+void Data::load(const std::string &featfile, const std::string &scorefile) {
+  m_feature_data->load(featfile);
+  m_score_data->load(scorefile);
+  if (m_feature_data->hasSparseFeatures())
+    m_sparse_flag = true;
+}

-void Data::loadnbest(const std::string &file)
+void Data::loadNBest(const string &file)
 {
-  TRACE_ERR("loading nbest from " << file << std::endl);
-
-  FeatureStats featentry;
-  ScoreStats scoreentry;
-  std::string sentence_index;
-
+  TRACE_ERR("loading nbest from " << file << endl);
  inputfilestream inp(file); // matches a stream with a file. Opens the file
-
  if (!inp.good())
    throw runtime_error("Unable to open: " + file);

-  std::string substring, subsubstring, stringBuf;
-  std::string theSentence;
-  std::string::size_type loc;
-
-  while (getline(inp,stringBuf,'\n')) {
-    if (stringBuf.empty()) continue;
-
-//              TRACE_ERR("stringBuf: " << stringBuf << std::endl);
-
-    getNextPound(stringBuf, substring, "|||"); //first field
-    sentence_index = substring;
-
-    getNextPound(stringBuf, substring, "|||"); //second field
-    theSentence = substring;
+  ScoreStats scoreentry;
+  string line, sentence_index, sentence, feature_str;

+  while (getline(inp, line, '\n')) {
+    if (line.empty()) continue;
    // adding statistics for error measures
-    featentry.reset();
    scoreentry.clear();

-    theScorer->prepareStats(sentence_index, theSentence, scoreentry);
+    getNextPound(line, sentence_index, "|||"); // first field
+    getNextPound(line, sentence, "|||");       // second field
+    getNextPound(line, feature_str, "|||");    // third field

-    scoredata->add(scoreentry, sentence_index);
-
-    getNextPound(stringBuf, substring, "|||"); //third field
+    m_scorer->prepareStats(sentence_index, sentence, scoreentry);
+    m_score_data->add(scoreentry, sentence_index);

    // examine first line for name of features
    if (!existsFeatureNames()) {
-      std::string stringsupport=substring;
-      std::string features="";
-      std::string tmpname="";
-
-      size_t tmpidx=0;
-      while (!stringsupport.empty()) {
-        //                      TRACE_ERR("Decompounding: " << substring << std::endl);
-        getNextPound(stringsupport, subsubstring);
-
-        // string ending with ":" are skipped, because they are the names of the features
-        if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
-          features+=tmpname+"_"+stringify(tmpidx)+" ";
-          tmpidx++;
-        }
-        // ignore sparse feature name
-        else if (subsubstring.find("_") != string::npos) {
-          // also ignore its value
-          getNextPound(stringsupport, subsubstring);
-        }
-        // update current feature name
-        else {
-          tmpidx=0;
-          tmpname=subsubstring.substr(0,subsubstring.size() - 1);
-        }
-      }
-
-      featdata->setFeatureMap(features);
+      InitFeatureMap(feature_str);
    }
-
-    // adding features
-    while (!substring.empty()) {
-//                      TRACE_ERR("Decompounding: " << substring << std::endl);
-      getNextPound(substring, subsubstring);
-
-      // no ':' -> feature value that needs to be stored
-      if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
-        featentry.add(ConvertStringToFeatureStatsType(subsubstring));
-      }
-      // sparse feature name? store as well
-      else if (subsubstring.find("_") != string::npos) {
-        std::string name = subsubstring;
-        getNextPound(substring, subsubstring);
-        featentry.addSparse( name, atof(subsubstring.c_str()) );
-        _sparse_flag = true;
-      }
-    }
-    //cerr << "number of sparse features: " << featentry.getSparse().size() << endl;
-    featdata->add(featentry,sentence_index);
+    AddFeatures(feature_str, sentence_index);
  }
-
  inp.close();
 }

+void Data::save(const std::string &featfile, const std::string &scorefile, bool bin) {
+  if (bin)
+    cerr << "Binary write mode is selected" << endl;
+  else
+    cerr << "Binary write mode is NOT selected" << endl;
+
+  m_feature_data->save(featfile, bin);
+  m_score_data->save(scorefile, bin);
+}
+
+void Data::InitFeatureMap(const string& str) {
+  string buf = str;
+  string substr;
+  string features = "";
+  string tmp_name = "";
+  size_t tmp_index = 0;
+  string::size_type loc;
+  char tmp[64];                         // for snprintf();
+
+  while (!buf.empty()) {
+    getNextPound(buf, substr);
+
+    // string ending with ":" are skipped, because they are the names of the features
+    if ((loc = substr.find_last_of(":")) != substr.length()-1) {
+      snprintf(tmp, sizeof(tmp), "%s_%lu ", tmp_name.c_str(), tmp_index);
+      features.append(tmp);
+
+      tmp_index++;
+    } else if (substr.find("_") != string::npos) {
+      // ignore sparse feature name and its value
+      getNextPound(buf, substr);
+    } else {                              // update current feature name
+      tmp_index = 0;
+      tmp_name = substr.substr(0, substr.size() - 1);
+    }
+  }
+  m_feature_data->setFeatureMap(features);
+}
+
+void Data::AddFeatures(const string& str,
+                       const string& sentence_index) {
+  string::size_type loc;
+  string buf = str;
+  string substr;
+  FeatureStats feature_entry;
+  feature_entry.reset();
+
+  while (!buf.empty()) {
+    getNextPound(buf, substr);
+
+    // no ':' -> feature value that needs to be stored
+    if ((loc = substr.find_last_of(":")) != substr.length()-1) {
+      feature_entry.add(ConvertStringToFeatureStatsType(substr));
+    } else if (substr.find("_") != string::npos) {
+      // sparse feature name? store as well
+      string name = substr;
+      getNextPound(buf, substr);
+      feature_entry.addSparse(name, atof(substr.c_str()));
+      m_sparse_flag = true;
+    }
+  }
+  m_feature_data->add(feature_entry, sentence_index);
+}
+
 // TODO
 void Data::mergeSparseFeatures() {
-  std::cerr << "ERROR: sparse features can only be trained with pairwise ranked optimizer (PRO), not traditional MERT\n";
+  cerr << "ERROR: sparse features can only be trained with pairwise ranked optimizer (PRO), not traditional MERT\n";
  exit(1);
 }

 void Data::createShards(size_t shard_count, float shard_size, const string& scorerconfig,
-                        std::vector<Data>& shards)
+                        vector<Data>& shards)
 {
  CHECK(shard_count);
  CHECK(shard_size >= 0);
  CHECK(shard_size <= 1);

-  size_t data_size = scoredata->size();
-  CHECK(data_size == featdata->size());
+  size_t data_size = m_score_data->size();
+  CHECK(data_size == m_feature_data->size());

  shard_size *= data_size;
+  const float coeff = static_cast<float>(data_size) / shard_count;

  for (size_t shard_id = 0; shard_id < shard_count; ++shard_id) {
    vector<size_t> shard_contents;
    if (shard_size == 0) {
      //split into roughly equal size shards
-      const size_t shard_start = floor(0.5 + shard_id * static_cast<float>(data_size) / shard_count);
-      const size_t shard_end = floor(0.5 + (shard_id + 1) * static_cast<float>(data_size) / shard_count);
+      const size_t shard_start = floor(0.5 + shard_id * coeff);
+      const size_t shard_end = floor(0.5 + (shard_id + 1) * coeff);
      for (size_t i = shard_start; i < shard_end; ++i) {
        shard_contents.push_back(i);
      }
@ -255,15 +256,15 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
      }
    }

-    Scorer* scorer = ScorerFactory::getScorer(score_type, scorerconfig);
+    Scorer* scorer = ScorerFactory::getScorer(m_score_type, scorerconfig);

-    shards.push_back(Data(*scorer));
-    shards.back().score_type = score_type;
-    shards.back().number_of_scores = number_of_scores;
-    shards.back()._sparse_flag = _sparse_flag;
+    shards.push_back(Data(scorer));
+    shards.back().m_score_type = m_score_type;
+    shards.back().m_num_scores = m_num_scores;
+    shards.back().m_sparse_flag = m_sparse_flag;
    for (size_t i = 0; i < shard_contents.size(); ++i) {
-      shards.back().featdata->add(featdata->get(shard_contents[i]));
-      shards.back().scoredata->add(scoredata->get(shard_contents[i]));
+      shards.back().m_feature_data->add(m_feature_data->get(shard_contents[i]));
+      shards.back().m_score_data->add(m_score_data->get(shard_contents[i]));
    }
    //cerr << endl;
  }
--- a/mert/Data.h
+++ b/mert/Data.h
@ -11,11 +11,8 @@

 using namespace std;

-#include <limits>
 #include <vector>
-#include <iostream>
-
-#include<boost/shared_ptr.hpp>
+#include <boost/shared_ptr.hpp>

 #include "Util.h"
 #include "FeatureData.h"
@ -26,90 +23,70 @@ class Scorer;
 typedef boost::shared_ptr<ScoreData> ScoreDataHandle;
 typedef boost::shared_ptr<FeatureData> FeatureDataHandle;

+// NOTE: there is no copy constructor implemented, so only the
+// compiler synthesised shallow copy is available.
 class Data
 {
 private:
-  Scorer* theScorer;
-  std::string score_type;
-  size_t number_of_scores;
-  bool _sparse_flag;
+  Scorer* m_scorer;
+  std::string m_score_type;
+  size_t m_num_scores;
+  bool m_sparse_flag;
+  ScoreDataHandle m_score_data;
+  FeatureDataHandle m_feature_data;

-protected:
-  ScoreDataHandle scoredata;
-  FeatureDataHandle featdata;
+  // Helper functions for loadnbest();
+  void InitFeatureMap(const std::string& str);
+  void AddFeatures(const std::string& str,
+                   const std::string& sentence_index);

 public:
-  explicit Data(Scorer& sc);
+  explicit Data(Scorer* scorer);
  Data();

-  //Note that there is no copy constructor implemented, so only the 
-  //compiler synthesised shallow copy is available
-
-  inline void clear() {
-    scoredata->clear();
-    featdata->clear();
+  void clear() {
+    m_score_data->clear();
+    m_feature_data->clear();
  }

-  ScoreDataHandle getScoreData() {
-    return scoredata;
+  ScoreDataHandle getScoreData() { return m_score_data; }
+
+  FeatureDataHandle getFeatureData() { return m_feature_data; }
+
+  Scorer* getScorer() { return m_scorer; }
+
+  size_t NumberOfFeatures() const {
+    return m_feature_data->NumberOfFeatures();
  }

-  FeatureDataHandle getFeatureData() {
-    return featdata;
-  }
+  void NumberOfFeatures(size_t v) { m_feature_data->NumberOfFeatures(v); }

-  Scorer* getScorer() {
-    return theScorer;
-  }
+  std::string Features() const { return m_feature_data->Features(); }
+  void Features(const std::string &f) { m_feature_data->Features(f); }

-  inline size_t NumberOfFeatures() const {
-    return featdata->NumberOfFeatures();
-  }
-  inline void NumberOfFeatures(size_t v) {
-    featdata->NumberOfFeatures(v);
-  }
-  inline std::string Features() const {
-    return featdata->Features();
-  }
-  inline void Features(const std::string &f) {
-    featdata->Features(f);
-  }
-
-  inline bool hasSparseFeatures() const { return _sparse_flag; }
+  bool hasSparseFeatures() const { return m_sparse_flag; }
  void mergeSparseFeatures();

-  void loadnbest(const std::string &file);
-  
-  void load(const std::string &featfile,const std::string &scorefile) {
-    featdata->load(featfile);
-    scoredata->load(scorefile);
-    if (featdata->hasSparseFeatures())
-      _sparse_flag = true;
-  }
+  void loadNBest(const std::string &file);
+
+  void load(const std::string &featfile, const std::string &scorefile);
+
+  void save(const std::string &featfile, const std::string &scorefile, bool bin=false);

  //ADDED BY TS
-  void remove_duplicates();
+  void removeDuplicates();
  //END_ADDED

-  void save(const std::string &featfile,const std::string &scorefile, bool bin=false) {
-
-    if (bin) cerr << "Binary write mode is selected" << endl;
-    else cerr << "Binary write mode is NOT selected" << endl;
-
-    featdata->save(featfile, bin);
-    scoredata->save(scorefile, bin);
-  }
-
  inline bool existsFeatureNames() const {
-    return featdata->existsFeatureNames();
+    return m_feature_data->existsFeatureNames();
  }

  inline std::string getFeatureName(size_t idx) const {
-    return featdata->getFeatureName(idx);
+    return m_feature_data->getFeatureName(idx);
  }

  inline size_t getFeatureIndex(const std::string& name) const {
-    return featdata->getFeatureIndex(name);
+    return m_feature_data->getFeatureIndex(name);
  }

  /**
--- a/mert/DataTest.cpp
+++ b/mert/DataTest.cpp
@ -10,7 +10,7 @@
 //very basic test of sharding
 BOOST_AUTO_TEST_CASE(shard_basic) {
  boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer("BLEU", ""));
-  Data data(*scorer);
+  Data data(scorer.get());
  FeatureArray fa1, fa2, fa3, fa4;
  ScoreArray sa1, sa2, sa3, sa4;
  fa1.setIndex("1");
--- a/mert/FeatureArray.cpp
+++ b/mert/FeatureArray.cpp
@ -6,135 +6,147 @@
 *
 */

+#include <fstream>
 #include "FeatureArray.h"
 #include "FileStream.h"
 #include "Util.h"

-
 FeatureArray::FeatureArray()
-    : idx(""), number_of_features(0), _sparse_flag(false) {}
+    : m_index(""), m_num_features(0), m_sparse_flag(false) {}

 FeatureArray::~FeatureArray() {}

-void FeatureArray::savetxt(std::ofstream& outFile)
+void FeatureArray::savetxt(ostream* os)
 {
-  outFile << FEATURES_TXT_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_features << " " << features << std::endl;
-  for (featarray_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    i->savetxt(outFile);
-    outFile << std::endl;
+  *os << FEATURES_TXT_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_features << " " << m_features << endl;
+  for (featarray_t::iterator i = m_array.begin(); i != m_array.end(); ++i) {
+    i->savetxt(os);
+    *os << endl;
  }
-  outFile << FEATURES_TXT_END << std::endl;
+  *os << FEATURES_TXT_END << endl;
 }

-void FeatureArray::savebin(std::ofstream& outFile)
+void FeatureArray::savebin(ostream* os)
 {
-  outFile << FEATURES_BIN_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_features << " " << features << std::endl;
-  for (featarray_t::iterator i = array_.begin(); i !=array_.end(); i++)
-    i->savebin(outFile);
+  *os << FEATURES_BIN_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_features << " " << m_features << endl;
+  for (featarray_t::iterator i = m_array.begin(); i != m_array.end(); ++i)
+    i->savebin(os);

-  outFile << FEATURES_BIN_END << std::endl;
+  *os << FEATURES_BIN_END << endl;
 }


-void FeatureArray::save(std::ofstream& inFile, bool bin)
+void FeatureArray::save(ostream* os, bool bin)
 {
-  if (size()>0)
-    (bin)?savebin(inFile):savetxt(inFile);
+  if (size() <= 0) return;
+  if (bin) {
+    savebin(os);
+  } else {
+    savetxt(os);
+  }
 }

-void FeatureArray::save(const std::string &file, bool bin)
+void FeatureArray::save(const string &file, bool bin)
 {
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  save(outFile);
-
-  outFile.close();
+  ofstream ofs(file.c_str(), ios::out);
+  if (!ofs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  ostream *os = &ofs;
+  save(os, bin);
+  ofs.close();
 }

-void FeatureArray::loadbin(ifstream& inFile, size_t n)
+void FeatureArray::save(bool bin)
 {
-  FeatureStats entry(number_of_features);
+  save(&cout, bin);
+}

-  for (size_t i=0 ; i < n; i++) {
-    entry.loadbin(inFile);
+void FeatureArray::loadbin(istream* is, size_t n)
+{
+  FeatureStats entry(m_num_features);
+  for (size_t i = 0 ; i < n; i++) {
+    entry.loadbin(is);
    add(entry);
  }
 }

-void FeatureArray::loadtxt(ifstream& inFile, size_t n)
+void FeatureArray::loadtxt(istream* is, size_t n)
 {
-  FeatureStats entry(number_of_features);
+  FeatureStats entry(m_num_features);

-  for (size_t i=0 ; i < n; i++) {
-    entry.loadtxt(inFile);
+  for (size_t i = 0; i < n; i++) {
+    entry.loadtxt(is);
    add(entry);
    if (entry.getSparse().size()>0)
-      _sparse_flag = true;
+      m_sparse_flag = true;
  }
 }

-void FeatureArray::load(ifstream& inFile)
+void FeatureArray::load(istream* is)
 {
-  size_t number_of_entries=0;
-  bool binmode=false;
+  size_t number_of_entries = 0;
+  bool binmode = false;

-  std::string substring, stringBuf;
-  std::string::size_type loc;
+  string substring, stringBuf;
+  string::size_type loc;

-  std::getline(inFile, stringBuf);
-  if (!inFile.good()) {
+  getline(*is, stringBuf);
+  if (!is->good()) {
    return;
  }

  if (!stringBuf.empty()) {
    if ((loc = stringBuf.find(FEATURES_TXT_BEGIN)) == 0) {
-      binmode=false;
+      binmode = false;
    } else if ((loc = stringBuf.find(FEATURES_BIN_BEGIN)) == 0) {
-      binmode=true;
+      binmode = true;
    } else {
      TRACE_ERR("ERROR: FeatureArray::load(): Wrong header");
      return;
    }
    getNextPound(stringBuf, substring);
    getNextPound(stringBuf, substring);
-    idx = substring;
+    m_index = substring;
    getNextPound(stringBuf, substring);
    number_of_entries = atoi(substring.c_str());
    getNextPound(stringBuf, substring);
-    number_of_features = atoi(substring.c_str());
-    features = stringBuf;
+    m_num_features = atoi(substring.c_str());
+    m_features = stringBuf;
  }

-  (binmode)?loadbin(inFile, number_of_entries):loadtxt(inFile, number_of_entries);
+  if (binmode) {
+    loadbin(is, number_of_entries);
+  } else {
+    loadtxt(is, number_of_entries);
+  }

-  std::getline(inFile, stringBuf);
+  getline(*is, stringBuf);
  if (!stringBuf.empty()) {
-    if ((loc = stringBuf.find(FEATURES_TXT_END)) != 0 && (loc = stringBuf.find(FEATURES_BIN_END)) != 0) {
+    if ((loc = stringBuf.find(FEATURES_TXT_END)) != 0 &&
+        (loc = stringBuf.find(FEATURES_BIN_END)) != 0) {
      TRACE_ERR("ERROR: FeatureArray::load(): Wrong footer");
      return;
    }
  }
 }

-void FeatureArray::load(const std::string &file)
+void FeatureArray::load(const string &file)
 {
-  TRACE_ERR("loading data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  load((ifstream&) inFile);
-
-  inFile.close();
-
+  TRACE_ERR("loading data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }

 void FeatureArray::merge(FeatureArray& e)
 {
  //dummy implementation
-  for (size_t i=0; i<e.size(); i++)
+  for (size_t i = 0; i < e.size(); i++)
    add(e.get(i));
 }

@ -144,10 +156,9 @@ bool FeatureArray::check_consistency() const
  if (sz == 0)
    return true;

-  for (featarray_t::const_iterator i = array_.begin(); i != array_.end(); i++) {
+  for (featarray_t::const_iterator i = m_array.begin(); i != m_array.end(); i++) {
    if (i->size() != sz)
      return false;
  }
  return true;
 }
-
--- a/mert/FeatureArray.h
+++ b/mert/FeatureArray.h
@ -11,7 +11,6 @@

 #include <vector>
 #include <iostream>
-#include <fstream>
 #include "FeatureStats.h"

 using namespace std;
@ -26,82 +25,57 @@ class FeatureArray
 private:
  // idx to identify the utterance. It can differ from
  // the index inside the vector.
-  std::string idx;
-
-protected:
-  featarray_t array_;
-  size_t number_of_features;
-  std::string features;
-  bool _sparse_flag;
+  std::string m_index;
+  featarray_t m_array;
+  size_t m_num_features;
+  std::string m_features;
+  bool m_sparse_flag;

 public:
  FeatureArray();
  ~FeatureArray();

-  inline void clear() {
-    array_.clear();
-  }
+  void clear() { m_array.clear(); }

-  inline bool hasSparseFeatures() const {
-    return _sparse_flag;
-  }
+  bool hasSparseFeatures() const { return m_sparse_flag; }

-  inline std::string getIndex() const {
-    return idx;
-  }
-  inline void setIndex(const std::string& value) {
-    idx = value;
-  }
+  std::string getIndex() const { return m_index; }
+  void setIndex(const std::string& value) { m_index = value; }

-  inline FeatureStats& get(size_t i) {
-    return array_.at(i);
-  }
-  inline const FeatureStats& get(size_t i)const {
-    return array_.at(i);
-  }
-  void add(FeatureStats& e) {
-    array_.push_back(e);
-  }
+  FeatureStats& get(size_t i) { return m_array.at(i); }
+  const FeatureStats& get(size_t i) const { return m_array.at(i); }
+
+  void add(FeatureStats& e) { m_array.push_back(e); }

  //ADDED BY TS
  void swap(size_t i, size_t j) {
-    std::swap(array_[i],array_[j]);
+    std::swap(m_array[i], m_array[j]);
  }
-  
+
  void resize(size_t new_size) {
-    array_.resize(std::min(new_size,array_.size()));
+    m_array.resize(std::min(new_size, m_array.size()));
  }
  //END_ADDED

  void merge(FeatureArray& e);

-  inline size_t size() const {
-    return array_.size();
-  }
-  inline size_t NumberOfFeatures() const {
-    return number_of_features;
-  }
-  inline void NumberOfFeatures(size_t v) {
-    number_of_features = v;
-  }
-  inline std::string Features() const {
-    return features;
-  }
-  inline void Features(const std::string& f) {
-    features = f;
-  }
+  size_t size() const { return m_array.size(); }

-  void savetxt(ofstream& outFile);
-  void savebin(ofstream& outFile);
-  void save(ofstream& outFile, bool bin=false);
+  size_t NumberOfFeatures() const { return m_num_features; }
+  void NumberOfFeatures(size_t v) { m_num_features = v; }
+
+  std::string Features() const { return m_features; }
+  void Features(const std::string& f) { m_features = f; }
+
+  void savetxt(std::ostream* os);
+  void savebin(std::ostream* os);
+  void save(std::ostream* os, bool bin=false);
  void save(const std::string &file, bool bin=false);
-  inline void save(bool bin=false) {
-    save("/dev/stdout",bin);
-  }
+  void save(bool bin=false);

-  void loadtxt(ifstream& inFile, size_t n);
-  void loadbin(ifstream& inFile, size_t n);
-  void load(ifstream& inFile);
+  void loadtxt(std::istream* is, size_t n);
+  void loadbin(std::istream* is, size_t n);
+  void load(std::istream* is);
  void load(const std::string &file);

  bool check_consistency() const;
--- a/mert/FeatureData.cpp
+++ b/mert/FeatureData.cpp
@ -13,44 +13,45 @@
 #include "Util.h"
 #include <cstdio>

-static const float MIN_FLOAT=-1.0*numeric_limits<float>::max();
-static const float MAX_FLOAT=numeric_limits<float>::max();
+static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
+static const float MAX_FLOAT = numeric_limits<float>::max();

 FeatureData::FeatureData()
-    : number_of_features(0),
-      _sparse_flag(false) {}
+    : m_num_features(0),
+      m_sparse_flag(false) {}

-void FeatureData::save(std::ofstream& outFile, bool bin)
+void FeatureData::save(ostream* os, bool bin)
 {
-  for (featdata_t::iterator i = array_.begin(); i !=array_.end(); i++)
-    i->save(outFile, bin);
+  for (featdata_t::iterator i = m_array.begin(); i != m_array.end(); i++)
+    i->save(os, bin);
 }

-void FeatureData::save(const std::string &file, bool bin)
+void FeatureData::save(const string &file, bool bin)
 {
  if (file.empty()) return;
-
-  TRACE_ERR("saving the array into " << file << std::endl);
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  save(outFile, bin);
-
-  outFile.close();
+  TRACE_ERR("saving the array into " << file << endl);
+  ofstream ofs(file.c_str(), ios::out); // matches a stream with a file. Opens the file
+  ostream* os = &ofs;
+  save(os, bin);
+  ofs.close();
 }

-void FeatureData::load(ifstream& inFile)
+void FeatureData::save(bool bin) {
+  save(&cout, bin);
+}
+
+void FeatureData::load(istream* is)
 {
  FeatureArray entry;

-  while (!inFile.eof()) {
+  while (!is->eof()) {

-    if (!inFile.good()) {
-      std::cerr << "ERROR FeatureData::load inFile.good()" << std::endl;
+    if (!is->good()) {
+      cerr << "ERROR FeatureData::load inFile.good()" << endl;
    }

    entry.clear();
-    entry.load(inFile);
+    entry.load(is);

    if (entry.size() == 0)
      break;
@ -59,26 +60,23 @@ void FeatureData::load(ifstream& inFile)
      setFeatureMap(entry.Features());

    if (entry.hasSparseFeatures())
-      _sparse_flag = true;
+      m_sparse_flag = true;

    add(entry);
  }
 }


-void FeatureData::load(const std::string &file)
+void FeatureData::load(const string &file)
 {
-  TRACE_ERR("loading feature data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  if (!inFile) {
+  TRACE_ERR("loading feature data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  if (!input_stream) {
    throw runtime_error("Unable to open feature file: " + file);
  }
-
-  load((ifstream&) inFile);
-
-  inFile.close();
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }

 void FeatureData::add(FeatureArray& e)
@ -86,25 +84,25 @@ void FeatureData::add(FeatureArray& e)
  if (exists(e.getIndex())) { // array at position e.getIndex() already exists
    //enlarge array at position e.getIndex()
    size_t pos = getIndex(e.getIndex());
-    array_.at(pos).merge(e);
+    m_array.at(pos).merge(e);
  } else {
-    array_.push_back(e);
+    m_array.push_back(e);
    setIndex();
  }
 }

-void FeatureData::add(FeatureStats& e, const std::string& sent_idx)
+void FeatureData::add(FeatureStats& e, const string& sent_idx)
 {
  if (exists(sent_idx)) { // array at position e.getIndex() already exists
    //enlarge array at position e.getIndex()
    size_t pos = getIndex(sent_idx);
 //              TRACE_ERR("Inserting " << e << " in array " << sent_idx << std::endl);
-    array_.at(pos).add(e);
+    m_array.at(pos).add(e);
  } else {
 //              TRACE_ERR("Creating a new entry in the array and inserting " << e << std::endl);
    FeatureArray a;
-    a.NumberOfFeatures(number_of_features);
-    a.Features(features);
+    a.NumberOfFeatures(m_num_features);
+    a.Features(m_features);
    a.setIndex(sent_idx);
    a.add(e);
    add(a);
@ -113,10 +111,10 @@ void FeatureData::add(FeatureStats& e, const std::string& sent_idx)

 bool FeatureData::check_consistency() const
 {
-  if (array_.size() == 0)
+  if (m_array.size() == 0)
    return true;

-  for (featdata_t::const_iterator i = array_.begin(); i != array_.end(); i++)
+  for (featdata_t::const_iterator i = m_array.begin(); i != m_array.end(); i++)
    if (!i->check_consistency()) return false;

  return true;
@ -125,26 +123,26 @@ bool FeatureData::check_consistency() const
 void FeatureData::setIndex()
 {
  size_t j=0;
-  for (featdata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    idx2arrayname_[j]=(*i).getIndex();
-    arrayname2idx_[(*i).getIndex()] = j;
+  for (featdata_t::iterator i = m_array.begin(); i !=m_array.end(); i++) {
+    m_index_to_array_name[j]=(*i).getIndex();
+    m_array_name_to_index[(*i).getIndex()] = j;
    j++;
  }
 }

-void FeatureData::setFeatureMap(const std::string& feat)
+void FeatureData::setFeatureMap(const string& feat)
 {
-  number_of_features = 0;
-  features = feat;
+  m_num_features = 0;
+  m_features = feat;

-  std::string substring, stringBuf;
-  stringBuf = features;
-  while (!stringBuf.empty()) {
-    getNextPound(stringBuf, substring);
-
-    featname2idx_[substring] = idx2featname_.size();
-    idx2featname_[idx2featname_.size()] = substring;
-    number_of_features++;
+  vector<string> buf;
+  Tokenize(feat.c_str(), ' ', &buf);
+  for (vector<string>::const_iterator it = buf.begin();
+       it != buf.end(); ++it) {
+    const size_t size = m_index_to_feature_name.size();
+    m_feature_name_to_index[*it] = size;
+    m_index_to_feature_name[size] = *it;
+    ++m_num_features;
  }
 }

@ -152,26 +150,23 @@ string FeatureData::ToString() const {
  string res;
  char buf[100];

-  snprintf(buf, sizeof(buf), "number of features: %lu, ", number_of_features);
+  snprintf(buf, sizeof(buf), "number of features: %lu, ", m_num_features);
  res.append(buf);

-  snprintf(buf, sizeof(buf), "features: ");
-  res.append(buf);
-  res.append(features);
+  res.append("features: ");
+  res.append(m_features);

-  snprintf(buf, sizeof(buf), ", sparse flag: %s, ", (_sparse_flag) ? "yes" : "no");
+  snprintf(buf, sizeof(buf), ", sparse flag: %s, ", (m_sparse_flag) ? "yes" : "no");
  res.append(buf);

-  snprintf(buf, sizeof(buf), "feature_id_map = { ");
-  res.append(buf);
-  for (map<string, size_t>::const_iterator it = featname2idx_.begin();
-       it != featname2idx_.end(); ++it) {
+  res.append("feature_id_map = { ");
+  for (map<string, size_t>::const_iterator it = m_feature_name_to_index.begin();
+       it != m_feature_name_to_index.end(); ++it) {
    snprintf(buf, sizeof(buf), "%s => %lu, ",
                  it->first.c_str(), it->second);
    res.append(buf);
  }
-  snprintf(buf, sizeof(buf), "}");
-  res.append(buf);
+  res.append("}");

  return res;
 }
--- a/mert/FeatureData.h
+++ b/mert/FeatureData.h
@ -19,109 +19,92 @@ using namespace std;
 class FeatureData
 {
 private:
-  size_t number_of_features;
-  std::string features;
-  bool _sparse_flag;
-
-  map<std::string, size_t> featname2idx_; // map from name to index of features
-  map<size_t, std::string> idx2featname_; // map from index to name of features
-
-protected:
-  featdata_t array_;
-  idx2name idx2arrayname_; // map from index to name of array
-  name2idx arrayname2idx_; // map from name to index of array
+  size_t m_num_features;
+  std::string m_features;
+  bool m_sparse_flag;
+  map<std::string, size_t> m_feature_name_to_index; // map from name to index of features
+  map<size_t, std::string> m_index_to_feature_name; // map from index to name of features
+  featdata_t m_array;
+  idx2name m_index_to_array_name; // map from index to name of array
+  name2idx m_array_name_to_index; // map from name to index of array

 public:
  FeatureData();
  ~FeatureData() {}

-  inline void clear() {
-    array_.clear();
+  void clear() { m_array.clear(); }
+
+  bool hasSparseFeatures() const { return m_sparse_flag; }
+
+  FeatureArray get(const std::string& idx) {
+    return m_array.at(getIndex(idx));
  }

-  inline bool hasSparseFeatures() const {
-    return _sparse_flag;
-  }
-  inline FeatureArray get(const std::string& idx) {
-    return array_.at(getIndex(idx));
-  }
-  inline FeatureArray& get(size_t idx) {
-    return array_.at(idx);
-  }
-  inline const FeatureArray& get(size_t idx) const {
-    return array_.at(idx);
-  }
+  FeatureArray& get(size_t idx) { return m_array.at(idx); }
+  const FeatureArray& get(size_t idx) const { return m_array.at(idx); }

  inline bool exists(const std::string& sent_idx) const {
    return exists(getIndex(sent_idx));
  }

  inline bool exists(int sent_idx) const {
-    return (sent_idx > -1 && sent_idx < static_cast<int>(array_.size())) ? true : false;
+    return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
  }

  inline FeatureStats& get(size_t i, size_t j) {
-    return array_.at(i).get(j);
+    return m_array.at(i).get(j);
  }
-  inline const FeatureStats&  get(size_t i, size_t j) const {
-    return array_.at(i).get(j);
+
+  inline const FeatureStats& get(size_t i, size_t j) const {
+    return m_array.at(i).get(j);
  }

  void add(FeatureArray& e);
  void add(FeatureStats& e, const std::string& sent_idx);

-  inline size_t size() const {
-    return array_.size();
-  }
-  inline size_t NumberOfFeatures() const {
-    return number_of_features;
-  }
-  inline void NumberOfFeatures(size_t v) {
-    number_of_features = v;
-  }
-  inline std::string Features() const {
-    return features;
-  }
-  inline void Features(const std::string& f) {
-    features = f;
-  }
+  size_t size() const { return m_array.size(); }
+
+  size_t NumberOfFeatures() const { return m_num_features; }
+  void NumberOfFeatures(size_t v) { m_num_features = v; }
+
+  std::string Features() const { return m_features; }
+  void Features(const std::string& f) { m_features = f; }

  void save(const std::string &file, bool bin=false);
-  void save(ofstream& outFile, bool bin=false);
-  inline void save(bool bin=false) {
-    save("/dev/stdout", bin);
-  }
+  void save(std::ostream* os, bool bin=false);
+  void save(bool bin=false);

-  void load(ifstream& inFile);
+  void load(std::istream* is);
  void load(const std::string &file);

  bool check_consistency() const;
+
  void setIndex();

  inline int getIndex(const std::string& idx) const {
-    name2idx::const_iterator i = arrayname2idx_.find(idx);
-    if (i != arrayname2idx_.end())
+    name2idx::const_iterator i = m_array_name_to_index.find(idx);
+    if (i != m_array_name_to_index.end())
      return i->second;
    else
      return -1;
  }

  inline std::string getIndex(size_t idx) const {
-    idx2name::const_iterator i = idx2arrayname_.find(idx);
-    if (i != idx2arrayname_.end())
+    idx2name::const_iterator i = m_index_to_array_name.find(idx);
+    if (i != m_index_to_array_name.end())
      throw runtime_error("there is no entry at index " + idx);
    return i->second;
  }

  bool existsFeatureNames() const {
-    return (idx2featname_.size() > 0) ? true : false;
+    return (m_index_to_feature_name.size() > 0) ? true : false;
  }

  std::string getFeatureName(size_t idx) const {
-    if (idx >= idx2featname_.size())
+    if (idx >= m_index_to_feature_name.size())
      throw runtime_error("Error: you required an too big index");
-    map<size_t, std::string>::const_iterator it = idx2featname_.find(idx);
-    if (it == idx2featname_.end()) {
+    map<size_t, std::string>::const_iterator it = m_index_to_feature_name.find(idx);
+    if (it == m_index_to_feature_name.end()) {
      throw runtime_error("Error: specified id is unknown: " + idx);
    } else {
      return it->second;
@ -129,8 +112,8 @@ public:
  }

  size_t getFeatureIndex(const std::string& name) const {
-    map<std::string, size_t>::const_iterator it = featname2idx_.find(name);
-    if (it == featname2idx_.end())
+    map<std::string, size_t>::const_iterator it = m_feature_name_to_index.find(name);
+    if (it == m_feature_name_to_index.end())
      throw runtime_error("Error: feature " + name + " is unknown");
    return it->second;
  }
--- a/mert/FeatureDataTest.cpp
+++ b/mert/FeatureDataTest.cpp
@ -0,0 +1,39 @@
+#include "FeatureData.h"
+
+#define BOOST_TEST_MODULE FeatureData
+#include <boost/test/unit_test.hpp>
+
+#include <cstdio>
+
+namespace {
+
+void CheckFeatureMap(const FeatureData* feature_data,
+                     const char* str, int num_feature, int* cnt) {
+  char tmp[32];
+  for (int i = 0; i < num_feature; ++i) {
+    std::snprintf(tmp, sizeof(tmp), "%s_%d", str, i);
+    BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(tmp), *cnt);
+    BOOST_CHECK_EQUAL(feature_data->getFeatureName(*cnt).c_str(), tmp);
+    ++(*cnt);
+  }
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(set_feature_map) {
+  std::string str("d_0 d_1 d_2 d_3 d_4 d_5 d_6 lm_0 lm_1 tm_0 tm_1 tm_2 tm_3 tm_4 w_0 ");
+  FeatureData feature_data;
+
+  feature_data.setFeatureMap(str);
+
+  BOOST_REQUIRE(feature_data.Features() == str);
+  BOOST_REQUIRE(feature_data.NumberOfFeatures() == 15);
+
+  int cnt = 0;
+  CheckFeatureMap(&feature_data, "d", 7, &cnt);
+  CheckFeatureMap(&feature_data, "lm", 2, &cnt);
+  CheckFeatureMap(&feature_data, "tm", 5, &cnt);
+
+  BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), cnt);
+  BOOST_CHECK_EQUAL(feature_data.getFeatureName(cnt).c_str(), "w_0");
+}
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@ -8,6 +8,7 @@

 #include "FeatureStats.h"

+#include <fstream>
 #include <cmath>
 #include "Util.h"

@ -15,58 +16,58 @@ namespace {
 const int kAvailableSize = 8;
 } // namespace

-SparseVector::name2id_t SparseVector::name2id_;
-SparseVector::id2name_t SparseVector::id2name_;
+SparseVector::name2id_t SparseVector::m_name_to_id;
+SparseVector::id2name_t SparseVector::m_id_to_name;

 FeatureStatsType SparseVector::get(const string& name) const {
-  name2id_t::const_iterator name2id_iter = name2id_.find(name);
-  if (name2id_iter == name2id_.end()) return 0;
+  name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
+  if (name2id_iter == m_name_to_id.end()) return 0;
  size_t id = name2id_iter->second;
  return get(id);
 }

 FeatureStatsType SparseVector::get(size_t id) const {
-  fvector_t::const_iterator fvector_iter = fvector_.find(id);
-  if (fvector_iter == fvector_.end()) return 0;
+  fvector_t::const_iterator fvector_iter = m_fvector.find(id);
+  if (fvector_iter == m_fvector.end()) return 0;
  return fvector_iter->second;
 }

 void SparseVector::set(const string& name, FeatureStatsType value) {
-  name2id_t::const_iterator name2id_iter = name2id_.find(name);
+  name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
  size_t id = 0;
-  if (name2id_iter == name2id_.end()) {
-    id = id2name_.size();
-    id2name_.push_back(name);
-    name2id_[name] = id;
+  if (name2id_iter == m_name_to_id.end()) {
+    id = m_id_to_name.size();
+    m_id_to_name.push_back(name);
+    m_name_to_id[name] = id;
  } else {
    id = name2id_iter->second;
  }
-  fvector_[id] = value;
+  m_fvector[id] = value;
 }

 void SparseVector::write(ostream& out, const string& sep) const {
-  for (fvector_t::const_iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
+  for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
    if (abs(i->second) < 0.00001) continue;
-    string name = id2name_[i->first];
+    string name = m_id_to_name[i->first];
    out << name << sep << i->second << " ";
  }
 }

 void SparseVector::clear() {
-  fvector_.clear();
+  m_fvector.clear();
 }

 SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
  //All the elements that have values in *this
-  for (fvector_t::iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
-    fvector_[i->first] = i->second - rhs.get(i->first);
+  for (fvector_t::iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
+    m_fvector[i->first] = i->second - rhs.get(i->first);
  }

  //Any elements in rhs, that have no value in *this
-  for (fvector_t::const_iterator i = rhs.fvector_.begin();
-      i != rhs.fvector_.end(); ++i) {
-    if (fvector_.find(i->first) == fvector_.end()) {
-      fvector_[i->first] = -(i->second);
+  for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+      i != rhs.m_fvector.end(); ++i) {
+    if (m_fvector.find(i->first) == m_fvector.end()) {
+      m_fvector[i->first] = -(i->second);
    }
  }
  return *this;
@ -79,37 +80,37 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs) {
 }

 FeatureStats::FeatureStats()
-    : available_(kAvailableSize), entries_(0),
-      array_(new FeatureStatsType[available_]) {}
+    : m_available_size(kAvailableSize), m_entries(0),
+      m_array(new FeatureStatsType[m_available_size]) {}

 FeatureStats::FeatureStats(const size_t size)
-    : available_(size), entries_(size),
-      array_(new FeatureStatsType[available_])
+    : m_available_size(size), m_entries(size),
+      m_array(new FeatureStatsType[m_available_size])
 {
-  memset(array_, 0, GetArraySizeWithBytes());
+  memset(m_array, 0, GetArraySizeWithBytes());
 }

-FeatureStats::FeatureStats(std::string &theString)
-    : available_(0), entries_(0), array_(NULL)
+FeatureStats::FeatureStats(string &theString)
+    : m_available_size(0), m_entries(0), m_array(NULL)
 {
  set(theString);
 }

 FeatureStats::~FeatureStats()
 {
-  if (array_) {
-    delete [] array_;
-    array_ = NULL;
+  if (m_array) {
+    delete [] m_array;
+    m_array = NULL;
  }
 }

 void FeatureStats::Copy(const FeatureStats &stats)
 {
-  available_ = stats.available();
-  entries_ = stats.size();
-  array_ = new FeatureStatsType[available_];
-  memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
-  map_ = stats.getSparse();
+  m_available_size = stats.available();
+  m_entries = stats.size();
+  m_array = new FeatureStatsType[m_available_size];
+  memcpy(m_array, stats.getArray(), GetArraySizeWithBytes());
+  m_map = stats.getSparse();
 }

 FeatureStats::FeatureStats(const FeatureStats &stats)
@ -119,34 +120,34 @@ FeatureStats::FeatureStats(const FeatureStats &stats)

 FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
 {
-  delete [] array_;
+  delete [] m_array;
  Copy(stats);
  return *this;
 }

 void FeatureStats::expand()
 {
-  available_ *= 2;
-  featstats_t t_ = new FeatureStatsType[available_];
-  memcpy(t_, array_, GetArraySizeWithBytes());
-  delete [] array_;
-  array_ = t_;
+  m_available_size *= 2;
+  featstats_t t_ = new FeatureStatsType[m_available_size];
+  memcpy(t_, m_array, GetArraySizeWithBytes());
+  delete [] m_array;
+  m_array = t_;
 }

 void FeatureStats::add(FeatureStatsType v)
 {
  if (isfull()) expand();
-  array_[entries_++]=v;
+  m_array[m_entries++]=v;
 }

 void FeatureStats::addSparse(const string& name, FeatureStatsType v)
 {
-  map_.set(name,v);
+  m_map.set(name,v);
 }

-void FeatureStats::set(std::string &theString)
+void FeatureStats::set(string &theString)
 {
-  std::string substring, stringBuf;
+  string substring, stringBuf;
  reset();

  while (!theString.empty()) {
@ -163,48 +164,50 @@ void FeatureStats::set(std::string &theString)
  }
 }

-
-void FeatureStats::loadbin(std::ifstream& inFile)
+void FeatureStats::loadbin(istream* is)
 {
-  inFile.read((char*) array_, GetArraySizeWithBytes());
+  is->read(reinterpret_cast<char*>(m_array),
+           static_cast<streamsize>(GetArraySizeWithBytes()));
 }

-void FeatureStats::loadtxt(std::ifstream& inFile)
+void FeatureStats::loadtxt(istream* is)
 {
-  std::string theString;
-  std::getline(inFile, theString);
-  set(theString);
+  string line;
+  getline(*is, line);
+  set(line);
 }

-void FeatureStats::loadtxt(const std::string &file)
+void FeatureStats::loadtxt(const string &file)
 {
-  //    TRACE_ERR("loading the stats from " << file << std::endl);
-
-  std::ifstream inFile(file.c_str(), std::ios::in); // matches a stream with a file. Opens the file
-
-  loadtxt(inFile);
+  ifstream ifs(file.c_str(), ios::in);
+  if (!ifs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  istream* is = &ifs;
+  loadtxt(is);
 }

-
-void FeatureStats::savetxt(const std::string &file)
+void FeatureStats::savetxt(const string &file)
 {
-//      TRACE_ERR("saving the stats into " << file << std::endl);
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  savetxt(outFile);
+  ofstream ofs(file.c_str(), ios::out);
+  ostream* os = &ofs;
+  savetxt(os);
 }

-
-void FeatureStats::savetxt(std::ofstream& outFile)
+void FeatureStats::savetxt(ostream* os)
 {
-//      TRACE_ERR("saving the stats" << std::endl);
-  outFile << *this;
+  *os << *this;
 }

-void FeatureStats::savebin(std::ofstream& outFile)
+void FeatureStats::savetxt() {
+  savetxt(&cout);
+}
+
+void FeatureStats::savebin(ostream* os)
 {
-  outFile.write((char*) array_, GetArraySizeWithBytes());
+  os->write(reinterpret_cast<char*>(m_array),
+            static_cast<streamsize>(GetArraySizeWithBytes()));
 }

 ostream& operator<<(ostream& o, const FeatureStats& e)
@ -230,7 +233,7 @@ bool operator==(const FeatureStats& f1, const FeatureStats& f2) {
    if (f1.get(k) != f2.get(k))
      return false;
  }
-  
+
  return true;
 }
 //END_ADDED
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@ -10,7 +10,6 @@
 #define MERT_FEATURE_STATS_H_

 #include <cstring>
-#include <fstream>
 #include <iostream>
 #include <map>
 #include <string>
@ -30,18 +29,16 @@ public:
  FeatureStatsType get(size_t id) const;
  void set(const std::string& name, FeatureStatsType value);
  void clear();
-  size_t size() const {
-    return fvector_.size();
-  }
+  size_t size() const { return m_fvector.size(); }

  void write(std::ostream& out, const std::string& sep = " ") const;

  SparseVector& operator-=(const SparseVector& rhs);

 private:
-  static name2id_t name2id_;
-  static id2name_t id2name_;
-  fvector_t fvector_;
+  static name2id_t m_name_to_id;
+  static id2name_t m_id_to_name;
+  fvector_t m_fvector;
 };

 SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
@ -49,12 +46,12 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
 class FeatureStats
 {
 private:
-  size_t available_;
-  size_t entries_;
+  size_t m_available_size;
+  size_t m_entries;

  // TODO: Use smart pointer for exceptional-safety.
-  featstats_t array_;
-  SparseVector map_;
+  featstats_t m_array;
+  SparseVector m_map;

 public:
  FeatureStats();
@ -69,64 +66,47 @@ public:

  void Copy(const FeatureStats &stats);

-  bool isfull() const {
-    return (entries_ < available_) ? 0 : 1;
-  }
+  bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }
  void expand();
  void add(FeatureStatsType v);
  void addSparse(const string& name, FeatureStatsType v);

  void clear() {
-    memset((void*)array_, 0, GetArraySizeWithBytes());
-    map_.clear();
+    memset((void*)m_array, 0, GetArraySizeWithBytes());
+    m_map.clear();
  }

  void reset() {
-    entries_ = 0;
+    m_entries = 0;
    clear();
  }

-  inline FeatureStatsType get(size_t i) {
-    return array_[i];
-  }
-  inline FeatureStatsType get(size_t i)const {
-    return array_[i];
-  }
-  inline featstats_t getArray() const {
-    return array_;
-  }
-  inline const SparseVector& getSparse() const {
-    return map_;
-  }
+  FeatureStatsType get(size_t i) { return m_array[i]; }
+  FeatureStatsType get(size_t i)const { return m_array[i]; }
+  featstats_t getArray() const { return m_array; }
+
+  const SparseVector& getSparse() const { return m_map; }

  void set(std::string &theString);

-  inline size_t bytes() const {
-    return GetArraySizeWithBytes();
-  }
+  inline size_t bytes() const { return GetArraySizeWithBytes(); }

  size_t GetArraySizeWithBytes() const {
-    return entries_ * sizeof(FeatureStatsType);
+    return m_entries * sizeof(FeatureStatsType);
  }

-  inline size_t size() const {
-    return entries_;
-  }
+  size_t size() const { return m_entries; }

-  inline size_t available() const {
-    return available_;
-  }
+  size_t available() const { return m_available_size; }

  void savetxt(const std::string &file);
-  void savetxt(ofstream& outFile);
-  void savebin(ofstream& outFile);
-  inline void savetxt() {
-    savetxt("/dev/stdout");
-  }
+  void savetxt(std::ostream* os);
+  void savebin(std::ostream* os);
+  void savetxt();

  void loadtxt(const std::string &file);
-  void loadtxt(ifstream& inFile);
-  void loadbin(ifstream& inFile);
+  void loadtxt(std::istream* is);
+  void loadbin(std::istream* is);

  /**
   * Write the whole object to a stream.
--- a/mert/FileStream.cpp
+++ b/mert/FileStream.cpp
@ -13,11 +13,11 @@ bool IsGzipFile(const std::string &filename) {
 } // namespace

 inputfilestream::inputfilestream(const std::string &filePath)
-    : std::istream(0), m_streambuf(0), is_good(false)
+    : std::istream(0), m_streambuf(0), m_is_good(false)
 {
  // check if file is readable
  std::filebuf* fb = new std::filebuf();
-  is_good = (fb->open(filePath.c_str(), std::ios::in) != NULL);
+  m_is_good = (fb->open(filePath.c_str(), std::ios::in) != NULL);

  if (IsGzipFile(filePath)) {
    fb->close();
@ -40,11 +40,11 @@ void inputfilestream::close()
 }

 outputfilestream::outputfilestream(const std::string &filePath)
-    : std::ostream(0), m_streambuf(0), is_good(false)
+    : std::ostream(0), m_streambuf(0), m_is_good(false)
 {
  // check if file is readable
  std::filebuf* fb = new std::filebuf();
-  is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);
+  m_is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);

  if (IsGzipFile(filePath)) {
    throw runtime_error("Output to a zipped file not supported!");
--- a/mert/FileStream.h
+++ b/mert/FileStream.h
@ -2,6 +2,7 @@
 #define MERT_FILE_STREAM_H_

 #include <fstream>
+#include <iostream>
 #include <streambuf>
 #include <string>

@ -9,13 +10,13 @@ class inputfilestream : public std::istream
 {
 protected:
  std::streambuf *m_streambuf;
-  bool is_good;
+  bool m_is_good;

 public:
  explicit inputfilestream(const std::string &filePath);
  virtual ~inputfilestream();

-  bool good() const { return is_good; }
+  bool good() const { return m_is_good; }
  void close();
 };

@ -23,13 +24,13 @@ class outputfilestream : public std::ostream
 {
 protected:
  std::streambuf *m_streambuf;
-  bool is_good;
+  bool m_is_good;

 public:
  explicit outputfilestream(const std::string &filePath);
  virtual ~outputfilestream();

-  bool good() const { return is_good; }
+  bool good() const { return m_is_good; }
  void close();
 };

--- a/mert/InterpolatedScorer.cpp
+++ b/mert/InterpolatedScorer.cpp
@ -1,35 +1,36 @@
-#include "ScorerFactory.h"
 #include "InterpolatedScorer.h"
+#include "ScorerFactory.h"
 #include "Util.h"

 using namespace std;

-
-InterpolatedScorer::InterpolatedScorer (const string& name, const string& config): Scorer(name,config)
+// TODO: This is too long. Consider creating a function for
+// initialization such as Init().
+InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
+    : Scorer(name,config)
 {
-
  // name would be: HAMMING,BLEU or similar
  string scorers = name;
  while (scorers.length() > 0) {
    string scorertype = "";
-    getNextPound(scorers,scorertype,",");
-    Scorer *theScorer=ScorerFactory::getScorer(scorertype,config);
-    _scorers.push_back(theScorer);
+    getNextPound(scorers, scorertype,",");
+    Scorer *scorer = ScorerFactory::getScorer(scorertype,config);
+    m_scorers.push_back(scorer);
  }
-  if (_scorers.size() == 0) {
+  if (m_scorers.size() == 0) {
    throw runtime_error("There are no scorers");
  }
-  cerr << "Number of scorers: " << _scorers.size() << endl;
+  cerr << "Number of scorers: " << m_scorers.size() << endl;

  //TODO debug this
  string wtype = getConfig("weights","");
  //Default weights set to uniform ie. if two weights 0.5 each
  //weights should add to 1
  if (wtype.length() == 0) {
-    float weight = 1.0/_scorers.size() ;
+    float weight = 1.0 / m_scorers.size() ;
    //cout << " Default weights:" << weight << endl;
-    for (size_t i = 0; i < _scorers.size(); i ++) {
-      _scorerWeights.push_back(weight);
+    for (size_t i = 0; i < m_scorers.size(); i ++) {
+      m_scorer_weights.push_back(weight);
    }
  } else {
    float tot=0;
@ -38,24 +39,24 @@ InterpolatedScorer::InterpolatedScorer (const string& name, const string& config
      string scoreweight = "";
      getNextPound(wtype,scoreweight,"+");
      float weight = atof(scoreweight.c_str());
-      _scorerWeights.push_back(weight);
+      m_scorer_weights.push_back(weight);
      tot += weight;
      //cout << " :" << weight ;
    }
    //cout << endl;
-    if (tot != float(1)) {
-      for (vector<float>::iterator it = _scorerWeights.begin(); it != _scorerWeights.end(); ++it)
-      {
+    if (tot != float(1)) { // TODO: fix this checking in terms of readability.
+      for (vector<float>::iterator it = m_scorer_weights.begin();
+           it != m_scorer_weights.end(); ++it) {
        *it /= tot;
      }
    }

-    if (_scorers.size() != _scorerWeights.size()) {
+    if (m_scorers.size() != m_scorer_weights.size()) {
      throw runtime_error("The number of weights does not equal the number of scorers!");
    }
  }
  cerr << "The weights for the interpolated scorers are: " << endl;
-  for (vector<float>::iterator it = _scorerWeights.begin(); it < _scorerWeights.end(); it++) {
+  for (vector<float>::iterator it = m_scorer_weights.begin(); it < m_scorer_weights.end(); it++) {
    cerr << *it << " " ;
  }
  cerr <<endl;
@ -65,9 +66,10 @@ void InterpolatedScorer::setScoreData(ScoreData* data)
 {
  size_t last = 0;
  m_score_data = data;
-  for (ScopedVector<Scorer>::iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+  for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
    int numScoresScorer = (*itsc)->NumberOfScores();
-    ScoreData* newData =new ScoreData(**itsc);
+    ScoreData* newData =new ScoreData(*itsc);
    for (size_t i = 0; i < data->size(); i++) {
      ScoreArray scoreArray = data->get(i);
      ScoreArray newScoreArray;
@ -110,14 +112,16 @@ void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& di
 {
  //cout << "*******InterpolatedScorer::score" << endl;
  size_t scorerNum = 0;
-  for (ScopedVector<Scorer>::const_iterator itsc =  _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+  for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
    //int numScores = (*itsc)->NumberOfScores();
    statscores_t tscores;
    (*itsc)->score(candidates,diffs,tscores);
    size_t inc = 0;
-    for (statscores_t::iterator itstatsc =  tscores.begin(); itstatsc!=tscores.end(); itstatsc++) {
+    for (statscores_t::iterator itstatsc = tscores.begin();
+         itstatsc != tscores.end(); ++itstatsc) {
      //cout << "Scores " << (*itstatsc) << endl;
-      float weight = _scorerWeights[scorerNum];
+      float weight = m_scorer_weights[scorerNum];
      if (weight == 0) {
        stringstream msg;
        msg << "No weights for scorer" << scorerNum ;
@ -139,7 +143,8 @@ void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& di

 void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
 {
-  for (ScopedVector<Scorer>::iterator itsc =  _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+  for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
    (*itsc)->setReferenceFiles(referenceFiles);
  }
 }
@ -147,8 +152,9 @@ void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
 void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
 {
  stringstream buff;
-  int i=0;
-  for (ScopedVector<Scorer>::iterator itsc =  _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+  int i = 0;
+  for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
    ScoreStats tempEntry;
    (*itsc)->prepareStats(sid, text, tempEntry);
    if (i > 0) buff <<  " ";
@ -167,16 +173,10 @@ void InterpolatedScorer::setFactors(const string& factors)
  vector<string> fsplit;
  split(factors, ',', fsplit);

-  if (fsplit.size() != _scorers.size()) throw runtime_error("Number of factor specifications does not equal number of interpolated scorers.");
-  
-  for (size_t i = 0; i < _scorers.size(); ++i)
-  {
-    _scorers[i]->setFactors(fsplit[i]);
+  if (fsplit.size() != m_scorers.size())
+    throw runtime_error("Number of factor specifications does not equal number of interpolated scorers.");
+
+  for (size_t i = 0; i < m_scorers.size(); ++i) {
+    m_scorers[i]->setFactors(fsplit[i]);
  }
 }
-
-
-
-
-
-
--- a/mert/InterpolatedScorer.h
+++ b/mert/InterpolatedScorer.h
@ -1,14 +1,6 @@
-#ifndef __INTERPOLATED_SCORER_H__
-#define __INTERPOLATED_SCORER_H__
+#ifndef MERT_INTERPOLATED_SCORER_H_
+#define MERT_INTERPOLATED_SCORER_H_

-#include <algorithm>
-#include <cmath>
-#include <iostream>
-#include <iterator>
-#include <limits>
-#include <set>
-#include <sstream>
-#include <stdexcept>
 #include <string>
 #include <vector>
 #include "Types.h"
@ -33,12 +25,13 @@ public:
  virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);

  virtual size_t NumberOfScores() const {
-    size_t sz=0;
-    for (ScopedVector<Scorer>::const_iterator itsc = _scorers.begin(); itsc != _scorers.end(); itsc++) {
+    size_t sz = 0;
+    for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+         itsc != m_scorers.end(); ++itsc) {
      sz += (*itsc)->NumberOfScores();
    }
    return sz;
-  };
+  }

  virtual void setScoreData(ScoreData* data);

@ -48,13 +41,13 @@ public:
  virtual void setFactors(const string& factors);

 protected:
-  ScopedVector<Scorer> _scorers;
+  ScopedVector<Scorer> m_scorers;

  // Take the ownership of the heap-allocated the objects
  // by Scorer objects.
  ScopedVector<ScoreData> m_scorers_score_data;

-  vector<float> _scorerWeights;
+  vector<float> m_scorer_weights;
 };

-#endif //__INTERPOLATED_SCORER_H
+#endif  // MERT_INTERPOLATED_SCORER_H_
--- a/mert/Jamfile
+++ b/mert/Jamfile
@ -6,9 +6,13 @@ lib mert_lib :
 Util.cpp
 FileStream.cpp
 Timer.cpp
-ScoreStats.cpp ScoreArray.cpp ScoreData.cpp
+ScoreStats.cpp
+ScoreArray.cpp
+ScoreData.cpp
 ScoreDataIterator.cpp
-FeatureStats.cpp FeatureArray.cpp FeatureData.cpp
+FeatureStats.cpp
+FeatureArray.cpp
+FeatureData.cpp
 FeatureDataIterator.cpp
 Data.cpp
 BleuScorer.cpp
@ -18,6 +22,7 @@ PerScorer.cpp
 Scorer.cpp
 ScorerFactory.cpp
 Optimizer.cpp
+OptimizerFactory.cpp
 TER/alignmentStruct.cpp
 TER/hashMap.cpp
 TER/hashMapStringInfos.cpp
@ -32,6 +37,7 @@ TER/tools.cpp
 TerScorer.cpp
 CderScorer.cpp
 MergeScorer.cpp
+Vocabulary.cpp
 ../util//kenutil m ..//z ;

 exe mert : mert.cpp mert_lib ../moses/src//ThreadPool ;
@ -44,8 +50,15 @@ exe pro : pro.cpp mert_lib ..//boost_program_options ;

 alias programs : mert extractor evaluator pro ;

+unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test reference_test : ReferenceTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test singleton_test : SingletonTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test timer_test : TimerTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test util_test : UtilTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test vocabulary_test : VocabularyTest.cpp mert_lib ..//boost_unit_test_framework ;

 install legacy : programs : <location>. ;
--- a/mert/MergeScorer.cpp
+++ b/mert/MergeScorer.cpp
@ -14,7 +14,8 @@
 using namespace TERCpp;

 MergeScorer::MergeScorer(const string& config)
-    : StatisticsBasedScorer("MERGE",config), kLENGTH(4) {}
+    : StatisticsBasedScorer("MERGE", config) {}
+
 MergeScorer::~MergeScorer() {}

 void MergeScorer::setReferenceFiles(const vector<string>& referenceFiles)
--- a/mert/MergeScorer.h
+++ b/mert/MergeScorer.h
@ -13,6 +13,8 @@ using namespace std;
 class PerScorer;
 class ScoreStats;

+const int kMergeScorerLength = 4;
+
 /**
 * Merge scoring.
 */
@ -23,23 +25,13 @@ public:

  virtual void setReferenceFiles(const vector<string>& referenceFiles);
  virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-
-  virtual size_t NumberOfScores() const
-  {
-    return 0;
-  }
-
-  void whoami() const {
-    cerr << "I AM MergeScorer" << endl;
-  }
+  virtual size_t NumberOfScores() const { return 0; }

 protected:
  friend class PerScorer;
  virtual float calculateScore(const vector<int>& comps) const;

 private:
-  const int kLENGTH;
-
  // no copying allowed
  MergeScorer(const MergeScorer&);
  MergeScorer& operator=(const MergeScorer&);
--- a/mert/Ngram.h
+++ b/mert/Ngram.h
@ -0,0 +1,98 @@
+#ifndef MERT_NGRAM_H_
+#define MERT_NGRAM_H_
+
+#include <vector>
+#include <map>
+#include <string>
+
+/** A simple STL-std::map based n-gram counts. Basically, we provide
+ * typical accessors and mutaors, but we intentionally does not allow
+ * erasing elements.
+ */
+class NgramCounts {
+ public:
+  // Used to construct the ngram map
+  struct NgramComparator {
+    bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
+      std::size_t i;
+      const std::size_t as = a.size();
+      const std::size_t bs = b.size();
+      for (i = 0; i < as && i < bs; ++i) {
+        if (a[i] < b[i]) {
+          return true;
+        }
+        if (a[i] > b[i]) {
+          return false;
+        }
+      }
+      // entries are equal, shortest wins
+      return as < bs;
+    }
+  };
+
+  typedef std::vector<int> Key;
+  typedef int Value;
+  typedef std::map<Key, Value, NgramComparator>::iterator iterator;
+  typedef std::map<Key, Value, NgramComparator>::const_iterator const_iterator;
+
+  NgramCounts() : kDefaultCount(1) { }
+  virtual ~NgramCounts() { }
+
+  /**
+   * If the specified "ngram" is found, we add counts.
+   * If not, we insert the default count in the container. */
+  void Add(const Key& ngram) {
+    const_iterator it = find(ngram);
+    if (it != end()) {
+      m_counts[ngram] = it->second + 1;
+    } else {
+      m_counts[ngram] = kDefaultCount;
+    }
+  }
+
+  /**
+   * Return true iff the specified "ngram" is found in the container.
+   */
+  bool Lookup(const Key& ngram, Value* v) const {
+    const_iterator it = m_counts.find(ngram);
+    if (it == m_counts.end()) return false;
+    *v = it->second;
+    return true;
+  }
+
+  /**
+   * Clear all elments in the container.
+   */
+  void clear() { m_counts.clear(); }
+
+  /**
+   * Return true iff the container is empty.
+   */
+  bool empty() const { return m_counts.empty(); }
+
+  /**
+   * Return the the number of elements in the container.
+   */
+  std::size_t size() const { return m_counts.size(); }
+
+  std::size_t max_size() const { return m_counts.max_size(); }
+
+  // Note: This is mainly used by unit tests.
+  int get_default_count() const { return kDefaultCount; }
+
+  iterator find(const Key& ngram) { return m_counts.find(ngram); }
+  const_iterator find(const Key& ngram) const { return m_counts.find(ngram); }
+
+  Value& operator[](const Key& ngram) { return m_counts[ngram]; }
+
+  iterator begin() { return m_counts.begin(); }
+  const_iterator begin() const { return m_counts.begin(); }
+  iterator end() { return m_counts.end(); }
+  const_iterator end() const { return m_counts.end(); }
+
+ private:
+  const int kDefaultCount;
+  std::map<Key, Value, NgramComparator> m_counts;
+};
+
+#endif  // MERT_NGRAM_H_
--- a/mert/NgramTest.cpp
+++ b/mert/NgramTest.cpp
@ -0,0 +1,83 @@
+#include "Ngram.h"
+
+#define BOOST_TEST_MODULE MertNgram
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_CASE(ngram_basic) {
+  NgramCounts counts;
+  NgramCounts::Key key;
+  key.push_back(1);
+  key.push_back(2);
+  key.push_back(4);
+  counts.Add(key);
+
+  BOOST_REQUIRE(!counts.empty());
+  BOOST_CHECK_EQUAL(counts.size(), 1);
+
+  NgramCounts::const_iterator it = counts.find(key);
+  BOOST_CHECK(it != counts.end());
+  BOOST_CHECK_EQUAL(it->first.size(), key.size());
+  for (size_t i = 0; i < key.size(); ++i) {
+    BOOST_CHECK_EQUAL(it->first[i], key[i]);
+  }
+  BOOST_CHECK_EQUAL(it->second, 1);
+}
+
+BOOST_AUTO_TEST_CASE(ngram_Add) {
+  NgramCounts counts;
+  NgramCounts::Key key;
+  key.push_back(1);
+  key.push_back(2);
+  counts.Add(key);
+  BOOST_REQUIRE(!counts.empty());
+  BOOST_CHECK_EQUAL(counts[key], counts.get_default_count());
+
+  NgramCounts::Key key2;
+  key2.push_back(1);
+  key2.push_back(2);
+  counts.Add(key2);
+  BOOST_CHECK_EQUAL(counts.size(), 1);
+  BOOST_CHECK_EQUAL(counts[key], counts.get_default_count() + 1);
+  BOOST_CHECK_EQUAL(counts[key2], counts.get_default_count() + 1);
+
+  NgramCounts::Key key3;
+  key3.push_back(10);
+  counts.Add(key3);
+  BOOST_CHECK_EQUAL(counts.size(), 2);
+  BOOST_CHECK_EQUAL(counts[key3], counts.get_default_count());
+}
+
+BOOST_AUTO_TEST_CASE(ngram_lookup) {
+  NgramCounts counts;
+  NgramCounts::Key key;
+  key.push_back(1);
+  key.push_back(2);
+  key.push_back(4);
+  counts.Add(key);
+
+  {
+    NgramCounts::Value v;
+    BOOST_REQUIRE(counts.Lookup(key, &v));
+    BOOST_CHECK_EQUAL(v, 1);
+  }
+
+  // the case the key is not found.
+  {
+    NgramCounts::Key key2;
+    key2.push_back(0);
+    key2.push_back(4);
+    NgramCounts::Value v;
+    // We only check the return value;
+    // we don't check the value of "v" because it makes sense
+    // to check the value when the specified ngram is found.
+    BOOST_REQUIRE(!counts.Lookup(key2, &v));
+  }
+
+  // test after clear
+  counts.clear();
+  BOOST_CHECK(counts.empty());
+  {
+    NgramCounts::Value v;
+    BOOST_CHECK(!counts.Lookup(key, &v));
+  }
+}
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@ -32,36 +32,25 @@ inline float intersect(float m1, float b1, float m2, float b2)

 } // namespace

-
-void Optimizer::SetScorer(Scorer *_scorer)
+Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
+    : m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom)
 {
-  scorer = _scorer;
-}
-
-void Optimizer::SetFData(FeatureDataHandle _FData)
-{
-  FData = _FData;
-}
-
-Optimizer::Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
-    : scorer(NULL), FData(), number_of_random_directions(nrandom)
-{
-  // Warning: the init vector is a full set of parameters, of dimension pdim!
-  Point::pdim = Pd;
+  // Warning: the init vector is a full set of parameters, of dimension m_pdim!
+  Point::m_pdim = Pd;

  CHECK(start.size() == Pd);
-  Point::dim = i2O.size();
-  Point::optindices = i2O;
-  if (Point::pdim > Point::dim) {
-    for (unsigned int i = 0; i < Point::pdim; i++) {
+  Point::m_dim = i2O.size();
+  Point::m_opt_indices = i2O;
+  if (Point::m_pdim > Point::m_dim) {
+    for (unsigned int i = 0; i < Point::m_pdim; i++) {
      unsigned int j = 0;
-      while (j < Point::dim && i != i2O[j])
+      while (j < Point::m_dim && i != i2O[j])
        j++;

-      // The index i wasnt found on optindices, it is a fixed index,
+      // The index i wasnt found on m_opt_indices, it is a fixed index,
      // we use the value of the start vector.
-      if (j == Point::dim)
-        Point::fixedweights[i] = start[i];
+      if (j == Point::m_dim)
+        Point::m_fixed_weights[i] = start[i];
    }
  }
 }
@ -76,7 +65,7 @@ statscore_t Optimizer::GetStatScore(const Point& param) const
  return score;
 }

-map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, pair<unsigned,unsigned> newdiff)
+map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, const pair<unsigned,unsigned>& newdiff)
 {
  map<float,diff_t>::iterator it = thresholdmap.find(newt);
  if (it != thresholdmap.end()) {
@ -112,12 +101,12 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
    //cerr << "Sentence " << S << endl;
    multimap<float, unsigned> gradient;
    vector<float> f0;
-    f0.resize(FData->get(S).size());
-    for (unsigned j = 0; j < FData->get(S).size(); j++) {
+    f0.resize(m_feature_data->get(S).size());
+    for (unsigned j = 0; j < m_feature_data->get(S).size(); j++) {
      // gradient of the feature function for this particular target sentence
-      gradient.insert(pair<float, unsigned>(direction * (FData->get(S,j)), j));
+      gradient.insert(pair<float, unsigned>(direction * (m_feature_data->get(S,j)), j));
      // compute the feature function at the origin point
-      f0[j] = origin * FData->get(S, j);
+      f0[j] = origin * m_feature_data->get(S, j);
    }
    // Now let's compute the 1best for each value of x.

@ -308,7 +297,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,

 void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
 {
-  CHECK(FData);
+  CHECK(m_feature_data);
  bests.clear();
  bests.resize(size());

@ -316,8 +305,8 @@ void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
    float bestfs = MIN_FLOAT;
    unsigned idx = 0;
    unsigned j;
-    for (j = 0; j < FData->get(i).size(); j++) {
-      float curfs = P * FData->get(i, j);
+    for (j = 0; j < m_feature_data->get(i).size(); j++) {
+      float curfs = P * m_feature_data->get(i, j);
      if (curfs > bestfs) {
        bestfs = curfs;
        idx = j;
@ -330,15 +319,15 @@ void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const

 statscore_t Optimizer::Run(Point& P) const
 {
-  if (!FData) {
+  if (!m_feature_data) {
    cerr << "error trying to optimize without Features loaded" << endl;
    exit(2);
  }
-  if (!scorer) {
+  if (!m_scorer) {
    cerr << "error trying to optimize without a Scorer loaded" << endl;
    exit(2);
  }
-  if (scorer->getReferenceSize() != FData->size()) {
+  if (m_scorer->getReferenceSize() != m_feature_data->size()) {
    cerr << "error length mismatch between feature file and score file" << endl;
    exit(2);
  }
@ -359,13 +348,13 @@ statscore_t Optimizer::Run(Point& P) const
 }


-vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst, vector<vector <pair<unsigned,unsigned> > > thediffs) const
+vector<statscore_t> Optimizer::GetIncStatScore(const vector<unsigned>& thefirst, const vector<vector <pair<unsigned,unsigned> > >& thediffs) const
 {
-  CHECK(scorer);
+  CHECK(m_scorer);

  vector<statscore_t> theres;

-  scorer->score(thefirst, thediffs, theres);
+  m_scorer->score(thefirst, thediffs, theres);
  return theres;
 }

@ -392,7 +381,7 @@ statscore_t SimpleOptimizer::TrueRun(Point& P) const

    Point  linebest;

-    for (unsigned int d = 0; d < Point::getdim()+number_of_random_directions; d++) {
+    for (unsigned int d = 0; d < Point::getdim() + m_num_random_directions; d++) {
      if (verboselevel() > 4) {
        //	cerr<<"minimizing along direction "<<d<<endl;
        cerr << "starting point: " << P << " => " << prevscore << endl;
@ -440,7 +429,7 @@ statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
  // do specified number of random direction optimizations
  unsigned int nrun = 0;
  unsigned int nrun_no_change = 0;
-  for (; nrun_no_change < number_of_random_directions; nrun++, nrun_no_change++)
+  for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++)
  {
    // choose a random direction in which to optimize
    Point direction;
@ -473,63 +462,3 @@ statscore_t RandomOptimizer::TrueRun(Point& P) const
  P.SetScore(score);
  return score;
 }
-
-//--------------------------------------
-
-vector<string> OptimizerFactory::typenames;
-
-void OptimizerFactory::SetTypeNames()
-{
-  if (typenames.empty()) {
-    typenames.resize(NOPTIMIZER);
-    typenames[POWELL]="powell";
-    typenames[RANDOM_DIRECTION]="random-direction";
-    typenames[RANDOM]="random";
-    // Add new type there
-  }
-}
-vector<string> OptimizerFactory::GetTypeNames()
-{
-  if (typenames.empty())
-    SetTypeNames();
-  return typenames;
-}
-
-OptimizerFactory::OptType OptimizerFactory::GetOType(const string& type)
-{
-  unsigned int thetype;
-  if (typenames.empty())
-    SetTypeNames();
-  for (thetype = 0; thetype < typenames.size(); thetype++)
-    if (typenames[thetype] == type)
-      break;
-  return((OptType)thetype);
-}
-
-Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim, vector<unsigned> i2o, vector<parameter_t> start, const string& type, unsigned int nrandom)
-{
-  OptType T = GetOType(type);
-  if (T == NOPTIMIZER) {
-    cerr << "Error: unknown Optimizer type " << type << endl;
-    cerr << "Known Algorithm are:" << endl;
-    unsigned int thetype;
-    for (thetype = 0; thetype < typenames.size(); thetype++)
-      cerr << typenames[thetype] << endl;
-    throw ("unknown Optimizer Type");
-  }
-
-  switch ((OptType)T) {
-    case POWELL:
-      return new SimpleOptimizer(dim, i2o, start, nrandom);
-      break;
-    case RANDOM_DIRECTION:
-      return new RandomDirectionOptimizer(dim, i2o, start, nrandom);
-      break;
-    case RANDOM:
-      return new RandomOptimizer(dim, i2o, start, nrandom);
-      break;
-    default:
-      cerr << "Error: unknown optimizer" << type << endl;
-      return NULL;
-  }
-}
--- a/mert/Optimizer.h
+++ b/mert/Optimizer.h
@ -10,8 +10,6 @@

 using namespace std;

-typedef float featurescore;
-
 class Point;

 /**
@ -20,18 +18,19 @@ class Point;
 class Optimizer
 {
 protected:
-  Scorer *scorer;      // no accessor for them only child can use them
-  FeatureDataHandle FData;  // no accessor for them only child can use them
-  unsigned int number_of_random_directions;
+  Scorer *m_scorer;      // no accessor for them only child can use them
+  FeatureDataHandle m_feature_data;  // no accessor for them only child can use them
+  unsigned int m_num_random_directions;

 public:
-  Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom);
-  void SetScorer(Scorer *_scorer);
-  void SetFData(FeatureDataHandle _FData);
+  Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom);
+
+  void SetScorer(Scorer *scorer) { m_scorer = scorer; }
+  void SetFeatureData(FeatureDataHandle feature_data) { m_feature_data = feature_data; }
  virtual ~Optimizer();

  unsigned size() const {
-    return FData ? FData->size() : 0;
+    return m_feature_data ? m_feature_data->size() : 0;
  }

  /**
@ -53,12 +52,12 @@ public:
   * Given a set of nbests, get the Statistical score.
   */
  statscore_t GetStatScore(const vector<unsigned>& nbests) const {
-    return scorer->score(nbests);
+    return m_scorer->score(nbests);
  }

  statscore_t GetStatScore(const Point& param) const;

-  vector<statscore_t> GetIncStatScore(vector<unsigned> ref, vector<vector<pair<unsigned,unsigned> > >) const;
+  vector<statscore_t> GetIncStatScore(const vector<unsigned>& ref, const vector<vector<pair<unsigned,unsigned> > >& diffs) const;

  /**
   * Get the optimal Lambda and the best score in a particular direction from a given Point.
@ -76,7 +75,7 @@ class SimpleOptimizer : public Optimizer
 private:
  const float kEPS;
 public:
-  SimpleOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
+  SimpleOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
      : Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {}
  virtual statscore_t TrueRun(Point&) const;
 };
@ -89,7 +88,7 @@ class RandomDirectionOptimizer : public Optimizer
 private:
  const float kEPS;
 public:
-  RandomDirectionOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
+  RandomDirectionOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
      : Optimizer(dim, i2O, start, nrandom), kEPS(0.0001) {}
  virtual statscore_t TrueRun(Point&) const;
 };
@ -100,36 +99,9 @@ public:
 class RandomOptimizer : public Optimizer
 {
 public:
-  RandomOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
+  RandomOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
      : Optimizer(dim, i2O, start, nrandom) {}
  virtual statscore_t TrueRun(Point&) const;
 };

-class OptimizerFactory
-{
-public:
-  static vector<string> GetTypeNames();
-  static Optimizer* BuildOptimizer(unsigned dim, vector<unsigned> tooptimize, vector<parameter_t> start, const string& type, unsigned int nrandom);
-
-private:
-  OptimizerFactory() {}
-  ~OptimizerFactory() {}
-
-  // Add new optimizer here BEFORE NOPTIMZER
-  enum OptType {
-    POWELL = 0,
-    RANDOM_DIRECTION = 1,
-    RANDOM,
-    NOPTIMIZER
-  };
-
-  // Get optimizer type.
-  static OptType GetOType(const string& type);
-
-  // Setup optimization types.
-  static void SetTypeNames();
-
-  static vector<string> typenames;
-};
-
 #endif  // OPTIMIZER_H
--- a/mert/OptimizerFactory.cpp
+++ b/mert/OptimizerFactory.cpp
@ -0,0 +1,66 @@
+#include "OptimizerFactory.h"
+#include "Optimizer.h"
+
+using namespace std;
+
+vector<string> OptimizerFactory::m_type_names;
+
+void OptimizerFactory::SetTypeNames()
+{
+  if (m_type_names.empty()) {
+    m_type_names.resize(NOPTIMIZER);
+    m_type_names[POWELL] = "powell";
+    m_type_names[RANDOM_DIRECTION] = "random-direction";
+    m_type_names[RANDOM] = "random";
+    // Add new type there
+  }
+}
+vector<string> OptimizerFactory::GetTypeNames()
+{
+  if (m_type_names.empty())
+    SetTypeNames();
+  return m_type_names;
+}
+
+OptimizerFactory::OptimizerType OptimizerFactory::GetOptimizerType(const string& type)
+{
+  unsigned int t;
+  if (m_type_names.empty())
+    SetTypeNames();
+  for (t = 0; t < m_type_names.size(); t++)
+    if (m_type_names[t] == type)
+      break;
+  return((OptimizerType)t);
+}
+
+Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,
+                                            const vector<unsigned>& i2o,
+                                            const vector<parameter_t>& start,
+                                            const string& type,
+                                            unsigned int nrandom)
+{
+  OptimizerType opt_type = GetOptimizerType(type);
+  if (opt_type == NOPTIMIZER) {
+    cerr << "Error: unknown Optimizer type " << type << endl;
+    cerr << "Known Algorithm are:" << endl;
+    unsigned int t;
+    for (t = 0; t < m_type_names.size(); t++)
+      cerr << m_type_names[t] << endl;
+    throw ("unknown Optimizer Type");
+  }
+
+  switch (opt_type) {
+    case POWELL:
+      return new SimpleOptimizer(dim, i2o, start, nrandom);
+      break;
+    case RANDOM_DIRECTION:
+      return new RandomDirectionOptimizer(dim, i2o, start, nrandom);
+      break;
+    case RANDOM:
+      return new RandomOptimizer(dim, i2o, start, nrandom);
+      break;
+    default:
+      cerr << "Error: unknown optimizer" << type << endl;
+      return NULL;
+  }
+}
--- a/mert/OptimizerFactory.h
+++ b/mert/OptimizerFactory.h
@ -0,0 +1,41 @@
+#ifndef MERT_OPTIMIZER_FACTORY_H_
+#define MERT_OPTIMIZER_FACTORY_H_
+
+#include <vector>
+#include "Types.h"
+
+class Optimizer;
+
+class OptimizerFactory
+{
+ public:
+  // NOTE: Add new optimizer here BEFORE NOPTIMZER
+  enum OptimizerType {
+    POWELL = 0,
+    RANDOM_DIRECTION = 1,
+    RANDOM,
+    NOPTIMIZER
+  };
+
+  static std::vector<string> GetTypeNames();
+
+  // Setup optimization types.
+  static void SetTypeNames();
+
+  // Get optimizer type.
+  static OptimizerType GetOptimizerType(const std::string& type);
+
+  static Optimizer* BuildOptimizer(unsigned dim,
+                                   const std::vector<unsigned>& to_optimize,
+                                   const std::vector<parameter_t>& start,
+                                   const std::string& type,
+                                   unsigned int nrandom);
+
+ private:
+  OptimizerFactory() {}
+  ~OptimizerFactory() {}
+
+  static vector<string> m_type_names;
+};
+
+#endif  // MERT_OPTIMIZER_FACTORY_H_
--- a/mert/OptimizerFactoryTest.cpp
+++ b/mert/OptimizerFactoryTest.cpp
@ -0,0 +1,46 @@
+#include "OptimizerFactory.h"
+#include "Optimizer.h"
+
+#define BOOST_TEST_MODULE MertOptimizerFactory
+#include <boost/test/unit_test.hpp>
+#include <boost/scoped_ptr.hpp>
+
+namespace {
+
+inline void CheckBuildOptimizer(unsigned dim,
+                                const vector<unsigned>& to_optimize,
+                                const vector<parameter_t>& start,
+                                const string& type,
+                                unsigned int num_random) {
+  boost::scoped_ptr<Optimizer> optimizer(
+      OptimizerFactory::BuildOptimizer(dim, to_optimize, start, type, num_random));
+  BOOST_CHECK(optimizer.get() != NULL);
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(optimizer_type) {
+  BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("powell"),
+                    OptimizerFactory::POWELL);
+  BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("random"),
+                    OptimizerFactory::RANDOM);
+  BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("random-direction"),
+                    OptimizerFactory::RANDOM_DIRECTION);
+}
+
+BOOST_AUTO_TEST_CASE(optimizer_build) {
+  const unsigned dim = 3;
+  std::vector<unsigned> to_optimize;
+  to_optimize.push_back(1);
+  to_optimize.push_back(2);
+  to_optimize.push_back(3);
+  std::vector<parameter_t> start;
+  start.push_back(0.3);
+  start.push_back(0.1);
+  start.push_back(0.2);
+  const unsigned int num_random = 1;
+
+  CheckBuildOptimizer(dim, to_optimize, start, "powell", num_random);
+  CheckBuildOptimizer(dim, to_optimize, start, "random", num_random);
+  CheckBuildOptimizer(dim, to_optimize, start, "random-direction", num_random);
+}
--- a/mert/PerScorer.h
+++ b/mert/PerScorer.h
@ -1,9 +1,7 @@
 #ifndef MERT_PER_SCORER_H_
 #define MERT_PER_SCORER_H_

-#include <iostream>
 #include <set>
-#include <sstream>
 #include <string>
 #include <vector>
 #include "Types.h"
@ -27,18 +25,9 @@ public:

  virtual void setReferenceFiles(const vector<string>& referenceFiles);
  virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-
-  virtual size_t NumberOfScores() const {
-    // cerr << "PerScorer: 3" << endl;
-    return 3;
-  }
-
+  virtual size_t NumberOfScores() const { return 3; }
  virtual float calculateScore(const vector<int>& comps) const;

-  void whoami() const {
-    cerr << "I AM PerScorer" << std::endl;
-  }
-
 private:
  // no copying allowed
  PerScorer(const PerScorer&);
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@ -8,41 +8,41 @@

 using namespace std;

-vector<unsigned> Point::optindices;
+vector<unsigned> Point::m_opt_indices;

-unsigned Point::dim = 0;
+unsigned Point::m_dim = 0;

-map<unsigned,statscore_t> Point::fixedweights;
+map<unsigned,statscore_t> Point::m_fixed_weights;

-unsigned Point::pdim = 0;
-unsigned Point::ncall = 0;
+unsigned Point::m_pdim = 0;
+unsigned Point::m_ncall = 0;

 vector<parameter_t> Point::m_min;
 vector<parameter_t> Point::m_max;

-Point::Point() : vector<parameter_t>(dim), score_(0.0) {}
+Point::Point() : vector<parameter_t>(m_dim), m_score(0.0) {}

-//Can initialize from a vector of dim or pdim
+//Can initialize from a vector of dim or m_pdim
 Point::Point(const vector<parameter_t>& init,
             const vector<parameter_t>& min,
             const vector<parameter_t>& max)
-    : vector<parameter_t>(Point::dim), score_(0.0)
+    : vector<parameter_t>(Point::m_dim), m_score(0.0)
 {
-  m_min.resize(Point::dim);
-  m_max.resize(Point::dim);
-  if(init.size()==dim) {
-    for (unsigned int i=0; i<Point::dim; i++) {
-      operator[](i)=init[i];
+  m_min.resize(Point::m_dim);
+  m_max.resize(Point::m_dim);
+  if (init.size() == m_dim) {
+    for (unsigned int i = 0; i < Point::m_dim; i++) {
+      operator[](i) = init[i];
      m_min[i] = min[i];
      m_max[i] = max[i];
    }
  } else {
-    CHECK(init.size()==pdim);
-    CHECK(optindices.size() == Point::dim);
-    for (unsigned int i=0; i<Point::dim; i++) {
-      operator[](i)=init[optindices[i]];
-      m_min[i] = min[optindices[i]];
-      m_max[i] = max[optindices[i]];
+    CHECK(init.size() == m_pdim);
+    CHECK(m_opt_indices.size() == Point::m_dim);
+    for (unsigned int i = 0; i < Point::m_dim; i++) {
+      operator[](i) = init[m_opt_indices[i]];
+      m_min[i] = min[m_opt_indices[i]];
+      m_max[i] = max[m_opt_indices[i]];
    }
  }
 }
@ -51,9 +51,9 @@ Point::~Point() {}

 void Point::Randomize()
 {
-  CHECK(m_min.size()==Point::dim);
-  CHECK(m_max.size()==Point::dim);
-  for (unsigned int i=0; i<size(); i++) {
+  CHECK(m_min.size() == Point::m_dim);
+  CHECK(m_max.size() == Point::m_dim);
+  for (unsigned int i = 0; i < size(); i++) {
    operator[](i) = m_min[i] +
                    static_cast<float>(random()) / static_cast<float>(RAND_MAX) * (m_max[i] - m_min[i]);
  }
@ -61,16 +61,17 @@ void Point::Randomize()

 double Point::operator*(const FeatureStats& F) const
 {
-  ncall++; // to track performance
-  double prod=0.0;
-  if(OptimizeAll())
+  m_ncall++; // to track performance
+  double prod = 0.0;
+  if (OptimizeAll())
    for (unsigned i=0; i<size(); i++)
-      prod+= operator[](i)*F.get(i);
+      prod += operator[](i) * F.get(i);
  else {
-    for (unsigned i=0; i<size(); i++)
-      prod+= operator[](i)*F.get(optindices[i]);
-    for(map<unsigned,float >::iterator it=fixedweights.begin(); it!=fixedweights.end(); it++)
-      prod+=it->second*F.get(it->first);
+    for (unsigned i = 0; i < size(); i++)
+      prod += operator[](i) * F.get(m_opt_indices[i]);
+    for(map<unsigned, float>::iterator it = m_fixed_weights.begin();
+        it != m_fixed_weights.end(); ++it)
+      prod += it->second * F.get(it->first);
  }
  return prod;
 }
@ -83,7 +84,7 @@ Point Point::operator+(const Point& p2) const
    Res[i] += p2[i];
  }

-  Res.score_ = numeric_limits<statscore_t>::max();
+  Res.m_score = numeric_limits<statscore_t>::max();
  return Res;
 }

@ -93,7 +94,7 @@ void Point::operator+=(const Point& p2)
  for (unsigned i = 0; i < size(); i++) {
    operator[](i) += p2[i];
  }
-  score_ = numeric_limits<statscore_t>::max();
+  m_score = numeric_limits<statscore_t>::max();
 }

 Point Point::operator*(float l) const
@ -102,14 +103,14 @@ Point Point::operator*(float l) const
  for (unsigned i = 0; i < size(); i++) {
    Res[i] *= l;
  }
-  Res.score_ = numeric_limits<statscore_t>::max();
+  Res.m_score = numeric_limits<statscore_t>::max();
  return Res;
 }

 ostream& operator<<(ostream& o, const Point& P)
 {
  vector<parameter_t> w = P.GetAllWeights();
-  for (unsigned int i = 0; i < Point::pdim; i++) {
+  for (unsigned int i = 0; i < Point::m_pdim; i++) {
    o << w[i] << " ";
  }
  return o;
@ -118,24 +119,24 @@ ostream& operator<<(ostream& o, const Point& P)
 void Point::NormalizeL2()
 {
  parameter_t norm=0.0;
-  for (unsigned int i=0; i<size(); i++)
-    norm+= operator[](i)*operator[](i);
-  if(norm!=0.0) {
-    norm=sqrt(norm);
-    for (unsigned int i=0; i<size(); i++)
-      operator[](i)/=norm;
+  for (unsigned int i = 0; i < size(); i++)
+    norm += operator[](i) * operator[](i);
+  if (norm != 0.0) {
+    norm = sqrt(norm);
+    for (unsigned int i = 0; i < size(); i++)
+      operator[](i) /= norm;
  }
 }


 void Point::NormalizeL1()
 {
-  parameter_t norm=0.0;
-  for (unsigned int i=0; i<size(); i++)
-    norm+= abs(operator[](i));
-  if(norm!=0.0) {
-    for (unsigned int i=0; i<size(); i++)
-      operator[](i)/=norm;
+  parameter_t norm = 0.0;
+  for (unsigned int i = 0; i < size(); i++)
+    norm += abs(operator[](i));
+  if (norm != 0.0) {
+    for (unsigned int i = 0; i < size(); i++)
+      operator[](i) /= norm;
  }
 }

@ -143,14 +144,16 @@ void Point::NormalizeL1()
 vector<parameter_t> Point::GetAllWeights()const
 {
  vector<parameter_t> w;
-  if(OptimizeAll()) {
-    w=*this;
+  if (OptimizeAll()) {
+    w = *this;
  } else {
-    w.resize(pdim);
-    for (unsigned int i=0; i<size(); i++)
-      w[optindices[i]]=operator[](i);
-    for(map<unsigned,float >::iterator it=fixedweights.begin(); it!=fixedweights.end(); it++)
+    w.resize(m_pdim);
+    for (unsigned int i = 0; i < size(); i++)
+      w[m_opt_indices[i]] = operator[](i);
+    for (map<unsigned, float>::iterator it = m_fixed_weights.begin();
+         it != m_fixed_weights.end(); ++it) {
      w[it->first]=it->second;
+    }
  }
  return w;
 }
--- a/mert/Point.h
+++ b/mert/Point.h
@ -1,7 +1,7 @@
 #ifndef MERT_POINT_H_
 #define MERT_POINT_H_

-#include <fstream>
+#include <ostream>
 #include <map>
 #include <vector>
 #include "Types.h"
@ -16,61 +16,55 @@ class Optimizer;
 class Point : public vector<parameter_t>
 {
  friend class Optimizer;
+
 private:
  /**
   * The indices over which we optimize.
   */
-  static vector<unsigned int> optindices;
+  static vector<unsigned int> m_opt_indices;

  /**
-   * Dimension of optindices and of the parent vector.
+   * Dimension of m_opt_indices and of the parent vector.
   */
-  static unsigned int dim;
+  static unsigned int m_dim;

  /**
   * Fixed weights in case of partial optimzation.
   */
-  static map<unsigned int,parameter_t> fixedweights;
+  static map<unsigned int,parameter_t> m_fixed_weights;

  /**
   * Total size of the parameter space; we have
-   * pdim = FixedWeight.size() + optinidices.size().
+   * m_pdim = FixedWeight.size() + optinidices.size().
   */
-  static unsigned int pdim;
-  static unsigned int ncall;
+  static unsigned int m_pdim;
+  static unsigned int m_ncall;

  /**
-   * The limits for randomization, both vectors are of full length, pdim.
+   * The limits for randomization, both vectors are of full length, m_pdim.
   */
  static vector<parameter_t> m_min;
  static vector<parameter_t> m_max;

-  statscore_t score_;
+  statscore_t m_score;

 public:
-  static unsigned int getdim() {
-    return dim;
-  }
-  static unsigned int getpdim() {
-    return pdim;
-  }
-  static void setpdim(size_t pd) {
-    pdim = pd;
-  }
-  static void setdim(size_t d) {
-    dim = d;
-  }
+  static unsigned int getdim() { return m_dim; }
+  static void setdim(size_t d) { m_dim = d; }
+
+  static unsigned int getpdim() { return m_pdim; }
+  static void setpdim(size_t pd) { m_pdim = pd; }

  static void set_optindices(const vector<unsigned int>& indices) {
-    optindices = indices;
+    m_opt_indices = indices;
  }

  static const vector<unsigned int>& get_optindices() {
-    return optindices;
+    return m_opt_indices;
  }

  static bool OptimizeAll() {
-    return fixedweights.empty();
+    return m_fixed_weights.empty();
  }

  Point();
@ -88,7 +82,7 @@ public:
  Point operator*(float) const;

  /**
-   * Write the Whole featureweight to a stream (ie pdim float).
+   * Write the Whole featureweight to a stream (ie m_pdim float).
   */
  friend ostream& operator<<(ostream& o,const Point& P);

@ -97,16 +91,13 @@ public:
  void NormalizeL1();

  /**
-   * Return a vector of size pdim where all weights have been
+   * Return a vector of size m_pdim where all weights have been
   * put (including fixed ones).
   */
  vector<parameter_t> GetAllWeights() const;

-  statscore_t GetScore() const {
-    return score_;
-  }
-
-  void SetScore(statscore_t score) { score_ = score; }
+  statscore_t GetScore() const { return m_score; }
+  void SetScore(statscore_t score) { m_score = score; }
 };

 #endif  // MERT_POINT_H
--- a/mert/Reference.h
+++ b/mert/Reference.h
@ -0,0 +1,80 @@
+#ifndef MERT_REFERENCE_H_
+#define MERT_REFERENCE_H_
+
+#include <algorithm>
+#include <climits>
+#include <vector>
+
+#include "Ngram.h"
+
+/**
+ * Reference class represents reference translations for an output
+ * translation used in calculating BLEU score.
+ */
+class Reference {
+ public:
+  // for m_length
+  typedef std::vector<size_t>::iterator iterator;
+  typedef std::vector<size_t>::const_iterator const_iterator;
+
+  Reference() : m_counts(new NgramCounts) { }
+  ~Reference() { delete m_counts; }
+
+  NgramCounts* get_counts() { return m_counts; }
+  const NgramCounts* get_counts() const { return m_counts; }
+
+  iterator begin() { return m_length.begin(); }
+  const_iterator begin() const { return m_length.begin(); }
+  iterator end() { return m_length.end(); }
+  const_iterator end() const { return m_length.end(); }
+
+  void push_back(size_t len) { m_length.push_back(len); }
+
+  size_t num_references() const { return m_length.size(); }
+
+  int CalcAverage() const;
+  int CalcClosest(size_t length) const;
+  int CalcShortest() const;
+
+ private:
+  NgramCounts* m_counts;
+
+  // multiple reference lengths
+  std::vector<size_t> m_length;
+};
+
+inline int Reference::CalcAverage() const {
+  int total = 0;
+  for (size_t i = 0; i < m_length.size(); ++i) {
+    total += m_length[i];
+  }
+  return static_cast<int>(
+      static_cast<float>(total) / m_length.size());
+}
+
+inline int Reference::CalcClosest(size_t length) const {
+  int min_diff = INT_MAX;
+  int closest_ref_id = 0; // an index of the closest reference translation
+  for (size_t i = 0; i < m_length.size(); ++i) {
+    const int ref_length = m_length[i];
+    const int length_diff = abs(ref_length - static_cast<int>(length));
+    const int abs_min_diff = abs(min_diff);
+    // Look for the closest reference
+    if (length_diff < abs_min_diff) {
+      min_diff = ref_length - length;
+      closest_ref_id = i;
+      // if two references has the same closest length, take the shortest
+    } else if (length_diff == abs_min_diff) {
+      if (ref_length < static_cast<int>(m_length[closest_ref_id])) {
+        closest_ref_id = i;
+      }
+    }
+  }
+  return static_cast<int>(m_length[closest_ref_id]);
+}
+
+inline int Reference::CalcShortest() const {
+  return *std::min_element(m_length.begin(), m_length.end());
+}
+
+#endif  // MERT_REFERENCE_H_
--- a/mert/ReferenceTest.cpp
+++ b/mert/ReferenceTest.cpp
@ -0,0 +1,116 @@
+#include "Reference.h"
+
+#define BOOST_TEST_MODULE MertReference
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_CASE(refernece_count) {
+  Reference ref;
+  BOOST_CHECK(ref.get_counts() != NULL);
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_iterator) {
+  Reference ref;
+  ref.push_back(4);
+  ref.push_back(2);
+  BOOST_REQUIRE(ref.num_references() == 2);
+
+  Reference::iterator it = ref.begin();
+  BOOST_CHECK_EQUAL(*it, 4);
+  ++it;
+  BOOST_CHECK_EQUAL(*it, 2);
+  ++it;
+  BOOST_CHECK(it == ref.end());
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_average) {
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(1);
+    BOOST_CHECK_EQUAL(2, ref.CalcAverage());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    BOOST_CHECK_EQUAL(3, ref.CalcAverage());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    ref.push_back(4);
+    ref.push_back(5);
+    BOOST_CHECK_EQUAL(4, ref.CalcAverage());
+  }
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_closest) {
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(1);
+    BOOST_REQUIRE(ref.num_references() == 2);
+
+    BOOST_CHECK_EQUAL(1, ref.CalcClosest(2));
+    BOOST_CHECK_EQUAL(1, ref.CalcClosest(1));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(3));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(4));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(5));
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    BOOST_REQUIRE(ref.num_references() == 2);
+
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(1));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(2));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(3));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(4));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(5));
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    ref.push_back(4);
+    ref.push_back(5);
+    BOOST_REQUIRE(ref.num_references() == 4);
+
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(1));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(2));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(3));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(4));
+    BOOST_CHECK_EQUAL(5, ref.CalcClosest(5));
+  }
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_shortest) {
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(1);
+    BOOST_CHECK_EQUAL(1, ref.CalcShortest());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    BOOST_CHECK_EQUAL(3, ref.CalcShortest());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    ref.push_back(4);
+    ref.push_back(5);
+    BOOST_CHECK_EQUAL(3, ref.CalcShortest());
+  }
+}
--- a/mert/ScopedVector.h
+++ b/mert/ScopedVector.h
@ -12,39 +12,39 @@ class ScopedVector {
  ScopedVector() {}
  virtual ~ScopedVector() { reset(); }

-  bool empty() const { return vec_.empty(); }
+  bool empty() const { return m_vec.empty(); }

-  void push_back(T *e) { vec_.push_back(e); }
+  void push_back(T *e) { m_vec.push_back(e); }

  void reset() {
-    for (iterator it = vec_.begin(); it != vec_.end(); ++it) {
+    for (iterator it = m_vec.begin(); it != m_vec.end(); ++it) {
      delete *it;
    }
-    vec_.clear();
+    m_vec.clear();
  }

-  void reserve(size_t capacity) { vec_.reserve(capacity); }
-  void resize(size_t size) { vec_.resize(size); }
+  void reserve(size_t capacity) { m_vec.reserve(capacity); }
+  void resize(size_t size) { m_vec.resize(size); }

-  size_t size() const {return vec_.size(); }
+  size_t size() const {return m_vec.size(); }

-  iterator begin() { return vec_.begin(); }
-  const_iterator begin() const { return vec_.begin(); }
+  iterator begin() { return m_vec.begin(); }
+  const_iterator begin() const { return m_vec.begin(); }

-  iterator end() { return vec_.end(); }
-  const_iterator end() const { return vec_.end(); }
+  iterator end() { return m_vec.end(); }
+  const_iterator end() const { return m_vec.end(); }

-  std::vector<T*>& get() { return vec_; }
-  const std::vector<T*>& get() const { return vec_; }
+  std::vector<T*>& get() { return m_vec; }
+  const std::vector<T*>& get() const { return m_vec; }

-  std::vector<T*>* operator->() { return &vec_; }
-  const std::vector<T*>* operator->() const { return &vec_; }
+  std::vector<T*>* operator->() { return &m_vec; }
+  const std::vector<T*>* operator->() const { return &m_vec; }

-  T*& operator[](size_t i) { return vec_[i]; }
-  const T* operator[](size_t i) const { return vec_[i]; }
+  T*& operator[](size_t i) { return m_vec[i]; }
+  const T* operator[](size_t i) const { return m_vec[i]; }

 private:
-  std::vector<T*> vec_;
+  std::vector<T*> m_vec;

  // no copying allowed.
  ScopedVector<T>(const ScopedVector<T>&);
--- a/mert/ScoreArray.cpp
+++ b/mert/ScoreArray.cpp
@ -10,76 +10,85 @@
 #include "Util.h"
 #include "FileStream.h"

-
 ScoreArray::ScoreArray()
-    : number_of_scores(0), idx("") {}
+    : m_num_scores(0), m_index("") {}

-void ScoreArray::savetxt(std::ofstream& outFile, const std::string& sctype)
+void ScoreArray::savetxt(ostream* os, const string& sctype)
 {
-  outFile << SCORES_TXT_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_scores << " " << sctype << std::endl;
-  for (scorearray_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    i->savetxt(outFile);
-    outFile << std::endl;
+  *os << SCORES_TXT_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_scores << " " << sctype << endl;
+  for (scorearray_t::iterator i = m_array.begin(); i !=m_array.end(); i++) {
+    i->savetxt(os);
+    *os << endl;
  }
-  outFile << SCORES_TXT_END << std::endl;
+  *os << SCORES_TXT_END << endl;
 }

-void ScoreArray::savebin(std::ofstream& outFile, const std::string& sctype)
+void ScoreArray::savebin(ostream* os, const string& score_type)
 {
-  outFile << SCORES_BIN_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_scores << " " << sctype << std::endl;
-  for (scorearray_t::iterator i = array_.begin(); i !=array_.end(); i++)
-    i->savebin(outFile);
-
-  outFile << SCORES_BIN_END << std::endl;
+  *os << SCORES_BIN_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_scores << " " << score_type << endl;
+  for (scorearray_t::iterator i = m_array.begin();
+       i != m_array.end(); i++) {
+    i->savebin(os);
+  }
+  *os << SCORES_BIN_END << endl;
 }

-void ScoreArray::save(std::ofstream& inFile, const std::string& sctype, bool bin)
+void ScoreArray::save(ostream* os, const string& score_type, bool bin)
 {
-  if (size()>0)
-    (bin)?savebin(inFile, sctype):savetxt(inFile, sctype);
+  if (size() <= 0) return;
+  if (bin) {
+    savebin(os, score_type);
+  } else {
+    savetxt(os, score_type);
+  }
 }

-void ScoreArray::save(const std::string &file, const std::string& sctype, bool bin)
+void ScoreArray::save(const string &file, const string& score_type, bool bin)
 {
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  save(outFile, sctype, bin);
-
-  outFile.close();
+  ofstream ofs(file.c_str(), ios::out);
+  if (!ofs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  ostream* os = &ofs;
+  save(os, score_type, bin);
+  ofs.close();
 }

-void ScoreArray::loadbin(ifstream& inFile, size_t n)
-{
-  ScoreStats entry(number_of_scores);
+void ScoreArray::save(const string& score_type, bool bin) {
+  save(&cout, score_type, bin);
+}

-  for (size_t i=0 ; i < n; i++) {
-    entry.loadbin(inFile);
+void ScoreArray::loadbin(istream* is, size_t n)
+{
+  ScoreStats entry(m_num_scores);
+  for (size_t i = 0; i < n; i++) {
+    entry.loadbin(is);
    add(entry);
  }
 }

-void ScoreArray::loadtxt(ifstream& inFile, size_t n)
+void ScoreArray::loadtxt(istream* is, size_t n)
 {
-  ScoreStats entry(number_of_scores);
-
-  for (size_t i=0 ; i < n; i++) {
-    entry.loadtxt(inFile);
+  ScoreStats entry(m_num_scores);
+  for (size_t i = 0; i < n; i++) {
+    entry.loadtxt(is);
    add(entry);
  }
 }

-void ScoreArray::load(ifstream& inFile)
+void ScoreArray::load(istream* is)
 {
-  size_t number_of_entries=0;
-  bool binmode=false;
+  size_t number_of_entries = 0;
+  bool binmode = false;

-  std::string substring, stringBuf;
-  std::string::size_type loc;
+  string substring, stringBuf;
+  string::size_type loc;

-  std::getline(inFile, stringBuf);
-  if (!inFile.good()) {
+  getline(*is, stringBuf);
+  if (!is->good()) {
    return;
  }

@ -94,35 +103,38 @@ void ScoreArray::load(ifstream& inFile)
    }
    getNextPound(stringBuf, substring);
    getNextPound(stringBuf, substring);
-    idx = substring;
+    m_index = substring;
    getNextPound(stringBuf, substring);
    number_of_entries = atoi(substring.c_str());
    getNextPound(stringBuf, substring);
-    number_of_scores = atoi(substring.c_str());
+    m_num_scores = atoi(substring.c_str());
    getNextPound(stringBuf, substring);
-    score_type = substring;
+    m_score_type = substring;
  }

-  (binmode)?loadbin(inFile, number_of_entries):loadtxt(inFile, number_of_entries);
+  if (binmode) {
+    loadbin(is, number_of_entries);
+  } else {
+    loadtxt(is, number_of_entries);
+  }

-  std::getline(inFile, stringBuf);
+  getline(*is, stringBuf);
  if (!stringBuf.empty()) {
-    if ((loc = stringBuf.find(SCORES_TXT_END)) != 0 && (loc = stringBuf.find(SCORES_BIN_END)) != 0) {
+    if ((loc = stringBuf.find(SCORES_TXT_END)) != 0 &&
+        (loc = stringBuf.find(SCORES_BIN_END)) != 0) {
      TRACE_ERR("ERROR: ScoreArray::load(): Wrong footer");
      return;
    }
  }
 }

-void ScoreArray::load(const std::string &file)
+void ScoreArray::load(const string &file)
 {
-  TRACE_ERR("loading data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  load((ifstream&) inFile);
-
-  inFile.close();
+  TRACE_ERR("loading data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }


@ -139,7 +151,8 @@ bool ScoreArray::check_consistency() const
  if (sz == 0)
    return true;

-  for (scorearray_t::const_iterator i = array_.begin(); i != array_.end(); ++i) {
+  for (scorearray_t::const_iterator i = m_array.begin();
+       i != m_array.end(); ++i) {
    if (i->size() != sz)
      return false;
  }
--- a/mert/ScoreArray.h
+++ b/mert/ScoreArray.h
@ -24,85 +24,62 @@ const char SCORES_BIN_END[] = "SCORES_BIN_END_0";

 class ScoreArray
 {
-protected:
-  scorearray_t array_;
-  std::string score_type;
-  size_t number_of_scores;
+ private:
+  scorearray_t m_array;
+  std::string m_score_type;
+  size_t m_num_scores;

-private:
-  // idx to identify the utterance.
+  // indexx to identify the utterance.
  // It can differ from the index inside the vector.
-  std::string  idx;
+  std::string  m_index;

 public:
  ScoreArray();
  ~ScoreArray() {}

-  inline void clear() {
-    array_.clear();
-  }
+  void clear() { m_array.clear(); }

-  inline std::string getIndex() const {
-    return idx;
-  }
-  inline void setIndex(const std::string& value) {
-    idx=value;
-  }
+  std::string getIndex() const { return m_index; }

-//	inline ScoreStats get(size_t i){ return array_.at(i); }
+  void setIndex(const std::string& value) { m_index = value; }

-  inline ScoreStats&  get(size_t i) {
-    return array_.at(i);
-  }
-  inline const ScoreStats&  get(size_t i)const {
-    return array_.at(i);
-  }
+  ScoreStats& get(size_t i) { return m_array.at(i); }

-  void add(const ScoreStats& e) {
-    array_.push_back(e);
-  }
+  const ScoreStats& get(size_t i) const { return m_array.at(i); }
+
+  void add(const ScoreStats& e) { m_array.push_back(e); }

  //ADDED BY TS
  void swap(size_t i, size_t j) {
-    std::swap(array_[i],array_[j]);
+    std::swap(m_array[i], m_array[j]);
  }

  void resize(size_t new_size) {
-    array_.resize(std::min(new_size,array_.size()));
+    m_array.resize(std::min(new_size, m_array.size()));
  }
  //END_ADDED

  void merge(ScoreArray& e);

-  inline std::string name() const {
-    return score_type;
-  }
+  std::string name() const { return m_score_type; }

-  inline void name(std::string &sctype) {
-    score_type = sctype;
-  }
+  void name(std::string &score_type) { m_score_type = score_type; }

-  inline size_t size() const {
-    return array_.size();
-  }
-  inline size_t NumberOfScores() const {
-    return number_of_scores;
-  }
-  inline void NumberOfScores(size_t v) {
-    number_of_scores = v;
-  }
+  size_t size() const { return m_array.size(); }

-  void savetxt(ofstream& outFile, const std::string& sctype);
-  void savebin(ofstream& outFile, const std::string& sctype);
-  void save(ofstream& outFile, const std::string& sctype, bool bin=false);
-  void save(const std::string &file, const std::string& sctype, bool bin=false);
-  inline void save(const std::string& sctype, bool bin=false) {
-    save("/dev/stdout", sctype, bin);
-  }
+  size_t NumberOfScores() const { return m_num_scores; }

-  void loadtxt(ifstream& inFile, size_t n);
-  void loadbin(ifstream& inFile, size_t n);
-  void load(ifstream& inFile);
+  void NumberOfScores(size_t v) { m_num_scores = v; }
+
+  void savetxt(std::ostream* os, const std::string& score_type);
+  void savebin(std::ostream* os, const std::string& score_type);
+  void save(std::ostream* os, const std::string& score_type, bool bin=false);
+  void save(const std::string &file, const std::string& score_type, bool bin=false);
+  void save(const std::string& score_type, bool bin=false);
+
+  void loadtxt(std::istream* is, size_t n);
+  void loadbin(std::istream* is, size_t n);
+  void load(std::istream* is);
  void load(const std::string &file);

  bool check_consistency() const;
--- a/mert/ScoreData.cpp
+++ b/mert/ScoreData.cpp
@ -7,55 +7,56 @@
 */

 #include "ScoreData.h"
+
+#include <fstream>
 #include "Scorer.h"
 #include "Util.h"
 #include "FileStream.h"

-ScoreData::ScoreData(Scorer& ptr):
-  theScorer(&ptr)
+ScoreData::ScoreData(Scorer* scorer) :
+  m_scorer(scorer)
 {
-  score_type = theScorer->getName();
+  m_score_type = m_scorer->getName();
  // This is not dangerous: we don't use the this pointer in SetScoreData.
-  theScorer->setScoreData(this);
-  number_of_scores = theScorer->NumberOfScores();
-  // TRACE_ERR("ScoreData: number_of_scores: " << number_of_scores << std::endl);
+  m_scorer->setScoreData(this);
+  m_num_scores = m_scorer->NumberOfScores();
+  // TRACE_ERR("ScoreData: m_num_scores: " << m_num_scores << std::endl);
 }

-void ScoreData::save(std::ofstream& outFile, bool bin)
+void ScoreData::save(ostream* os, bool bin)
 {
-  for (scoredata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    i->save(outFile, score_type, bin);
+  for (scoredata_t::iterator i = m_array.begin();
+       i != m_array.end(); ++i) {
+    i->save(os, m_score_type, bin);
  }
 }

-void ScoreData::save(const std::string &file, bool bin)
+void ScoreData::save(const string &file, bool bin)
 {
  if (file.empty()) return;
-  TRACE_ERR("saving the array into " << file << std::endl);
+  TRACE_ERR("saving the array into " << file << endl);

  // matches a stream with a file. Opens the file.
-  std::ofstream outFile(file.c_str(), std::ios::out);
-
-  ScoreStats entry;
-
-  save(outFile, bin);
-
-  outFile.close();
+  ofstream ofs(file.c_str(), ios::out);
+  ostream* os = &ofs;
+  save(os, bin);
+  ofs.close();
 }

-void ScoreData::load(ifstream& inFile)
+void ScoreData::save(bool bin) {
+  save(&cout, bin);
+}
+
+void ScoreData::load(istream* is)
 {
  ScoreArray entry;

-  while (!inFile.eof()) {
-
-    if (!inFile.good()) {
-      std::cerr << "ERROR ScoreData::load inFile.good()" << std::endl;
+  while (!is->eof()) {
+    if (!is->good()) {
+      cerr << "ERROR ScoreData::load inFile.good()" << endl;
    }
-
    entry.clear();
-    entry.load(inFile);
-
+    entry.load(is);
    if (entry.size() == 0) {
      break;
    }
@ -63,63 +64,58 @@ void ScoreData::load(ifstream& inFile)
  }
 }

-
-void ScoreData::load(const std::string &file)
+void ScoreData::load(const string &file)
 {
-  TRACE_ERR("loading score data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  if (!inFile) {
+  TRACE_ERR("loading score data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  if (!input_stream) {
    throw runtime_error("Unable to open score file: " + file);
  }
-
-  load((ifstream&) inFile);
-
-  inFile.close();
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }

-
 void ScoreData::add(ScoreArray& e)
 {
  if (exists(e.getIndex())) { // array at position e.getIndex() already exists
    //enlarge array at position e.getIndex()
    size_t pos = getIndex(e.getIndex());
-    array_.at(pos).merge(e);
+    m_array.at(pos).merge(e);
  } else {
-    array_.push_back(e);
+    m_array.push_back(e);
    setIndex();
  }
 }

-void ScoreData::add(const ScoreStats& e, const std::string& sent_idx)
+void ScoreData::add(const ScoreStats& e, const string& sent_idx)
 {
  if (exists(sent_idx)) { // array at position e.getIndex() already exists
    // Enlarge array at position e.getIndex()
    size_t pos = getIndex(sent_idx);
    //          TRACE_ERR("Inserting in array " << sent_idx << std::endl);
-    array_.at(pos).add(e);
+    m_array.at(pos).add(e);
    //          TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
  } else {
    //          TRACE_ERR("Creating a new entry in the array" << std::endl);
    ScoreArray a;
-    a.NumberOfScores(number_of_scores);
+    a.NumberOfScores(m_num_scores);
    a.add(e);
    a.setIndex(sent_idx);
-    size_t idx = array_.size();
-    array_.push_back(a);
-    idx2arrayname_[idx] = sent_idx;
-    arrayname2idx_[sent_idx]=idx;
+    size_t idx = m_array.size();
+    m_array.push_back(a);
+    m_index_to_array_name[idx] = sent_idx;
+    m_array_name_to_index[sent_idx]=idx;
    //          TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
  }
 }

 bool ScoreData::check_consistency() const
 {
-  if (array_.size() == 0)
+  if (m_array.size() == 0)
    return true;

-  for (scoredata_t::const_iterator i = array_.begin(); i != array_.end(); ++i)
+  for (scoredata_t::const_iterator i = m_array.begin(); i != m_array.end(); ++i)
    if (!i->check_consistency()) return false;

  return true;
@ -127,10 +123,10 @@ bool ScoreData::check_consistency() const

 void ScoreData::setIndex()
 {
-  size_t j=0;
-  for (scoredata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    idx2arrayname_[j]=i->getIndex();
-    arrayname2idx_[i->getIndex()]=j;
+  size_t j = 0;
+  for (scoredata_t::iterator i = m_array.begin(); i != m_array.end(); ++i) {
+    m_index_to_array_name[j] = i->getIndex();
+    m_array_name_to_index[i->getIndex()]=j;
    j++;
  }
 }
--- a/mert/ScoreData.h
+++ b/mert/ScoreData.h
@ -9,9 +9,8 @@
 #ifndef MERT_SCORE_DATA_H_
 #define MERT_SCORE_DATA_H_

-#include <fstream>
-#include <vector>
 #include <iostream>
+#include <vector>
 #include <stdexcept>
 #include <string>
 #include "ScoreArray.h"
@ -23,35 +22,34 @@ class Scorer;

 class ScoreData
 {
-protected:
-  scoredata_t array_;
-  idx2name idx2arrayname_; // map from index to name of array
-  name2idx arrayname2idx_; // map from name to index of array
-
 private:
  // Do not allow the user to instanciate without arguments.
  ScoreData() {}

-  Scorer* theScorer;
-  std::string score_type;
-  size_t number_of_scores;
+  scoredata_t m_array;
+  idx2name m_index_to_array_name; // map from index to name of array
+  name2idx m_array_name_to_index; // map from name to index of array
+
+  Scorer* m_scorer;
+  std::string m_score_type;
+  size_t m_num_scores;

 public:
-  ScoreData(Scorer& sc);
+  ScoreData(Scorer* scorer);
  ~ScoreData() {}

-  inline void clear() {
-    array_.clear();
-  }
+  void clear() { m_array.clear(); }

  inline ScoreArray get(const std::string& idx) {
-    return array_.at(getIndex(idx));
+    return m_array.at(getIndex(idx));
  }
+
  inline ScoreArray& get(size_t idx) {
-    return array_.at(idx);
+    return m_array.at(idx);
  }
+
  inline const ScoreArray& get(size_t idx) const {
-    return array_.at(idx);
+    return m_array.at(idx);
  }

  inline bool exists(const std::string& sent_idx) const {
@ -59,56 +57,51 @@ public:
  }

  inline bool exists(int sent_idx) const {
-    return (sent_idx > -1 && sent_idx < static_cast<int>(array_.size())) ? true : false;
+    return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
  }

  inline ScoreStats& get(size_t i, size_t j) {
-    return array_.at(i).get(j);
-  }
-  inline const ScoreStats&  get(size_t i, size_t j) const {
-    return array_.at(i).get(j);
+    return m_array.at(i).get(j);
  }

-  inline std::string name() const {
-    return score_type;
+  inline const ScoreStats& get(size_t i, size_t j) const {
+    return m_array.at(i).get(j);
  }

-  inline std::string name(const std::string &sctype) {
-    return score_type = sctype;
+  std::string name() const { return m_score_type; }
+
+  std::string name(const std::string &score_type) {
+    return m_score_type = score_type;
  }

  void add(ScoreArray& e);
  void add(const ScoreStats& e, const std::string& sent_idx);

-  inline size_t NumberOfScores() const {
-    return number_of_scores;
-  }
-  inline size_t size() const {
-    return array_.size();
-  }
+  size_t NumberOfScores() const { return m_num_scores; }
+  size_t size() const { return m_array.size(); }

  void save(const std::string &file, bool bin=false);
-  void save(ofstream& outFile, bool bin=false);
-  inline void save(bool bin=false) {
-    save("/dev/stdout", bin);
-  }
+  void save(std::ostream* os, bool bin=false);
+  void save(bool bin=false);

-  void load(ifstream& inFile);
+  void load(std::istream* is);
  void load(const std::string &file);

  bool check_consistency() const;
+
  void setIndex();

  inline int getIndex(const std::string& idx) const {
-    name2idx::const_iterator i = arrayname2idx_.find(idx);
-    if (i != arrayname2idx_.end())
+    name2idx::const_iterator i = m_array_name_to_index.find(idx);
+    if (i != m_array_name_to_index.end())
      return i->second;
    else
      return -1;
  }
+
  inline std::string getIndex(size_t idx) const {
-    idx2name::const_iterator i = idx2arrayname_.find(idx);
-    if (i != idx2arrayname_.end())
+    idx2name::const_iterator i = m_index_to_array_name.find(idx);
+    if (i != m_index_to_array_name.end())
      throw runtime_error("there is no entry at index " + idx);
    return i->second;
  }
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@ -14,30 +14,30 @@ const int kAvailableSize = 8;
 } // namespace

 ScoreStats::ScoreStats()
-    : available_(kAvailableSize), entries_(0),
-      array_(new ScoreStatsType[available_]) {}
+    : m_available_size(kAvailableSize), m_entries(0),
+      m_array(new ScoreStatsType[m_available_size]) {}

 ScoreStats::ScoreStats(const size_t size)
-    : available_(size), entries_(size),
-      array_(new ScoreStatsType[available_])
+    : m_available_size(size), m_entries(size),
+      m_array(new ScoreStatsType[m_available_size])
 {
-  memset(array_, 0, GetArraySizeWithBytes());
+  memset(m_array, 0, GetArraySizeWithBytes());
 }

 ScoreStats::~ScoreStats()
 {
-  if (array_) {
-    delete [] array_;
-    array_ = NULL;
+  if (m_array) {
+    delete [] m_array;
+    m_array = NULL;
  }
 }

 void ScoreStats::Copy(const ScoreStats &stats)
 {
-  available_ = stats.available();
-  entries_ = stats.size();
-  array_ = new ScoreStatsType[available_];
-  memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
+  m_available_size = stats.available();
+  m_entries = stats.size();
+  m_array = new ScoreStatsType[m_available_size];
+  memcpy(m_array, stats.getArray(), GetArraySizeWithBytes());
 }

 ScoreStats::ScoreStats(const ScoreStats &stats)
@ -47,27 +47,27 @@ ScoreStats::ScoreStats(const ScoreStats &stats)

 ScoreStats& ScoreStats::operator=(const ScoreStats &stats)
 {
-  delete [] array_;
+  delete [] m_array;
  Copy(stats);
  return *this;
 }

 void ScoreStats::expand()
 {
-  available_ *= 2;
-  scorestats_t buf = new ScoreStatsType[available_];
-  memcpy(buf, array_, GetArraySizeWithBytes());
-  delete [] array_;
-  array_ = buf;
+  m_available_size *= 2;
+  scorestats_t buf = new ScoreStatsType[m_available_size];
+  memcpy(buf, m_array, GetArraySizeWithBytes());
+  delete [] m_array;
+  m_array = buf;
 }

 void ScoreStats::add(ScoreStatsType v)
 {
  if (isfull()) expand();
-  array_[entries_++]=v;
+  m_array[m_entries++]=v;
 }

-void ScoreStats::set(const std::string& str)
+void ScoreStats::set(const string& str)
 {
  reset();
  vector<string> out;
@ -78,46 +78,51 @@ void ScoreStats::set(const std::string& str)
  }
 }

-void ScoreStats::loadbin(std::ifstream& inFile)
+void ScoreStats::loadbin(istream* is)
 {
-  inFile.read((char*)array_, GetArraySizeWithBytes());
+  is->read(reinterpret_cast<char*>(m_array),
+           static_cast<streamsize>(GetArraySizeWithBytes()));
 }

-void ScoreStats::loadtxt(std::ifstream& inFile)
+void ScoreStats::loadtxt(istream* is)
 {
-  std::string theString;
-  std::getline(inFile, theString);
-  set(theString);
+  string line;
+  getline(*is, line);
+  set(line);
 }

-void ScoreStats::loadtxt(const std::string &file)
+void ScoreStats::loadtxt(const string &file)
 {
-//      TRACE_ERR("loading the stats from " << file << std::endl);
-
-  std::ifstream inFile(file.c_str(), std::ios::in); // matches a stream with a file. Opens the file
-
-  loadtxt(inFile);
+  ifstream ifs(file.c_str(), ios::in); // matches a stream with a file. Opens the file
+  if (!ifs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  istream* is = &ifs;
+  loadtxt(is);
 }


-void ScoreStats::savetxt(const std::string &file)
+void ScoreStats::savetxt(const string &file)
 {
-//      TRACE_ERR("saving the stats into " << file << std::endl);
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  savetxt(outFile);
+  ofstream ofs(file.c_str(), ios::out); // matches a stream with a file. Opens the file
+  ostream* os = &ofs;
+  savetxt(os);
 }

-
-void ScoreStats::savetxt(std::ofstream& outFile)
+void ScoreStats::savetxt(ostream* os)
 {
-  outFile << *this;
+  *os << *this;
 }

-void ScoreStats::savebin(std::ofstream& outFile)
+void ScoreStats::savetxt() {
+  savetxt(&cout);
+}
+
+void ScoreStats::savebin(ostream* os)
 {
-  outFile.write((char*)array_, GetArraySizeWithBytes());
+  os->write(reinterpret_cast<char*>(m_array),
+            static_cast<streamsize>(GetArraySizeWithBytes()));
 }

 ostream& operator<<(ostream& o, const ScoreStats& e)
--- a/mert/ScoreStats.h
+++ b/mert/ScoreStats.h
@ -22,11 +22,11 @@ using namespace std;
 class ScoreStats
 {
 private:
-  size_t available_;
-  size_t entries_;
+  size_t m_available_size;
+  size_t m_entries;

  // TODO: Use smart pointer for exceptional-safety.
-  scorestats_t array_;
+  scorestats_t m_array;

 public:
  ScoreStats();
@ -40,31 +40,23 @@ public:

  void Copy(const ScoreStats &stats);

-  bool isfull() const {
-    return (entries_ < available_) ? 0 : 1;
-  }
+  bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }

  void expand();
  void add(ScoreStatsType v);

  void clear() {
-    memset((void*)array_, 0, GetArraySizeWithBytes());
+    memset((void*)m_array, 0, GetArraySizeWithBytes());
  }

  void reset() {
-    entries_ = 0;
+    m_entries = 0;
    clear();
  }

-  inline ScoreStatsType get(size_t i) {
-    return array_[i];
-  }
-  inline ScoreStatsType get(size_t i)const {
-    return array_[i];
-  }
-  inline scorestats_t getArray() const {
-    return array_;
-  }
+  ScoreStatsType get(size_t i) { return m_array[i]; }
+  ScoreStatsType get(size_t i) const { return m_array[i]; }
+  scorestats_t getArray() const { return m_array; }

  void set(const std::string& str);

@ -76,31 +68,24 @@ public:
    }
  }

-  inline size_t bytes() const {
-    return GetArraySizeWithBytes();
-  }
+  size_t bytes() const { return GetArraySizeWithBytes(); }

  size_t GetArraySizeWithBytes() const {
-    return entries_ * sizeof(ScoreStatsType);
+    return m_entries * sizeof(ScoreStatsType);
  }

-  inline size_t size() const {
-    return entries_;
-  }
-  inline size_t available() const {
-    return available_;
-  }
+  size_t size() const { return m_entries; }
+
+  size_t available() const { return m_available_size; }

  void savetxt(const std::string &file);
-  void savetxt(ofstream& outFile);
-  void savebin(ofstream& outFile);
-  inline void savetxt() {
-    savetxt("/dev/stdout");
-  }
+  void savetxt(ostream* os);
+  void savebin(ostream* os);
+  void savetxt();

  void loadtxt(const std::string &file);
-  void loadtxt(ifstream& inFile);
-  void loadbin(ifstream& inFile);
+  void loadtxt(istream* is);
+  void loadbin(istream* is);

  /**
   * Write the whole object to a stream.
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@ -1,6 +1,9 @@
 #include "Scorer.h"
+
 #include <limits>
+#include "Vocabulary.h"
 #include "Util.h"
+#include "Singleton.h"

 namespace {

@ -34,14 +37,14 @@ inline float score_average(const statscores_t& scores, size_t start, size_t end)

 Scorer::Scorer(const string& name, const string& config)
    : m_name(name),
-      m_encoder(new Encoder),
+      m_vocab(mert::VocabularyFactory::GetVocabulary()),
      m_score_data(0),
      m_enable_preserve_case(true) {
  InitConfig(config);
 }

 Scorer::~Scorer() {
-  delete m_encoder;
+  Singleton<mert::Vocabulary>::Delete();
 }

 void Scorer::InitConfig(const string& config) {
@ -65,23 +68,6 @@ void Scorer::InitConfig(const string& config) {
  }
 }

-Scorer::Encoder::Encoder() {}
-
-Scorer::Encoder::~Encoder() {}
-
-int Scorer::Encoder::Encode(const string& token) {
-  map<string, int>::iterator it = m_vocab.find(token);
-  int encoded_token;
-  if (it == m_vocab.end()) {
-    // Add an new entry to the vocaburary.
-    encoded_token = static_cast<int>(m_vocab.size());
-    m_vocab[token] = encoded_token;
-  } else {
-    encoded_token = it->second;
-  }
-  return encoded_token;
-}
-
 void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) {
  std::istringstream in(line);
  std::string token;
@ -92,7 +78,7 @@ void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) {
        *it = tolower(*it);
      }
    }
-    encoded.push_back(m_encoder->Encode(token));
+    encoded.push_back(m_vocab->Encode(token));
  }
 }

@ -107,40 +93,40 @@ void Scorer::setFactors(const string& factors)
  for(vector<string>::iterator it = factors_vec.begin(); it != factors_vec.end(); ++it)
  {
    int factor = atoi(it->c_str());
-    m_factors.push_back(factor);        
+    m_factors.push_back(factor);
  }
 }

 /**
 * Take the factored sentence and return the desired factors
 */
-string Scorer::applyFactors(const string& sentence)
+string Scorer::applyFactors(const string& sentence) const
 {
  if (m_factors.size() == 0) return sentence;
-  
+
  vector<string> tokens;
  split(sentence, ' ', tokens);
- 
-  stringstream sstream; 
+
+  stringstream sstream;
  for (size_t i = 0; i < tokens.size(); ++i)
  {
-    if (tokens[i] == "") continue;   
+    if (tokens[i] == "") continue;

    vector<string> factors;
    split(tokens[i], '|', factors);

    int fsize = factors.size();
-    
-    if (i>0) sstream << " ";
-    
+
+    if (i > 0) sstream << " ";
+
    for (size_t j = 0; j < m_factors.size(); ++j)
    {
      int findex = m_factors[j];
      if (findex < 0 || findex >= fsize) throw runtime_error("Factor index is out of range.");

-      if (j>0) sstream << "|";
+      if (j > 0) sstream << "|";
      sstream << factors[findex];
-    }    
+    }
  }
  return sstream.str();
 }
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@ -13,6 +13,12 @@ using namespace std;

 class ScoreStats;

+namespace mert {
+
+class Vocabulary;
+
+} // namespace mert
+
 /**
 * Superclass of all scorers and dummy implementation.
 *
@ -105,24 +111,15 @@ class Scorer
  /**
   * Take the factored sentence and return the desired factors
   */
-  virtual string applyFactors(const string& sentece);
+  virtual string applyFactors(const string& sentece) const;
+
+  mert::Vocabulary* GetVocab() const { return m_vocab; }

 private:
-  class Encoder {
-   public:
-    Encoder();
-    virtual ~Encoder();
-    int Encode(const std::string& token);
-    void Clear() { m_vocab.clear(); }
-
-   private:
-    std::map<std::string, int> m_vocab;
-  };
-
  void InitConfig(const string& config);

  string m_name;
-  Encoder* m_encoder;
+  mert::Vocabulary* m_vocab;
  map<string, string> m_config;
  vector<int> m_factors;

@ -144,14 +141,11 @@ class Scorer

  /**
   * Tokenise line and encode.
-   * Note: We assume that all tokens are separated by single spaces.
+   * Note: We assume that all tokens are separated by whitespaces.
   */
  void TokenizeAndEncode(const string& line, vector<int>& encoded);
-
-  void ClearEncoder() { m_encoder->Clear(); }
 };

-
 /**
 * Abstract base class for Scorers that work by adding statistics across all
 * outout sentences, then apply some formula, e.g., BLEU, PER.
--- a/mert/Singleton.h
+++ b/mert/Singleton.h
@ -0,0 +1,33 @@
+#ifndef MERT_SINGLETON_H_
+#define MERT_SINGLETON_H_
+
+#include <cstdlib>
+
+// thread *un*safe singleton.
+// TODO: replace this with thread-safe singleton.
+template <typename T>
+class Singleton {
+ public:
+  static T* GetInstance() {
+    if (m_instance == NULL) {
+      m_instance = new T;
+    }
+    return m_instance;
+  }
+
+  static void Delete() {
+    if (m_instance) {
+      delete m_instance;
+      m_instance = NULL;
+    }
+  }
+
+ private:
+  Singleton();
+  static T* m_instance;
+};
+
+template <typename T>
+T* Singleton<T>::m_instance = NULL;
+
+#endif  // MERT_SINGLETON_H_
--- a/mert/SingletonTest.cpp
+++ b/mert/SingletonTest.cpp
@ -0,0 +1,27 @@
+#include "Singleton.h"
+
+#define BOOST_TEST_MODULE MertSingleton
+#include <boost/test/unit_test.hpp>
+
+namespace {
+
+static int g_count = 0;
+
+class Instance {
+ public:
+  Instance() { ++g_count; }
+  ~Instance() {}
+};
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(singleton_basic) {
+  Instance* instance1 = Singleton<Instance>::GetInstance();
+  Instance* instance2 = Singleton<Instance>::GetInstance();
+  Instance* instance3 = Singleton<Instance>::GetInstance();
+  BOOST_REQUIRE(instance1 == instance2);
+  BOOST_REQUIRE(instance2 == instance3);
+  BOOST_CHECK_EQUAL(1, g_count);
+
+  Singleton<Instance>::Delete();
+}
--- a/mert/Vocabulary.cpp
+++ b/mert/Vocabulary.cpp
@ -0,0 +1,21 @@
+#include "Vocabulary.h"
+#include "Singleton.h"
+
+namespace mert {
+namespace {
+Vocabulary* g_vocab = NULL;
+} // namespace
+
+Vocabulary* VocabularyFactory::GetVocabulary() {
+  if (g_vocab == NULL) {
+    return Singleton<Vocabulary>::GetInstance();
+  } else {
+    return g_vocab;
+  }
+}
+
+void VocabularyFactory::SetVocabulary(Vocabulary* vocab) {
+  g_vocab = vocab;
+}
+
+} // namespace mert
--- a/mert/Vocabulary.h
+++ b/mert/Vocabulary.h
@ -0,0 +1,79 @@
+#ifndef MERT_VOCABULARY_H_
+#define MERT_VOCABULARY_H_
+
+#include <map>
+#include <string>
+
+namespace mert {
+
+/**
+ * A embarrassingly simple map to handle vocabularies to calculate
+ * various scores such as BLEU.
+ *
+ * TODO: replace this with more efficient data structure.
+ */
+class Vocabulary {
+ public:
+  typedef std::map<std::string, int>::iterator iterator;
+  typedef std::map<std::string, int>::const_iterator const_iterator;
+
+  Vocabulary() {}
+  virtual ~Vocabulary() {}
+
+  /** Returns the assiged id for given "token". */
+  int Encode(const std::string& token) {
+    iterator it = m_vocab.find(token);
+    int encoded_token;
+    if (it == m_vocab.end()) {
+      // Add an new entry to the vocaburary.
+      encoded_token = static_cast<int>(m_vocab.size());
+      m_vocab[token] = encoded_token;
+    } else {
+      encoded_token = it->second;
+    }
+    return encoded_token;
+  }
+
+  /**
+   * Return true iff the specified "str" is found in the container.
+   */
+  bool Lookup(const std::string&str , int* v) const {
+    const_iterator it = m_vocab.find(str);
+    if (it == m_vocab.end()) return false;
+    *v = it->second;
+    return true;
+  }
+
+  void clear() { m_vocab.clear(); }
+
+  bool empty() const { return m_vocab.empty(); }
+
+  size_t size() const { return m_vocab.size(); }
+
+  iterator find(const std::string& str) { return m_vocab.find(str); }
+  const_iterator find(const std::string& str) const { return m_vocab.find(str); }
+
+  int& operator[](const std::string& str) { return m_vocab[str]; }
+
+  iterator begin() { return m_vocab.begin(); }
+  const_iterator begin() const { return m_vocab.begin(); }
+  iterator end() { return m_vocab.end(); }
+  const_iterator end() const { return m_vocab.end(); }
+
+ private:
+  std::map<std::string, int> m_vocab;
+};
+
+class VocabularyFactory {
+ public:
+  static Vocabulary* GetVocabulary();
+  static void SetVocabulary(Vocabulary* vocab);
+
+ private:
+  VocabularyFactory() {}
+  virtual ~VocabularyFactory() {}
+};
+
+} // namespace mert
+
+#endif  // MERT_VOCABULARY_H_
--- a/mert/VocabularyTest.cpp
+++ b/mert/VocabularyTest.cpp
@ -0,0 +1,52 @@
+#include "Vocabulary.h"
+
+#define BOOST_TEST_MODULE MertVocabulary
+#include <boost/test/unit_test.hpp>
+
+#include "Singleton.h"
+
+namespace mert {
+namespace {
+
+void TearDown() {
+  Singleton<Vocabulary>::Delete();
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(vocab_basic) {
+  Vocabulary vocab;
+  BOOST_REQUIRE(vocab.empty());
+  vocab.clear();
+
+  BOOST_CHECK_EQUAL(0, vocab.Encode("hello"));
+  BOOST_CHECK_EQUAL(0, vocab.Encode("hello"));
+  BOOST_CHECK_EQUAL(1, vocab.Encode("world"));
+
+  BOOST_CHECK_EQUAL(2, vocab.size());
+
+  int v;
+  BOOST_CHECK(vocab.Lookup("hello", &v));
+  BOOST_CHECK_EQUAL(0, v);
+  BOOST_CHECK(vocab.Lookup("world", &v));
+  BOOST_CHECK_EQUAL(1, v);
+
+  BOOST_CHECK(!vocab.Lookup("java", &v));
+
+  vocab.clear();
+  BOOST_CHECK(!vocab.Lookup("hello", &v));
+  BOOST_CHECK(!vocab.Lookup("world", &v));
+}
+
+BOOST_AUTO_TEST_CASE(vocab_factory_test) {
+  Vocabulary* vocab1 = VocabularyFactory::GetVocabulary();
+  Vocabulary* vocab2 = VocabularyFactory::GetVocabulary();
+  Vocabulary* vocab3 = VocabularyFactory::GetVocabulary();
+
+  BOOST_REQUIRE(vocab1 != NULL);
+  BOOST_CHECK(vocab1 == vocab2);
+  BOOST_CHECK(vocab2 == vocab3);
+
+  TearDown();
+}
+} // namespace mert
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@ -55,7 +55,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
    for (int i = 0; i < bootstrap; ++i)
    {
      // TODO: Use smart pointer for exceptional-safety.
-      ScoreData* scoredata = new ScoreData(*g_scorer);
+      ScoreData* scoredata = new ScoreData(g_scorer);
      for (int j = 0; j < n; ++j)
      {
        int randomIndex = random() % n;
@ -89,7 +89,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
  else
  {
    // TODO: Use smart pointer for exceptional-safety.
-    ScoreData* scoredata = new ScoreData(*g_scorer);
+    ScoreData* scoredata = new ScoreData(g_scorer);
    for (int sid = 0; sid < n; ++sid)
    {
      string str_sid = int2string(sid);
--- a/mert/extractor.cpp
+++ b/mert/extractor.cpp
@ -197,7 +197,7 @@ int main(int argc, char** argv)

    PrintUserTime("References loaded");

-    Data data(*scorer);
+    Data data(scorer.get());

    // load old data
    for (size_t i = 0; i < prevScoreDataFiles.size(); i++) {
@ -208,13 +208,13 @@ int main(int argc, char** argv)

    // computing score statistics of each nbest file
    for (size_t i = 0; i < nbestFiles.size(); i++) {
-      data.loadnbest(nbestFiles.at(i));
+      data.loadNBest(nbestFiles.at(i));
    }

    PrintUserTime("Nbest entries loaded and scored");

    //ADDED_BY_TS
-    data.remove_duplicates();
+    data.removeDuplicates();
    //END_ADDED

    data.save(option.featureDataFile, option.scoreDataFile, option.binmode);
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@ -20,6 +20,7 @@
 #include "ScoreData.h"
 #include "FeatureData.h"
 #include "Optimizer.h"
+#include "OptimizerFactory.h"
 #include "Types.h"
 #include "Timer.h"
 #include "Util.h"
@ -338,7 +339,7 @@ int main(int argc, char **argv)
      ScorerFactory::getScorer(option.scorer_type, option.scorer_config));

  //load data
-  Data data(*scorer);
+  Data data(scorer.get());

  for (size_t i = 0; i < ScoreDataFiles.size(); i++) {
    cerr<<"Loading Data from: "<< ScoreDataFiles.at(i) << " and " << FeatureDataFiles.at(i) << endl;
@ -348,7 +349,7 @@ int main(int argc, char **argv)
  scorer->setScoreData(data.getScoreData().get());

  //ADDED_BY_TS
-  data.remove_duplicates();
+  data.removeDuplicates();
  //END_ADDED

  PrintUserTime("Data loaded");
@ -434,7 +435,7 @@ int main(int argc, char **argv)
    vector<OptimizationTask*>& tasks = allTasks[i];
    Optimizer *optimizer = OptimizerFactory::BuildOptimizer(option.pdim, to_optimize, start_list[0], option.optimize_type, option.nrandom);
    optimizer->SetScorer(data_ref.getScorer());
-    optimizer->SetFData(data_ref.getFeatureData());
+    optimizer->SetFeatureData(data_ref.getFeatureData());
    // A task for each start point
    for (size_t j = 0; j < startingPoints.size(); ++j) {
      OptimizationTask* task = new OptimizationTask(optimizer, startingPoints[j]);
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@ -21,8 +21,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/


-/** 
-  * This is part of the PRO implementation. It converts the features and scores 
+/**
+  * This is part of the PRO implementation. It converts the features and scores
  * files into a form suitable for input into the megam maxent trainer.
  *
  *   For details of PRO, refer to Hopkins & May (EMNLP 2011)
@ -34,9 +34,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>

 #include <boost/program_options.hpp>

+#include "BleuScorer.h"
 #include "FeatureDataIterator.h"
 #include "ScoreDataIterator.h"

@ -46,49 +48,49 @@ namespace po = boost::program_options;

 class SampledPair {
 private:
-	pair<size_t,size_t> translation1;
-	pair<size_t,size_t> translation2;
-	float scoreDiff;
+  pair<size_t,size_t> m_translation1;
+  pair<size_t,size_t> m_translation2;
+  float m_score_diff;
+
 public:
-	SampledPair(const pair<size_t,size_t>& t1, const pair<size_t,size_t>& t2, float diff ) {
-		if (diff > 0) {
-			translation1 = t1;
-			translation2 = t2;
-			scoreDiff = diff;
-		}
-		else {
-			translation1 = t2;
-			translation2 = t1;
-			scoreDiff = -diff;
-		}			
-	}
-	float getDiff() const { return scoreDiff; }
-	const pair<size_t,size_t>& getTranslation1() const { return translation1; }
-	const pair<size_t,size_t>& getTranslation2() const { return translation2; }
+  SampledPair(const pair<size_t,size_t>& t1, const pair<size_t,size_t>& t2, float diff ) {
+    if (diff > 0) {
+      m_translation1 = t1;
+      m_translation2 = t2;
+      m_score_diff = diff;
+    } else {
+      m_translation1 = t2;
+      m_translation2 = t1;
+      m_score_diff = -diff;
+    }
+  }
+
+  float getDiff() const { return m_score_diff; }
+  const pair<size_t,size_t>& getTranslation1() const { return m_translation1; }
+  const pair<size_t,size_t>& getTranslation2() const { return m_translation2; }
 };


 static float sentenceLevelBleuPlusOne(const vector<float>& stats) {
-	float logbleu = 0.0;
-	const unsigned int bleu_order = 4;
-	for (unsigned int j=0; j<bleu_order; j++) {
-		//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
-		logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
-	}
-	logbleu /= bleu_order;
-	const float brevity = 1.0 - static_cast<float>(stats[(bleu_order*2)]) / stats[1];
-	if (brevity < 0.0) {
-		logbleu += brevity;
-	}
-	//cerr << brevity << " -> " << exp(logbleu) << endl;
-	return exp(logbleu);
+  float logbleu = 0.0;
+  for (unsigned int j=0; j<kBleuNgramOrder; j++) {
+    //cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
+    logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
+  }
+  logbleu /= kBleuNgramOrder;
+  const float brevity = 1.0 - static_cast<float>(stats[(kBleuNgramOrder * 2)]) / stats[1];
+  if (brevity < 0.0) {
+    logbleu += brevity;
+  }
+  //cerr << brevity << " -> " << exp(logbleu) << endl;
+  return exp(logbleu);
 }

 static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureDataItem& f2) {
  // difference in score in regular features
-	for(unsigned int j=0; j<f1.dense.size(); j++)
-		if (abs(f1.dense[j]-f2.dense[j]) > 0.00001)
-			out << " F" << j << " " << (f1.dense[j]-f2.dense[j]);
+  for(unsigned int j=0; j<f1.dense.size(); j++)
+    if (abs(f1.dense[j]-f2.dense[j]) > 0.00001)
+      out << " F" << j << " " << (f1.dense[j]-f2.dense[j]);

  if (f1.sparse.size() || f2.sparse.size()) {
    out << " ";
@ -101,27 +103,27 @@ static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureD
  }
 }

-	
-int main(int argc, char** argv) 
+
+int main(int argc, char** argv)
 {
  bool help;
  vector<string> scoreFiles;
  vector<string> featureFiles;
  int seed;
  string outputFile;
-  //TODO: options
-	const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
-	const unsigned int n_samples = 50; // Xi, in Hopkins & May
-	const float min_diff = 0.05;
+  // TODO: Add these constants to options
+  const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
+  const unsigned int n_samples = 50; // Xi, in Hopkins & May
+  const float min_diff = 0.05;

  po::options_description desc("Allowed options");
  desc.add_options()
-    ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
-    ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
-    ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
-    ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
-    ("output-file,o", po::value<string>(&outputFile), "Output file")
-    ;
+      ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+      ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
+      ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+      ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
+      ("output-file,o", po::value<string>(&outputFile), "Output file")
+      ;

  po::options_description cmdline_options;
  cmdline_options.add(desc);
@ -134,7 +136,7 @@ int main(int argc, char** argv)
      cout << desc << endl;
      exit(0);
  }
-  
+
  if (vm.count("random-seed")) {
    cerr << "Initialising random seed to " << seed << endl;
    srand(seed);
@ -167,7 +169,7 @@ int main(int argc, char** argv)
    out = &cout;
  }

-  
+
  vector<FeatureDataIterator> featureDataIters;
  vector<ScoreDataIterator> scoreDataIters;
  for (size_t i = 0; i < featureFiles.size(); ++i) {
@ -179,7 +181,7 @@ int main(int argc, char** argv)
  size_t sentenceId = 0;
  while(1) {
    vector<pair<size_t,size_t> > hypotheses;
-    //TODO: de-deuping. Collect hashes of score,feature pairs and 
+    //TODO: de-deuping. Collect hashes of score,feature pairs and
    //only add index if it's unique.
    if (featureDataIters[0] == FeatureDataIterator::end()) {
      break;
@ -214,7 +216,7 @@ int main(int argc, char** argv)
      size_t rand2 = rand() % n_translations;
      pair<size_t,size_t> translation2 = hypotheses[rand2];
      float bleu2 = sentenceLevelBleuPlusOne(scoreDataIters[translation2.first]->operator[](translation2.second));
-      
+
      /*
      cerr << "t(" << translation1.first << "," << translation1.second << ") = " << bleu1 <<
        " t(" << translation2.first << "," << translation2.second << ") = " <<
@ -222,7 +224,7 @@ int main(int argc, char** argv)
      */
      if (abs(bleu1-bleu2) < min_diff)
        continue;
-      
+
      samples.push_back(SampledPair(translation1, translation2, bleu1-bleu2));
      scores.push_back(1.0-abs(bleu1-bleu2));
    }
@ -261,4 +263,3 @@ int main(int argc, char** argv)
  outFile.close();

 }
-
--- a/moses/src/GlobalLexicalModel.cpp
+++ b/moses/src/GlobalLexicalModel.cpp
@ -42,7 +42,6 @@ GlobalLexicalModel::~GlobalLexicalModel()
    }
    delete iter->first; // delete output word
  }
-  // if (m_cache != NULL) delete m_cache;
 }

 void GlobalLexicalModel::LoadData(const string &filePath,
@ -153,7 +152,7 @@ float GlobalLexicalModel::ScorePhrase( const TargetPhrase& targetPhrase ) const
 float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const
 {
  LexiconCache& m_cache = m_local->cache;
-  map< const TargetPhrase*, float >::const_iterator query = m_cache.find( &targetPhrase );
+  const LexiconCache::const_iterator query = m_cache.find( &targetPhrase );
  if ( query != m_cache.end() ) {
    return query->second;
  }
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -1034,14 +1034,13 @@ sub execute_steps {
 	    }
 	    elsif (! -e &versionize(&step_file($i)).".DONE") {
 		my $step = &versionize(&step_file($i));
-		print "\texecuting $step via ";
 		&define_step($i);
 		&write_info($i);

 		# cluster job submission
 		if ($CLUSTER && ! &is_qsub_script($i)) {
 		    $DO{$i}++;
-		    print "qsub\n";
+		    print "\texecuting $step via qsub ($active active)\n";
 		    my $qsub_args = &get_qsub_args($DO_STEP[$i]);
 		    `qsub $qsub_args -e $step.STDERR -o $step.STDOUT $step`;
 		}
@ -1050,16 +1049,13 @@ sub execute_steps {
 		elsif ($CLUSTER || $active < $MAX_ACTIVE) {
 		    $active++;
 		    $DO{$i}++;
-		    print "sh ($active active)\n";
+		    print "\texecuting $step via sh ($active active)\n";
 		    sleep(5);
 		    if (!fork) {
 		        `sh $step >$step.STDOUT 2> $step.STDERR`;
 		         exit;
 		    }
 		}
-		else {
-		    print " --- on hold\n";
-		}
 	    }
 	}

@ -1853,6 +1849,9 @@ sub define_training_create_config {
 	    $cmd .= "-lm $factor:$order:$lm_file:$type ";
    }

+    my $additional_ini = &get("TRAINING:additional-ini");
+    $cmd .= "-additional-ini '$additional_ini' " if defined($additional_ini);
+
    &create_step($step_id,$cmd);
 }

@ -2185,6 +2184,7 @@ sub define_evaluation_decode {
    my $nbest = &backoff_and_get("EVALUATION:$set:nbest");
    my $moses_parallel = &backoff_and_get("EVALUATION:$set:moses-parallel");
    my $report_segmentation = &backoff_and_get("EVALUATION:$set:report-segmentation");
+    my $analyze_search_graph = &backoff_and_get("EVALUATION:$set:analyze-search-graph");
    my $report_precision_by_coverage = &backoff_and_get("EVALUATION:$set:report-precision-by-coverage");
    my $hierarchical = &get("TRAINING:hierarchical-rule-set");
    
@ -2193,6 +2193,9 @@ sub define_evaluation_decode {
      $settings .= " -use-alignment-info -alignment-output-file $system_output.wa";
      $report_segmentation = "yes";
    }
+    if (defined($analyze_search_graph) && $analyze_search_graph eq "yes") {
+      $settings .= " -unpruned-search-graph -osg $system_output.graph";
+    }
    if (defined($report_segmentation) && $report_segmentation eq "yes") {
      if ($hierarchical) {
        $settings .= " -T $system_output.trace";
@ -2237,12 +2240,17 @@ sub define_evaluation_analysis {
 	$output,$reference,$input) = &get_output_and_input($step_id);
    my $script = &backoff_and_get("EVALUATION:$set:analysis");
    my $report_segmentation = &backoff_and_get("EVALUATION:$set:report-segmentation");
+    my $analyze_search_graph = &backoff_and_get("EVALUATION:$set:analyze-search-graph");

    my $cmd = "$script -system $output -reference $reference -input $input -dir $analysis";
    if (defined($report_segmentation) && $report_segmentation eq "yes") {
        my $segmentation_file = &get_default_file("EVALUATION",$set,"decode");
 	$cmd .= " -segmentation $segmentation_file";
    }
+    if (defined($analyze_search_graph) && $analyze_search_graph eq "yes") {
+      my $search_graph_file = &get_default_file("EVALUATION",$set,"decode");
+      $cmd .= " -search-graph $search_graph_file.graph";
+    }
    if (&get("TRAINING:hierarchical-rule-set")) {
 	$cmd .= " -hierarchical";
    }
--- a/scripts/ems/support/interpolate-lm.perl
+++ b/scripts/ems/support/interpolate-lm.perl
@ -110,7 +110,7 @@ print STDERR "\n=== BUILDING FINAL LM ===\n\n";
 sub interpolate {
  my ($name,@LM) = @_;

-  die("cannot interpolate more than 10 language models at once.")
+  die("cannot interpolate more than 10 language models at once: ",join(",",@LM))
    if scalar(@LM) > 10;

  my $tmp = tempdir(DIR=>$TEMPDIR);
--- a/scripts/ems/support/reference-from-sgm.perl
+++ b/scripts/ems/support/reference-from-sgm.perl
@ -17,12 +17,17 @@ close(ORDER);

 # get from sgm file which lines belong to which system
 my %DOC;
+my $system_from_refset = 0;
 my ($doc,$system);
 open(REF,$ref);
 while(<REF>) {
+    if (/<refset/ && /refid="([^\"]+)"/i) {
+      $system = $1;
+      $system_from_refset = 1;
+    }
    if (/<doc/i) {
-        die unless /sysid="([^\"]+)"/i;
-	$system = $1;
+        die unless /sysid="([^\"]+)"/i || $system_from_refset;
+        $system = $1 unless $system_from_refset;
        die unless /docid="([^\"]+)"/i;
        $doc = $1;
    }
--- a/scripts/ems/support/wrap-xml.perl
+++ b/scripts/ems/support/wrap-xml.perl
@ -18,8 +18,9 @@ while(<SRC>) {
    elsif (/^<\/srcset/) {
 	s/<\/srcset/<\/tstset/;
    }
-    elsif (/^<DOC/i) {
-	s/<DOC/<DOC sysid="$system"/i;
+    elsif (/^<doc/i) {
+  s/ *sysid="[^\"]+"//;
+	s/<doc/<doc sysid="$system"/i;
    }
    elsif (/<seg/) {
 	my $line = shift(@OUT);
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@ -101,13 +101,14 @@ if ($numParallel > 1)
  print STDERR $extractCmd;
  print STDERR $extractInvCmd;
  print STDERR $extractOrderingCmd;
-  `$extractCmd`;
-  `$extractInvCmd`;
+
+  systemCheck($extractCmd);
+  systemCheck($extractInvCmd);

  my $numStr = NumStr(0);
  if (-e "$TMPDIR/extract.$numStr.o")
  {
-    `$extractOrderingCmd`;
+    systemCheck($extractOrderingCmd);
  }
 }
 else
@ -130,6 +131,15 @@ print STDERR $cmd;

 print STDERR "Finished ".localtime() ."\n";

+sub systemCheck($)
+{
+  my $cmd = shift;
+  my $retVal = system($cmd);
+  if ($retVal != 0)
+  {
+    exit(1);
+  }
+}

 sub NumStr($)
 {
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@ -63,8 +63,13 @@ sub detokenize {
 	my($text) = @_;
 	chomp($text);
 	$text = " $text ";
-        $text =~ s/ \@\-\@ /-/g;
-	
+  $text =~ s/ \@\-\@ /-/g;
+  # de-escape special chars
+  $text =~ s/\&bar;/\|/g;
+  $text =~ s/\&lt;/\</g;
+  $text =~ s/\&gt;/\>/g;
+  $text =~ s/\&amp;/\&/g;
+
 	my $word;
 	my $i;
 	my @words = split(/ /,$text);
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@ -18,6 +18,7 @@ my $language = "en";
 my $QUIET = 0;
 my $HELP = 0;
 my $AGGRESSIVE = 0;
+my $SKIP_XML = 0;

 #my $start = [ Time::HiRes::gettimeofday( ) ];

@ -27,6 +28,7 @@ while (@ARGV) {
 	/^-l$/ && ($language = shift, next);
 	/^-q$/ && ($QUIET = 1, next);
 	/^-h$/ && ($HELP = 1, next);
+	/^-x$/ && ($SKIP_XML = 1, next);
 	/^-a$/ && ($AGGRESSIVE = 1, next);
 }

@ -50,7 +52,7 @@ if (scalar(%NONBREAKING_PREFIX) eq 0){
 }

 while(<STDIN>) {
-	if (/^<.+>$/ || /^\s*$/) {
+	if (($SKIP_XML && /^<.+>$/) || /^\s*$/) {
 		#don't try to tokenize XML/HTML tag lines
 		print $_;
 	}
@ -141,7 +143,13 @@ sub tokenize {
 		$text =~ s/DOTDOTMULTI/DOTMULTI./g;
 	}
 	$text =~ s/DOTMULTI/./g;
-	
+
+  #escape special chars
+  $text =~ s/\&/\&amp;/g;
+  $text =~ s/\|/\&bar;/g;
+  $text =~ s/\</\&lt;/g;
+  $text =~ s/\>/\&gt;/g;
+
 	#ensure final line break
 	$text .= "\n" unless $text =~ /\n$/;

--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@ -404,6 +404,9 @@ if (-e $ref_abs) {
 else {
  # if multiple file, get a full list of the files
    my $part = 0;
+    if (! -e $ref_abs."0" && -e $ref_abs.".ref0") {
+        $ref_abs .= ".ref";
+    }
    while (-e $ref_abs.$part) {
        push @references, $ref_abs.$part;
        $part++;
--- a/scripts/training/phrase-extract/extract.xcodeproj/project.pbxproj
+++ b/scripts/training/phrase-extract/extract.xcodeproj/project.pbxproj
@ -38,6 +38,10 @@
 		1E2C902D141FDED400EA06A6 /* InputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A211129C024C00041956 /* InputFileStream.cpp */; };
 		1E2C902E141FDF6D00EA06A6 /* tables-core.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1CE8CE4B0FC6EAA200924FEA /* tables-core.cpp */; };
 		1EB1C8321200D5C00079FCBB /* PhraseAlignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB1C8311200D5C00079FCBB /* PhraseAlignment.cpp */; };
+		1EB29A3B1511C253005BC4BA /* InputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A211129C024C00041956 /* InputFileStream.cpp */; };
+		1EB29A3C1511C253005BC4BA /* InputFileStream.h in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A210129C024C00041956 /* InputFileStream.h */; };
+		1EB29A3E1511C2D9005BC4BA /* InputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A211129C024C00041956 /* InputFileStream.cpp */; };
+		1EB29A3F1511C2D9005BC4BA /* InputFileStream.h in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A210129C024C00041956 /* InputFileStream.h */; };
 		1EB8A212129C024C00041956 /* InputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A211129C024C00041956 /* InputFileStream.cpp */; };
 		1EB8A261129C04C700041956 /* InputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A211129C024C00041956 /* InputFileStream.cpp */; };
 		1EB8A297129C06A300041956 /* gzfilebuf.h in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A20C129C022000041956 /* gzfilebuf.h */; };
@ -354,6 +358,8 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				1EB29A3B1511C253005BC4BA /* InputFileStream.cpp in Sources */,
+				1EB29A3C1511C253005BC4BA /* InputFileStream.h in Sources */,
 				1C05BA281174CF10003585B2 /* extract-rules.cpp in Sources */,
 				1C05BA251174CF03003585B2 /* Hole.h in Sources */,
 				1C05BA261174CF03003585B2 /* HoleCollection.cpp in Sources */,
@ -376,6 +382,8 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				1EB29A3E1511C2D9005BC4BA /* InputFileStream.cpp in Sources */,
+				1EB29A3F1511C2D9005BC4BA /* InputFileStream.h in Sources */,
 				1C05BA381174CFAD003585B2 /* tables-core.cpp in Sources */,
 				1C05BA391174CFAD003585B2 /* tables-core.h in Sources */,
 				1C05BA351174CF98003585B2 /* AlignmentPhrase.cpp in Sources */,
@ -444,6 +452,7 @@
 				GCC_MODEL_TUNING = G5;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				INSTALL_PATH = /usr/local/bin;
+				OTHER_LDFLAGS = "-lz";
 				PREBINDING = NO;
 				PRODUCT_NAME = "extract-rules";
 				SDKROOT = macosx10.6;
@ -461,6 +470,7 @@
 				GCC_MODEL_TUNING = G5;
 				INSTALL_PATH = /usr/local/bin;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_LDFLAGS = "-lz";
 				PREBINDING = NO;
 				PRODUCT_NAME = "extract-rules";
 				SDKROOT = macosx10.6;
@ -479,6 +489,7 @@
 				GCC_MODEL_TUNING = G5;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				INSTALL_PATH = /usr/local/bin;
+				OTHER_LDFLAGS = "-lz";
 				PREBINDING = NO;
 				PRODUCT_NAME = statistics;
 				SDKROOT = macosx10.6;
@ -496,6 +507,7 @@
 				GCC_MODEL_TUNING = G5;
 				INSTALL_PATH = /usr/local/bin;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_LDFLAGS = "-lz";
 				PREBINDING = NO;
 				PRODUCT_NAME = statistics;
 				SDKROOT = macosx10.6;
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl.missing_bin_dir
@ -40,7 +40,7 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
 my $debug = 0; # debug this script, do not delete any files in debug mode

 # the following line is set installation time by 'make release'.  BEWARE!
-my $BINDIR="/Users/hieuhoang/workspace/bin";
+my $BINDIR="/Users/hieuhoang/workspace/bin/";

 $_HELP = 1
    unless &GetOptions('root-dir=s' => \$_ROOT_DIR,
@ -1490,10 +1490,10 @@ sub score_phrase_phrase_extract {
 	          # sorting
 	          print STDERR "(6.".($substep++).")  sorting $direction @ ".`date`;
 	          if (-e "$extract_filename.gz") {
-		      safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_filename.sorted") or die("ERROR");
+		      safesystem("gunzip < $extract_filename.gz | LC_ALL=C $SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_filename.sorted") or die("ERROR");
 	          }
 	          else {
-		      safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename > $extract_filename.sorted") or die("ERROR");
+		      safesystem("LC_ALL=C $SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename > $extract_filename.sorted") or die("ERROR");
 	          }
              }

@ -1515,7 +1515,7 @@ sub score_phrase_phrase_extract {
        # sorting inverse phrase-table-half to sync up with regular one
        if ($direction eq "e2f" && ! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) {
          print STDERR "(6." . ($substep++) . ") sorting inverse e2f table@ ".`date`;
-          safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f > $ttable_file.half.e2f.sorted") or die("ERROR");
+          safesystem("LC_ALL=C $SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f > $ttable_file.half.e2f.sorted") or die("ERROR");
          if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); }
        }

@ -1570,7 +1570,7 @@ sub score_phrase_memscore {

    # The output is sorted to avoid breaking scripts that rely on the
    # sorting behaviour of the previous scoring algorithm.
-    my $cmd = "$MEMSCORE $options | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip >$ttable_file.gz";
+    my $cmd = "$MEMSCORE $options | LC_ALL=C $SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip >$ttable_file.gz";
    if (-e "$extract_file.gz") {
        $cmd = "$ZCAT $extract_file.gz | ".$cmd;
    } else {
@ -1626,10 +1626,10 @@ sub get_reordering_factored {
 sub get_reordering {
    my ($extract_file,$reo_model_path) = @_;
    if (-e "$extract_file.o.gz") {
-	safesystem("gunzip < $extract_file.o.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_file.o.sorted") or die("ERROR");
+	safesystem("gunzip < $extract_file.o.gz | LC_ALL=C $SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_file.o.sorted") or die("ERROR");
    }
    else {
-        safesystem("LC_ALL=C sort -T $___TEMP_DIR $extract_file.o > $extract_file.o.sorted") or die("ERROR");
+        safesystem("LC_ALL=C $SORT_EXEC -T $___TEMP_DIR $extract_file.o > $extract_file.o.sorted") or die("ERROR");
    }

    my $smooth = $___REORDERING_SMOOTH;
--- a/scripts/training/wrappers/make-factor-stem.perl
+++ b/scripts/training/wrappers/make-factor-stem.perl
@ -6,8 +6,8 @@ my ($size,$in,$out) = @ARGV;

 open(IN,$in);
 open(OUT,">$out");
-binmode(IN, ":utf8");
-binmode(OUT, ":utf8");
+binmode(IN, ":UTF8");
+binmode(OUT, ":UTF8");

 while(<IN>) {
    my $first = 1;
--- a/util/file.cc
+++ b/util/file.cc
@ -42,6 +42,16 @@ int OpenReadOrThrow(const char *name) {
  return ret;
 }

+int CreateOrThrow(const char *name) {
+  int ret;
+#if defined(_WIN32) || defined(_WIN64)
+  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
+#else
+  UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
+#endif
+  return ret;
+}
+
 uint64_t SizeFile(int fd) {
 #if defined(_WIN32) || defined(_WIN64)
  __int64 ret = _filelengthi64(fd);
--- a/util/file.hh
+++ b/util/file.hh
@ -65,7 +65,10 @@ class scoped_FILE {
    std::FILE *file_;
 };

+// Open for read only.  
 int OpenReadOrThrow(const char *name);
+// Create file if it doesn't exist, truncate if it does.  Opened for write.   
+int CreateOrThrow(const char *name);

 // Return value for SizeFile when it can't size properly.  
 const uint64_t kBadSize = (uint64_t)-1;
--- a/util/getopt.c
+++ b/util/getopt.c
@ -10,6 +10,7 @@ Code given out at the 1985 UNIFORUM conference in Dallas.

 #include "getopt.hh"
 #include <stdio.h>
+#include <string.h>

 #define NULL	0
 #define EOF	(-1)
@ -74,4 +75,4 @@ char	**argv, *opts;
 	return(c);
 }

-#endif  /* __GNUC__ */
+#endif  /* __GNUC__ */
--- a/util/mmap.cc
+++ b/util/mmap.cc
@ -170,20 +170,6 @@ void *MapZeroedWrite(int fd, std::size_t size) {
  return MapOrThrow(size, true, kFileFlags, false, fd, 0);
 }

-namespace {
-
-int CreateOrThrow(const char *name) {
-  int ret;
-#if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
-#else
-  UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
-#endif
-  return ret;
-}
-
-} // namespace
-
 void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) {
  file.reset(CreateOrThrow(name));
  try {