From 2a3c9fc6797c97595f5f6bda7dab0b17ce758afc Mon Sep 17 00:00:00 2001
From: Tetsuo Kiso <tetsuo-s@is.naist.jp>
Date: Fri, 7 Dec 2012 08:45:47 +0900
Subject: [PATCH] Further optimization for extractor.

Fixes inefficient updating N-gram counts.

NOTE: Using '--binary' option (this option is not enabled by default yet)
for saving outputs would lead to significant speed up.
---
 mert/BleuScorer.cpp | 15 +++++++++------
 mert/Ngram.h        |  9 +--------
 2 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 8fb814390..1adbd0276 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -65,21 +65,24 @@ size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
   } else {
     TokenizeAndEncode(line, encoded_tokens);
   }
+  const size_t len = encoded_tokens.size();
+  vector<int> ngram;
+
   for (size_t k = 1; k <= n; ++k) {
     //ngram order longer than sentence - no point
-    if (k > encoded_tokens.size()) {
+    if (k > len) {
       continue;
     }
-    for (size_t i = 0; i < encoded_tokens.size()-k+1; ++i) {
-      vector<int> ngram;
-      ngram.reserve(encoded_tokens.size());
-      for (size_t j = i; j < i+k && j < encoded_tokens.size(); ++j) {
+    for (size_t i = 0; i < len - k + 1; ++i) {
+      ngram.clear();
+      ngram.reserve(len);
+      for (size_t j = i; j < i+k && j < len; ++j) {
         ngram.push_back(encoded_tokens[j]);
       }
       counts.Add(ngram);
     }
   }
-  return encoded_tokens.size();
+  return len;
 }
 
 void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
diff --git a/mert/Ngram.h b/mert/Ngram.h
index 811f21f27..6363c847c 100644
--- a/mert/Ngram.h
+++ b/mert/Ngram.h
@@ -45,14 +45,7 @@ class NgramCounts {
   /**
    * If the specified "ngram" is found, we add counts.
    * If not, we insert the default count in the container. */
-  void Add(const Key& ngram) {
-    const_iterator it = find(ngram);
-    if (it != end()) {
-      m_counts[ngram] = it->second + 1;
-    } else {
-      m_counts[ngram] = kDefaultCount;
-    }
-  }
+  inline void Add(const Key& ngram) { m_counts[ngram]++; }
 
   /**
    * Return true iff the specified "ngram" is found in the container.