From ffd7af1168be694f2416597c60dbb965c1a3f70e Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Thu, 6 Feb 2014 20:15:25 +0000
Subject: [PATCH 01/23] Fixes to hypergraph and htk outputs, better cmd line
 help.

Mixing boost paths and strings is bad.
Leaks in htk output.
assert that should be an exception
---
 moses-cmd/Main.cpp  | 9 ++++-----
 moses/Parameter.cpp | 4 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp
index f5f52583c..eb5b5c35a 100644
--- a/moses-cmd/Main.cpp
+++ b/moses-cmd/Main.cpp
@@ -180,6 +180,7 @@ public:
       } else {
         TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl);
       }
+      delete file;
     }
 
     // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
@@ -233,7 +234,7 @@ public:
 
         } else {
           stringstream hypergraphDirName;
-          hypergraphDirName << boost::filesystem::current_path() << "/hypergraph";
+          hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
           hypergraphDir = hypergraphDirName.str();
         }
       }
@@ -527,9 +528,7 @@ size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff
     }
     return index+numScoreComps;
   } else {
-    cerr << "Sparse features are not yet supported when outputting hypergraph format" << endl;
-    assert(false);
-    return 0;
+    UTIL_THROW2("Sparse features are not yet supported when outputting hypergraph format");
   }
 }
 
@@ -641,7 +640,7 @@ int main(int argc, char** argv)
           boost::filesystem::path nbestPath(nbestFile);
           weightsFilename << nbestPath.parent_path().filename() << "/weights";
         } else {
-          weightsFilename << boost::filesystem::current_path() << "/hypergraph/weights";
+          weightsFilename << boost::filesystem::current_path().string() << "/hypergraph/weights";
         }
       }
       boost::filesystem::path weightsFilePath(weightsFilename.str());
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index 1b4683dc0..d4accb3fc 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -102,8 +102,8 @@ Parameter::Parameter()
   AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
   AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
   AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
-  AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF)");
-  AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder)");
+  AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed byy a directory name, which must exist");
+  AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder). This flag is followed by 3 values: 'true (gz|txt|bz) directory-name'");
   AddParam("include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
 #ifdef HAVE_PROTOBUF
   AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");

From 91d6bfe0d5205c77adaf9ebcf8d5da2d8b171862 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 10 Apr 2014 21:34:34 +0100
Subject: [PATCH 02/23] start on FF ReferenceComparison

---
 contrib/other-builds/moses/.project | 10 ++++++++++
 moses/FF/Factory.cpp                |  2 ++
 moses/FF/SetSourcePhrase.h          |  1 +
 3 files changed, 13 insertions(+)

diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index d8679eb44..c7fd19dcf 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1251,6 +1251,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhrasePenalty.h</locationURI>
 		</link>
+		<link>
+			<name>FF/ReferenceComparison.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ReferenceComparison.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/ReferenceComparison.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ReferenceComparison.h</locationURI>
+		</link>
 		<link>
 			<name>FF/SetSourcePhrase.cpp</name>
 			<type>1</type>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index c42d737cc..731a86047 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -39,6 +39,7 @@
 #include "moses/FF/HyperParameterAsWeight.h"
 #include "moses/FF/SetSourcePhrase.h"
 #include "CountNonTerms.h"
+#include "ReferenceComparison.h"
 
 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"
@@ -181,6 +182,7 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(HyperParameterAsWeight);
   MOSES_FNAME(SetSourcePhrase);
   MOSES_FNAME(CountNonTerms);
+  MOSES_FNAME(ReferenceComparison);
 
   MOSES_FNAME(SkeletonStatelessFF);
   MOSES_FNAME(SkeletonStatefulFF);
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index bd07ffbd4..6b391baa4 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -5,6 +5,7 @@
 namespace Moses
 {
 
+// the only thing this FF does is set TargetPhrase::m_ruleSource so that other FF can use it in Evaluate(Search).
 class SetSourcePhrase : public StatelessFeatureFunction
 {
 public:

From 1686686e65e861d257de9d8dfaea3b1db1ac1081 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 10 Apr 2014 21:47:06 +0100
Subject: [PATCH 03/23] start on FF ReferenceComparison

---
 moses/FF/ReferenceComparison.cpp | 11 ++++++++
 moses/FF/ReferenceComparison.h   | 45 ++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 moses/FF/ReferenceComparison.cpp
 create mode 100644 moses/FF/ReferenceComparison.h

diff --git a/moses/FF/ReferenceComparison.cpp b/moses/FF/ReferenceComparison.cpp
new file mode 100644
index 000000000..b11d133c2
--- /dev/null
+++ b/moses/FF/ReferenceComparison.cpp
@@ -0,0 +1,11 @@
+#include "ReferenceComparison.h"
+
+namespace Moses
+{
+ReferenceComparison::ReferenceComparison(const std::string &line)
+:StatelessFeatureFunction(0, line)
+{
+}
+
+}
+
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
new file mode 100644
index 000000000..d3db29ddd
--- /dev/null
+++ b/moses/FF/ReferenceComparison.h
@@ -0,0 +1,45 @@
+#pragma once
+#include <string>
+#include "StatelessFeatureFunction.h"
+
+namespace Moses
+{
+
+// the only thing this FF does is set TargetPhrase::m_ruleSource so that other FF can use it in Evaluate(Search).
+class ReferenceComparison : public StatelessFeatureFunction
+{
+public:
+	ReferenceComparison(const std::string &line);
+
+	  virtual bool IsUseable(const FactorMask &mask) const
+	  { return true; }
+
+	  virtual void Evaluate(const Phrase &source
+							, const TargetPhrase &targetPhrase
+							, ScoreComponentCollection &scoreBreakdown
+							, ScoreComponentCollection &estimatedFutureScore) const
+	  {}
+
+	  virtual void Evaluate(const InputType &input
+	                         , const InputPath &inputPath
+	                         , const TargetPhrase &targetPhrase
+	                         , ScoreComponentCollection &scoreBreakdown
+	                         , ScoreComponentCollection *estimatedFutureScore = NULL) const
+	  {}
+
+	  virtual void Evaluate(const Hypothesis& hypo,
+	                        ScoreComponentCollection* accumulator) const
+	  {}
+
+	  virtual void EvaluateChart(const ChartHypothesis &hypo,
+	                             ScoreComponentCollection* accumulator) const
+	  {}
+
+	  std::vector<float> DefaultWeights() const
+	  { return std::vector<float>(); }
+
+
+};
+
+}
+

From 9644a308587689906c7eb630c0a8a425e43dd282 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 11 Apr 2014 10:22:03 +0100
Subject: [PATCH 04/23] add FF RuleAmbiguity

---
 contrib/other-builds/moses/.project | 10 +++++
 moses/FF/Factory.cpp                |  2 +
 moses/FF/RuleAmbiguity.cpp          | 61 +++++++++++++++++++++++++++++
 moses/FF/RuleAmbiguity.h            | 44 +++++++++++++++++++++
 moses/Phrase.h                      |  8 ++++
 5 files changed, 125 insertions(+)
 create mode 100644 moses/FF/RuleAmbiguity.cpp
 create mode 100644 moses/FF/RuleAmbiguity.h

diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index c7fd19dcf..1a9939c51 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1261,6 +1261,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ReferenceComparison.h</locationURI>
 		</link>
+		<link>
+			<name>FF/RuleAmbiguity.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/RuleAmbiguity.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/RuleAmbiguity.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/RuleAmbiguity.h</locationURI>
+		</link>
 		<link>
 			<name>FF/SetSourcePhrase.cpp</name>
 			<type>1</type>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 731a86047..ddab3df72 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -40,6 +40,7 @@
 #include "moses/FF/SetSourcePhrase.h"
 #include "CountNonTerms.h"
 #include "ReferenceComparison.h"
+#include "RuleAmbiguity.h"
 
 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"
@@ -183,6 +184,7 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(SetSourcePhrase);
   MOSES_FNAME(CountNonTerms);
   MOSES_FNAME(ReferenceComparison);
+  MOSES_FNAME(RuleAmbiguity);
 
   MOSES_FNAME(SkeletonStatelessFF);
   MOSES_FNAME(SkeletonStatefulFF);
diff --git a/moses/FF/RuleAmbiguity.cpp b/moses/FF/RuleAmbiguity.cpp
new file mode 100644
index 000000000..e447eee74
--- /dev/null
+++ b/moses/FF/RuleAmbiguity.cpp
@@ -0,0 +1,61 @@
+#include "RuleScope.h"
+#include "moses/StaticData.h"
+#include "moses/Word.h"
+
+namespace Moses
+{
+RuleAmbiguity::RuleAmbiguity(const std::string &line)
+:StatelessFeatureFunction(1, line)
+,m_sourceSyntax(true)
+{
+}
+
+bool IsAmbiguous(const Word &word, bool sourceSyntax)
+{
+  const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal();
+  return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
+}
+
+void RuleAmbiguity::Evaluate(const Phrase &source
+						, const TargetPhrase &targetPhrase
+						, ScoreComponentCollection &scoreBreakdown
+						, ScoreComponentCollection &estimatedFutureScore) const
+{
+  // source can't be empty, right?
+  float score = 0;
+
+  int count = 0;
+  for (size_t i = 0; i < source.GetSize() - 0; ++i) {
+	const Word &word = source.GetWord(i);
+	bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
+	if (ambiguous) {
+		++count;
+	}
+	else {
+		if (count > 0) {
+			score += count;
+		}
+		count = -1;
+	}
+  }
+
+  // 1st & last always adjacent to ambiguity
+  ++count;
+  if (count > 0) {
+	score += count;
+  }
+
+  scoreBreakdown.PlusEquals(this, score);
+}
+
+void RuleAmbiguity::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "source-syntax") {
+	  m_sourceSyntax = Scan<bool>(value);
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+
+}
+
diff --git a/moses/FF/RuleAmbiguity.h b/moses/FF/RuleAmbiguity.h
new file mode 100644
index 000000000..83cd0272f
--- /dev/null
+++ b/moses/FF/RuleAmbiguity.h
@@ -0,0 +1,44 @@
+#pragma once
+#include <string>
+#include "StatelessFeatureFunction.h"
+
+namespace Moses
+{
+
+// the only thing this FF does is set TargetPhrase::m_ruleSource so that other FF can use it in Evaluate(Search).
+class RuleAmbiguity : public StatelessFeatureFunction
+{
+public:
+	RuleAmbiguity(const std::string &line);
+
+	  virtual bool IsUseable(const FactorMask &mask) const
+	  { return true; }
+
+	  virtual void Evaluate(const Phrase &source
+							, const TargetPhrase &targetPhrase
+							, ScoreComponentCollection &scoreBreakdown
+							, ScoreComponentCollection &estimatedFutureScore) const;
+
+	  virtual void Evaluate(const InputType &input
+	                         , const InputPath &inputPath
+	                         , const TargetPhrase &targetPhrase
+	                         , ScoreComponentCollection &scoreBreakdown
+	                         , ScoreComponentCollection *estimatedFutureScore = NULL) const
+	  {}
+
+	  virtual void Evaluate(const Hypothesis& hypo,
+	                        ScoreComponentCollection* accumulator) const
+	  {}
+
+	  virtual void EvaluateChart(const ChartHypothesis &hypo,
+	                             ScoreComponentCollection* accumulator) const
+	  {}
+
+	  void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+  bool m_sourceSyntax;
+};
+
+}
+
diff --git a/moses/Phrase.h b/moses/Phrase.h
index 1de00bfdf..55fb2bdf5 100644
--- a/moses/Phrase.h
+++ b/moses/Phrase.h
@@ -121,6 +121,14 @@ public:
     return m_words[GetSize() - 1];
   }
 
+  inline const Word &Front() const {
+    return m_words[0];
+  }
+
+  inline const Word &Back() const {
+    return m_words[GetSize() - 1];
+  }
+
   //! particular factor at a particular position
   inline const Factor *GetFactor(size_t pos, FactorType factorType) const {
     const Word &ptr = m_words[pos];

From 1e116a21aef9db3ef26f451309aa0efe4180eecf Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 11 Apr 2014 10:29:57 +0100
Subject: [PATCH 05/23] add FF RuleAmbiguity

---
 moses/FF/RuleAmbiguity.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/moses/FF/RuleAmbiguity.h b/moses/FF/RuleAmbiguity.h
index 83cd0272f..436e2fa58 100644
--- a/moses/FF/RuleAmbiguity.h
+++ b/moses/FF/RuleAmbiguity.h
@@ -5,7 +5,7 @@
 namespace Moses
 {
 
-// the only thing this FF does is set TargetPhrase::m_ruleSource so that other FF can use it in Evaluate(Search).
+// similar to Scope, however, adjacent non-term count as 1 ammbiguity, rather than 2
 class RuleAmbiguity : public StatelessFeatureFunction
 {
 public:

From 0ec2fe016990bd3f6de51316bfada5650ea2c22f Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 11 Apr 2014 10:42:38 +0100
Subject: [PATCH 06/23] add FF RuleAmbiguity

---
 moses/FF/RuleAmbiguity.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/moses/FF/RuleAmbiguity.cpp b/moses/FF/RuleAmbiguity.cpp
index e447eee74..8f8760d28 100644
--- a/moses/FF/RuleAmbiguity.cpp
+++ b/moses/FF/RuleAmbiguity.cpp
@@ -1,4 +1,4 @@
-#include "RuleScope.h"
+#include "RuleAmbiguity.h"
 #include "moses/StaticData.h"
 #include "moses/Word.h"
 

From e347020049ed4233c6ae00fbdf1ca7d5eb91519d Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 11 Apr 2014 11:03:50 +0100
Subject: [PATCH 07/23] add FF ReferenceComparison

---
 moses/FF/ReferenceComparison.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index d3db29ddd..aef7be493 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -5,7 +5,8 @@
 namespace Moses
 {
 
-// the only thing this FF does is set TargetPhrase::m_ruleSource so that other FF can use it in Evaluate(Search).
+// Count how many hypotheses are in each stack, compare score with reference hypo
+// NOT threadsafe.
 class ReferenceComparison : public StatelessFeatureFunction
 {
 public:
@@ -38,6 +39,7 @@ public:
 	  std::vector<float> DefaultWeights() const
 	  { return std::vector<float>(); }
 
+protected:
 
 };
 

From 46cef770b75e4a17693ab1d03b96337b4de838f4 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Sat, 12 Apr 2014 17:20:34 +0200
Subject: [PATCH 08/23] add header allowOrigin *

---
 contrib/server/mosesserver.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp
index 25e2cb0ed..80eab8f20 100644
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@@ -512,7 +512,7 @@ int main(int argc, char** argv)
   xmlrpc_limit_set(XMLRPC_XML_SIZE_LIMIT_ID, 512*1024*1024);
 
   xmlrpc_c::registry myRegistry;
-
+  
   xmlrpc_c::methodPtr const translator(new Translator);
   xmlrpc_c::methodPtr const updater(new Updater);
   xmlrpc_c::methodPtr const optimizer(new Optimizer);
@@ -522,9 +522,11 @@ int main(int argc, char** argv)
   myRegistry.addMethod("optimize", optimizer);
 
   xmlrpc_c::serverAbyss myAbyssServer(
-    myRegistry,
-    port,              // TCP port on which to listen
-    logfile
+    xmlrpc_c::serverAbyss::constrOpt()
+    .registryPtr(&myRegistry)
+    .portNumber(port)              // TCP port on which to listen
+    .logFileName(logfile)
+    .allowOrigin("*")
   );
 
   cerr << "Listening on port " << port << endl;

From 66d0fe81e295dfd976941b2e1112c65d7903c8b0 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Tue, 15 Apr 2014 17:34:21 +0100
Subject: [PATCH 09/23] moses_chart: add ChartKBestExtractor (not enabled yet)

Implements algorithm 3 from Huang and Chiang (2005)
---
 moses/ChartHypothesis.cpp     |  16 +++
 moses/ChartHypothesis.h       |   4 +
 moses/ChartKBestExtractor.cpp | 258 ++++++++++++++++++++++++++++++++++
 moses/ChartKBestExtractor.h   | 125 ++++++++++++++++
 4 files changed, 403 insertions(+)
 create mode 100644 moses/ChartKBestExtractor.cpp
 create mode 100644 moses/ChartKBestExtractor.h

diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp
index 01eb49ccc..212a28d23 100644
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@@ -66,6 +66,22 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
   }
 }
 
+// Intended to be used by ChartKBestExtractor only.  This creates a mock
+// ChartHypothesis for use by the extractor's top-level target vertex.
+ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
+                                 const ChartKBestExtractor & /*unused*/)
+  :m_currSourceWordsRange(pred.m_currSourceWordsRange)
+  ,m_scoreBreakdown(pred.m_scoreBreakdown)
+  ,m_totalScore(pred.m_totalScore)
+  ,m_arcList(NULL)
+  ,m_winningHypo(NULL)
+  ,m_manager(pred.m_manager)
+  ,m_id(pred.m_manager.GetNextHypoId())
+{
+  // One predecessor, which is an existing top-level ChartHypothesis.
+  m_prevHypos.push_back(&pred);
+}
+
 ChartHypothesis::~ChartHypothesis()
 {
   // delete feature function states
diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h
index 6e2facb9c..532d757c9 100644
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@@ -31,6 +31,7 @@
 namespace Moses
 {
 
+class ChartKBestExtractor;
 class ChartHypothesis;
 class ChartManager;
 class RuleCubeItem;
@@ -74,6 +75,9 @@ protected:
   //! not implemented
   ChartHypothesis(const ChartHypothesis &copy);
 
+  //! only used by ChartKBestExtractor
+  ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
+
 public:
 #ifdef USE_HYPO_POOL
   void *operator new(size_t /* num_bytes */) {
diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp
new file mode 100644
index 000000000..60c066191
--- /dev/null
+++ b/moses/ChartKBestExtractor.cpp
@@ -0,0 +1,258 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2014 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "ChartKBestExtractor.h"
+
+#include "ChartHypothesis.h"
+#include "ScoreComponentCollection.h"
+#include "StaticData.h"
+
+#include <boost/scoped_ptr.hpp>
+
+#include <vector>
+
+namespace Moses
+{
+
+// Extract the k-best list from the search graph.
+void ChartKBestExtractor::Extract(
+    const std::vector<const ChartHypothesis*> &topHypos, std::size_t k,
+    KBestVec &kBestList)
+{
+  typedef std::vector<const ChartHypothesis*> HypoVec;
+
+  kBestList.clear();
+  if (topHypos.empty()) {
+    return;
+  }
+
+  // Create a new top-level ChartHypothesis that has the best hypothesis as its
+  // predecessor.  This is the search hypergraph's target vertex.
+  HypoVec::const_iterator iter = topHypos.begin();
+  boost::scoped_ptr<ChartHypothesis> supremeHypo(
+    new ChartHypothesis(**iter, *this));
+
+  // Do the same for each alternative top-level hypothesis, but add the new
+  // ChartHypothesis objects as arcs from supremeHypo, as if they had been
+  // recombined.
+  float prevScore = (*iter)->GetTotalScore();
+  for (++iter; iter != topHypos.end(); ++iter) {
+    // Check that the first item in topHypos really was the best.
+    UTIL_THROW_IF2((*iter)->GetTotalScore() <= prevScore,
+                   "top-level vertices are not correctly sorted");
+    // Note: there's no need for a smart pointer here: supremeHypo will take
+    // ownership of altHypo.
+    ChartHypothesis *altHypo = new ChartHypothesis(**iter, *this);
+    supremeHypo->AddArc(altHypo);
+  }
+
+  // Create the target vertex corresponding to supremeHypo then generate
+  // it's k-best list.
+  boost::shared_ptr<Vertex> top = FindOrCreateVertex(*supremeHypo);
+  LazyKthBest(*top, k, k);
+
+  // Copy the k-best list from the target vertex, but drop the top edge from
+  // each derivation.
+  kBestList.reserve(top->kBestList.size());
+  for (KBestVec::const_iterator p = top->kBestList.begin();
+       p != top->kBestList.end(); ++p) {
+    const Derivation &d = **p;
+    assert(d.edge->tail.size() == 1);  // d should have exactly one predecessor.
+    assert(d.backPointers.size() == 1);
+    std::size_t i = d.backPointers[0];
+    boost::shared_ptr<Derivation> pred = d.edge.tail[0]->kBestList[i];
+    kBestList.push_back(pred);
+  }
+}
+
+// Generate the target-side yield of the derivation d.
+Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
+{
+  FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+  Phrase ret(ARRAY_SIZE_INCR);
+
+  const ChartHypothesis &hypo = d.edge.head->hypothesis;
+  const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
+  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+    phrase.GetAlignNonTerm().GetNonTermIndexMap();
+  for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
+    const Word &word = phrase.GetWord(pos);
+    if (word.IsNonTerminal()) {
+      std::size_t nonTermInd = nonTermIndexMap[pos];
+      const Derivation &subderivation =
+        *d.edge.tail[nonTermInd]->kBestList[d.backPointers[nonTermInd]];
+      Phrase subPhrase = GetOutputPhrase(subderivation);
+      ret.Append(subPhrase);
+    } else {
+      ret.AddWord(word);
+      if (placeholderFactor == NOT_FOUND) {
+        continue;
+      }
+      std::set<std::size_t> sourcePosSet =
+        phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
+      if (sourcePosSet.size() == 1) {
+        const std::vector<const Word*> *ruleSourceFromInputPath =
+          hypo.GetTranslationOption().GetSourceRuleFromInputPath();
+        UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
+                       "Source Words in of the rules hasn't been filled out");
+        std::size_t sourcePos = *sourcePosSet.begin();
+        const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
+        UTIL_THROW_IF2(sourceWord == NULL,
+                       "Null source word at position " << sourcePos);
+        const Factor *factor = sourceWord->GetFactor(placeholderFactor);
+        if (factor) {
+          ret.Back()[0] = factor;
+        }
+      }
+    }
+  }
+
+  return ret;
+}
+
+// Create an unweighted hyperarc corresponding to the given ChartHypothesis.
+ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
+    const ChartHypothesis &h)
+{
+  UnweightedHyperarc edge;
+  edge.head = FindOrCreateVertex(h);
+  const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
+  edge.tail.resize(prevHypos.size());
+  for (std::size_t i = 0; i < prevHypos.size(); ++i) {
+    const ChartHypothesis *prevHypo = prevHypos[i];
+    edge.tail[i] = FindOrCreateVertex(*prevHypo);
+  }
+  return edge;
+}
+
+void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
+{
+  // Create a derivation for v's best incoming edge.
+  UnweightedHyperarc bestEdge = CreateEdge(v.hypothesis);
+  boost::shared_ptr<Derivation> d(new Derivation(bestEdge));
+  v.candidates.push(d);
+  v.seen.insert(d);
+  // Create derivations for the rest of v's incoming edges.
+  const ChartArcList *arcList = v.hypothesis.GetArcList();
+  if (arcList) {
+    for (std::size_t i = 0; i < arcList->size(); ++i) {
+      const ChartHypothesis &recombinedHypo = *(*arcList)[i];
+      UnweightedHyperarc edge = CreateEdge(recombinedHypo);
+      boost::shared_ptr<Derivation> d(new Derivation(edge));
+      v.candidates.push(d);
+      v.seen.insert(d);
+    }
+  }
+}
+
+// Look for the vertex corresponding to a given ChartHypothesis, creating
+// a new one if necessary.
+boost::shared_ptr<ChartKBestExtractor::Vertex>
+ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
+{
+  VertexMap::value_type element(&h, boost::shared_ptr<Vertex>());
+  std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element);
+  boost::shared_ptr<Vertex> &sp = p.first->second;
+  if (!p.second) {
+    return sp;  // Vertex was already in m_vertexMap.
+  }
+  sp.reset(new Vertex(h));
+  return sp;
+}
+
+void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
+                                      std::size_t globalK)
+{
+  // If this is the first visit to vertex v then initialize the priority queue.
+  if (v.visited == false) {
+    GetCandidates(v, globalK);
+    v.visited = true;
+  }
+  // Add derivations to the k-best list until it contains k or there are none
+  // left to add.
+  while (v.kBestList.size() < k) {
+    if (!v.kBestList.empty()) {
+      // Update the priority queue by adding the successors of the last
+      // derivation (unless they've been seen before).
+      const Derivation &d = *v.kBestList.back();
+      LazyNext(v, d, globalK);
+    }
+    // Check if there are any derivations left in the queue.
+    if (v.candidates.empty()) {
+      break;
+    }
+    // Get the next best derivation and delete it from the queue.
+    boost::shared_ptr<Derivation> d = v.candidates.top();
+    v.candidates.pop();
+    // Add it to the k-best list.
+    v.kBestList.push_back(d);
+  }
+}
+
+void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
+                                   std::size_t globalK)
+{
+  // Create the neighbours of Derivation d.
+  for (std::size_t i = 0; i < d.backPointers.size(); ++i) {
+    Vertex &predVertex = *d.edge.tail[i];
+    // Ensure that predVertex's k-best list contains enough derivations.
+    std::size_t k = d.backPointers[i] + 2;
+    LazyKthBest(predVertex, k, globalK);
+    if (predVertex.kBestList.size() < k) {
+      // predVertex's derivations have been exhausted.
+      continue;
+    }
+    // Create the neighbour.
+    boost::shared_ptr<Derivation> next(new Derivation(d, i));
+    // Check if it has been created before.
+    std::pair<Vertex::DerivationSet::iterator, bool> p = v.seen.insert(next);
+    if (p.second) {
+      v.candidates.push(next);  // Haven't previously seen it.
+    }
+  }
+}
+
+// Construct a Derivation corresponding to a ChartHypothesis.
+ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
+{
+  edge = e;
+  backPointers.resize(edge.tail.size(), 0);
+  scoreBreakdown = edge.head->hypothesis.GetScoreBreakdown();
+  score = edge.head->hypothesis.GetTotalScore();
+}
+
+// Construct a Derivation that neighbours an existing Derivation.
+ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
+{
+  edge.head = d.edge.head;
+  edge.tail = d.edge.tail;
+  backPointers = d.backPointers;
+  std::size_t j = ++backPointers[i];
+  scoreBreakdown = d.scoreBreakdown;
+  // Deduct the score of the old subderivation.
+  const Derivation &oldSubderivation = *(edge.tail[i]->kBestList[j-1]);
+  scoreBreakdown.MinusEquals(oldSubderivation.scoreBreakdown);
+  // Add the score of the new subderivation.
+  const Derivation &newSubderivation = *(edge.tail[i]->kBestList[j]);
+  scoreBreakdown.PlusEquals(newSubderivation.scoreBreakdown);
+  score = scoreBreakdown.GetWeightedScore();
+}
+
+}  // namespace Moses
diff --git a/moses/ChartKBestExtractor.h b/moses/ChartKBestExtractor.h
new file mode 100644
index 000000000..07df7eacb
--- /dev/null
+++ b/moses/ChartKBestExtractor.h
@@ -0,0 +1,125 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2014 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include "ChartHypothesis.h"
+#include "ScoreComponentCollection.h"
+
+#include <boost/unordered_set.hpp>
+
+#include <queue>
+#include <vector>
+
+namespace Moses
+{
+
+// k-best list extractor that implements algorithm 3 from this paper:
+//
+//  Liang Huang and David Chiang
+//  "Better k-best parsing"
+//  In Proceedings of IWPT 2005
+//
+class ChartKBestExtractor
+{
+public:
+  struct Vertex;
+
+  struct UnweightedHyperarc {
+    boost::shared_ptr<Vertex> head;
+    std::vector<boost::shared_ptr<Vertex> > tail;
+  };
+
+  struct Derivation {
+    Derivation(const UnweightedHyperarc &);
+    Derivation(const Derivation &, std::size_t);
+
+    UnweightedHyperarc edge;
+    std::vector<std::size_t> backPointers;
+    ScoreComponentCollection scoreBreakdown;
+    float score;
+  };
+
+  struct DerivationOrderer {
+    bool operator()(const boost::shared_ptr<Derivation> &d1,
+                    const boost::shared_ptr<Derivation> &d2) const {
+      return d1->score < d2->score;
+    }
+  };
+
+  struct DerivationHasher {
+    std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
+      std::size_t seed = 0;
+      boost::hash_combine(seed, d->edge.head);
+      boost::hash_combine(seed, d->edge.tail);
+      boost::hash_combine(seed, d->backPointers);
+      return seed;
+    }
+  };
+
+  struct DerivationEqualityPred {
+    bool operator()(const boost::shared_ptr<Derivation> &d1,
+                    const boost::shared_ptr<Derivation> &d2) const {
+      return d1->edge.head == d2->edge.head &&
+             d1->edge.tail == d2->edge.tail &&
+             d1->backPointers == d2->backPointers;
+    }
+  };
+
+  struct Vertex {
+    typedef std::priority_queue<boost::shared_ptr<Derivation>,
+                                std::vector<boost::shared_ptr<Derivation> >,
+                                DerivationOrderer> DerivationQueue;
+
+    typedef boost::unordered_set<boost::shared_ptr<Derivation>,
+                                 DerivationHasher,
+                                 DerivationEqualityPred> DerivationSet;
+
+    Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
+
+    const ChartHypothesis &hypothesis;
+    std::vector<boost::shared_ptr<Derivation> > kBestList;
+    DerivationQueue candidates;
+    DerivationSet seen;
+    bool visited;
+  };
+
+  typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
+
+  // Extract the k-best list from the search hypergraph given the full, sorted
+  // list of top-level vertices.
+  void Extract(const std::vector<const ChartHypothesis*> &topHypos,
+               std::size_t k, KBestVec &);
+
+  static Phrase GetOutputPhrase(const Derivation &);
+
+private:
+  typedef boost::unordered_map<const ChartHypothesis *,
+                               boost::shared_ptr<Vertex> > VertexMap;
+
+  UnweightedHyperarc CreateEdge(const ChartHypothesis &);
+  boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
+  void GetCandidates(Vertex &, std::size_t);
+  void LazyKthBest(Vertex &, std::size_t, std::size_t);
+  void LazyNext(Vertex &, const Derivation &, std::size_t);
+
+  VertexMap m_vertexMap;
+};
+
+}  // namespace Moses

From 5e3e50d4ec922c119a972387bfc1c2fe3c0ca9fb Mon Sep 17 00:00:00 2001
From: Nadir Durrani <nadir@hel.inf.ed.ac.uk>
Date: Wed, 16 Apr 2014 17:28:49 +0100
Subject: [PATCH 10/23] In-Decoding Transliteration Module

---
 .../in-decoding-transliteration.pl            | 230 ++++++++++++++++++
 scripts/ems/experiment.meta                   |  19 +-
 scripts/ems/experiment.perl                   |  15 +-
 scripts/training/train-model.perl             |  17 +-
 4 files changed, 270 insertions(+), 11 deletions(-)
 create mode 100755 scripts/Transliteration/in-decoding-transliteration.pl

diff --git a/scripts/Transliteration/in-decoding-transliteration.pl b/scripts/Transliteration/in-decoding-transliteration.pl
new file mode 100755
index 000000000..e4f0503a8
--- /dev/null
+++ b/scripts/Transliteration/in-decoding-transliteration.pl
@@ -0,0 +1,230 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use utf8;
+use File::Basename;
+use Getopt::Long "GetOptions";
+use FindBin qw($RealBin);
+use Scalar::Util qw(looks_like_number);
+use IO::Handle;
+binmode(STDIN,  ':utf8');
+binmode(STDOUT, ':utf8');
+binmode(STDERR, ':utf8');
+
+my $___FACTOR_DELIMITER = "|";
+my $OUT_FILE = "/tmp/transliteration-phrase-table.$$";
+
+my ($MOSES_SRC_DIR,$TRANSLIT_MODEL,$OOV_FILE, $OOV_FILE_NAME, $EXTERNAL_BIN_DIR, $LM_FILE, $INPUT_EXTENSION, $OUTPUT_EXTENSION);
+die("ERROR: wrong syntax when invoking postDecodingTransliteration.perl")
+    unless &GetOptions('moses-src-dir=s' => \$MOSES_SRC_DIR,
+			'external-bin-dir=s' => \$EXTERNAL_BIN_DIR,
+			'transliteration-model-dir=s' => \$TRANSLIT_MODEL,
+			'input-extension=s' => \$INPUT_EXTENSION,
+			'output-extension=s' => \$OUTPUT_EXTENSION,
+			'transliteration-file=s' => \$OOV_FILE,
+			'out-file=s' => \$OUT_FILE);
+
+# check if the files are in place
+die("ERROR: you need to define --moses-src-dir --external-bin-dir, --transliteration-model-dir, --transliteration-file, --input-extension, and --output-extension")
+    unless (defined($MOSES_SRC_DIR) &&
+            defined($TRANSLIT_MODEL) &&
+            defined($OOV_FILE) &&
+	     defined($INPUT_EXTENSION)&&	
+	     defined($OUTPUT_EXTENSION)&&	
+	     defined($EXTERNAL_BIN_DIR));
+
+die("ERROR: could not find Transliteration Model '$TRANSLIT_MODEL'")
+    unless -e $TRANSLIT_MODEL;
+die("ERROR: could not find Transliteration file $OOV_FILE'")
+    unless -e $OOV_FILE;
+
+$OOV_FILE_NAME = basename ($OOV_FILE);
+
+`mkdir $TRANSLIT_MODEL/evaluation`;
+`cp $OOV_FILE $TRANSLIT_MODEL/evaluation/`;
+my $translitFile = $TRANSLIT_MODEL . "/evaluation/" . $OOV_FILE_NAME;
+
+print "Preparing for Transliteration\n";
+prepare_for_transliteration ($OOV_FILE, $translitFile);
+print "Run Transliteration\n";
+run_transliteration ($MOSES_SRC_DIR , $EXTERNAL_BIN_DIR , $TRANSLIT_MODEL , $OOV_FILE_NAME);
+print "Pick Best Transliteration\n";
+form_corpus ($translitFile , $translitFile.".op.nBest" , $OUT_FILE);
+
+
+################### Read the UNK word file and prepare for Transliteration ###############################
+
+sub prepare_for_transliteration
+{
+	my @list = @_;
+	my $testFile = $list[0];
+	my $translitFile = $list[1];
+	my %UNK;
+	my @words;
+	my $src;
+	my @tW;
+
+	open MYFILE,  "<:encoding(UTF-8)", $testFile or die "Can't open $testFile: $!\n";
+
+	while (<MYFILE>) 
+	{
+        chomp;
+        #print "$_\n";
+        @words = split(/ /, "$_");
+
+	 foreach (@words)
+         {
+		
+		@tW = split /\Q$___FACTOR_DELIMITER/;
+
+		if (defined $tW[0])
+		{
+		
+		  if (! ($tW[0] =~ /[0-9.,]/))
+		   {
+			$UNK{$tW[0]} = 1;
+		   }
+		   else
+		   {
+		   	print "Not transliterating $tW[0] \n";
+		   }
+		}    
+         }
+	}
+	 close (MYFILE);
+
+	open MYFILE,  ">:encoding(UTF-8)", $translitFile or die "Can't open $translitFile: $!\n";
+
+	foreach my $key ( keys %UNK )
+	{
+  		$src=join(' ', split('',$key));
+ 		print MYFILE "$src\n";	
+	}
+	 close (MYFILE);
+}
+
+################### Run Transliteration Module to Obtain Transliterations ###############################
+
+sub run_transliteration
+{
+	my @list = @_;
+	my $MOSES_SRC = $list[0];
+	my $EXTERNAL_BIN_DIR = $list[1];
+	my $TRANSLIT_MODEL = $list[2];
+	my $eval_file = $list[3];
+
+	`touch $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`;
+	
+	print "Filter Table\n";
+
+	`$MOSES_SRC/scripts/training/train-model.perl -mgiza -mgiza-cpus 10 -dont-zip -first-step 9 -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 -score-options '--KneserNey' -phrase-translation-table $TRANSLIT_MODEL/model/phrase-table -config $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini -lm 0:3:$TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini:8`;
+
+	`$MOSES_SRC/scripts/training/filter-model-given-input.pl $TRANSLIT_MODEL/evaluation/$eval_file.filtered $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini $TRANSLIT_MODEL/evaluation/$eval_file  -Binarizer "$MOSES_SRC/bin/processPhraseTable"`;
+
+	`rm  $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`;
+
+	print "Apply Filter\n";
+
+	`$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl $TRANSLIT_MODEL/evaluation/$eval_file.filtered/moses.ini $TRANSLIT_MODEL/model/moses.ini $TRANSLIT_MODEL/tuning/moses.tuned.ini $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini`;
+
+	`$MOSES_SRC/bin/moses -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 -threads 16 -drop-unknown -distortion-limit 0 -n-best-list $TRANSLIT_MODEL/evaluation/$eval_file.op.nBest 100 distinct -f $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini < $TRANSLIT_MODEL/evaluation/$eval_file > $TRANSLIT_MODEL/evaluation/$eval_file.op`;
+
+}
+
+################### Read the output of Transliteration Model and Form Corpus ###############################
+
+
+sub form_corpus
+{
+
+	my @list = @_;
+	my $inp_file = $list[0];
+	my $testFile = $list[1];
+	my @words;
+	my $thisStr;
+	my $features;
+	my $prev = 0;
+	my $sNum;
+	my @UNK;
+	my %vocab;
+
+	my $antLog = exp(0.2);
+	my $phraseTable = $list[2];
+	
+	open MYFILE,  "<:encoding(UTF-8)", $inp_file or die "Can't open $inp_file: $!\n";
+	open PT,  ">:encoding(UTF-8)", $phraseTable or die "Can't open $phraseTable: $!\n";
+
+	while (<MYFILE>) 
+	{
+        chomp;
+        #print "$_\n";
+        @words = split(/ /, "$_");
+	 
+	  $thisStr = "";
+	  foreach (@words)
+         {
+         	$thisStr = $thisStr . "$_";
+         }
+
+	  push(@UNK, $thisStr);
+	  $vocab{$thisStr} = 1;	
+	}
+	 close (MYFILE);
+
+	open MYFILE,  "<:encoding(UTF-8)", $testFile or die "Can't open $testFile: $!\n";
+	my $inpCount = 0;
+
+	while (<MYFILE>) 
+	{
+       	 chomp;
+        	#print "$_\n";
+        	@words = split(/ /, "$_");
+
+	 	$sNum = $words[0];
+
+		if ($prev != $sNum){
+			$inpCount++;
+		} 
+	
+		my $i = 2;
+		$thisStr = "";
+		$features = "";
+
+		while ($words[$i] ne "|||")
+		{
+			$thisStr = $thisStr . $words[$i];
+			$i++;
+		}
+
+		$i++;
+		
+		while ($words[$i] ne "|||")
+		{
+			if ($words[$i] =~ /Penalty0/ || $words[$i] eq "Distortion0=" || $words[$i] eq "LM0=" ){
+				$i++;
+			}
+			elsif (looks_like_number($words[$i])){
+				$features = $features . " " . exp($words[$i]);
+			}
+
+			$i++;
+		}
+		$i++;
+
+		#$features = $features . " " . $words[$i];
+		
+		if ($thisStr ne ""){
+		 print PT "$UNK[$inpCount] ||| $thisStr ||| $features ||| 0-0 ||| 0 0 0\n";
+		}
+		$prev = $sNum;
+ 	}
+ 	close (MYFILE);
+	close (PT);
+
+		
+	`gzip $phraseTable`;
+	
+}
+
+
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index e2b21019d..83d597aa0 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -533,6 +533,13 @@ build-transliteration-model
        ignore-unless: transliteration-module
        rerun-on-change: transliteration-module training-options script giza-settings
 	default-name: model/Transliteration
+build-translit-table
+       in: transliteration-model
+       out: transliteration-table
+       ignore-unless: in-decoding-transliteration
+       rerun-on-change: in-decoding-transliteration transliteration-module
+	default-name: model/transliteration-phrase-table
+	template: $moses-script-dir/Transliteration/in-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN --input-extension $input-extension --output-extension $output-extension --transliteration-file $transliteration-file --out-file OUT
 extract-phrases
 	in: corpus-mml-postfilter=OR=word-alignment scored-corpus
 	out: extracted-phrases
@@ -601,7 +608,7 @@ build-sparse
         default-name: model/sparse-features
 	template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
 create-config
-	in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-model generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm 
+	in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm 
 	out: config
 	ignore-if: use-hiero
 	rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
@@ -863,7 +870,7 @@ split-reference-devtest
 	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
 	template: $output-splitter -model IN1.$output-extension < IN > OUT
 filter
-	in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains
+	in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
 	out: filtered-dir
 	default-name: tuning/filtered
 	rerun-on-change: filter-settings ttable-binarizer
@@ -989,8 +996,8 @@ split-input
 	pass-unless: input-splitter
 	template: $input-splitter -model IN1.$input-extension < IN > OUT
 filter
-	in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains
-	out: filtered-dir
+	in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
+	out: filtered-dir 
 	default-name: evaluation/filtered
 	rerun-on-change: filter-settings report-precision-by-coverage ttable-binarizer
 	pass-if: TRAINING:binarize-all
@@ -1027,11 +1034,11 @@ remove-markup
 	pass-unless: report-segmentation
 	template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
 post-decoding-transliteration
-	in: cleaned-output system-output TRAINING:transliteration-model LM:binlm
+	in: cleaned-output system-output TRAINING:transliteration-model
 	out: transliterated-output
 	default-name: evaluation/transliterated
 	pass-unless: TRAINING:post-decoding-transliteration
-	template: $moses-script-dir/Transliteration/post-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN2 --input-extension $input-extension --output-extension $output-extension --language-model IN3 --output-file IN0 --oov-file IN1.oov
+	template: $moses-script-dir/Transliteration/post-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN2 --input-extension $input-extension --output-extension $output-extension --language-model $TRAINING:language-model-file --output-file IN0 --oov-file IN1.oov
 recase-output
 	in: transliterated-output RECASING:recase-config
 	out: recased-output
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 761c7a694..f6a7e4db3 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2233,11 +2233,15 @@ sub get_config_tables {
 sub define_training_create_config {
     my ($step_id) = @_;
 
-    my ($config,$reordering_table,$phrase_translation_table,$translit_model,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
+    my ($config,$reordering_table,$phrase_translation_table,$transliteration_pt,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
 			= &get_output_and_input($step_id);
 
     my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);
 
+    if($transliteration_pt){
+	 $cmd .= "-transliteration-phrase-table $transliteration_pt ";
+    }	
+
     if($osm){
       
       my $osm_settings = &get("TRAINING:operation-sequence-model-settings"); 
@@ -2623,7 +2627,7 @@ sub define_tuningevaluation_filter {
     my $tuning_flag = !defined($set);
     my $hierarchical = &get("TRAINING:hierarchical-rule-set");
 
-    my ($filter_dir,$input,$phrase_translation_table,$reordering_table,$domains) = &get_output_and_input($step_id);
+    my ($filter_dir,$input,$phrase_translation_table,$reordering_table,$domains,$transliteration_table) = &get_output_and_input($step_id);
 
     my $binarizer;
     $binarizer = &backoff_and_get("EVALUATION:$set:ttable-binarizer") unless $tuning_flag;
@@ -2683,7 +2687,14 @@ sub define_tuningevaluation_filter {
       
       $cmd .= &get_config_tables($config,$reordering_table,$phrase_translation_table,undef,$domains);
 
+	if (&get("TRAINING:in-decoding-transliteration")) {
+
+		$cmd .= "-transliteration-phrase-table $dir/model/transliteration-phrase-table.$VERSION ";
+	}	
+
+
       $cmd .= "-lm 0:3:$config:8\n"; # dummy kenlm 3-gram model on factor 0
+
     }
 
     # filter command
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 3764ab0c2..46a7e1fe6 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
    $_DECODING_GRAPH_BACKOFF,
    $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
    @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
-   $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT,
+   $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT, $_TRANSLITERATION_PHRASE_TABLE,
    $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
    $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
    $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
@@ -122,7 +122,8 @@ $_HELP = 1
 		       'config=s' => \$_CONFIG,
 		       'osm-model=s' => \$_OSM,
 			'osm-setting=s' => \$_OSM_FACTORS,
-			'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,		
+			'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
+			'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,		
 		       'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
 		       'do-steps=s' => \$_DO_STEPS,
 		       'memscore:s' => \$_MEMSCORE,
@@ -1879,6 +1880,8 @@ sub create_ini {
      $path++;
    }
    print INI "1 T 1\n" if $_GLUE_GRAMMAR;
+  
+   print INI "1 T 1\n" if $_TRANSLITERATION_PHRASE_TABLE;	
 
    if (defined($_DECODING_GRAPH_BACKOFF)) {
      $_DECODING_GRAPH_BACKOFF =~ s/\s+/ /g;
@@ -1962,6 +1965,13 @@ sub create_ini {
      exit 1 if $i < $stepsused{"T"}; # fatal to define less
    }
 
+   if ($_TRANSLITERATION_PHRASE_TABLE){
+		
+     $feature_spec .= "PhraseDictionaryMemory name=TranslationModel$i table-limit=100 num-features=4 path=$_TRANSLITERATION_PHRASE_TABLE input-factor=0 output-factor=0\n";
+     $weight_spec .= "TranslationModel$i= 0.2 0.2 0.2 0.2\n";
+     $i++;	
+   }  	
+
    # glue grammar
    if ($_GLUE_GRAMMAR) {
      &full_path(\$___GLUE_GRAMMAR_FILE);
@@ -2069,8 +2079,9 @@ sub create_ini {
 	
     my $lm_oov_prob = 0.1;
 	
-    if ($_POST_DECODING_TRANSLIT){
+    if ($_POST_DECODING_TRANSLIT || $_TRANSLITERATION_PHRASE_TABLE){
 	$lm_oov_prob = -100.0;
+	$_LMODEL_OOV_FEATURE = "yes";
     } 	   
  
     $feature_spec .= "$type_name name=LM$i factor=$f path=$fn order=$o\n";

From d90aaf101839f172994142a14cd6c5908a5f962e Mon Sep 17 00:00:00 2001
From: Nadir Durrani <nadir@hel.inf.ed.ac.uk>
Date: Wed, 16 Apr 2014 17:40:49 +0100
Subject: [PATCH 11/23] Z

---
 scripts/Transliteration/in-decoding-transliteration.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Transliteration/in-decoding-transliteration.pl b/scripts/Transliteration/in-decoding-transliteration.pl
index e4f0503a8..237aec587 100755
--- a/scripts/Transliteration/in-decoding-transliteration.pl
+++ b/scripts/Transliteration/in-decoding-transliteration.pl
@@ -16,7 +16,7 @@ my $___FACTOR_DELIMITER = "|";
 my $OUT_FILE = "/tmp/transliteration-phrase-table.$$";
 
 my ($MOSES_SRC_DIR,$TRANSLIT_MODEL,$OOV_FILE, $OOV_FILE_NAME, $EXTERNAL_BIN_DIR, $LM_FILE, $INPUT_EXTENSION, $OUTPUT_EXTENSION);
-die("ERROR: wrong syntax when invoking postDecodingTransliteration.perl")
+die("ERROR: wrong syntax when invoking in-decoding-transliteration.perl")
     unless &GetOptions('moses-src-dir=s' => \$MOSES_SRC_DIR,
 			'external-bin-dir=s' => \$EXTERNAL_BIN_DIR,
 			'transliteration-model-dir=s' => \$TRANSLIT_MODEL,

From a4d32a2b090be41969b651fcad2df34fd824cbae Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieu@hoang.co.uk>
Date: Thu, 17 Apr 2014 20:04:02 +0100
Subject: [PATCH 12/23] minor compile errors in ChartKBestExtractor

---
 contrib/other-builds/moses/.project | 10 ++++++++++
 moses/ChartHypothesis.h             |  6 +++---
 moses/ChartKBestExtractor.cpp       |  2 +-
 moses/ChartKBestExtractor.h         |  1 +
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 1a9939c51..cf311ed9c 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -166,6 +166,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/ChartHypothesisCollection.h</locationURI>
 		</link>
+		<link>
+			<name>ChartKBestExtractor.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/ChartKBestExtractor.cpp</locationURI>
+		</link>
+		<link>
+			<name>ChartKBestExtractor.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/ChartKBestExtractor.h</locationURI>
+		</link>
 		<link>
 			<name>ChartManager.cpp</name>
 			<type>1</type>
diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h
index 532d757c9..150b53fd0 100644
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@@ -75,9 +75,6 @@ protected:
   //! not implemented
   ChartHypothesis(const ChartHypothesis &copy);
 
-  //! only used by ChartKBestExtractor
-  ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
-
 public:
 #ifdef USE_HYPO_POOL
   void *operator new(size_t /* num_bytes */) {
@@ -96,6 +93,9 @@ public:
   }
 #endif
 
+  //! only used by ChartKBestExtractor
+  ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
+
   ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
                   ChartManager &manager);
 
diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp
index 60c066191..05f8920c5 100644
--- a/moses/ChartKBestExtractor.cpp
+++ b/moses/ChartKBestExtractor.cpp
@@ -73,7 +73,7 @@ void ChartKBestExtractor::Extract(
   for (KBestVec::const_iterator p = top->kBestList.begin();
        p != top->kBestList.end(); ++p) {
     const Derivation &d = **p;
-    assert(d.edge->tail.size() == 1);  // d should have exactly one predecessor.
+    assert(d.edge.tail.size() == 1);  // d should have exactly one predecessor.
     assert(d.backPointers.size() == 1);
     std::size_t i = d.backPointers[0];
     boost::shared_ptr<Derivation> pred = d.edge.tail[0]->kBestList[i];
diff --git a/moses/ChartKBestExtractor.h b/moses/ChartKBestExtractor.h
index 07df7eacb..66430ec1e 100644
--- a/moses/ChartKBestExtractor.h
+++ b/moses/ChartKBestExtractor.h
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <cassert>
 #include "ChartHypothesis.h"
 #include "ScoreComponentCollection.h"
 

From e22b68e2fde3d0bc7b345cf9b395de05353de2f0 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieu@hoang.co.uk>
Date: Thu, 17 Apr 2014 20:15:06 +0100
Subject: [PATCH 13/23] roll back change in mosesserver. Doesn't work with
 xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04

---
 contrib/server/mosesserver.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp
index 80eab8f20..03b02ef41 100644
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@@ -521,6 +521,12 @@ int main(int argc, char** argv)
   myRegistry.addMethod("updater", updater);
   myRegistry.addMethod("optimize", optimizer);
 
+   xmlrpc_c::serverAbyss myAbyssServer(
+					myRegistry,
+					port,              // TCP port on which to listen
+					logfile
+					);
+  /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
   xmlrpc_c::serverAbyss myAbyssServer(
     xmlrpc_c::serverAbyss::constrOpt()
     .registryPtr(&myRegistry)
@@ -528,6 +534,7 @@ int main(int argc, char** argv)
     .logFileName(logfile)
     .allowOrigin("*")
   );
+  */
 
   cerr << "Listening on port " << port << endl;
   if (isSerial) {

From 00a2bd395aa0f8e5b9d36dfce3651ecf55cbf234 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Thu, 17 Apr 2014 21:22:30 +0100
Subject: [PATCH 14/23] word alignment from server, thanks to Jyotesh Choudhari

---
 contrib/server/Jamfile         |  2 +-
 contrib/server/mosesserver.cpp | 36 +++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/contrib/server/Jamfile b/contrib/server/Jamfile
index 445c07ae8..6e641a2f7 100644
--- a/contrib/server/Jamfile
+++ b/contrib/server/Jamfile
@@ -35,7 +35,7 @@ if $(build-moses-server) = true
   xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
   xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
 
-  exe mosesserver : mosesserver.cpp ../../moses//moses ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
+  exe mosesserver : mosesserver.cpp ../../moses//moses ../../moses-cmd/IOWrapper.cpp ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
 } else {
   alias mosesserver ;
 }
diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp
index 03b02ef41..105f09c13 100644
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@@ -12,6 +12,7 @@
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
 #include "moses/TreeInput.h"
 #include "moses/LM/ORLM.h"
+#include "moses-cmd/IOWrapper.h"
 
 #ifdef WITH_THREADS
 #include <boost/thread.hpp>
@@ -22,6 +23,7 @@
 #include <xmlrpc-c/server_abyss.hpp>
 
 using namespace Moses;
+using namespace MosesCmd;
 using namespace std;
 
 typedef std::map<std::string, xmlrpc_c::value> params_t;
@@ -215,6 +217,8 @@ public:
     cerr << "Input: " << source << endl;
     si = params.find("align");
     bool addAlignInfo = (si != params.end());
+    si = params.find("word-align");
+    bool addWordAlignInfo = (si != params.end());
     si = params.find("sg");
     bool addGraphInfo = (si != params.end());
     si = params.find("topt");
@@ -278,6 +282,20 @@ public:
         if (addAlignInfo) {
           retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
         }
+        if (addWordAlignInfo) {
+          stringstream wordAlignment;
+          OutputAlignment(wordAlignment, hypo);
+          vector<xmlrpc_c::value> alignments;
+          string alignmentPair;
+          while (wordAlignment >> alignmentPair) {
+          	int pos = alignmentPair.find('-');
+          	map<string, xmlrpc_c::value> wordAlignInfo;
+          	wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
+          	wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
+          	alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
+          }
+          retData.insert(pair<string, xmlrpc_c::value_array>("word-align", alignments));
+        }
 
         if(addGraphInfo) {
           insertGraphInfo(manager,retData);
@@ -415,9 +433,25 @@ public:
       }
       nBestXMLItem["hyp"] = xmlrpc_c::value_string(out.str());
 
-      if (addAlignmentInfo)
+      if (addAlignmentInfo) {
         nBestXMLItem["align"] = xmlrpc_c::value_array(alignInfo);
 
+        if ((int)edges.size() > 0) {
+          stringstream wordAlignment;
+          OutputAlignment(wordAlignment, edges[0]);
+          vector<xmlrpc_c::value> alignments;
+          string alignmentPair;
+          while (wordAlignment >> alignmentPair) {
+          	int pos = alignmentPair.find('-');
+          	map<string, xmlrpc_c::value> wordAlignInfo;
+          	wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
+          	wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
+          	alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
+          }
+          nBestXMLItem["word-align"] = xmlrpc_c::value_array(alignments);
+        }
+      }
+
       // weighted score
       nBestXMLItem["totalScore"] = xmlrpc_c::value_double(path.GetTotalScore());
       nBestXml.push_back(xmlrpc_c::value_struct(nBestXMLItem));

From 568685cb66287dc0af72315df5095567a1854853 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Sat, 19 Apr 2014 10:29:41 +0100
Subject: [PATCH 15/23] ChartKBestExtractor: fix memory leak, clean-up code

---
 moses/ChartKBestExtractor.cpp | 154 +++++++++++++++++++---------------
 moses/ChartKBestExtractor.h   |  69 +++++++--------
 2 files changed, 124 insertions(+), 99 deletions(-)

diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp
index 05f8920c5..72a894ba7 100644
--- a/moses/ChartKBestExtractor.cpp
+++ b/moses/ChartKBestExtractor.cpp
@@ -32,52 +32,48 @@ namespace Moses
 
 // Extract the k-best list from the search graph.
 void ChartKBestExtractor::Extract(
-    const std::vector<const ChartHypothesis*> &topHypos, std::size_t k,
+    const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
     KBestVec &kBestList)
 {
-  typedef std::vector<const ChartHypothesis*> HypoVec;
-
   kBestList.clear();
-  if (topHypos.empty()) {
+  if (topLevelHypos.empty()) {
     return;
   }
 
-  // Create a new top-level ChartHypothesis that has the best hypothesis as its
-  // predecessor.  This is the search hypergraph's target vertex.
-  HypoVec::const_iterator iter = topHypos.begin();
+  // Create a new ChartHypothesis object, supremeHypo, that has the best
+  // top-level hypothesis as its predecessor and has the same score.
+  std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
+  const ChartHypothesis &bestTopLevelHypo = **p;
   boost::scoped_ptr<ChartHypothesis> supremeHypo(
-    new ChartHypothesis(**iter, *this));
+      new ChartHypothesis(bestTopLevelHypo, *this));
 
   // Do the same for each alternative top-level hypothesis, but add the new
   // ChartHypothesis objects as arcs from supremeHypo, as if they had been
   // recombined.
-  float prevScore = (*iter)->GetTotalScore();
-  for (++iter; iter != topHypos.end(); ++iter) {
-    // Check that the first item in topHypos really was the best.
-    UTIL_THROW_IF2((*iter)->GetTotalScore() <= prevScore,
-                   "top-level vertices are not correctly sorted");
+  for (++p; p != topLevelHypos.end(); ++p) {
+    // Check that the first item in topLevelHypos really was the best.
+    UTIL_THROW_IF2((*p)->GetTotalScore() <= bestTopLevelHypo.GetTotalScore(),
+                   "top-level hypotheses are not correctly sorted");
     // Note: there's no need for a smart pointer here: supremeHypo will take
     // ownership of altHypo.
-    ChartHypothesis *altHypo = new ChartHypothesis(**iter, *this);
+    ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
     supremeHypo->AddArc(altHypo);
   }
 
-  // Create the target vertex corresponding to supremeHypo then generate
-  // it's k-best list.
-  boost::shared_ptr<Vertex> top = FindOrCreateVertex(*supremeHypo);
-  LazyKthBest(*top, k, k);
+  // Create the target vertex then lazily fill its k-best list.
+  boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
+  LazyKthBest(*targetVertex, k, k);
 
   // Copy the k-best list from the target vertex, but drop the top edge from
   // each derivation.
-  kBestList.reserve(top->kBestList.size());
-  for (KBestVec::const_iterator p = top->kBestList.begin();
-       p != top->kBestList.end(); ++p) {
-    const Derivation &d = **p;
-    assert(d.edge.tail.size() == 1);  // d should have exactly one predecessor.
-    assert(d.backPointers.size() == 1);
-    std::size_t i = d.backPointers[0];
-    boost::shared_ptr<Derivation> pred = d.edge.tail[0]->kBestList[i];
-    kBestList.push_back(pred);
+  kBestList.reserve(targetVertex->kBestList.size());
+  for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
+        q = targetVertex->kBestList.begin();
+        q != targetVertex->kBestList.end(); ++q) {
+    const boost::shared_ptr<Derivation> d(*q);
+    assert(d);
+    assert(d->subderivations.size() == 1);
+    kBestList.push_back(d->subderivations[0]);
   }
 }
 
@@ -96,8 +92,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
     const Word &word = phrase.GetWord(pos);
     if (word.IsNonTerminal()) {
       std::size_t nonTermInd = nonTermIndexMap[pos];
-      const Derivation &subderivation =
-        *d.edge.tail[nonTermInd]->kBestList[d.backPointers[nonTermInd]];
+      const Derivation &subderivation = *d.subderivations[nonTermInd];
       Phrase subPhrase = GetOutputPhrase(subderivation);
       ret.Append(subPhrase);
     } else {
@@ -142,26 +137,6 @@ ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
   return edge;
 }
 
-void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
-{
-  // Create a derivation for v's best incoming edge.
-  UnweightedHyperarc bestEdge = CreateEdge(v.hypothesis);
-  boost::shared_ptr<Derivation> d(new Derivation(bestEdge));
-  v.candidates.push(d);
-  v.seen.insert(d);
-  // Create derivations for the rest of v's incoming edges.
-  const ChartArcList *arcList = v.hypothesis.GetArcList();
-  if (arcList) {
-    for (std::size_t i = 0; i < arcList->size(); ++i) {
-      const ChartHypothesis &recombinedHypo = *(*arcList)[i];
-      UnweightedHyperarc edge = CreateEdge(recombinedHypo);
-      boost::shared_ptr<Derivation> d(new Derivation(edge));
-      v.candidates.push(d);
-      v.seen.insert(d);
-    }
-  }
-}
-
 // Look for the vertex corresponding to a given ChartHypothesis, creating
 // a new one if necessary.
 boost::shared_ptr<ChartKBestExtractor::Vertex>
@@ -174,14 +149,51 @@ ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
     return sp;  // Vertex was already in m_vertexMap.
   }
   sp.reset(new Vertex(h));
+  // Create the 1-best derivation and add it to the vertex's kBestList.
+  UnweightedHyperarc bestEdge;
+  bestEdge.head = sp;
+  const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
+  bestEdge.tail.resize(prevHypos.size());
+  for (std::size_t i = 0; i < prevHypos.size(); ++i) {
+    const ChartHypothesis *prevHypo = prevHypos[i];
+    bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
+  }
+  boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
+  std::pair<DerivationSet::iterator, bool> q =
+    m_derivations.insert(bestDerivation);
+  assert(q.second);
+  sp->kBestList.push_back(bestDerivation);
   return sp;
 }
 
+// Create the 1-best derivation for each edge in BS(v) (except the best one)
+// and add it to v's candidate queue.
+void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
+{
+  // Create derivations for all of v's incoming edges except the best.  This
+  // means everything in v.hypothesis.GetArcList() and not the edge defined
+  // by v.hypothesis itself.  The 1-best derivation for that edge will already
+  // have been created.
+  const ChartArcList *arcList = v.hypothesis.GetArcList();
+  if (arcList) {
+    for (std::size_t i = 0; i < arcList->size(); ++i) {
+      const ChartHypothesis &recombinedHypo = *(*arcList)[i];
+      boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
+      assert(w->kBestList.size() == 1);
+      v.candidates.push(w->kBestList[0]);
+    }
+  }
+}
+
+// Lazily fill v's k-best list.
 void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
                                       std::size_t globalK)
 {
   // If this is the first visit to vertex v then initialize the priority queue.
   if (v.visited == false) {
+    // The 1-best derivation should already be in v's k-best list.
+    assert(v.kBestList.size() == 1);
+    // Initialize v's priority queue.
     GetCandidates(v, globalK);
     v.visited = true;
   }
@@ -191,49 +203,57 @@ void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
     if (!v.kBestList.empty()) {
       // Update the priority queue by adding the successors of the last
       // derivation (unless they've been seen before).
-      const Derivation &d = *v.kBestList.back();
-      LazyNext(v, d, globalK);
+      boost::shared_ptr<Derivation> d(v.kBestList.back());
+      LazyNext(v, *d, globalK);
     }
     // Check if there are any derivations left in the queue.
     if (v.candidates.empty()) {
       break;
     }
     // Get the next best derivation and delete it from the queue.
-    boost::shared_ptr<Derivation> d = v.candidates.top();
+    boost::weak_ptr<Derivation> d = v.candidates.top();
     v.candidates.pop();
     // Add it to the k-best list.
     v.kBestList.push_back(d);
   }
 }
 
+// Create the neighbours of Derivation d and add them to v's candidate queue.
 void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
                                    std::size_t globalK)
 {
-  // Create the neighbours of Derivation d.
-  for (std::size_t i = 0; i < d.backPointers.size(); ++i) {
-    Vertex &predVertex = *d.edge.tail[i];
-    // Ensure that predVertex's k-best list contains enough derivations.
+  for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
+    Vertex &pred = *d.edge.tail[i];
+    // Ensure that pred's k-best list contains enough derivations.
     std::size_t k = d.backPointers[i] + 2;
-    LazyKthBest(predVertex, k, globalK);
-    if (predVertex.kBestList.size() < k) {
-      // predVertex's derivations have been exhausted.
+    LazyKthBest(pred, k, globalK);
+    if (pred.kBestList.size() < k) {
+      // pred's derivations have been exhausted.
       continue;
     }
     // Create the neighbour.
     boost::shared_ptr<Derivation> next(new Derivation(d, i));
     // Check if it has been created before.
-    std::pair<Vertex::DerivationSet::iterator, bool> p = v.seen.insert(next);
+    std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
     if (p.second) {
       v.candidates.push(next);  // Haven't previously seen it.
     }
   }
 }
 
-// Construct a Derivation corresponding to a ChartHypothesis.
+// Construct the 1-best Derivation that ends at edge e.
 ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
 {
   edge = e;
-  backPointers.resize(edge.tail.size(), 0);
+  std::size_t arity = edge.tail.size();
+  backPointers.resize(arity, 0);
+  subderivations.reserve(arity);
+  for (std::size_t i = 0; i < arity; ++i) {
+    const Vertex &pred = *edge.tail[i];
+    assert(pred.kBestList.size() == 1);
+    boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
+    subderivations.push_back(sub);
+  }
   scoreBreakdown = edge.head->hypothesis.GetScoreBreakdown();
   score = edge.head->hypothesis.GetTotalScore();
 }
@@ -244,14 +264,16 @@ ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
   edge.head = d.edge.head;
   edge.tail = d.edge.tail;
   backPointers = d.backPointers;
+  subderivations = d.subderivations;
   std::size_t j = ++backPointers[i];
   scoreBreakdown = d.scoreBreakdown;
   // Deduct the score of the old subderivation.
-  const Derivation &oldSubderivation = *(edge.tail[i]->kBestList[j-1]);
-  scoreBreakdown.MinusEquals(oldSubderivation.scoreBreakdown);
+  scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown);
+  // Update the subderivation pointer.
+  boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
+  subderivations[i] = newSub;
   // Add the score of the new subderivation.
-  const Derivation &newSubderivation = *(edge.tail[i]->kBestList[j]);
-  scoreBreakdown.PlusEquals(newSubderivation.scoreBreakdown);
+  scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown);
   score = scoreBreakdown.GetWeightedScore();
 }
 
diff --git a/moses/ChartKBestExtractor.h b/moses/ChartKBestExtractor.h
index 66430ec1e..05b016d50 100644
--- a/moses/ChartKBestExtractor.h
+++ b/moses/ChartKBestExtractor.h
@@ -24,6 +24,7 @@
 #include "ScoreComponentCollection.h"
 
 #include <boost/unordered_set.hpp>
+#include <boost/weak_ptr.hpp>
 
 #include <queue>
 #include <vector>
@@ -53,17 +54,46 @@ public:
 
     UnweightedHyperarc edge;
     std::vector<std::size_t> backPointers;
+    std::vector<boost::shared_ptr<Derivation> > subderivations;
     ScoreComponentCollection scoreBreakdown;
     float score;
   };
 
   struct DerivationOrderer {
-    bool operator()(const boost::shared_ptr<Derivation> &d1,
-                    const boost::shared_ptr<Derivation> &d2) const {
-      return d1->score < d2->score;
+    bool operator()(const boost::weak_ptr<Derivation> &d1,
+                    const boost::weak_ptr<Derivation> &d2) const {
+      boost::shared_ptr<Derivation> s1(d1);
+      boost::shared_ptr<Derivation> s2(d2);
+      return s1->score < s2->score;
     }
   };
 
+  struct Vertex {
+    typedef std::priority_queue<boost::weak_ptr<Derivation>,
+                                std::vector<boost::weak_ptr<Derivation> >,
+                                DerivationOrderer> DerivationQueue;
+
+    Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
+
+    const ChartHypothesis &hypothesis;
+    std::vector<boost::weak_ptr<Derivation> > kBestList;
+    DerivationQueue candidates;
+    bool visited;
+  };
+
+  typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
+
+  // Extract the k-best list from the search hypergraph given the full, sorted
+  // list of top-level vertices.
+  void Extract(const std::vector<const ChartHypothesis*> &topHypos,
+               std::size_t k, KBestVec &);
+
+  static Phrase GetOutputPhrase(const Derivation &);
+
+private:
+  typedef boost::unordered_map<const ChartHypothesis *,
+                               boost::shared_ptr<Vertex> > VertexMap;
+
   struct DerivationHasher {
     std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
       std::size_t seed = 0;
@@ -83,36 +113,8 @@ public:
     }
   };
 
-  struct Vertex {
-    typedef std::priority_queue<boost::shared_ptr<Derivation>,
-                                std::vector<boost::shared_ptr<Derivation> >,
-                                DerivationOrderer> DerivationQueue;
-
-    typedef boost::unordered_set<boost::shared_ptr<Derivation>,
-                                 DerivationHasher,
-                                 DerivationEqualityPred> DerivationSet;
-
-    Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
-
-    const ChartHypothesis &hypothesis;
-    std::vector<boost::shared_ptr<Derivation> > kBestList;
-    DerivationQueue candidates;
-    DerivationSet seen;
-    bool visited;
-  };
-
-  typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
-
-  // Extract the k-best list from the search hypergraph given the full, sorted
-  // list of top-level vertices.
-  void Extract(const std::vector<const ChartHypothesis*> &topHypos,
-               std::size_t k, KBestVec &);
-
-  static Phrase GetOutputPhrase(const Derivation &);
-
-private:
-  typedef boost::unordered_map<const ChartHypothesis *,
-                               boost::shared_ptr<Vertex> > VertexMap;
+  typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
+                               DerivationEqualityPred> DerivationSet;
 
   UnweightedHyperarc CreateEdge(const ChartHypothesis &);
   boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
@@ -121,6 +123,7 @@ private:
   void LazyNext(Vertex &, const Derivation &, std::size_t);
 
   VertexMap m_vertexMap;
+  DerivationSet m_derivations;
 };
 
 }  // namespace Moses

From 76a4609cff1b5bc9a5581e83dcddaf21d696682d Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Sun, 20 Apr 2014 23:30:09 +0100
Subject: [PATCH 16/23] add script to creat pt with only certain scores

---
 scripts/other/delete-scores.perl | 61 ++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100755 scripts/other/delete-scores.perl

diff --git a/scripts/other/delete-scores.perl b/scripts/other/delete-scores.perl
new file mode 100755
index 000000000..442173026
--- /dev/null
+++ b/scripts/other/delete-scores.perl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl 
+
+use strict;
+use Getopt::Long "GetOptions";
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+
+sub trim($);
+sub DeleteScore;
+
+my $keepScoresStr;
+GetOptions(
+  "keep-scores=s" => \$keepScoresStr
+) or exit(1);
+
+my @keepScores = split(/,/, $keepScoresStr);
+
+#MAIN LOOP
+while (my $line = <STDIN>) {
+  chomp($line);
+  #print STDERR "line=$line\n";
+  
+  my @toks = split(/\|/, $line);
+  my @scores = split(/ /, $toks[6]);
+  
+  $toks[6] = DeleteScore($toks[6], \@keepScores);
+
+  # output
+  print $toks[0];
+  for (my $i = 1; $i < scalar(@toks); ++$i) {
+    print "|" .$toks[$i];
+  }
+  print "\n";
+}
+
+######################
+# Perl trim function to remove whitespace from the start and end of the string
+sub trim($) {
+  my $string = shift;
+  $string =~ s/^\s+//;
+  $string =~ s/\s+$//;
+  return $string;
+}
+
+sub DeleteScore
+{
+  my $string = $_[0];
+  my @keepScores = @{$_[1]};
+  
+  $string = trim($string);
+  my @toks = split(/ /, $string);
+
+  $string = "";
+  for (my $i = 0; $i < scalar(@keepScores); ++$i) {
+    $string .= $toks[ $keepScores[$i] ] ." ";
+  }
+  $string = " " .$string;
+  
+  return $string;
+}

From b308bd56579a6791ccd8d0d326acb0e0b74d1b20 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Mon, 21 Apr 2014 10:19:00 +0100
Subject: [PATCH 17/23] moses_chart: enable ChartKBestExtractor.

---
 moses-chart-cmd/IOWrapper.cpp | 133 +++++++++++++++++++++++++++++++++-
 moses-chart-cmd/IOWrapper.h   |   5 +-
 moses-chart-cmd/Main.cpp      |   4 +-
 moses/ChartHypothesis.h       |   7 +-
 moses/ChartKBestExtractor.cpp |  17 ++---
 moses/ChartManager.cpp        |  60 +++++++++++++++
 moses/ChartManager.h          |   2 +
 7 files changed, 212 insertions(+), 16 deletions(-)

diff --git a/moses-chart-cmd/IOWrapper.cpp b/moses-chart-cmd/IOWrapper.cpp
index 56c166422..81612ed1f 100644
--- a/moses-chart-cmd/IOWrapper.cpp
+++ b/moses-chart-cmd/IOWrapper.cpp
@@ -553,7 +553,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
 
 //DIMw
 void IOWrapper::OutputDetailedAllTranslationReport(
-  const ChartTrellisPathList &nBestList,
+  const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
   const ChartManager &manager,
   const Sentence &sentence,
   long translationId)
@@ -793,6 +793,58 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran
   m_nBestOutputCollector->Write(translationId, out.str());
 }
 
+void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
+                                long translationId)
+{
+  std::ostringstream out;
+
+  if (m_nBestOutputCollector->OutputIsCout()) {
+    // Set precision only if we're writing the n-best list to cout.  This is to
+    // preserve existing behaviour, but should probably be done either way.
+    IOWrapper::FixPrecision(out);
+  }
+
+  bool includeWordAlignment =
+      StaticData::Instance().PrintAlignmentInfoInNbest();
+
+  for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+       p != nBestList.end(); ++p) {
+    const ChartKBestExtractor::Derivation &derivation = **p;
+
+    // get the derivation's target-side yield
+    Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
+
+    // delete <s> and </s>
+    UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+        "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+    outputPhrase.RemoveWord(0);
+    outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+
+    // print the translation ID, surface factors, and scores
+    out << translationId << " ||| ";
+    OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
+    out << " ||| ";
+    OutputAllFeatureScores(derivation.scoreBreakdown, out);
+    out << " ||| " << derivation.score;
+
+    // optionally, print word alignments
+    if (includeWordAlignment) {
+      out << " ||| ";
+      Alignments align;
+      OutputAlignmentNBest(align, derivation, 0);
+      for (Alignments::const_iterator q = align.begin(); q != align.end();
+           ++q) {
+        out << q->first << "-" << q->second << " ";
+      }
+    }
+
+    out << std::endl;
+  }
+
+  assert(m_nBestOutputCollector);
+  m_nBestOutputCollector->Write(translationId, out.str());
+}
+
 void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long translationId)
 {
   std::ostringstream out;
@@ -927,6 +979,85 @@ size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartT
   return totalTargetSize;
 }
 
+size_t IOWrapper::OutputAlignmentNBest(
+    Alignments &retAlign,
+    const Moses::ChartKBestExtractor::Derivation &derivation,
+    size_t startTarget)
+{
+  const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
+
+  size_t totalTargetSize = 0;
+  size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
+
+  const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
+
+  size_t thisSourceSize = CalcSourceSize(&hypo);
+
+  // position of each terminal word in translation rule, irrespective of alignment
+  // if non-term, number is undefined
+  vector<size_t> sourceOffsets(thisSourceSize, 0);
+  vector<size_t> targetOffsets(tp.GetSize(), 0);
+
+  const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
+  vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
+  const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
+
+  UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
+                 "Error");
+
+  size_t targetInd = 0;
+  for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+    if (tp.GetWord(targetPos).IsNonTerminal()) {
+      UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+      size_t sourceInd = targetPos2SourceInd[targetPos];
+      size_t sourcePos = sourceInd2pos[sourceInd];
+
+      const Moses::ChartKBestExtractor::Derivation &subderivation =
+        *derivation.subderivations[sourceInd];
+
+      // calc source size
+      size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
+      sourceOffsets[sourcePos] = sourceSize;
+
+      // calc target size.
+      // Recursively look thru child hypos
+      size_t currStartTarget = startTarget + totalTargetSize;
+      size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
+                                               currStartTarget);
+      targetOffsets[targetPos] = targetSize;
+
+      totalTargetSize += targetSize;
+      ++targetInd;
+    } else {
+      ++totalTargetSize;
+    }
+  }
+
+  // convert position within translation rule to absolute position within
+  // source sentence / output sentence
+  ShiftOffsets(sourceOffsets, startSource);
+  ShiftOffsets(targetOffsets, startTarget);
+
+  // get alignments from this hypo
+  const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
+
+  // add to output arg, offsetting by source & target
+  AlignmentInfo::const_iterator iter;
+  for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
+    const std::pair<size_t,size_t> &align = *iter;
+    size_t relSource = align.first;
+    size_t relTarget = align.second;
+    size_t absSource = sourceOffsets[relSource];
+    size_t absTarget = targetOffsets[relTarget];
+
+    pair<size_t, size_t> alignPoint(absSource, absTarget);
+    pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
+    UTIL_THROW_IF2(!ret.second, "Error");
+  }
+
+  return totalTargetSize;
+}
+
 void IOWrapper::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
 {
   ostringstream out;
diff --git a/moses-chart-cmd/IOWrapper.h b/moses-chart-cmd/IOWrapper.h
index 9e09ef00f..bd8264eb6 100644
--- a/moses-chart-cmd/IOWrapper.h
+++ b/moses-chart-cmd/IOWrapper.h
@@ -40,6 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "moses/TypeDef.h"
 #include "moses/Sentence.h"
 #include "moses/FactorTypeSet.h"
+#include "moses/ChartKBestExtractor.h"
 #include "moses/ChartTrellisPathList.h"
 #include "moses/OutputCollector.h"
 #include "moses/ChartHypothesis.h"
@@ -90,6 +91,7 @@ protected:
 
   typedef std::set< std::pair<size_t, size_t>  > Alignments;
   size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartTrellisNode &node, size_t startTarget);
+  std::size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartKBestExtractor::Derivation &derivation, std::size_t startTarget);
   size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
   void OutputAlignment(std::vector< std::set<size_t> > &retAlignmentsS2T, const Moses::AlignmentInfo &ai);
   void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
@@ -129,12 +131,13 @@ public:
   void OutputBestHypo(const std::vector<const Moses::Factor*>&  mbrBestHypo, long translationId);
   void OutputBestNone(long translationId);
   void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, long translationId);
+  void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
   void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
   void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
   void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
   void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
   void OutputDetailedTreeFragmentsTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
-  void OutputDetailedAllTranslationReport(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
+  void OutputDetailedAllTranslationReport(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
   void Backtrack(const Moses::ChartHypothesis *hypo);
 
   void ResetTranslationId();
diff --git a/moses-chart-cmd/Main.cpp b/moses-chart-cmd/Main.cpp
index f2baff0fa..fd82b5692 100644
--- a/moses-chart-cmd/Main.cpp
+++ b/moses-chart-cmd/Main.cpp
@@ -151,7 +151,7 @@ public:
     if (staticData.IsDetailedAllTranslationReportingEnabled()) {
       const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
       size_t nBestSize = staticData.GetNBestSize();
-      ChartTrellisPathList nBestList;
+      std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
       manager.CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
       m_ioWrapper.OutputDetailedAllTranslationReport(nBestList, manager, sentence, translationId);
     }
@@ -160,7 +160,7 @@ public:
     size_t nBestSize = staticData.GetNBestSize();
     if (nBestSize > 0) {
       VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
-      ChartTrellisPathList nBestList;
+      std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
       manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
       m_ioWrapper.OutputNBestList(nBestList, translationId);
       IFVERBOSE(2) {
diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h
index 150b53fd0..12050e764 100644
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@@ -45,6 +45,7 @@ typedef std::vector<ChartHypothesis*> ChartArcList;
 class ChartHypothesis
 {
   friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
+  friend class ChartKBestExtractor;
 
 protected:
 #ifdef USE_HYPO_POOL
@@ -75,6 +76,9 @@ protected:
   //! not implemented
   ChartHypothesis(const ChartHypothesis &copy);
 
+  //! only used by ChartKBestExtractor
+  ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
+
 public:
 #ifdef USE_HYPO_POOL
   void *operator new(size_t /* num_bytes */) {
@@ -93,9 +97,6 @@ public:
   }
 #endif
 
-  //! only used by ChartKBestExtractor
-  ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
-
   ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
                   ChartManager &manager);
 
diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp
index 72a894ba7..3a16198fc 100644
--- a/moses/ChartKBestExtractor.cpp
+++ b/moses/ChartKBestExtractor.cpp
@@ -200,21 +200,20 @@ void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
   // Add derivations to the k-best list until it contains k or there are none
   // left to add.
   while (v.kBestList.size() < k) {
-    if (!v.kBestList.empty()) {
-      // Update the priority queue by adding the successors of the last
-      // derivation (unless they've been seen before).
-      boost::shared_ptr<Derivation> d(v.kBestList.back());
-      LazyNext(v, *d, globalK);
-    }
+    assert(!v.kBestList.empty());
+    // Update the priority queue by adding the successors of the last
+    // derivation (unless they've been seen before).
+    boost::shared_ptr<Derivation> d(v.kBestList.back());
+    LazyNext(v, *d, globalK);
     // Check if there are any derivations left in the queue.
     if (v.candidates.empty()) {
       break;
     }
     // Get the next best derivation and delete it from the queue.
-    boost::weak_ptr<Derivation> d = v.candidates.top();
+    boost::weak_ptr<Derivation> next = v.candidates.top();
     v.candidates.pop();
     // Add it to the k-best list.
-    v.kBestList.push_back(d);
+    v.kBestList.push_back(next);
   }
 }
 
@@ -250,7 +249,7 @@ ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
   subderivations.reserve(arity);
   for (std::size_t i = 0; i < arity; ++i) {
     const Vertex &pred = *edge.tail[i];
-    assert(pred.kBestList.size() == 1);
+    assert(pred.kBestList.size() >= 1);
     boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
     subderivations.push_back(sub);
   }
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index 0e303390e..7162099d4 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -23,6 +23,7 @@
 #include "ChartManager.h"
 #include "ChartCell.h"
 #include "ChartHypothesis.h"
+#include "ChartKBestExtractor.h"
 #include "ChartTranslationOptions.h"
 #include "ChartTrellisDetourQueue.h"
 #include "ChartTrellisNode.h"
@@ -261,6 +262,65 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDi
   }
 }
 
+/** Calculate the n-best paths through the output hypergraph.
+ * Return the list of paths with the variable ret
+ * \param n how may paths to return
+ * \param ret return argument
+ * \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
+ */
+void ChartManager::CalcNBest(
+    std::size_t n,
+    std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
+    bool onlyDistinct) const
+{
+  nBestList.clear();
+  if (n == 0 || m_source.GetSize() == 0) {
+    return;
+  }
+
+  // Get the list of top-level hypotheses, sorted by score.
+  WordsRange range(0, m_source.GetSize()-1);
+  const ChartCell &lastCell = m_hypoStackColl.Get(range);
+  boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
+      lastCell.GetAllSortedHypotheses());
+  if (!topLevelHypos) {
+    return;
+  }
+
+  ChartKBestExtractor extractor;
+
+  if (!onlyDistinct) {
+    // Return the n-best list as is, including duplicate translations.
+    extractor.Extract(*topLevelHypos, n, nBestList);
+    return;
+  }
+
+  // Determine how many derivations to extract.  If the n-best list is
+  // restricted to distinct translations then this limit should be bigger
+  // than n.  The n-best factor determines how much bigger the limit should be,
+  // with 0 being 'unlimited.'  This actually sets a large-ish limit in case
+  // too many translations are identical.
+  const StaticData &staticData = StaticData::Instance();
+  const std::size_t nBestFactor = staticData.GetNBestFactor();
+  std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
+
+  // Extract the derivations.
+  ChartKBestExtractor::KBestVec bigList;
+  bigList.reserve(numDerivations);
+  extractor.Extract(*topLevelHypos, numDerivations, bigList);
+
+  // Copy derivations into nBestList, skipping ones with repeated translations.
+  std::set<Phrase> distinct;
+  for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
+       p != bigList.end(); ++p) {
+    boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
+    Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
+    if (distinct.insert(translation).second) {
+      nBestList.push_back(derivation);
+    }
+  }
+}
+
 void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
 {
   size_t size = m_source.GetSize();
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index 6beffc45e..27914e207 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -30,6 +30,7 @@
 #include "SentenceStats.h"
 #include "ChartTranslationOptionList.h"
 #include "ChartParser.h"
+#include "ChartKBestExtractor.h"
 
 #include <boost/shared_ptr.hpp>
 
@@ -71,6 +72,7 @@ public:
   void AddXmlChartOptions();
   const ChartHypothesis *GetBestHypothesis() const;
   void CalcNBest(size_t count, ChartTrellisPathList &ret, bool onlyDistinct=0) const;
+  void CalcNBest(size_t n, std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList, bool onlyDistinct=false) const;
 
   void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
   void FindReachableHypotheses( const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable ) const; /* auxilliary function for GetSearchGraph */

From ff8ac92be18cfb96e71669ad525fc830cdd14800 Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Mon, 21 Apr 2014 14:46:22 +0100
Subject: [PATCH 18/23] moses_chart: oops, don't output more distinct
 translations than requested

---
 moses/ChartManager.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index 7162099d4..623968dfc 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -312,7 +312,7 @@ void ChartManager::CalcNBest(
   // Copy derivations into nBestList, skipping ones with repeated translations.
   std::set<Phrase> distinct;
   for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
-       p != bigList.end(); ++p) {
+       nBestList.size() < n && p != bigList.end(); ++p) {
     boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
     Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
     if (distinct.insert(translation).second) {

From 00505ba048d119f54e57e179ef40989a38143c12 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Wed, 23 Apr 2014 11:36:25 +0100
Subject: [PATCH 19/23] minor leak

---
 moses/ChartTranslationOptionList.cpp                | 10 ++++++++--
 moses/TranslationModel/RuleTable/LoaderStandard.cpp |  5 ++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/moses/ChartTranslationOptionList.cpp b/moses/ChartTranslationOptionList.cpp
index e83fbac79..b86312b9d 100644
--- a/moses/ChartTranslationOptionList.cpp
+++ b/moses/ChartTranslationOptionList.cpp
@@ -163,8 +163,14 @@ float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell)
 {
     const HypoList *stack = chartCell->GetStack().cube;
     assert(stack);
-    assert(!stack->empty());
-    const ChartHypothesis &bestHypo = **(stack->begin());
+    //assert(!stack->empty());
+    if (stack->empty()) {
+    	return 0;
+    }
+    else {
+		const ChartHypothesis &bestHypo = **(stack->begin());
+		return bestHypo.GetTotalScore();
+    }    const ChartHypothesis &bestHypo = **(stack->begin());
     return bestHypo.GetTotalScore();
 }
 
diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
index 9d2e3fa20..47f7378d1 100644
--- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
@@ -218,7 +218,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
     // parse source & find pt node
 
     // constituent labels
-    Word *sourceLHS;
+    Word *sourceLHS = NULL;
     Word *targetLHS;
 
     // create target phrase obj
@@ -251,6 +251,9 @@ bool RuleTableLoaderStandard::Load(FormatType format
     TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
     phraseColl.Add(targetPhrase);
 
+    // not implemented correctly in memory pt. just delete it for now
+    delete sourceLHS;
+
     count++;
   }
 

From 4ee4e07c1bf2f1fdf56fadf67a606b8945989978 Mon Sep 17 00:00:00 2001
From: phikoehn <pkoehn@inf.ed.ac.uk>
Date: Wed, 23 Apr 2014 13:50:08 +0100
Subject: [PATCH 20/23] minor ems fixes

---
 scripts/ems/web/analysis.php      | 3 ++-
 scripts/ems/web/analysis_diff.php | 3 ++-
 scripts/ems/web/index.php         | 3 ++-
 scripts/ems/web/overview.php      | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/ems/web/analysis.php b/scripts/ems/web/analysis.php
index 726e30fbd..a64d5977f 100644
--- a/scripts/ems/web/analysis.php
+++ b/scripts/ems/web/analysis.php
@@ -436,7 +436,8 @@ function ngram_summary() {
   $score_line = "";
   for($i=0;$i<count($each_score);$i++) {
     if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
-        preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
+        preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match) ||
+        preg_match('/([\d\(\)\.\s]+) (METEOR[\-c]*)/',$each_score[$i],$match)) {
       $header .= "<td>$match[2]</td>";
       $score_line .= "<td>$match[1]</td>";
     }
diff --git a/scripts/ems/web/analysis_diff.php b/scripts/ems/web/analysis_diff.php
index 51c8e50ef..50c8c90df 100644
--- a/scripts/ems/web/analysis_diff.php
+++ b/scripts/ems/web/analysis_diff.php
@@ -683,7 +683,8 @@ function ngram_summary_diff() {
     $each_score = explode(" ; ",$experiment[$idx?$id2:$id]->result[$set]);
     for($i=0;$i<count($each_score);$i++) {
       if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
-          preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
+          preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match) ||
+          preg_match('/([\d\(\)\.\s]+) (METEOR[\-c]*)/',$each_score[$i],$match)) {
 	  $score[$match[2]][$idx] = $match[1];
       }
     }
diff --git a/scripts/ems/web/index.php b/scripts/ems/web/index.php
index 099c9078f..800f7426f 100644
--- a/scripts/ems/web/index.php
+++ b/scripts/ems/web/index.php
@@ -22,7 +22,8 @@ function head($title) {
 <body><h2>'.$title."</h2>\n";
 }
 
-if (array_key_exists("setup",$_POST) || array_key_exists("setup",$_GET)) {
+if (array_key_exists("setStepStatus",$_GET)) { set_step_status($_GET["setStepStatus"]); }
+else if (array_key_exists("setup",$_POST) || array_key_exists("setup",$_GET)) {
   load_experiment_info();
   load_comment();
 
diff --git a/scripts/ems/web/overview.php b/scripts/ems/web/overview.php
index c49e74be6..e56ed6f08 100644
--- a/scripts/ems/web/overview.php
+++ b/scripts/ems/web/overview.php
@@ -295,7 +295,8 @@ function output_score($id,$info) {
     $each_score = explode(" ; ",$score);
     for($i=0;$i<count($each_score);$i++) {
       if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
-          preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
+          preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match) ||
+          preg_match('/([\d\(\)\.\s]+) (METEOR[\-c]*)/',$each_score[$i],$match)) {
         if ($i>0) { print "<BR>"; }
 	$opened_a_tag = 0;
         if ($set != "avg") { 

From 6a9eb6c848f12711b0a7ac2994fe06bf6491fd23 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Wed, 23 Apr 2014 15:12:27 +0100
Subject: [PATCH 21/23] minor leak showing for lex reordering. Just refactor

---
 moses/PrefixTreeMap.cpp | 30 ++++++++++++++++++++++--------
 moses/PrefixTreeMap.h   | 14 ++++----------
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/moses/PrefixTreeMap.cpp b/moses/PrefixTreeMap.cpp
index c8edce726..ee7565d8b 100644
--- a/moses/PrefixTreeMap.cpp
+++ b/moses/PrefixTreeMap.cpp
@@ -5,6 +5,8 @@
 #include <boost/thread.hpp>
 #endif
 
+using namespace std;
+
 namespace Moses
 {
 void GenericCandidate::readBin(FILE* f)
@@ -62,6 +64,17 @@ void Candidates::readBin(FILE* f)
 
 const LabelId PrefixTreeMap::MagicWord = std::numeric_limits<LabelId>::max() - 1;
 
+//////////////////////////////////////////////////////////////////
+PrefixTreeMap::~PrefixTreeMap() {
+  if(m_FileSrc) {
+    fClose(m_FileSrc);
+  }
+  if(m_FileTgt) {
+    fClose(m_FileTgt);
+  }
+  FreeMemory();
+}
+
 
 void PrefixTreeMap::FreeMemory()
 {
@@ -75,20 +88,21 @@ void PrefixTreeMap::FreeMemory()
   m_PtrPool.reset();
 }
 
-static WordVoc* ReadVoc(const std::string& filename)
+WordVoc &ReadVoc(std::map<std::string,WordVoc> &vocs, const std::string& filename)
 {
-  static std::map<std::string,WordVoc*> vocs;
 #ifdef WITH_THREADS
   boost::mutex mutex;
   boost::mutex::scoped_lock lock(mutex);
 #endif
-  std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
+  std::map<std::string,WordVoc>::iterator vi = vocs.find(filename);
   if (vi == vocs.end()) {
-    WordVoc* voc = new WordVoc();
-    voc->Read(filename);
-    vocs[filename] = voc;
+    WordVoc &voc = vocs[filename];
+    voc.Read(filename);
+    return voc;
+  }
+  else {
+    return vi->second;
   }
-  return vocs[filename];
 }
 
 int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs)
@@ -133,7 +147,7 @@ int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs)
     sprintf(num, "%d", i);
     //m_Voc[i] = new WordVoc();
     //m_Voc[i]->Read(ifv + num);
-    m_Voc[i] = ReadVoc(ifv + num);
+    m_Voc[i] = &ReadVoc(m_vocs, ifv + num);
   }
 
   TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");
diff --git a/moses/PrefixTreeMap.h b/moses/PrefixTreeMap.h
index 06066878d..d6262ca65 100644
--- a/moses/PrefixTreeMap.h
+++ b/moses/PrefixTreeMap.h
@@ -99,18 +99,11 @@ public:
   PrefixTreeMap() : m_FileSrc(0), m_FileTgt(0) {
     PTF::setDefault(InvalidOffT);
   }
-  ~PrefixTreeMap() {
-    if(m_FileSrc) {
-      fClose(m_FileSrc);
-    }
-    if(m_FileTgt) {
-      fClose(m_FileTgt);
-    }
-    FreeMemory();
-  }
+  ~PrefixTreeMap();
+
 public:
   static const LabelId MagicWord;
-public:
+
   void FreeMemory();
 
   int Read(const std::string& fileNameStem, int numVocs = -1);
@@ -135,6 +128,7 @@ private:
 
   std::vector<WordVoc*> m_Voc;
   ObjectPool<PPimp>     m_PtrPool;
+  std::map<std::string,WordVoc> m_vocs;
 };
 
 }

From 2c14b506b450bcdcbb903158137a51185808211f Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Wed, 23 Apr 2014 16:11:09 +0100
Subject: [PATCH 22/23] merge problem

---
 moses/ChartTranslationOptionList.cpp | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/moses/ChartTranslationOptionList.cpp b/moses/ChartTranslationOptionList.cpp
index b86312b9d..89955dcee 100644
--- a/moses/ChartTranslationOptionList.cpp
+++ b/moses/ChartTranslationOptionList.cpp
@@ -163,15 +163,9 @@ float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell)
 {
     const HypoList *stack = chartCell->GetStack().cube;
     assert(stack);
-    //assert(!stack->empty());
-    if (stack->empty()) {
-    	return 0;
-    }
-    else {
-		const ChartHypothesis &bestHypo = **(stack->begin());
-		return bestHypo.GetTotalScore();
-    }    const ChartHypothesis &bestHypo = **(stack->begin());
-    return bestHypo.GetTotalScore();
+    assert(!stack->empty());
+	const ChartHypothesis &bestHypo = **(stack->begin());
+	return bestHypo.GetTotalScore();
 }
 
 void ChartTranslationOptionList::Evaluate(const InputType &input, const InputPath &inputPath)

From d7380d6d9e0c5f88eb0ee4973f49e4c4e3900b9f Mon Sep 17 00:00:00 2001
From: Hieu Hoang <s0565741@odin.inf.ed.ac.uk>
Date: Wed, 23 Apr 2014 17:11:25 +0100
Subject: [PATCH 23/23] don't add label to m_targetLabelSet if no hypotheses.
 Assert error in parsing

---
 moses/ChartCell.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/moses/ChartCell.cpp b/moses/ChartCell.cpp
index 6603139f6..125efd204 100644
--- a/moses/ChartCell.cpp
+++ b/moses/ChartCell.cpp
@@ -114,8 +114,11 @@ void ChartCell::SortHypotheses()
   MapType::iterator iter;
   for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
     ChartHypothesisCollection &coll = iter->second;
-    coll.SortHypotheses();
-    m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
+
+    if (coll.GetSize()) {
+      coll.SortHypotheses();
+      m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
+    }
   }
 }