Imported version 3493 from local repo.

Includes merges from trunk up to 3842. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/samplerank@4045 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-11 11:25:40 +03:00 · 2011-06-28 14:55:53 +00:00 · 2011-06-28 14:55:53 +00:00 · 465442340a
commit 465442340a
parent ac13074816
211 changed files with 21866 additions and 488 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,33 +1,38 @@
-*.[oa]
+*.o
+*~
 Makefile
 Makefile.in
 aclocal.m4
-autom4te.cache/
+autom4te.cache
 config.h
 config.log
 config.status
 configure
-mert/.deps/
-mert/Makefile
-mert/Makefile.in
-mert/extractor
-mert/mert
-misc/.deps/
+misc/.deps
 misc/Makefile
 misc/Makefile.in
 misc/processLexicalTable
 misc/processPhraseTable
 misc/queryLexicalTable
-moses-cmd/src/.deps/
+moses-cmd/src/.deps
 moses-cmd/src/Makefile
 moses-cmd/src/Makefile.in
 moses-cmd/src/moses
-moses/src/.deps/
+moses/src/.deps
 moses/src/Makefile
 moses/src/Makefile.in
+moses/src/libmoses.a
+stamp-h1
+josiah/josiah
+josiah/.deps
+josiah/m1
+scripts/Makefile
+moses-release.tar.gz
+release/
+scripts-20090213-0027/
 scripts/training/cmert-0.5/mert
 scripts/training/mbr/mbr
 scripts/training/phrase-extract/extract
 scripts/training/phrase-extract/score
 scripts/training/symal/symal
-stamp-h1
+
--- a/Makefile.am
+++ b/Makefile.am
@ -11,4 +11,4 @@ endif
 if WITH_SERVER
    SERVER = server
 endif
-SUBDIRS = kenlm moses/src moses-chart/src OnDiskPt/src moses-cmd/src misc moses-chart-cmd/src CreateOnDisk/src $(MERT) $(SERVER)
+SUBDIRS = kenlm moses/src moses-chart/src OnDiskPt/src moses-cmd/src misc moses-chart-cmd/src CreateOnDisk/src josiah $(MERT) $(SERVER)
--- a/config.h.in
+++ b/config.h.in
@ -1,10 +1,19 @@
 /* config.h.in.  Generated from configure.in by autoheader.  */

-/* define if the Boost library is available */
+/* Defined if the requested minimum BOOST version is satisfied */
 #undef HAVE_BOOST

-/* define if the Boost::Thread library is available */
-#undef HAVE_BOOST_THREAD
+/* Define to 1 if you have <boost/archive/text_oarchive.hpp> */
+#undef HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP
+
+/* Define to 1 if you have <boost/mpi/communicator.hpp> */
+#undef HAVE_BOOST_MPI_COMMUNICATOR_HPP
+
+/* Define to 1 if you have <boost/program_options.hpp> */
+#undef HAVE_BOOST_PROGRAM_OPTIONS_HPP
+
+/* Define to 1 if you have <boost/thread.hpp> */
+#undef HAVE_BOOST_THREAD_HPP

 /* Define to 1 if you have the <dlfcn.h> header file. */
 #undef HAVE_DLFCN_H
@ -61,6 +70,9 @@
   */
 #undef LT_OBJDIR

+/* Define if compiling with MPI. */
+#undef MPI_ENABLED
+
 /* Name of package */
 #undef PACKAGE

--- a/configure.in
+++ b/configure.in
@ -14,7 +14,29 @@ AC_PROG_LIBTOOL
 # Shared library are disabled for default
 #LT_INIT([disable-shared])

-AX_XMLRPC_C
+BOOST_REQUIRE([1.36.0])
+BOOST_PROGRAM_OPTIONS
+
+ac_have_mpi=no
+AC_ARG_WITH(mpi,
+            AC_HELP_STRING([--with-mpi],
+                       [Force compilation with MPI]),
+        [ if test $withval != no ; then
+            ac_have_mpi=yes
+          fi ] )
+if test $ac_have_mpi = yes ; then
+  AC_PATH_PROG(CXX, mpic++, none)
+  if test  $CXX = none ; then
+    AC_MSG_ERROR([Cannot locate MPI compiler drivers])
+  fi
+  BOOST_MPI
+  BOOST_SERIALIZATION
+  AC_DEFINE(MPI_ENABLED,1,[Define if compiling with MPI.])
+  CPPFLAGS="$CPPFLAGS -DMPI_ENABLED"
+fi
+
+
+#AX_XMLRPC_C

 AC_ARG_WITH(protobuf,
            [AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
@ -249,6 +271,6 @@ fi
 LIBS="$LIBS -lz"


-AC_CONFIG_FILES(Makefile OnDiskPt/src/Makefile moses/src/Makefile moses-chart/src/Makefile moses-cmd/src/Makefile moses-chart-cmd/src/Makefile misc/Makefile mert/Makefile server/Makefile CreateOnDisk/src/Makefile kenlm/Makefile)
+AC_CONFIG_FILES(Makefile OnDiskPt/src/Makefile moses/src/Makefile moses-chart/src/Makefile moses-cmd/src/Makefile moses-chart-cmd/src/Makefile misc/Makefile josiah/Makefile  mert/Makefile server/Makefile CreateOnDisk/src/Makefile kenlm/Makefile)

 AC_OUTPUT()
--- a/josiah/AnnealingSchedule.cpp
+++ b/josiah/AnnealingSchedule.cpp
@ -0,0 +1,36 @@
+#include "AnnealingSchedule.h"
+#include "StaticData.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+AnnealingSchedule::~AnnealingSchedule() {}
+
+LinearAnnealingSchedule::LinearAnnealingSchedule(int len, float max_temp) :
+    AnnealingSchedule(len), starting_temp(max_temp) {
+  VERBOSE(2, "Created LinearAnnealingSchedule:\n  len=" << len << ", starting temp=" << max_temp << endl);
+}
+
+float LinearAnnealingSchedule::GetTemperatureAtTime(int time) const {
+  const float temp = max(1.0f, (starting_temp -
+    (static_cast<float>(time) * (starting_temp - 0.5f)) / static_cast<float>(GetLength())));
+  VERBOSE(3, "Time " << time << ": temp=" << temp << endl);
+  return temp;
+}
+
+ExponentialAnnealingSchedule::ExponentialAnnealingSchedule(float start_temp, float stop_temp, float floor_temp, float ratio) :
+  AnnealingSchedule(0), m_startTemp(start_temp), m_stopTemp(stop_temp), m_floorTemp(floor_temp) , m_ratio (ratio){
+    VERBOSE(2, "Created ExponentialAnnealingSchedule:\n  starting temp=" << start_temp << ", stopping temp=" << stop_temp << ", floor temp=" << m_floorTemp << ", ratio: " << ratio << endl);
+}
+
+float ExponentialAnnealingSchedule::GetTemperatureAtTime(int time) const {
+  float curTemp = m_startTemp * pow(m_ratio, time);
+  if (curTemp < m_stopTemp)
+    return m_floorTemp;
+  return curTemp;
+}
+  
+  
+}
--- a/josiah/AnnealingSchedule.h
+++ b/josiah/AnnealingSchedule.h
@ -0,0 +1,39 @@
+#pragma once
+
+namespace Josiah {
+
+class AnnealingSchedule {
+ public:
+  AnnealingSchedule(int length) : m_len(length) {}
+  virtual ~AnnealingSchedule();
+  inline int GetLength() const { return m_len; }
+  virtual float GetTemperatureAtTime(int time) const = 0;
+ private:
+  int m_len;
+};
+
+// cools linearly
+class LinearAnnealingSchedule : public AnnealingSchedule {
+ public:
+  LinearAnnealingSchedule(int len, float max_temp);
+  virtual float GetTemperatureAtTime(int time) const;
+ private:
+  float starting_temp;
+};
+
+// cools exponentially
+class ExponentialAnnealingSchedule : public AnnealingSchedule {
+ public:
+  ExponentialAnnealingSchedule(float start_temp, float stop_temp, float floor_temp, float ratio);
+  virtual float GetTemperatureAtTime(int time) const;
+  float GetFloorTemp() {return m_floorTemp;}
+  void SetFloorTemp(float f) { m_floorTemp = f;}
+ private:
+  float m_startTemp;
+  float m_stopTemp;
+  float m_floorTemp;
+  float m_ratio;
+};
+  
+};
+
--- a/josiah/Bleu.cpp
+++ b/josiah/Bleu.cpp
@ -0,0 +1,312 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Bleu.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  
+  /**
+    * Extract the ngrams in the given sentence, up to the BLEU_ORDER, 
+    * clipping using the existing ngrams as necessary. */
+  static void ExtractNGrams(const Translation& sentence, NGramMap& ngrams) {
+    NGramMap newNgrams;
+    for (size_t start = 0; start < sentence.size(); ++start) {
+      Translation ngram;
+      for (size_t length = 1; length <= BLEU_ORDER; ++length) {
+        size_t position = start + length-1;
+        if (position < sentence.size()) {
+          ngram.push_back(sentence[position]);
+          ++newNgrams[ngram];  
+        } else {
+          break;
+        }
+      }
+    }
+    
+    //clipping
+    for (NGramMap::const_iterator i = newNgrams.begin(); i != newNgrams.end(); ++i) {
+      Translation ngram = i->first;
+      if (ngrams[i->first] < i->second) {
+        ngrams[i->first] = i->second;
+      }
+    }
+  }
+
+  static BleuStats ExtractStats(const NGramMap& ref, const NGramMap& hyp) {
+    /*
+    cerr << "ref ngrams" << endl;
+    for (NGramMap::const_iterator ref_iter = ref.begin(); ref_iter != ref.end(); ++ref_iter) { 
+      const Translation& ngram = ref_iter->first;
+      size_t count = ref_iter->second;
+      for (size_t i = 0; i < ngram.size(); ++i) {
+        cerr << *(ngram[i]) << " ";
+      }
+      cerr << count << endl;
+    }*/
+    BleuStats stats;
+    for (NGramMap::const_iterator hyp_iter = hyp.begin(); hyp_iter != hyp.end(); ++hyp_iter) {
+      const Translation& ngram = hyp_iter->first;
+      size_t count = hyp_iter->second;
+      size_t order = ngram.size();
+      stats.total(order, stats.total(order) + count);
+      NGramMap::const_iterator ref_iter = ref.find(ngram);
+      if (ref_iter != ref.end()) {
+        size_t matches = min(count, ref_iter->second);
+        stats.tp(order,stats.tp(order) + matches);
+      }
+    }
+    return stats;
+  }
+  
+  Bleu::Bleu() : m_smoothingWeight(0) {
+    for (size_t i = 1; i <= BLEU_ORDER; ++i) {
+      m_smoothingStats.tp(i, BLEU_SMOOTHING);
+      m_smoothingStats.total(i, BLEU_SMOOTHING);
+    }
+  }
+  
+  void Bleu::SetSmoothingWeight(float smoothingWeight) {
+    m_smoothingWeight = smoothingWeight;
+  }
+  
+  float Bleu::GetSmoothingWeight() const {
+    return m_smoothingWeight;
+  }
+  
+  GainFunctionHandle Bleu::GetGainFunction(const std::vector<size_t>& sentenceIds) {
+    return GainFunctionHandle(new BleuFunction(*this,sentenceIds));
+  }
+  
+  void Bleu::AddReferences(const std::vector<Translation>& refs, const Translation& source) {
+    if (m_referenceLengths.size()) {
+      assert(m_referenceLengths[0].size() == refs.size());
+    }
+    m_sourceLengths.push_back(source.size());
+    m_referenceLengths.push_back(vector<size_t>());
+    m_referenceStats.push_back(NGramMap());
+    for (size_t i = 0; i < refs.size(); ++i) {
+      m_referenceLengths.back().push_back(refs[i].size());
+      ExtractNGrams(refs[i],m_referenceStats.back());
+    }
+  }
+  
+  const NGramMap& Bleu::GetReferenceStats(size_t sentenceId) const {
+    return m_referenceStats.at(sentenceId);
+  }
+  
+  const vector<size_t>& Bleu::GetReferenceLengths(size_t sentenceId) const {
+    return m_referenceLengths.at(sentenceId);
+  }
+  
+  float Bleu::GetAverageReferenceLength(size_t sentenceId) const {
+    const vector<size_t>& lengths = GetReferenceLengths(sentenceId);
+    float total = 0.0f;
+    for (size_t i = 0; i < lengths.size(); ++i) {
+      total += lengths[i];
+    }
+    return total/lengths.size();
+  }
+  
+  size_t Bleu::GetSourceLength(size_t sentenceId) const {
+    return m_sourceLengths.at(sentenceId);
+  }
+  
+  
+  const BleuStats& Bleu::GetSmoothingStats() const {
+    return m_smoothingStats;
+  }
+  
+  void Bleu::AddSmoothingStats(const BleuStats& stats) {
+    //Chiang's update rule.
+    if (m_smoothingWeight) {
+      m_smoothingStats += stats;
+      m_smoothingStats *= m_smoothingWeight;
+    }
+  }
+
+  BleuFunction::BleuFunction(Bleu& bleu, const vector<size_t>& sentenceIds):
+      m_stats(bleu),m_sentenceIds(sentenceIds), m_smoothingStatsCount(0), m_cachedStats(sentenceIds.size())
+    {}
+    
+  
+  float BleuFunction::Evaluate(const std::vector<Translation>& hypotheses) const {
+    assert(hypotheses.size() == m_sentenceIds.size());
+    BleuStats totalStats;
+    for (size_t i = 0; i < hypotheses.size(); ++i) {
+      if (m_cachedStats[i].first != hypotheses[i]) {
+        //don't have this sentence cached
+        NGramMap hypNgrams;
+        ExtractNGrams(hypotheses[i], hypNgrams);
+        const NGramMap& refNgrams = m_stats.GetReferenceStats(m_sentenceIds[i]);
+        m_cachedStats[i] = pair<Translation,BleuStats>
+              (hypotheses[i],ExtractStats(refNgrams,hypNgrams));
+        //cerr << "SID " << m_sentenceIds[i] << " " << m_cachedStats[i].second << endl;
+      }
+      totalStats += m_cachedStats[i].second;
+      float src_len = m_stats.GetSourceLength(m_sentenceIds[i]);
+      float hyp_len = hypotheses[i].size();
+      const vector<size_t>& ref_lens = m_stats.GetReferenceLengths(m_sentenceIds[i]);
+       //closest length
+      float  ref_len = ref_lens[0];
+      for (size_t j = 1; j < ref_lens.size(); ++j) {
+        if (abs(ref_len - hyp_len) > abs(ref_lens[j] - hyp_len)) {
+          ref_len = ref_lens[j];
+        }
+      }
+      totalStats.ref_len(totalStats.ref_len() + ref_len);
+      totalStats.hyp_len(totalStats.hyp_len() + hyp_len);
+      totalStats.src_len(totalStats.src_len() + src_len);
+      //cerr << totalStats << endl;
+    }
+
+    float log_bleu = 0;
+    const BleuStats& smoothing = m_stats.GetSmoothingStats();
+    for (size_t i = 1; i <= BLEU_ORDER; ++i) {
+      log_bleu = log_bleu + log(totalStats.tp(i) + smoothing.tp(i)) - 
+          log(totalStats.total(i) + smoothing.total(i));
+    }
+
+    log_bleu /= BLEU_ORDER;
+
+    float ref_len = totalStats.ref_len() + smoothing.ref_len();
+    float hyp_len = totalStats.hyp_len() + smoothing.hyp_len();
+    
+    float bp = 0;
+    if (hyp_len < ref_len) {
+      bp = 1 - ref_len / hyp_len;
+    }
+    log_bleu += bp;
+    //cerr << totalStats << endl;
+    //cerr << "bleu before scale: " << exp(log_bleu);
+    if (m_stats.GetSmoothingWeight()) {
+      //cerr << "smoothing " << smoothing << endl;
+      //cerr << "lb " << log_bleu;
+      //doing approx doc bleu
+      log_bleu += log(totalStats.src_len() + smoothing.src_len()) - log(hypotheses.size());
+      //cerr << " " << log_bleu << endl;
+    } else {
+      log_bleu += log(100);
+    }
+    //cerr << " After " << exp(log_bleu) << endl;
+
+    //cerr << totalStats << " " << exp(log_bleu) << endl;
+    return exp(log_bleu);
+  }
+  
+  void BleuFunction::AddSmoothingStats(size_t sentenceId, const Translation& hypothesis) {
+    //Only calculating stats for one sentence
+    sentenceId = m_sentenceIds[sentenceId];
+    
+    NGramMap hypNgrams;
+    ExtractNGrams(hypothesis,hypNgrams);
+    const NGramMap refNgrams = m_stats.GetReferenceStats(sentenceId);
+    BleuStats smoothStats = ExtractStats(refNgrams,hypNgrams);
+    smoothStats.src_len(m_stats.GetSourceLength(sentenceId));
+    smoothStats.hyp_len(hypothesis.size());
+    smoothStats.ref_len(m_stats.GetAverageReferenceLength(sentenceId));
+    m_smoothingStats += smoothStats;
+    ++m_smoothingStatsCount;
+  }
+  
+  void BleuFunction::UpdateSmoothingStats() {
+    m_smoothingStats /= m_smoothingStatsCount;
+    m_stats.AddSmoothingStats(m_smoothingStats);
+    m_smoothingStatsCount = 0;
+    m_smoothingStats.clear();
+  }
+
+
+  BleuStats::BleuStats() :
+    m_data(BLEU_ORDER*2+3) {}
+  
+  void BleuStats::clear() {
+    m_data = valarray<float>(BLEU_ORDER*2+3);
+  }
+
+
+  float BleuStats::tp(size_t order) const {
+    return m_data[order*2-2];
+  }
+
+  void BleuStats::tp(size_t order, float val) {
+    m_data[order*2-2] = val;
+  }
+
+  float BleuStats::total(size_t order) const {
+    return m_data[order*2-1];
+  }
+
+  void BleuStats::total(size_t order, float val) {
+    m_data[order*2-1] = val;
+  }
+
+  float BleuStats::src_len() const {
+    return m_data[BLEU_ORDER*2];
+  }
+
+  void BleuStats::src_len(float val) {
+    m_data[BLEU_ORDER*2] = val;
+  }
+
+  float BleuStats::ref_len() const {
+    return m_data[BLEU_ORDER*2+1];
+  }
+
+  void BleuStats::ref_len(float val) {
+    m_data[BLEU_ORDER*2+1] = val;
+  }
+
+  float BleuStats::hyp_len() const {
+    return m_data[BLEU_ORDER*2+2];
+  }
+
+  void BleuStats::hyp_len(float val) {
+    m_data[BLEU_ORDER*2+2] = val;
+  }
+
+  void BleuStats::operator+=(const BleuStats&  rhs) {
+    m_data += rhs.m_data;
+  }
+
+  void BleuStats::operator*=(float scalar) {
+    m_data *= scalar;
+  }
+
+  void BleuStats::operator/=(float scalar) {
+    m_data /= scalar;
+  }
+
+  void BleuStats::write(ostream& out) const {
+    out << "{";
+    for (size_t i = 0; i < m_data.size(); ++i) {
+      out << m_data[i];
+      if (i < m_data.size()-1) out << ",";
+    }
+    out << "}";
+  }
+
+  ostream& operator<<(ostream& out, const BleuStats& stats) {
+    stats.write(out);
+    return out;
+  }
+}
+
--- a/josiah/Bleu.h
+++ b/josiah/Bleu.h
@ -0,0 +1,117 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <map>
+#include <valarray>
+#include <vector>
+
+#include <boost/unordered_map.hpp>
+
+#include "Gain.h"
+
+#define BLEU_ORDER 4
+#define BLEU_SMOOTHING 0.01
+
+namespace Josiah {
+
+typedef boost::unordered_map<std::vector<const Moses::Factor*>, size_t> NGramMap;
+
+class BleuStats {
+  public:
+    BleuStats();
+    void clear();
+    float tp(size_t order) const;
+    void tp(size_t order, float val);
+    float total(size_t order) const;
+    void total(size_t order, float val);
+    float src_len() const;
+    void src_len(float val);
+    float ref_len() const;
+    void ref_len(float val);
+    float hyp_len() const;
+    void hyp_len(float val);
+
+    void operator+=(const BleuStats&  rhs);
+    void operator*=(float scalar);
+    void operator/=(float scalar);
+
+    void write(std::ostream& out) const;
+
+  private:
+    std::valarray<float> m_data;
+};
+
+
+class Bleu : public Gain {
+
+  public:
+    Bleu();
+    virtual GainFunctionHandle GetGainFunction(const std::vector<size_t>& sentenceIds);
+    virtual void AddReferences(const std::vector<Translation>& refs, const Translation& source);
+    virtual float GetAverageReferenceLength(size_t sentenceId) const;
+    
+    const NGramMap& GetReferenceStats(size_t sentenceId) const;
+    const std::vector<size_t>& GetReferenceLengths(size_t sentenceId)  const;
+    size_t GetSourceLength(size_t sentenceId) const;
+    /** Update the overall smoothing stats */
+    const BleuStats& GetSmoothingStats() const;
+    /** Get the stats for smoothing the current sentence */
+    void AddSmoothingStats(const BleuStats& stats);
+    /** The decay constant for Chiang smoothing. Zero indicates no smoothing */
+    void SetSmoothingWeight(float smoothingWeight);
+    float GetSmoothingWeight() const;
+    
+    
+  private:
+    std::vector<NGramMap> m_referenceStats;
+    std::vector<std::vector<size_t> > m_referenceLengths;
+    std::vector<size_t> m_sourceLengths;
+    BleuStats m_smoothingStats;
+    float m_smoothingWeight;
+    
+};
+
+class BleuFunction : public GainFunction {
+  public:
+    BleuFunction(Bleu& bleu, const std::vector<size_t>& sentenceIds);
+    virtual float Evaluate(const std::vector<Translation>& hypotheses) const;
+    /** Add the stats for this hypothesis to the smoothing stats being collected */
+    virtual void AddSmoothingStats(size_t sentenceId, const Translation& hypothesis);
+    /** Inform the GainFunction that we've finished with this sentence, and it can now
+    update the parent's stats */
+    virtual void UpdateSmoothingStats();
+    
+
+  private:
+    Bleu& m_stats;
+    std::vector<size_t> m_sentenceIds;
+    //smoothing stats collected for this batch
+    BleuStats m_smoothingStats;
+    size_t m_smoothingStatsCount;
+    mutable std::vector<std::pair<Translation, BleuStats> > m_cachedStats;
+};
+
+
+
+
+std::ostream& operator<<(std::ostream& out, const BleuStats& stats);
+
+}
--- a/josiah/CorpusSampler.cpp
+++ b/josiah/CorpusSampler.cpp
@ -0,0 +1,286 @@
+#include "CorpusSampler.h"
+
+#include "Decoder.h"
+#include "Hypothesis.h"
+#include "GibbsOperator.h"
+
+#ifdef MPI_ENABLED
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+#include <boost/serialization/vector.hpp>
+namespace mpi = boost::mpi;
+#endif
+
+using namespace std;
+
+namespace Josiah {
+  
+void CorpusSamplerCollector::collect(Sample& s) {
+   //nothing to do  
+}
+
+//Resample based on derivation distribution
+void CorpusSamplerCollector::resample(int sent) {
+  std::map<const Derivation*,double> m_p, m_resampled_p;
+  m_derivationCollector.getDistribution(m_p); //fetch the distribution
+    
+  //copy it to a vector, will be easier for further processing
+  vector<const Derivation*> derivations;
+  vector<double> scores;
+    
+  for (map<const Derivation*,double>::iterator it = m_p.begin(); it != m_p.end(); ++it) {
+    derivations.push_back(it->first);
+    scores.push_back(it->second);
+  }
+  
+  //Printing out distribution
+
+  IFVERBOSE(2) {
+    for (size_t i = 0; i < derivations.size();++i) {
+      cerr << *derivations[i] << " has score " << scores[i] <<endl;
+    }  
+  }
+  
+  
+  double sum = scores[0];
+  for (size_t i = 1; i < scores.size(); ++i) {
+    sum = log_sum(sum,scores[i]);
+  }
+  
+  transform(scores.begin(),scores.end(),scores.begin(),bind2nd(minus<double>(),sum));
+  
+  
+  //now sample from this
+  for (int j = 0; j < m_samples; ++j) {
+    
+    //random number between 0 and 1
+    double random =  RandomNumberGenerator::instance().next();//(double)rand() / RAND_MAX;
+      
+    random = log(random);
+      
+    //now figure out which sample
+    size_t position = 1;
+    sum = scores[0];
+    for (; position < scores.size() && sum < random; ++position) {
+      sum = log_sum(sum,scores[position]);
+    }
+      
+    size_t chosen =  position-1;  
+    MPI_VERBOSE(2, "Chosen derivation " << chosen << endl)
+      
+    //Store chosen derivation's feature values and length
+    const Derivation* chosenDeriv = derivations[chosen]; 
+    m_resampled_p[chosenDeriv] += 1.0/m_samples;
+    MPI_VERBOSE(2, "Chosen deriv " << *chosenDeriv << endl)
+    MPI_VERBOSE(2, "Chosen deriv size" << chosenDeriv->getTargetSentenceSize() << endl)
+    
+    m_featureVectors.at(j) += chosenDeriv->getFeatureValues();
+    MPI_VERBOSE(2, "Feature vector : " << m_featureVectors.at(j) << endl)
+    m_lengths[j] += chosenDeriv->getTargetSentenceSize();
+    MPI_VERBOSE(2, "Lengths : " << m_lengths.at(j) << endl)
+      
+    //Store chosen derivation's gain sufficient stats
+    SufficientStats *stats = new BleuSufficientStats(4);
+    
+    std::vector<const Factor*> yield;
+    chosenDeriv->getTargetFactors(yield);
+    g[sent]->GetSufficientStats(yield, stats);
+    
+    m_sufficientStats[j] += *(static_cast<BleuSufficientStats*>(stats));
+    MPI_VERBOSE(2, "Stats : " << m_sufficientStats.at(j) << endl)
+    delete stats;
+  }    
+  
+  IFVERBOSE(2) {
+  cerr << "After resampling, distribution is : " << endl;
+  for (map<const Derivation*,double>::iterator it = m_resampled_p.begin(); it != m_resampled_p.end(); ++it) {
+    cerr << *(it->first) << "has score " << it->second << endl;
+  }
+  }
+  
+  setRegularisation(m_p);
+  setRegularisationGradientFactor(m_p);
+  
+  //Now reset the derivation collector
+  m_derivationCollector.reset();
+  m_numSents++;
+  
+}
+
+  
+  
+#ifdef MPI_ENABLED    
+
+template<class T>
+struct VectorPlus {
+    vector<T> operator()(const vector<T>& lhs, const vector<T>& rhs) const {
+        assert(lhs.size() == rhs.size());
+        vector<T> sum(lhs.size());
+        for (size_t i = 0; i < lhs.size(); ++i) sum.push_back(lhs[i] + rhs[i]);
+        return sum;
+    }
+};
+  
+void CorpusSamplerCollector::AggregateSamples(int rank) {
+  AggregateSuffStats(rank);
+}
+   
+void CorpusSamplerCollector::AggregateSuffStats(int rank) {
+  /*what do we need to store?
+  1. Feature Vectors
+  2. Lengths
+  3. Bleu Stats 
+  */
+  vector  <int>  lengths (m_lengths.size());
+  FVector featsVecs, recFeatsVecs;
+  FVector suffStats, recSuffStats;
+  int numSents;
+
+  mpi::communicator world;
+  
+  //Reduce length
+  mpi::reduce(world,m_lengths,lengths,VectorPlus<int>(),0);
+//  if (MPI_SUCCESS != MPI_Reduce(const_cast<int*>(&m_lengths[0]), &lengths[0], m_lengths.size(), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+
+  //Reduce numSents
+//  if (MPI_SUCCESS != MPI_Reduce(&m_numSents, &numSents, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+  mpi::reduce(world,m_numSents,numSents,std::plus<int>(),0);
+  
+  //Reduce feature vectors and sufficient stats
+  mpi::reduce(world,m_featureVectors,m_featureVectors,VectorPlus<FVector>(),0);
+  mpi::reduce(world,m_sufficientStats,m_sufficientStats,VectorPlus<BleuSufficientStats>(),0);
+
+
+  //MPI can't handle vector of vectors, so first concatenate elements together
+  
+  /*
+  //Concatenate feature vectors
+  for (size_t i = 0; i < m_featureVectors.size(); ++i) {
+    for (size_t j = 0; j < m_featureVectors[i].size(); ++j) {
+      featsVecs.push_back(m_featureVectors[i][j]);
+    }
+  }
+  
+  //Concatenate sufficient stats 
+  for (size_t i = 0; i < m_sufficientStats.size(); ++i) {
+    vector < float > bleuStats = m_sufficientStats[i].data();
+    for (size_t j = 0; j < bleuStats.size(); ++j) {
+      suffStats.push_back(bleuStats[j]);
+    }
+  }
+ 
+  if (rank == 0) {
+    recFeatsVecs.resize(featsVecs.size());
+    recSuffStats.resize(suffStats.size());
+  }
+
+  //Reduce FVs and SStats
+  if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&featsVecs[0]), &recFeatsVecs[0], featsVecs.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+  if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&suffStats[0]), &recSuffStats[0], suffStats.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+
+  
+  //Unpack FVs and SStats, 
+  if (rank == 0 ) {  
+    //FVs
+    size_t numFeats = recFeatsVecs.size() /  m_featureVectors.size();
+    m_featureVectors.clear();
+
+    for (size_t i = 0; i  < recFeatsVecs.size(); i += numFeats) {
+      vector<float> features(recFeatsVecs.begin() + i, recFeatsVecs.begin() + i + numFeats);
+      ScoreComponentCollection feats(features); 
+      m_featureVectors.push_back(feats);   
+    }
+
+    //Suff Stats
+    size_t sizeStats = recSuffStats.size() /  m_sufficientStats.size();
+    m_sufficientStats.clear();
+    
+    for (size_t i = 0; i  < recSuffStats.size(); i += sizeStats) {
+      vector<float> _stats(recSuffStats.begin() + i, recSuffStats.begin() + i + sizeStats);
+      BleuSufficientStats stats(_stats);
+      m_sufficientStats.push_back(stats);
+    }
+    */
+    
+    //Transfer lengths back
+  if (rank == 0) {
+    m_lengths = lengths;
+    m_numSents = numSents;
+  }
+}
+#endif
+  
+float CorpusSamplerCollector::UpdateGradient(FVector* gradient,FValue *exp_len, FValue *unreg_exp_gain) {
+  FVector feature_expectations = getFeatureExpectations();
+    
+  MPI_VERBOSE(1,"FEXP: " << feature_expectations << endl)
+    
+  //gradient computation
+  FVector grad;
+  FValue exp_gain = 0;
+  FValue gain = 0.0;
+  for (size_t i = 0; i < m_featureVectors.size() ; ++i) {
+    FVector fv = m_featureVectors[i];
+    MPI_VERBOSE(2,"FV: " << fv)
+    gain = SentenceBLEU::CalcBleu(m_sufficientStats[i], false);
+    fv -= feature_expectations;
+    MPI_VERBOSE(2,"DIFF: " << fv)
+    fv *= gain;
+    MPI_VERBOSE(2,"GAIN: " << gain  << endl);
+    exp_gain += gain;
+    grad += fv;
+    MPI_VERBOSE(2,"grad: " << grad << endl);
+  }
+  grad /= m_featureVectors.size();
+  exp_gain /= m_featureVectors.size();
+  
+  cerr << "Gradient without reg " << grad << endl;
+  FVector regularizationGrad = getRegularisationGradientFactor();
+  regularizationGrad /= GetNumSents();
+  grad += regularizationGrad;
+  
+  
+  cerr << "Exp gain without reg term :  " << exp_gain << endl;
+  *unreg_exp_gain = exp_gain;
+  exp_gain += getRegularisation()/GetNumSents();
+  cerr << "Exp gain with reg term:  " << exp_gain << endl;
+  
+  *gradient += grad;
+  MPI_VERBOSE(1,"Gradient: " << grad << endl)
+  
+  cerr << "Gradient: " << grad << endl;
+  
+  //expected length
+ if (exp_len) {
+    *exp_len = 0;
+    for (size_t j = 0; j < m_sufficientStats.size(); ++j) {
+      *exp_len += m_sufficientStats[j].hyp_len;
+    }
+  } 
+  return exp_gain;
+}
+  
+FVector CorpusSamplerCollector::getFeatureExpectations() const {
+  FVector sum;
+  for (size_t i = 0; i < m_featureVectors.size(); ++i) {
+    sum += m_featureVectors[i];
+  }
+  return sum;
+}
+  
+void CorpusSamplerCollector::reset() {
+  m_featureVectors.clear(); m_featureVectors.resize(m_samples);
+  m_lengths.clear(); m_lengths.resize(m_samples);
+  m_sufficientStats.clear(); m_sufficientStats.resize(m_samples);
+  m_numSents = 0;
+}
+  
+float CorpusSamplerCollector::getReferenceLength() {
+  float refLen(0.0); 
+  for (size_t j = 0; j < m_sufficientStats.size(); ++j) {
+    refLen += m_sufficientStats[j].ref_len;
+  }
+  return refLen;
+}
+  
+}
--- a/josiah/CorpusSampler.h
+++ b/josiah/CorpusSampler.h
@ -0,0 +1,62 @@
+#pragma once
+
+#include <map>
+#include <utility>
+
+#include "MpiDebug.h"
+#include "FeatureVector.h"
+#include "GibblerExpectedLossTraining.h"
+#include "GibblerMaxDerivDecoder.h"
+#include "Phrase.h"
+#include "Sampler.h"
+
+
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#endif
+
+using namespace Moses;
+
+namespace Josiah {
+  
+  class Sampler;
+  class Derivation;  
+  class CorpusSamplerCollector : public ExpectedLossCollector {
+  public:
+      CorpusSamplerCollector(int samples, Sampler &sampler):  ExpectedLossCollector(),
+          m_samples(samples), m_numSents(0)  {
+      sampler.AddCollector(&m_derivationCollector);
+      m_featureVectors.resize(m_samples);
+      m_lengths.resize(m_samples);
+      m_sufficientStats.resize(m_samples);
+    }
+    virtual ~CorpusSamplerCollector() {}
+    virtual void collect(Sample& sample);
+    virtual void resample(int);
+    virtual FValue UpdateGradient(FVector* gradient, FValue* exp_len, FValue* unreg_exp_gain);
+#ifdef MPI_ENABLED  
+    virtual void AggregateSamples(int);
+#endif
+    virtual void reset();   
+    float getReferenceLength();
+    virtual void setRegularisationGradientFactor(std::map<const Derivation*,double>& m_p) {}
+    virtual void setRegularisation(std::map<const Derivation*,double>& m_p) {}
+    virtual FVector getRegularisationGradientFactor() {return FVector();}
+    virtual FValue getRegularisation() {return 0.0;}
+    
+  private:
+    std::vector<FVector> m_featureVectors;
+    std::vector  <int>  m_lengths;
+    std::vector <BleuSufficientStats> m_sufficientStats; 
+    
+    DerivationCollector m_derivationCollector;
+    const int m_samples;
+    FVector getFeatureExpectations() const;
+    int m_numSents;
+    int GetNumSents() { return m_numSents;}
+  protected:   
+    void AggregateSuffStats(int);    
+    
+  };
+  
+}
--- a/josiah/CorpusSamplerAnnealed.cpp
+++ b/josiah/CorpusSamplerAnnealed.cpp
@ -0,0 +1,81 @@
+#include "CorpusSamplerAnnealed.h"
+
+#include "Hypothesis.h"
+#include "Derivation.h"
+#ifdef MPI_ENABLED
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+namespace mpi=boost::mpi;
+#endif
+
+using namespace std;
+
+namespace Josiah {
+  
+  FVector CorpusSamplerAnnealedCollector::getExpectedFeatureValue(std::map<const Derivation*,double>& m_p) {
+    FVector expFV;
+    for (std::map<const Derivation*,double>::const_iterator it = m_p.begin(); it != m_p.end(); ++it) {
+     const Derivation* deriv = it->first;
+      expFV += (deriv->getFeatureValues() * (it->second));
+    }
+    return expFV;
+  }
+  
+  void CorpusSamplerAnnealedCollector::setRegularisationGradientFactor(std::map<const Derivation*,double>& m_p) {
+    double temperature = GetTemperature();
+    FVector expFV = getExpectedFeatureValue(m_p);
+    //cerr << "Expected FV " << expFV << endl;
+    float entropy_factor;
+    for (std::map<const Derivation*,double>::const_iterator it = m_p.begin(); it != m_p.end(); ++it) {
+      entropy_factor = -temperature * it->second * (log (it->second)+1);
+      //cerr << "Entropy factor " << entropy_factor << endl;
+      FVector fv = it->first->getFeatureValues();  
+      fv -= expFV;
+      fv *= entropy_factor;
+      m_gradient += fv;
+    }
+    //cerr << "Gradient regularization " << m_gradient << endl;
+  }
+  
+  void CorpusSamplerAnnealedCollector:: setRegularisation(std::map<const Derivation*,double>& m_p) {
+    float entropy(0.0);
+    for (std::map<const Derivation*,double>::const_iterator it = m_p.begin(); it != m_p.end(); ++it) {
+      entropy  -= it->second*log(it->second);
+    }
+    m_regularisation += GetTemperature() * entropy;
+  }
+
+#ifdef MPI_ENABLED
+  void CorpusSamplerAnnealedCollector::AggregateSamples(int rank) {
+    AggregateRegularisationStats(rank);
+    AggregateSuffStats(rank);
+  }
+  
+  void CorpusSamplerAnnealedCollector::AggregateRegularisationStats(int rank) {  
+    FVector regularizationGrad;
+    float regularizationFactor;
+    FVector recvRegGrad;
+    
+    //Reduce regularization
+    float reg = getRegularisation();
+    MPI_VERBOSE(1, "Regualarization for rank " << rank << " = " << reg << endl); 
+    mpi::communicator world;
+    mpi::reduce(world,reg,regularizationFactor,std::plus<float>(),0);
+    //if (MPI_SUCCESS != MPI_Reduce(&reg, &regularizationFactor, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    
+    //Reduce regularization gradient
+    MPI_VERBOSE(1, "Regualarization grad for rank " << rank << " = " << getRegularisationGradientFactor() << endl);
+    mpi::reduce(world,getRegularisationGradientFactor(), m_gradient ,FVectorPlus(),0);
+    //if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&regGrad[0]), &recvRegGrad[0], regGrad.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    
+    if (rank == 0 ) {  
+      m_regularisation = regularizationFactor;
+    }
+    MPI_VERBOSE(1, "After agg, Regualarization for rank " << rank << " = " << m_regularisation << endl);
+    MPI_VERBOSE(1, "After agg, Regualarization grad for rank " << rank << " = " << m_gradient << endl);
+  }
+#endif  
+  
+}
+
+
--- a/josiah/CorpusSamplerAnnealed.h
+++ b/josiah/CorpusSamplerAnnealed.h
@ -0,0 +1,56 @@
+#pragma once
+
+#include <map>
+#include <utility>
+#include <ext/hash_map>
+
+#include "FeatureVector.h"
+#include "CorpusSampler.h"
+#include "Phrase.h"
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#endif
+
+
+using namespace Moses;
+
+namespace Josiah {
+  class Derivation;
+  class GainFunction;
+  class CorpusSamplerCollector;
+  
+  class CorpusSamplerAnnealedCollector : public CorpusSamplerCollector {
+  public:
+    CorpusSamplerAnnealedCollector(int samples, Sampler &sampler) 
+    :  CorpusSamplerCollector(samples, sampler), m_regularisation(0.0) {
+    }
+    
+    float GetTemperature() { return m_temp;}
+    void SetTemperature(float temp) {m_temp = temp;} 
+    virtual FVector getRegularisationGradientFactor() {
+      return m_gradient;
+    }
+    virtual float getRegularisation() {
+      return m_regularisation;
+    }
+    virtual void reset() {
+      CorpusSamplerCollector::reset();
+      m_regularisation = 0.0;
+      m_gradient.clear();
+    }
+    virtual void setRegularisationGradientFactor(std::map<const Derivation*,double>& m_p);
+    virtual void setRegularisation(std::map<const Derivation*,double>& m_p);
+#ifdef MPI_ENABLED
+    virtual void AggregateSamples(int rank);
+#endif
+  private:
+    FValue m_temp, m_regularisation;
+    FVector m_gradient;
+    FVector getExpectedFeatureValue(std::map<const Derivation*,double>& m_p);
+#ifdef MPI_ENABLED
+    void AggregateRegularisationStats(int rank);
+#endif
+    
+  };
+  
+}
--- a/josiah/CorpusTrainer.cpp
+++ b/josiah/CorpusTrainer.cpp
@ -0,0 +1,486 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2009 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#endif
+
+#include <boost/program_options.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "AnnealingSchedule.h"
+#include "Decoder.h"
+#include "Derivation.h"
+#include "Gibbler.h"
+#include "InputSource.h"
+#include "TrainingSource.h"
+#include "FeatureVector.h"
+#include "GibbsOperator.h"
+#include "SentenceBleu.h"
+#include "GainFunction.h"
+#include "CorpusSampler.h"
+#include "CorpusSamplerAnnealed.h"
+#include "GibblerMaxTransDecoder.h"
+#include "MpiDebug.h"
+#include "StaticData.h"
+#include "Optimizer.h"
+#include "Selector.h"
+#include "TranslationDelta.h"
+#include "Utils.h"
+
+
+using namespace std;
+using namespace Josiah;
+using namespace Moses;
+using boost::lexical_cast;
+using boost::bad_lexical_cast;
+using boost::split;
+using boost::is_any_of;
+namespace po = boost::program_options;
+
+/**
+ * Main for Josiah - the Gibbs sampler for moses.
+ **/
+int main(int argc, char** argv) {
+  int rank = 0, size = 1;
+#ifdef MPI_ENABLED
+  MPI_Init(&argc,&argv);
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&size);
+  cerr << "MPI rank: " << rank << endl; 
+  cerr << "MPI size: " << size << endl;
+#endif
+  GibbsTimer timer;
+  size_t iterations;
+  unsigned int topn;
+  int debug;
+  int mpidebug;
+  string mpidebugfile;
+  string feature_file;
+  int burning_its;
+  int mbr_size;
+  string inputfile;
+  string outputfile;
+  string mosesini;
+  bool decode;
+  bool translate;
+  bool translation_distro;
+  bool derivation_distro;
+  bool help;
+  bool expected_cbleu;
+  unsigned training_batch_size;
+  bool mbr_decoding;
+  bool do_timing;
+  int max_training_iterations;
+  int num_samples;
+  uint32_t seed;
+  int lineno;
+  bool randomize;
+  FValue scalefactor;
+  FValue eta;
+  FValue mu;
+  string weightfile;
+  vector<string> ref_files;
+  int periodic_decode;
+  bool collect_dbyt;
+  bool output_max_change;
+  bool anneal;
+  unsigned int reheatings;
+  float max_temp;
+  float prior_variance;
+  float prior_mean;
+  string prev_gradient_file;
+  bool expected_cbleu_da;
+  float start_temp_expda;
+  float stop_temp_expda;  
+  float floor_temp_expda;  
+  float anneal_ratio_da;
+  float gamma;
+  bool use_metanormalized_egd;
+  int optimizerFreq; 
+  float brev_penalty_scaling_factor;
+  bool hack_bp_denum;
+  int weight_dump_freq;
+  string weight_dump_stem;
+  int init_iteration_number;
+  bool greedy, fixedTemp;
+  float fixed_temperature;
+  vector<string> ngramorders;
+  size_t lag;
+  float flip_prob, merge_split_prob, retrans_prob;
+  float log_base_factor;
+  po::options_description desc("Allowed options");
+  desc.add_options()
+  ("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
+  ("config,f",po::value<string>(&mosesini),"Moses ini file")
+  ("verbosity,v", po::value<int>(&debug)->default_value(0), "Verbosity level")
+  ("mpi-debug", po::value<int>(&MpiDebug::verbosity)->default_value(0), "Verbosity level for debugging messages used in mpi.")
+  ("mpi-debug-file", po::value<string>(&mpidebugfile), "Debug file stem for use by mpi processes")
+  ("random-seed,e", po::value<uint32_t>(&seed), "Random seed")
+  ("timing,m", po::value(&do_timing)->zero_tokens()->default_value(false), "Display timing information.")
+  ("max-samples", po::value<size_t>(&iterations)->default_value(5), 
+   "How many samples to gather initially (before resampling step)")
+  ("samples,s", po::value<int>(&num_samples)->default_value(5), "Number of samples used for training")
+  ("burn-in,b", po::value<int>(&burning_its)->default_value(1), "Duration (in sampling iterations) of burn-in period")
+  ("scale-factor,c", po::value<float>(&scalefactor)->default_value(1.0), "Scale factor for model weights.")
+  ("input-file,i",po::value<string>(&inputfile),"Input file containing tokenised source")
+  ("output-file-prefix,o",po::value<string>(&outputfile),"Output file prefix for translations, MBR output, etc")
+  ("nbest-drv,n",po::value<unsigned int>(&topn)->default_value(0),"Write the top n derivations to stdout")
+	("weights,w",po::value<string>(&weightfile),"Weight file")
+  ("decode-derivation,d",po::value( &decode)->zero_tokens()->default_value(false),"Write the most likely derivation to stdout")
+  ("decode-translation,t",po::value(&translate)->zero_tokens()->default_value(false),"Write the most likely translation to stdout")
+  ("distro-derivation", po::value(&derivation_distro)->zero_tokens()->default_value(false), "Print derivation probability distribution")
+  ("distro-translation", po::value(&translation_distro)->zero_tokens()->default_value(false), "Print translation probability distribution")
+  ("periodic-derivation,p",po::value(&periodic_decode)->default_value(0), "Periodically write the max derivation to stderr")
+  ("max-change", po::value(&output_max_change)->zero_tokens()->default_value(false), "Whenever the max deriv or max trans changes, write it to stderr")
+  ("collect-dbyt",po::value(&collect_dbyt)->zero_tokens()->default_value(false), "Collect derivations per translation")
+  ("line-number,L", po::value(&lineno)->default_value(0), "Starting reference/line number")
+  ("randomize-batches,R", po::value(&randomize)->zero_tokens()->default_value(false), "Randomize training batches")
+  ("gaussian-prior-variance", po::value<float>(&prior_variance)->default_value(0.0f), "Gaussian prior variance (0 for no prior)")
+  ("gaussian-prior-mean,P", po::value<float>(&prior_mean), "Gaussian prior mean")
+  ("expected-bleu-training,T", po::value(&expected_cbleu)->zero_tokens()->default_value(false), "Train to maximize expected corpus BLEU")
+  ("max-training-iterations,M", po::value(&max_training_iterations)->default_value(30), "Maximum training iterations")
+  ("training-batch-size,S", po::value(&training_batch_size)->default_value(0), "Batch size to use during xpected bleu training, 0 = full corpus")
+	("reheatings", po::value<unsigned int>(&reheatings)->default_value(1), "Number of times to reheat the sampler")
+	("anneal,a", po::value(&anneal)->default_value(false)->zero_tokens(), "Use annealing during the burn in period")
+	("max-temp", po::value<float>(&max_temp)->default_value(4.0), "Annealing maximum temperature")
+      ("eta", po::value<FValue>(&eta)->default_value(0.0), "Default learning rate for SGD/EGD")
+  ("prev-gradient", po::value<string>(&prev_gradient_file), "File containing previous gradient for restarting SGD/EGD")
+  ("mu", po::value<FValue>(&mu)->default_value(1.0f), "Metalearning rate for EGD")
+  ("gamma", po::value<FValue>(&gamma)->default_value(0.9f), "Smoothing parameter for Metanormalized EGD ")
+  ("ref,r", po::value<vector<string> >(&ref_files), "Reference translation files for training")
+      ("extra-feature-config,X", po::value<string>(&feature_file), "Configuration file for extra (non-Moses) features")
+  ("use-metanormalized-egd,N", po::value(&use_metanormalized_egd)->zero_tokens()->default_value(false), "Use metanormalized EGD")
+  ("expected-bleu-deterministic-annealing-training,D", po::value(&expected_cbleu_da)->zero_tokens()->default_value(false), "Train to maximize expected corpus BLEU using deterministic annealing")   
+  ("optimizer-freq", po::value<int>(&optimizerFreq)->default_value(1),"Number of optimization to perform at given temperature")
+  ("initial-det-anneal-temp", po::value<float>(&start_temp_expda)->default_value(1000.0f), "Initial deterministic annealing entropy temperature")
+  ("final-det-anneal-temp", po::value<float>(&stop_temp_expda)->default_value(0.001f), "Final deterministic annealing entropy temperature")
+  ("floor-temp", po::value<float>(&floor_temp_expda)->default_value(0.0f), "Floor temperature for det annealing")
+  ("det-annealing-ratio,A", po::value<float>(&anneal_ratio_da)->default_value(0.5f), "Deterministc annealing ratio")
+  ("hack-bp-denum,H", po::value(&hack_bp_denum)->default_value(false), "Use a predefined scalar as denum in BP computation")
+  ("bp-scale,B", po::value<float>(&brev_penalty_scaling_factor)->default_value(1.0f), "Scaling factor for sent level brevity penalty for BLEU - default is 1.0")
+  ("weight-dump-freq", po::value<int>(&weight_dump_freq)->default_value(0), "Frequency to dump weight files during training")
+  ("weight-dump-stem", po::value<string>(&weight_dump_stem)->default_value("weights"), "Stem of filename to use for dumping weights")
+      ("init-iteration-number", po::value<int>(&init_iteration_number)->default_value(0), "First training iteration will be one after this (useful for restarting)")
+  ("greedy", po::value(&greedy)->zero_tokens()->default_value(false), "Greedy sample acceptor")
+  ("fixed-temp-accept", po::value(&fixedTemp)->zero_tokens()->default_value(false), "Fixed temperature sample acceptor")
+  ("fixed-temperature", po::value<float>(&fixed_temperature)->default_value(1.0f), "Temperature for fixed temp sample acceptor")
+  ("lag", po::value<size_t>(&lag)->default_value(10), "Lag between collecting samples")
+  ("flip-prob", po::value<float>(&flip_prob)->default_value(0.6f), "Probability of applying flip operator during random scan")
+  ("merge-split-prob", po::value<float>(&merge_split_prob)->default_value(0.2f), "Probability of applying merge-split operator during random scan")
+  ("retrans-prob", po::value<float>(&retrans_prob)->default_value(0.2f), "Probability of applying retrans operator during random scan")
+    ("log-base-factor", po::value<float>(&log_base_factor)->default_value(1.0f), "Scaling factor for log probabilities in translation and language models");
+  
+  
+  po::options_description cmdline_options;
+  cmdline_options.add(desc);
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc,argv).
+            options(cmdline_options).run(), vm);
+  po::notify(vm);
+  
+  
+  
+  if (help) {
+    std::cout << "Usage: " + string(argv[0]) +  " -f mosesini-file [options]" << std::endl;
+    std::cout << desc << std::endl;
+    return 0;
+  }
+  
+  if (weightfile.empty()) {
+    std::cerr << "Setting all feature weights to zero" << std::endl;
+    WeightManager::init();
+  } else {
+    std::cerr << "Loading feature weights from " << weightfile <<  std::endl;
+    WeightManager::init(weightfile);
+  }
+  
+  if (expected_cbleu && expected_cbleu_da) {
+    std::cerr << "Incorrect usage: Cannot do both expected bleu training and expected bleu deterministic annealing training" << std::endl;
+    return 0;
+  }
+  
+  float opProb = flip_prob + merge_split_prob + retrans_prob;
+  if (fabs(1.0 - opProb) > 0.00001) {
+    std::cerr << "Incorrect usage: specified operator probs should sum up to 1" << std::endl;
+    return 0;  
+  }
+  
+  
+  if (translation_distro) translate = true;
+  if (derivation_distro) decode = true;
+  
+  
+  if (mosesini.empty()) {
+    cerr << "Error: No moses ini file specified" << endl;
+    return 1;
+  }
+  
+  
+  
+  if (mpidebugfile.length()) {
+    MpiDebug::init(mpidebugfile,rank);
+  }
+  cerr << "optimizer freq " << optimizerFreq << endl;
+  assert(optimizerFreq != 0);
+  
+  if (do_timing) {
+    timer.on();
+  }
+
+  
+  if (log_base_factor != 1.0) {
+      cerr << "Setting log base factor to " << log_base_factor << endl;
+      SetLogBaseFactor(log_base_factor);
+  }
+ 
+ 
+  //set up moses
+  initMoses(mosesini,debug);
+  auto_ptr<Decoder> decoder(new RandomDecoder());
+  
+  feature_vector extra_features; 
+  configure_features_from_file(feature_file, extra_features);
+  std::cerr << "Using " << extra_features.size() << " features" << std::endl;
+  
+   
+  //scale model weights
+  vector<float> weights = StaticData::Instance().GetAllWeights();
+  transform(weights.begin(),weights.end(),weights.begin(),bind2nd(multiplies<float>(),scalefactor));
+  const_cast<StaticData&>(StaticData::Instance()).SetAllWeights(weights);
+  VERBOSE(1,"Scaled weights by factor of " << scalefactor << endl);
+  
+  
+  
+  if (vm.count("random-seed")) {
+    RandomNumberGenerator::instance().setSeed(seed + rank);
+  }      
+  
+  GainFunctionVector g;
+  if (ref_files.size() > 0) LoadReferences(ref_files, inputfile, &g, brev_penalty_scaling_factor, hack_bp_denum);
+  
+  ostream* out = &cout;
+  if (!outputfile.empty()) {
+    ostringstream os;
+    os << setfill('0');
+    os << outputfile << '.' << setw(3) << rank << "_of_" << size;
+    VERBOSE(1, "Writing output to: " << os.str() << endl);
+    out = new ofstream(os.str().c_str());
+  }
+  auto_ptr<istream> in;
+  auto_ptr<InputSource> input;
+  
+  auto_ptr<Optimizer> optimizer;
+  
+  FVector etaVector(eta);
+  
+  
+  
+  FVector prev_gradient;
+  if (!prev_gradient_file.empty()) {
+    prev_gradient.load(prev_gradient_file);
+  }
+  
+  if (use_metanormalized_egd) {
+    optimizer.reset(new MetaNormalizedExponentiatedGradientDescent(
+                                                                   etaVector,
+                                                                   mu,
+                                                                   0.1f,   // minimal step scaling factor
+                                                                   gamma,                                       
+                                                                   max_training_iterations,
+                                                                   prev_gradient));
+  } else {
+    optimizer.reset(new ExponentiatedGradientDescent(
+                                                     etaVector,
+                                                     mu,
+                                                     0.1f,   // minimal step scaling factor
+                                                     max_training_iterations,
+                                                     prev_gradient));
+  }
+  if (optimizer.get()) {
+      optimizer->SetIteration(init_iteration_number);
+  }
+  if (prior_variance != 0.0f) {
+    assert(prior_variance > 0);
+    std::cerr << "Using Gaussian prior: \\sigma^2=" << prior_variance <<  " \\mu=" << prior_mean << endl;
+    optimizer->SetUseGaussianPrior(prior_mean, prior_variance);
+  }
+  ExpectedBleuTrainer* trainer = NULL;
+  vector<string> input_lines;
+  ifstream infiles(inputfile.c_str());
+  assert (infiles);
+  while(infiles) {
+    string line;
+    getline(infiles, line);
+    if (line.empty() && infiles.eof()) break;
+    assert(!line.empty());
+    input_lines.push_back(line);
+  }
+  VERBOSE(1, "Loaded " << input_lines.size() << " lines in training mode" << endl);
+  if (!training_batch_size || training_batch_size > input_lines.size())
+    training_batch_size = input_lines.size();
+  VERBOSE(1, "Batch size: " << training_batch_size << endl);
+  trainer = new ExpectedBleuTrainer(rank, size, training_batch_size, &input_lines, seed, randomize, optimizer.get(), weight_dump_freq, weight_dump_stem);
+
+  input.reset(trainer);
+  
+  auto_ptr<SamplingSelector> selector(new SamplingSelector());
+  auto_ptr<AnnealingSchedule> annealingSchedule;
+  if (anneal) {
+    annealingSchedule.reset(new LinearAnnealingSchedule(burning_its, max_temp));
+    selector->SetAnnealingSchedule(annealingSchedule.get()); 
+  }
+  
+  auto_ptr<AnnealingSchedule> detAnnealingSchedule;
+  if (expected_cbleu_da) {
+    detAnnealingSchedule.reset(new ExponentialAnnealingSchedule(start_temp_expda, stop_temp_expda, floor_temp_expda, anneal_ratio_da));
+  }
+  
+  
+  auto_ptr<CorpusSamplerCollector> elCollector;
+  Sampler sampler;
+  //configure the sampler
+  sampler.SetSelector(selector.get());
+  VERBOSE(2,"Reheatings: " << reheatings << endl);
+  sampler.SetReheatings(reheatings);
+  sampler.SetLag(lag); //thinning factor for sample collection
+  MergeSplitOperator mso(merge_split_prob);
+  FlipOperator fo(flip_prob);
+  TranslationSwapOperator tso(retrans_prob);
+    
+  
+   
+  sampler.AddOperator(&mso);
+  sampler.AddOperator(&tso);
+  sampler.AddOperator(&fo);
+  
+  //Acceptor
+  if (greedy || fixed_temperature == 0) {
+    assert(!"greedy not supported");
+  }
+  else if (fixedTemp){
+    assert(!"fixed temp not supported");
+  }
+  
+  
+  sampler.SetIterations(iterations);
+  sampler.SetBurnIn(burning_its);
+  
+  if (expected_cbleu) {
+    elCollector.reset(new CorpusSamplerCollector(num_samples, sampler));
+    sampler.AddCollector(elCollector.get());
+  }
+  else if (expected_cbleu_da) {
+    elCollector.reset(new CorpusSamplerAnnealedCollector(num_samples, sampler));
+    sampler.AddCollector(elCollector.get());
+  }
+  
+  timer.check("Processing input file");
+  int sentCtr = 0;
+  while (input->HasMore()) {
+    string line;
+    input->GetSentence(&line, &lineno);
+    if (line.empty()) {
+      if (!input->HasMore()) continue;
+      assert(!"I don't like empty lines");
+    }
+    
+    elCollector->addGainFunction(&(g[lineno]));
+    //Set the annealing temperature
+    if (expected_cbleu_da) {
+      int it = optimizer->GetIteration() / optimizerFreq  ;
+      float temp = detAnnealingSchedule->GetTemperatureAtTime(it);
+      
+      CorpusSamplerAnnealedCollector* annealedELCollector = static_cast<CorpusSamplerAnnealedCollector*>(elCollector.get());
+      annealedELCollector->SetTemperature(temp);
+      cerr << "Annealing temperature " << annealedELCollector->GetTemperature() << endl;
+      
+      
+      
+      
+    }
+    
+    Hypothesis* hypothesis;
+    TranslationOptionCollection* toc;
+    
+    timer.check("Running decoder");
+    
+    
+    std::vector<Word> source;
+    decoder->decode(line,hypothesis,toc,source);
+    timer.check("Running sampler");
+    
+    
+    sampler.Run(hypothesis,toc,source,extra_features);  
+    
+    
+    timer.check("Outputting results");
+    
+    //Now resample
+    elCollector->resample(sentCtr);
+    
+    //cerr << "curr " << trainer->GetCurr() << ", end  " << trainer->GetCurrEnd() << endl; 
+    if (trainer && trainer->GetCurr() == trainer->GetCurrEnd()) {//Now need to aggregate the feature vectors and bleu stats
+#ifdef MPI_ENABLED  
+      elCollector->AggregateSamples(rank);    
+#endif      
+      FVector gradient;
+      float exp_trans_len = 0;
+      float unreg_exp_gain = 0;
+      float exp_gain = 0;
+
+#ifdef MPI_ENABLED  
+      if (rank == 0) {
+        exp_gain = elCollector->UpdateGradient(&gradient, &exp_trans_len, &unreg_exp_gain);  
+      }
+#else
+      exp_gain = elCollector->UpdateGradient(&gradient, &exp_trans_len, &unreg_exp_gain);
+#endif
+      if (trainer)
+        
+        trainer->IncorporateCorpusGradient(
+                                           exp_trans_len,
+                                           elCollector->getReferenceLength(),
+                                           exp_gain,
+                                           unreg_exp_gain,
+                                           gradient,
+                                           decoder.get());
+      elCollector->reset();
+    }
+
+    ++lineno;
+    ++sentCtr;
+  }
+      
+#ifdef MPI_ENABLED
+  MPI_Finalize();
+#endif
+  (*out) << flush;
+  if (!outputfile.empty())
+    delete out;
+  return 0;
+}
--- a/josiah/Decoder.cpp
+++ b/josiah/Decoder.cpp
@ -0,0 +1,191 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+
+#include "Decoder.h"
+#include "DummyScoreProducers.h"
+#include "Manager.h"
+#include "PhraseFeature.h"
+#include "TranslationSystem.h"
+#include "TrellisPathCollection.h"
+#include "TrellisPath.h"
+
+using namespace std;
+using namespace Moses;
+
+
+namespace Josiah {
+
+  /**
+    * Allocates a char* and copies string into it.
+  **/
+  static char* strToChar(const string& s) {
+    char* c = new char[s.size()+1];
+    strcpy(c,s.c_str());
+    return c;
+  }
+    
+  
+  void initMoses(const string& inifile, int debuglevel, const  vector<string>& extraArgs) {
+    static int BASE_ARGC = 6;
+    Parameter* params = new Parameter();
+    char ** mosesargv = new char*[BASE_ARGC + extraArgs.size()];
+    mosesargv[0] = strToChar("-f");
+    mosesargv[1] = strToChar(inifile);
+    mosesargv[2] = strToChar("-max-trans-opt-per-coverage");
+    mosesargv[3] = strToChar("0");
+    mosesargv[4] = strToChar("-v");
+    stringstream dbgin;
+    dbgin << debuglevel;
+    mosesargv[5] = strToChar(dbgin.str());
+    for (size_t i = 0; i < extraArgs.size(); ++i) {
+      mosesargv[BASE_ARGC+i] = strToChar(extraArgs[i]);
+    }
+    
+    params->LoadParam(BASE_ARGC + extraArgs.size(),mosesargv);
+    StaticData::LoadDataStatic(params);
+    for (int i = 0; i < BASE_ARGC; ++i) {
+      delete[] mosesargv[i];
+    }
+    delete[] mosesargv;
+  }
+
+  void setMosesWeights(const FVector& currentWeights) {
+    PhraseFeature::updateWeights(currentWeights);
+    StaticData& staticData =
+      const_cast<StaticData&>(StaticData::Instance());
+    TranslationSystem& system = 
+      const_cast<TranslationSystem&>(staticData.GetTranslationSystem(
+        TranslationSystem::DEFAULT));
+
+    ScoreComponentCollection  mosesWeights = staticData.GetAllWeights();
+    for (LMList::const_iterator i = system.GetLanguageModels().begin();
+     i !=  system.GetLanguageModels().end(); ++i) {
+      LanguageModel* lm = const_cast<LanguageModel*>(*i);
+      float lmWeight = currentWeights[lm->GetScoreProducerDescription()];
+      //lm->SetWeight(lmWeight);
+      mosesWeights.Assign(lm,lmWeight);
+    }
+    const ScoreProducer* wp  = system.GetWordPenaltyProducer();
+    const string wpName = wp->GetScoreProducerDescription();
+    //staticData.SetWeightWordPenalty(currentWeights[wpName]);
+    mosesWeights.Assign(wp,currentWeights[wpName]);
+
+    const ScoreProducer* dp = system.GetDistortionProducer();
+    string distName = dp->GetScoreProducerDescription();
+    //staticData.SetWeightDistortion(currentWeights[distName]);
+    mosesWeights.Assign(dp, currentWeights[distName]);
+
+    staticData.SetAllWeights(mosesWeights);
+  }
+
+  struct TOptCompare {
+    bool operator()(const TranslationOption* lhs, const TranslationOption* rhs) {
+      return lhs->GetFutureScore() > rhs->GetFutureScore();
+    }
+  };
+
+  static const TargetPhrase& emptyTarget() {
+    static TargetPhrase* tp = new TargetPhrase(Input);
+    return *tp;
+  }
+
+  //Ensures that cleanup is not run the first time around
+  bool TranslationHypothesis::m_cleanup = false;
+
+  TranslationHypothesis::TranslationHypothesis(const string& source) 
+    {
+
+    const StaticData &staticData = StaticData::Instance();
+    const TranslationSystem& system = 
+      staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+
+    //clean up previous sentence
+    if (m_cleanup) {
+      system.CleanUpAfterSentenceProcessing();
+    } else {
+      m_cleanup = true;
+    }
+    
+    //the sentence
+    Sentence sentence(Input);
+    stringstream in(source + "\n");
+    const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
+    sentence.Read(in,inputFactorOrder);
+    for (size_t i=0; i<sentence.GetSize(); ++i){ m_words.push_back(sentence.GetWord(i)); } 
+
+    //translation options
+    m_manager.reset(new Manager(sentence, Normal, &system));
+    m_manager->ResetSentenceStats(sentence);
+    system.InitializeBeforeSentenceProcessing(sentence);
+    m_toc.reset(sentence.CreateTranslationOptionCollection(&system));
+    //const vector <DecodeGraph*>
+    //      &decodeStepVL = staticData.GetDecodeGraphs();
+    m_toc->CreateTranslationOptions();
+
+    //sort the options
+    size_t maxPhraseSize = staticData.GetMaxPhraseLength();
+    for (size_t start = 0; start < m_words.size(); ++start) {
+      for (size_t end = start; end < start + maxPhraseSize && end < m_words.size(); ++end) {
+        TranslationOptionList& options = 
+          m_toc->GetTranslationOptionList(start,end);
+        sort(options.begin(), options.end(), TOptCompare());
+        /*
+        while (options.size() > ttableLimit) {
+          size_t pos = options.size() - 1;
+          delete options.Get(pos);
+          options.Remove(pos); 
+        }*/
+      }
+    }
+
+    //hypothesis
+    m_hypothesis.reset(Hypothesis::Create(*m_manager,sentence, emptyTarget()));
+    for (size_t i = 0; i < m_words.size(); ++i) {
+      m_allHypos.push_back(m_hypothesis);
+      WordsRange segment(i,i);
+      const TranslationOptionList& options = 
+        m_toc->GetTranslationOptionList(segment);
+      
+      
+      /*
+      cerr << "Options for " << *(options.Get(0)->GetSourcePhrase()) << endl;
+      for (size_t j = 0; j < options.size(); ++j) {
+        cerr << *(options.Get(j)) << endl;
+      }*/
+      assert(options.size());
+      m_hypothesis.reset(
+        Hypothesis::Create(*m_hypothesis, *(options.Get(0)), NULL)); 
+    }
+}
+
+  TranslationOptionCollection* TranslationHypothesis::getToc() const {
+    return m_toc.get();
+  }
+
+  Hypothesis* TranslationHypothesis::getHypothesis() const {
+    return m_hypothesis.get();
+  }
+
+  const vector<Word>& TranslationHypothesis::getWords() const {
+    return m_words;
+  }
+  
+}
--- a/josiah/Decoder.h
+++ b/josiah/Decoder.h
@ -0,0 +1,81 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#pragma once
+
+#include <iostream>
+#include <cstring>
+#include <sstream>
+
+#include <boost/shared_ptr.hpp>
+
+#include "FeatureVector.h"
+#include "Hypothesis.h"
+#include "Parameter.h"
+#include "Sentence.h"
+#include "SearchNormal.h"
+#include "StaticData.h"
+#include "TrellisPathList.h"
+#include "TranslationOptionCollectionText.h"
+
+//
+// Wrapper functions and objects for the decoder.
+//
+
+namespace Josiah {
+  
+typedef std::vector<const Moses::Factor*> Translation;
+typedef boost::shared_ptr<Moses::Hypothesis> HypothesisHandle;
+typedef boost::shared_ptr<Moses::Manager> ManagerHandle;
+typedef boost::shared_ptr<Moses::TranslationOptionCollection> TOCHandle;
+typedef std::vector<HypothesisHandle> HypothesisVector; 
+
+/**
+ * Initialise moses (including StaticData) using the given ini file and
+ *  debuglevel, passing through any * other command line arguments. 
+ **/
+void initMoses(const std::string& inifile, int debuglevel,  const std::vector<std::string>& = std::vector<std::string>());
+
+
+
+/** Update all the core moses weights */
+void setMosesWeights(const Moses::FVector& weights);
+
+/**
+  * Generates random translation hypotheses.
+  **/
+class TranslationHypothesis {
+  public:
+    TranslationHypothesis(const std::string& source);
+
+    Moses::TranslationOptionCollection* getToc() const;
+    Moses::Hypothesis* getHypothesis() const;
+    //source sentence
+    const std::vector<Moses::Word>& getWords() const;
+
+  private:
+    static bool m_cleanup;
+    HypothesisHandle m_hypothesis;
+    TOCHandle m_toc;
+    HypothesisVector m_allHypos;
+    ManagerHandle m_manager;
+    std::vector<Moses::Word> m_words;
+};
+
+} //namespace
+
--- a/josiah/Dependency.cpp
+++ b/josiah/Dependency.cpp
@ -0,0 +1,441 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Dependency.h"
+
+#include "Derivation.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+static void addChildren(vector<set<size_t> >& tree, size_t parent, set<size_t>& children) {
+  for (set<size_t>::const_iterator i = tree[parent].begin(); i!= tree[parent].end(); ++i) {
+    children.insert(*i);
+    addChildren(tree,*i,children);
+  }
+}
+  
+DependencyTree::DependencyTree(const vector<Word>& words, FactorType parentFactor) {
+  vector<set<size_t> > tree(words.size()); // map parents to their immediate children
+  int root = -1;
+  for (size_t child = 0; child < words.size(); ++child) {
+    int parent = atoi(words[child][parentFactor]->GetString().c_str());
+    if (parent < 0) {
+      root = child;
+    } else {
+      tree[(size_t)parent].insert(child);
+    }
+    m_parents.push_back(parent);
+  }
+  m_spans.resize(words.size());
+  for (size_t i = 0; i < m_parents.size(); ++i) {
+    addChildren(tree,i,m_spans[i]);
+    m_spans[i].insert(i); // the head covers itself
+  }
+  
+}
+
+static string ToString(const DependencyTree& t)
+{
+  ostringstream os;
+  for (size_t i = 0; i < t.getLength(); ++i) {
+    os << i << "->" << t.getParent(i) << ", ";
+  }
+  return os.str();
+}
+
+ostream& operator<<(ostream& out, const DependencyTree& t)
+{
+  out << ToString(t);
+  return out;
+}
+
+/** Parent of this index, -1 if root*/
+int DependencyTree::getParent(size_t index) const {
+  return m_parents[index];
+}
+
+/** Does the parent word cover the child word? */
+bool DependencyTree::covers(size_t parent, size_t descendent) const {
+  return m_spans[parent].count(descendent);
+}
+
+float CherrySyntacticCohesionFeatureFunction::computeScore() {
+  float interruptionCount = 0.0;
+  Hypothesis *prev = const_cast<Hypothesis*>(const_cast<Sample&>(getSample()).GetTargetTail()->GetNextHypo()); //first hypo in tgt order
+  
+  for (Hypothesis* h =  const_cast<Hypothesis*>(prev->GetNextHypo()); h; h = const_cast<Hypothesis*>(h->GetNextHypo())) {
+    Context context = { &(prev->GetCurrSourceWordsRange()), &(h->GetCurrSourceWordsRange()), &(prev->GetCurrTargetWordsRange()), &(h->GetCurrTargetWordsRange())  };
+    interruptionCount += getInterruptions(prev->GetCurrSourceWordsRange(), &(h->GetTranslationOption()), h->GetCurrTargetWordsRange(), context);
+    prev = h;
+  }
+  VERBOSE(2,"In compute score, interr cnt = " << interruptionCount << endl);
+  return interruptionCount;
+}
+  
+/** Score due to  one segment */
+  
+//NB : Target Segment is the old one  
+float CherrySyntacticCohesionFeatureFunction::getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {
+  const Hypothesis* prevTgt = gap.leftHypo;
+  if (!prevTgt->GetPrevHypo()) { //dummy hyp at start of sent, no cohesion violation
+    return 0.0;
+  }
+  Context context = { &(prevTgt->GetCurrSourceWordsRange()), &(option->GetSourceWordsRange()), &(prevTgt->GetCurrTargetWordsRange()), &(gap.segment)  };
+  float interruptionCnt =  getInterruptions(prevTgt->GetCurrSourceWordsRange(), option, gap.segment, context);
+  VERBOSE(2, "In single upd, int cnt " << interruptionCnt << endl);
+  return interruptionCnt;
+}  
+
+/** Score due to flip */
+float CherrySyntacticCohesionFeatureFunction::getFlipUpdateScore(
+    const TranslationOption* leftTgtOption, const TranslationOption* rightTgtOption,
+    const TargetGap& leftGap, const TargetGap& rightGap) {
+  float interruptionCnt = 0.0;                                            
+    
+  //Let's sort out the order of the segments
+  WordsRange* leftTgtSegment = const_cast<WordsRange*> (&leftGap.segment);
+  WordsRange* rightTgtSegment = const_cast<WordsRange*> (&rightGap.segment);
+  
+  assert(*leftTgtSegment < *rightTgtSegment); //should already be in target order!
+  
+  const Hypothesis* leftTgtHypPred = leftGap.leftHypo;
+  const Hypothesis* rightTgtHypSucc = rightGap.rightHypo;
+  
+  Context context = { &(leftTgtOption->GetSourceWordsRange()), &(rightTgtOption->GetSourceWordsRange()), leftTgtSegment, rightTgtSegment };
+  
+  //Left tgt option and its predecessor
+  if (leftTgtHypPred && leftTgtHypPred->GetPrevHypo()) {
+    interruptionCnt +=  getInterruptions(leftTgtHypPred->GetCurrSourceWordsRange(), leftTgtOption, *leftTgtSegment, context);  
+  }
+    
+  //Right tgt option and its successor
+  if (rightTgtHypSucc) {
+    interruptionCnt +=  getInterruptions(rightTgtOption->GetSourceWordsRange()  ,&(rightTgtHypSucc->GetTranslationOption()), rightTgtHypSucc->GetCurrTargetWordsRange(), context);    
+  }
+    
+  //Are the options contiguous on the target side?
+  bool contiguous = (leftTgtSegment->GetEndPos() + 1 == rightTgtSegment->GetStartPos()) ;
+    
+  if (contiguous) {
+    interruptionCnt +=  getInterruptions(leftTgtOption->GetSourceWordsRange(), rightTgtOption, *rightTgtSegment, context);  
+  }
+  else {
+    //Left tgt option and its successor
+    const Hypothesis* leftTgtSuccessorHyp = leftGap.rightHypo;
+    if (leftTgtSuccessorHyp) {
+      interruptionCnt +=  getInterruptions(leftTgtOption->GetSourceWordsRange(), &(leftTgtSuccessorHyp->GetTranslationOption()), leftTgtSuccessorHyp->GetCurrTargetWordsRange(), context );  
+    }
+    
+    //Right tgt option and its predecessor
+    const Hypothesis* rightTgtPredecessorHyp = rightGap.leftHypo;
+    if (rightTgtPredecessorHyp) {
+      interruptionCnt +=  getInterruptions(rightTgtPredecessorHyp->GetCurrSourceWordsRange(), rightTgtOption, *rightTgtSegment, context);
+    }
+    
+    //Everything in between
+    if (leftTgtSuccessorHyp != rightTgtPredecessorHyp) {
+      TranslationOption *prevOption =  const_cast<TranslationOption*>(&(leftTgtSuccessorHyp->GetTranslationOption()));
+      for (Hypothesis *hyp = const_cast<Hypothesis*>(leftTgtSuccessorHyp->GetNextHypo()); ; hyp = const_cast<Hypothesis*>(hyp->GetNextHypo())) {
+        if (hyp) {
+          interruptionCnt +=  getInterruptions(prevOption->GetSourceWordsRange(), &(hyp->GetTranslationOption()), hyp->GetCurrTargetWordsRange(), context);  
+          prevOption = const_cast<TranslationOption*>(&(hyp->GetTranslationOption()));
+        }
+        if (hyp == rightGap.leftHypo) {
+          break;
+        }
+      }  
+    }
+        
+  }
+  VERBOSE (2, "In flip, interr cnt = " << interruptionCnt << endl);  
+  return interruptionCnt; 
+}
+  
+/** Score due to two segments **/
+float CherrySyntacticCohesionFeatureFunction::getContiguousPairedUpdateScore(
+    const TranslationOption* leftTgtOption,const TranslationOption* rightTgtOption, 
+    const TargetGap& gap) {
+  
+  float interruptionCnt = 0.0;
+  
+  const Hypothesis*  leftTgtHypPred = gap.leftHypo;
+  const Hypothesis*  rightTgtHypSucc = gap.rightHypo;
+  
+  Context context = { &(leftTgtOption->GetSourceWordsRange()), &(rightTgtOption->GetSourceWordsRange()), &(gap.segment), &(gap.segment)  };
+  //Left tgt option and its predecessor
+  if (gap.segment.GetStartPos() > 0) {
+    if (leftTgtHypPred) {
+      interruptionCnt +=  getInterruptions(leftTgtHypPred->GetCurrSourceWordsRange(), leftTgtOption, gap.segment, context);  
+    }  
+  } 
+  
+  //Right tgt option and its successor
+  if (rightTgtHypSucc) {
+    interruptionCnt +=  getInterruptions(rightTgtOption->GetSourceWordsRange()  ,&(rightTgtHypSucc->GetTranslationOption()), rightTgtHypSucc->GetCurrTargetWordsRange(), context);    
+  }
+  
+  interruptionCnt +=  getInterruptions(leftTgtOption->GetSourceWordsRange(), rightTgtOption, gap.segment, context);  
+  
+  VERBOSE(2, "In paired update, interr cnt = " << interruptionCnt << endl); 
+  return interruptionCnt; 
+}
+
+float CherrySyntacticCohesionFeatureFunction::getDiscontiguousPairedUpdateScore(
+    const TranslationOption* leftTgtOption,const TranslationOption* rightTgtOption, 
+    const TargetGap& leftGap, const TargetGap& rightGap) {
+  float interruptionCnt = 0.0;
+  
+  WordsRange* leftTgtSegment = const_cast<WordsRange*> (&leftGap.segment);
+  WordsRange* rightTgtSegment = const_cast<WordsRange*> (&rightGap.segment);
+  assert(*leftTgtSegment < *rightTgtSegment); //should already be in target order!
+  
+  const Hypothesis*  leftTgtHypPred = leftGap.leftHypo;
+  const Hypothesis*  leftTgtSuccessorHyp = leftGap.rightHypo;
+  const Hypothesis*  rightTgtPredecessorHyp = rightGap.leftHypo;
+  const Hypothesis*  rightTgtHypSucc = rightGap.rightHypo;
+  
+  Context context = { &(leftTgtOption->GetSourceWordsRange()), &(rightTgtOption->GetSourceWordsRange()), leftTgtSegment, rightTgtSegment };
+  
+  //Left tgt option and its predecessor
+  if (leftTgtSegment->GetStartPos() > 0) {
+    if (leftTgtHypPred) {
+      interruptionCnt +=  getInterruptions(leftTgtHypPred->GetCurrSourceWordsRange(), leftTgtOption, *leftTgtSegment, context);  
+    }  
+  } 
+  
+  //Right tgt option and its successor
+  if (rightTgtHypSucc) {
+    interruptionCnt +=  getInterruptions(rightTgtOption->GetSourceWordsRange()  ,&(rightTgtHypSucc->GetTranslationOption()), rightTgtHypSucc->GetCurrTargetWordsRange(), context);    
+  }
+  
+  //Left tgt option and its successor
+  if (leftTgtSuccessorHyp) {
+    interruptionCnt +=  getInterruptions(leftTgtOption->GetSourceWordsRange(), &(leftTgtSuccessorHyp->GetTranslationOption()), leftTgtSuccessorHyp->GetCurrTargetWordsRange(), context);  
+  }
+    
+  //Right tgt option and its predecessor
+  if (rightTgtPredecessorHyp) {
+    interruptionCnt +=  getInterruptions(rightTgtPredecessorHyp->GetCurrSourceWordsRange(), rightTgtOption, *rightTgtSegment, context);
+  }
+  
+  //Everything in between
+  if (leftTgtSuccessorHyp != rightTgtPredecessorHyp) {
+    TranslationOption *prevOption =  const_cast<TranslationOption*>(&(leftTgtSuccessorHyp->GetTranslationOption()));
+    for (Hypothesis *hyp = const_cast<Hypothesis*>(leftTgtSuccessorHyp->GetNextHypo()); ; hyp = const_cast<Hypothesis*>(hyp->GetNextHypo())) {
+      if (hyp) {
+        interruptionCnt +=  getInterruptions(prevOption->GetSourceWordsRange(), &(hyp->GetTranslationOption()), hyp->GetCurrTargetWordsRange(), context);  
+        prevOption = const_cast<TranslationOption*>(&(hyp->GetTranslationOption()));
+      }
+      if (hyp == rightGap.leftHypo) {
+        break;
+      }
+    }  
+  }
+  
+  
+  VERBOSE (2,  "In paired update, interr cnt = " << interruptionCnt << endl); 
+  return interruptionCnt; 
+}
+
+
+
+/**Helper method */
+float CherrySyntacticCohesionFeatureFunction::getInterruptions(const WordsRange& prevSourceRange, const TranslationOption *option, const WordsRange& targetSegment, const Context& context) {
+  float interruptionCnt = 0.0;
+  size_t f_L =  prevSourceRange.GetStartPos(); 
+  size_t f_R =  prevSourceRange.GetEndPos();
+    
+  interruptionCnt = getInterruptionCount(option, targetSegment, f_L, context);
+  if (interruptionCnt == 0 && f_L != f_R)
+    interruptionCnt = getInterruptionCount(option, targetSegment, f_R, context);  
+    
+  return interruptionCnt;
+  
+}  
+  
+float CherrySyntacticCohesionFeatureFunction::getInterruptionCount(const TranslationOption *option, const WordsRange& targetSegment, size_t f, const Context& context) {
+  size_t r_prime = f;
+  size_t r = NOT_FOUND;
+    
+  while (notAllWordsCoveredByTree(option, r_prime)) {
+    r = r_prime;
+    r_prime = m_sourceTree->getParent(r_prime);
+  }
+    
+  if (r == NOT_FOUND)
+    return 0.0; 
+    
+  const set<size_t> & children = m_sourceTree->getChildren(r);
+  for (set<size_t>::const_iterator it = children.begin(); it != children.end(); ++it) {
+    size_t child = *it;
+    const WordsRange* otherSegment; 
+    if (context.leftSrcRange->covers(child)) {
+      otherSegment = context.leftTgtRange  ;
+    }
+    else if (context.rightSrcRange->covers(child)) {
+      otherSegment = context.rightTgtRange  ;
+  }
+    else {
+      Hypothesis* hyp = const_cast<Sample&>(getSample()).GetHypAtSourceIndex(child);
+      otherSegment = &(hyp->GetCurrTargetWordsRange());
+  }
+    
+    if (isInterrupting(*otherSegment, targetSegment)) {
+      return 1.0;
+    }
+  }
+  return 0.0;
+}  
+  
+bool CherrySyntacticCohesionFeatureFunction::isInterrupting(const WordsRange& otherSegment, const WordsRange& targetSegment) {
+  return otherSegment > targetSegment;    
+}  
+  
+bool CherrySyntacticCohesionFeatureFunction::notAllWordsCoveredByTree(const TranslationOption* option, size_t parent) {
+  for (size_t s = option->GetStartPos(); s <= option->GetEndPos(); ++s) {
+    if (!m_sourceTree->covers(parent, s))
+      return true;
+  }
+  return false;       
+}
+
+//new sample
+DependencyDistortionFeatureFunction::DependencyDistortionFeatureFunction(const Sample& sample, Moses::FactorType parentFactor) :
+    DependencyFeatureFunction(sample,"DependencyDistortion",parentFactor) {
+  size_t sourceSize = getSample().GetSourceSize();
+  size_matrix_t::extent_gen extents;
+  m_distances.resize(extents[sourceSize][sourceSize]);
+  
+  //Use Floyd-Warshall to compute all the distances
+  
+  //Initialise with  the (undirected) tree structure
+  for (size_t i = 0; i < sourceSize; ++i) {
+    size_t iparent = (size_t)m_sourceTree->getParent(i);
+    for (size_t j = 0; j < sourceSize; ++j) {
+      size_t jparent = (size_t)m_sourceTree->getParent(j);
+      if (i == j) {
+        m_distances[i][j] = 0;
+      } else if (iparent == j || jparent == i) {
+        m_distances[i][j] = 1;
+      } else {
+        m_distances[i][j] = sourceSize*2; //no path - infinity
+      }
+    }
+  } 
+  //run algorithm
+  for (size_t k = 0; k < sourceSize; ++k) {
+    for (size_t i = 0; i < sourceSize; ++i) {
+      for (size_t j = 0; j < sourceSize; ++j) {
+        m_distances[i][j] = min(m_distances[i][j], m_distances[i][k] + m_distances[k][j]);
+        
+      }
+    }
+  }
+  
+  /*for (size_t i = 0; i < sourceSize; ++i) {
+    for (size_t j = 0; j < sourceSize; ++j) {
+      cerr << "p[" << i << "][" << j << "] = " << m_distances[i][j] << " ";
+    }
+    cerr << endl;
+}*/
+  
+}
+
+
+size_t DependencyDistortionFeatureFunction::getDistortionDistance(const WordsRange& leftRange, const WordsRange& rightRange) {
+  size_t leftSourcePos = leftRange.GetEndPos();
+  size_t rightSourcePos = rightRange.GetStartPos();
+  return m_distances[leftSourcePos][rightSourcePos] - 1;
+}
+
+
+/** Compute full score of a sample from scratch **/
+float DependencyDistortionFeatureFunction::computeScore() {
+  //
+  // The score for each pair of adjacent target phrases is the tree distance of the corresponding edge source words
+  //
+  float score = 0;
+  const Hypothesis* currHypo = getSample().GetTargetTail();
+  while ((currHypo = (currHypo->GetNextHypo()))) {
+    const Hypothesis* nextHypo = currHypo->GetNextHypo();
+    if (nextHypo) {
+      score += getDistortionDistance(currHypo->GetCurrSourceWordsRange(),nextHypo->GetCurrSourceWordsRange());
+    }
+  }
+  return score;
+}
+
+/** Score due to  one segment */
+float DependencyDistortionFeatureFunction::getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {
+  float score = 0;
+  if (gap.leftHypo->GetPrevHypo()) {
+    score += getDistortionDistance(gap.leftHypo->GetCurrSourceWordsRange(), option->GetSourceWordsRange());
+  }
+  if (gap.rightHypo) {
+    score += getDistortionDistance(option->GetSourceWordsRange(), gap.rightHypo->GetCurrSourceWordsRange());
+  }
+  return score;
+}
+
+/** Score due to two segments **/
+float DependencyDistortionFeatureFunction::getContiguousPairedUpdateScore
+    (const TranslationOption* leftOption, const TranslationOption* rightOption,  const TargetGap& gap) {
+  float score = 0;
+  if (gap.leftHypo->GetPrevHypo()) {
+    score += getDistortionDistance(gap.leftHypo->GetCurrSourceWordsRange(), leftOption->GetSourceWordsRange());
+  }
+  score += getDistortionDistance(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange());
+  if (gap.rightHypo) {
+    score += getDistortionDistance(rightOption->GetSourceWordsRange(), gap.rightHypo->GetCurrSourceWordsRange());
+  }
+  return score;
+}
+
+
+float DependencyDistortionFeatureFunction::getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption, 
+    const TargetGap& leftGap, const TargetGap& rightGap) {
+      return getSingleUpdateScore(leftOption,leftGap) + getSingleUpdateScore(rightOption,rightGap);
+}
+  
+/** Score due to flip */
+float DependencyDistortionFeatureFunction::getFlipUpdateScore(
+    const TranslationOption* leftOption, const TranslationOption* rightOption, 
+    const TargetGap& leftGap, const TargetGap& rightGap) {
+  float score = 0;
+  if (leftGap.leftHypo->GetPrevHypo()) {
+    score += getDistortionDistance(leftGap.leftHypo->GetCurrSourceWordsRange(), leftOption->GetSourceWordsRange());
+  }
+  bool contiguous = (leftGap.segment.GetEndPos() + 1 == rightGap.segment.GetStartPos());
+  if (contiguous) {
+    score += getDistortionDistance(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange());
+  } else {
+    score += getDistortionDistance(leftOption->GetSourceWordsRange(),leftGap.rightHypo->GetCurrSourceWordsRange());
+    score += getDistortionDistance(rightGap.leftHypo->GetCurrSourceWordsRange(), rightOption->GetSourceWordsRange());
+  }
+  if (rightGap.rightHypo) {
+    score += getDistortionDistance(rightOption->GetSourceWordsRange(), rightGap.rightHypo->GetCurrSourceWordsRange());
+  }
+  
+  return score;
+}
+
+}
--- a/josiah/Dependency.h
+++ b/josiah/Dependency.h
@ -0,0 +1,177 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <climits>
+#include <set>
+#include <vector>
+
+#include <boost/multi_array.hpp>
+
+#include "Factor.h"
+
+#include "Gibbler.h"
+#include "FeatureFunction.h"
+
+namespace Josiah {
+
+class DependencyTree {
+  public:
+    DependencyTree(const std::vector<Word>& words, Moses::FactorType parentFactor);
+    /** Parent of this index, -1 if root*/
+    int getParent(size_t index) const;
+    /** Does the parent word cover the child word? */
+    bool covers(size_t parent, size_t child) const;
+    /** length of sentence */
+    size_t getLength() const {return m_parents.size();}
+    const std::set<size_t> & getChildren(size_t parent) const { return m_spans[parent];} 
+    
+  private:
+    std::vector<int> m_parents;
+    std::vector<std::set<size_t> > m_spans;
+};
+std::ostream& operator<<(std::ostream& out, const DependencyTree& t);
+
+class DependencyFeatureFunction: public SingleValuedFeatureFunction {
+  public:
+    
+  DependencyFeatureFunction(const Sample& sample, const std::string& name, Moses::FactorType parentFactor):
+      SingleValuedFeatureFunction(sample,name), m_parentFactor(parentFactor)
+ {
+    m_sourceTree.reset(new DependencyTree(sample.GetSourceWords(), m_parentFactor));
+      //cerr << "New Tree: " << *(m_sourceTree.get()) << endl;
+    for (size_t parent = 0; parent < m_sourceTree->getLength(); ++parent) {
+      for (size_t child = 0; child < m_sourceTree->getLength(); ++child) {
+          //cerr << "parent " << parent << " child " << child << " covers " << m_sourceTree->covers(parent,child) << endl;
+      }
+    }
+    updateTarget();
+  }
+  
+  protected:
+    std::auto_ptr<DependencyTree> m_sourceTree;
+    Moses::FactorType m_parentFactor; //which factor is the parent index?
+};
+
+/**
+  * Feature based on Colin Cherry's Soft Syntactic Constraint (ACL 2008).
+ **/
+class CherrySyntacticCohesionFeatureFunction : public DependencyFeatureFunction {
+  public:
+    CherrySyntacticCohesionFeatureFunction(const Sample& sample,Moses::FactorType parentFactor): 
+        DependencyFeatureFunction(sample,"Cherry",parentFactor) {}
+    
+    /** Compute full score of a sample from scratch **/
+    virtual float computeScore();
+    /** Score due to  one segment */
+    virtual float getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap);
+    /** Score due to two segments **/
+    virtual float getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption, 
+                                       const TargetGap& gap);
+    virtual float getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    /** Score due to flip */
+    virtual float getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption, 
+                                     const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    virtual ~CherrySyntacticCohesionFeatureFunction() {}
+  
+  private:
+    struct Context {
+      const WordsRange* leftSrcRange, *rightSrcRange;
+      const WordsRange* leftTgtRange, *rightTgtRange;
+    } ;
+  
+    float getInterruptions(const WordsRange& prevSourceRange, const TranslationOption *option, const WordsRange& targetSegment, const Context &);
+    float getInterruptionCount(const TranslationOption* option, const WordsRange& targetSegment, size_t f, const Context &);
+    bool  notAllWordsCoveredByTree(const TranslationOption* option, size_t parent);
+    bool  isInterrupting(const WordsRange& otherSegment, const WordsRange& targetSegment);
+    
+   
+};
+
+ 
+class CherrySyntacticCohesionFeature : public Feature {
+  public:
+    CherrySyntacticCohesionFeature(Moses::FactorType parentFactor) :
+      m_parentFactor(parentFactor) {}
+    
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const {
+      return FeatureFunctionHandle(new CherrySyntacticCohesionFeatureFunction(sample, m_parentFactor));
+    }
+      
+  private:
+    Moses::FactorType m_parentFactor;
+};
+ 
+
+
+/**
+  * Feature which measures distortion using distance in the dependency tree.
+ **/
+class DependencyDistortionFeatureFunction : public DependencyFeatureFunction {
+  public:
+  
+    DependencyDistortionFeatureFunction(const Sample& sample,Moses::FactorType parentFactor);
+  
+  /** Compute full score of a sample from scratch **/
+  virtual float computeScore();
+  /** Score due to  one segment */
+  virtual float getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap);
+  /** Score due to two segments **/
+  virtual float getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption, 
+                                               const TargetGap& gap);
+  virtual float getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption, 
+      const TargetGap& leftGap, const TargetGap& rightGap);
+    
+  /** Score due to flip */
+  virtual float getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption, 
+                                   const TargetGap& leftGap, const TargetGap& rightGap);
+  
+  virtual ~DependencyDistortionFeatureFunction() {}
+  
+  private:
+    //the distance in the dependency tree between any given pair of source words
+    typedef boost::multi_array<size_t, 2> size_matrix_t;
+    size_matrix_t m_distances;
+    
+    /** Compute dependency distortion between two target adjacent source-ranges */
+    size_t getDistortionDistance(const WordsRange& leftRange, const WordsRange& rightRange);
+
+};
+
+class DependencyDistortionFeature : public Feature {
+  public:
+    DependencyDistortionFeature(Moses::FactorType parentFactor) :
+      m_parentFactor(parentFactor) {}
+    
+    FeatureFunctionHandle getFunction(const Sample& sample) const {
+      return FeatureFunctionHandle(new DependencyDistortionFeatureFunction(sample, m_parentFactor));
+    }
+      
+      private:
+        Moses::FactorType m_parentFactor;
+};
+ 
+
+  
+
+}
--- a/josiah/Derivation.cpp
+++ b/josiah/Derivation.cpp
@ -0,0 +1,96 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Derivation.h"
+#include "Gibbler.h"
+#include "DummyScoreProducers.h"
+#include "WeightManager.h"
+
+using namespace std;
+using namespace Moses;
+
+
+
+namespace Josiah {
+
+  bool Derivation::PhraseAlignment::operator<(const PhraseAlignment& other) const {
+  if (_sourceSegment < other._sourceSegment) return true;
+  if (other._sourceSegment < _sourceSegment) return false;
+  return _target < other._target;
+  }
+
+  Derivation::Derivation(const Sample& sample) {
+    m_featureValues = sample.GetFeatureValues();
+    const Hypothesis* currHypo = sample.GetTargetTail();
+    while ((currHypo = (currHypo->GetNextHypo()))) {
+      TargetPhrase targetPhrase = currHypo->GetTargetPhrase();
+      m_alignments.push_back(
+        PhraseAlignment(currHypo->GetCurrSourceWordsRange(), Phrase(targetPhrase)));
+    }
+    
+    const FVector& weights = WeightManager::instance().get();
+    m_score = inner_product(m_featureValues, weights);
+  }
+  
+
+  //FIXME: This may not be the most efficient way of mapping derivations, but will do for now
+  bool Derivation::operator <(const Derivation & other) const {
+    bool result = m_alignments < other.m_alignments;
+    return result;
+  }
+  
+  void Derivation::getTargetFactors(std::vector<const Factor*>& sentence) const {
+    for (vector<PhraseAlignment>::const_iterator i = m_alignments.begin(); i != m_alignments.end(); ++i) {
+      const Phrase& targetPhrase = i->_target;
+      for (size_t j = 0; j < targetPhrase.GetSize(); ++j) {
+        sentence.push_back(targetPhrase.GetFactor(j,0));
+      }
+    }
+  }
+  
+  int Derivation::getTargetSentenceSize() const { //shortcut, extract tgt size from feature vector
+      std::vector<std::string> words;
+      getTargetSentence(words);
+      return words.size();
+  }
+
+  void Derivation::getTargetSentence(std::vector<std::string>& targetWords ) const {
+    for (vector<PhraseAlignment>::const_iterator i = m_alignments.begin(); i != m_alignments.end(); ++i) {
+      const Phrase& targetPhrase = i->_target;
+      for (size_t j = 0; j < targetPhrase.GetSize(); ++j) {
+        targetWords.push_back(targetPhrase.GetWord(j).GetFactor(0)->GetString());
+      }
+    }
+  }
+  
+  ostream& operator<<(ostream& out, const Derivation& d) {
+    out << "Target: << ";
+    for (size_t i = 0; i < d.m_alignments.size(); ++i) {
+      out << d.m_alignments[i]._target;
+      out << d.m_alignments[i]._sourceSegment << " ";
+    }
+    out << ">> Feature values: ";
+    out << d.m_featureValues;
+    out << " Score: ";
+    out << d.m_score;
+    return out;
+  }
+  
+}
+//namespace
--- a/josiah/Derivation.h
+++ b/josiah/Derivation.h
@ -0,0 +1,88 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "FeatureVector.h"
+#include "WordsRange.h"
+#include "Phrase.h"
+#include "Factor.h"
+
+using namespace Moses;
+
+namespace Josiah {
+
+class Sample;
+
+/**
+  * Represents a derivation, ie a way of getting from e to f.
+  **/
+  class Derivation {
+    public:
+      Derivation(const Sample& sample);
+      void getTargetSentence(std::vector<std::string>&) const;
+      int getTargetSentenceSize() const;
+      const FVector& getFeatureValues() const {return m_featureValues;}
+      float getScore() const {return m_score;}
+      void getTargetFactors(std::vector<const Factor*>& sentence) const;
+      bool operator<(const Derivation& other) const;
+      
+      struct PhraseAlignment {
+        //since these are stored in target order, no need to retain the source Segment
+        WordsRange _sourceSegment;
+        Phrase _target;
+        PhraseAlignment(const WordsRange& sourceSegment,const Phrase& target)
+          : _sourceSegment(sourceSegment),_target(target) {}
+        bool operator<(const PhraseAlignment& other) const;
+      };
+      
+      friend std::ostream& operator<<(std::ostream&, const Derivation&);
+      friend struct DerivationProbLessThan;
+      
+    private:
+      std::vector<PhraseAlignment> m_alignments; //in target order
+      FVector m_featureValues;
+      FValue m_score;
+      //std::vector<std::string> m_targetWords;
+  };
+  
+  struct DerivationLessThan {
+      bool operator()(const Derivation& d1, const Derivation& d2) {
+      return d1 < d2; 
+      }
+  };
+  
+  typedef std::pair<const Derivation*,float> DerivationProbability;
+  
+  
+  
+  std::ostream& operator<<(std::ostream&, const Derivation&);
+  
+  
+
+} //namespace
+
+
--- a/josiah/DiscriminativeLMFeature.cpp
+++ b/josiah/DiscriminativeLMFeature.cpp
@ -0,0 +1,173 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <fstream>
+
+#include "DiscriminativeLMFeature.h"
+
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  
+const string DiscriminativeLMBigramFeature::ROOTNAME = "dlmb";
+
+DiscriminativeLMBigramFeature::DiscriminativeLMBigramFeature
+  (FactorType factorId,const std::string& vocabFile) : m_factorId(factorId) {
+  if (!vocabFile.empty()) {
+    ifstream in(vocabFile.c_str());
+    assert(in);
+    string line;
+    while (getline(in,line)) {
+      m_vocab.insert(line);
+    }
+    m_vocab.insert(EOS_);
+    m_vocab.insert(BOS_);
+  }
+
+  //create BOS and EOS
+  FactorCollection& factorCollection = FactorCollection::Instance();
+  const Factor* bosFactor = factorCollection.AddFactor(Input,m_factorId,BOS_);
+  BOS.SetFactor(m_factorId,bosFactor);
+  const Factor* eosFactor = factorCollection.AddFactor(Input,m_factorId,EOS_);
+  EOS.SetFactor(m_factorId,eosFactor);
+}
+
+
+FeatureFunctionHandle DiscriminativeLMBigramFeature::getFunction(const Sample& sample) const {
+  return FeatureFunctionHandle(new DiscriminativeLMBigramFeatureFunction(sample,*this));
+}
+
+const Word& DiscriminativeLMBigramFeature::bos() const {
+  return BOS;
+}
+
+const Word& DiscriminativeLMBigramFeature::eos() const {
+  return EOS;
+}
+
+const std::set<std::string>& DiscriminativeLMBigramFeature::vocab() const {
+  return m_vocab;
+}
+
+Moses::FactorType DiscriminativeLMBigramFeature::factorId() const {
+  return m_factorId;
+}
+
+DiscriminativeLMBigramFeatureFunction::DiscriminativeLMBigramFeatureFunction
+  (const Sample& sample, const DiscriminativeLMBigramFeature& parent):
+    FeatureFunction(sample), m_parent(parent)
+{}
+
+void DiscriminativeLMBigramFeatureFunction::updateTarget() {
+  m_targetWords = getSample().GetTargetWords();
+}
+
+void DiscriminativeLMBigramFeatureFunction::scoreBigram(const Word& word1, const Word& word2, FVector& scores) {
+    const string& text1 = word1[m_parent.factorId()]->GetString();
+    if (!m_parent.vocab().empty() && m_parent.vocab().find(text1) == m_parent.vocab().end()) {
+      return;
+    }
+    const string& text2 = word2[m_parent.factorId()]->GetString();
+    if (!m_parent.vocab().empty() && m_parent.vocab().find(text2) == m_parent.vocab().end()) {
+      return;
+    }
+    FName name(m_parent.ROOTNAME, text1 + ":" + text2);
+    ++scores[name];
+}
+
+
+
+/** Assign the total score of this feature on the current hypo */
+void DiscriminativeLMBigramFeatureFunction::assignScore(FVector& scores)
+{
+    for (size_t i = 0; i < m_targetWords.size()-1; ++i) {
+        scoreBigram(m_targetWords[i],m_targetWords[i+1],scores);
+    }
+}
+
+void DiscriminativeLMBigramFeatureFunction::doUpdate(const Phrase& gapPhrase, const TargetGap& gap, FVector& scores)
+{
+    if (gap.leftHypo->GetPrevHypo()) {
+        //left edge
+        const TargetPhrase& leftPhrase = gap.leftHypo->GetTargetPhrase();
+        scoreBigram(leftPhrase.GetWord(leftPhrase.GetSize()-1), gapPhrase.GetWord(0),scores);
+    } else {
+      scoreBigram(m_parent.bos(), gapPhrase.GetWord(0),scores);
+    }
+    //gap phrase
+    size_t i = 0;
+    for (; i < gapPhrase.GetSize()-1; ++i) {
+        scoreBigram(gapPhrase.GetWord(i), gapPhrase.GetWord(i+1),scores);
+    }
+    
+    //right edge
+    if (gap.rightHypo) {
+        scoreBigram(gapPhrase.GetWord(i),gap.rightHypo->GetTargetPhrase().GetWord(0), scores);
+    } else {
+      scoreBigram(gapPhrase.GetWord(i),m_parent.eos(),scores);
+    }
+
+    
+}
+
+/** Score due to one segment */
+void DiscriminativeLMBigramFeatureFunction::doSingleUpdate
+        (const TranslationOption* option, const TargetGap& gap, FVector& scores) 
+{
+    doUpdate(option->GetTargetPhrase(),gap, scores);
+}
+
+/** Score due to two segments. The left and right refer to the target positions.**/
+void DiscriminativeLMBigramFeatureFunction::doContiguousPairedUpdate
+        (const TranslationOption* leftOption,const TranslationOption* rightOption,
+            const TargetGap& gap, FVector& scores)
+{
+    Phrase gapPhrase(leftOption->GetTargetPhrase());
+    gapPhrase.Append(rightOption->GetTargetPhrase());
+    doUpdate(gapPhrase,gap,scores);
+}
+
+void DiscriminativeLMBigramFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
+{
+    doUpdate(leftOption->GetTargetPhrase(), leftGap, scores);
+    doUpdate(rightOption->GetTargetPhrase(), rightGap, scores);
+}
+
+/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+void DiscriminativeLMBigramFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) 
+{
+    if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
+        //contiguous
+        Phrase gapPhrase(leftOption->GetTargetPhrase());
+        gapPhrase.Append(rightOption->GetTargetPhrase());
+        TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, 
+                      WordsRange(leftGap.segment.GetStartPos(), rightGap.segment.GetEndPos()));
+        doUpdate(gapPhrase,gap,scores);
+    } else {
+        //discontiguous
+        doUpdate(leftOption->GetTargetPhrase(), leftGap,scores);
+        doUpdate(rightOption->GetTargetPhrase(), rightGap,scores);
+    }
+}
+
+}
--- a/josiah/DiscriminativeLMFeature.h
+++ b/josiah/DiscriminativeLMFeature.h
@ -0,0 +1,90 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+
+namespace Josiah {
+
+/**
+ * Feature based on target bigrams.
+ **/
+class DiscriminativeLMBigramFeature : public Feature {
+  public:
+    DiscriminativeLMBigramFeature(Moses::FactorType factorId, const std::string& vocabFile="");
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+    
+    const Word& bos() const;
+    const Word& eos() const;
+    const std::set<std::string>& vocab() const;
+    Moses::FactorType factorId() const;
+    static const std::string ROOTNAME;
+    
+  private:
+    std::set<std::string> m_vocab;
+    Moses::FactorType m_factorId;
+    Word BOS;
+    Word EOS;
+};
+
+
+class DiscriminativeLMBigramFeatureFunction : public FeatureFunction {
+    public:
+      DiscriminativeLMBigramFeatureFunction(const Sample& sample, const DiscriminativeLMBigramFeature& parent);
+        
+        /** Update the target words.*/
+        virtual void updateTarget();
+        
+        /** Assign the total score of this feature on the current hypo */
+        virtual void assignScore(FVector& scores);
+    
+        /** Score due to one segment */
+        virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
+        /** Score due to two segments. The left and right refer to the target positions.**/
+        virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                              const TargetGap& gap, FVector& scores);
+        virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+    
+        /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+        virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                  const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
+        
+        
+    private:
+        void scoreBigram(const Word& word1, const Word& word2, FVector& scores);
+        /** Score change due to filling in the gapPhrase in the gap.*/
+        void doUpdate(const Phrase& gapPhrase, const TargetGap& gap, FVector& scores);
+        std::vector<Word> m_targetWords;
+        const DiscriminativeLMBigramFeature m_parent;
+        
+        
+};
+
+
+
+
+}
--- a/josiah/DistortionPenaltyFeature.cpp
+++ b/josiah/DistortionPenaltyFeature.cpp
@ -0,0 +1,76 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+
+#include "DistortionPenaltyFeature.h"
+#include "DummyScoreProducers.h"
+
+#include "Derivation.h"
+#include "Gibbler.h"
+#include "GibbsOperator.h"
+
+
+namespace Josiah {
+
+FeatureFunctionHandle DistortionPenaltyFeature::getFunction( const Sample& sample ) const {
+  return FeatureFunctionHandle(new DistortionPenaltyFeatureFunction(sample));
+}
+
+FValue DistortionPenaltyFeatureFunction::computeScore() {
+  FValue distortion = 0;
+  //cerr << Derivation(*m_sample) << endl;
+  const Hypothesis* currHypo = getSample().GetTargetTail(); //target tail
+  
+  //step through in target order
+  int lastSrcEnd = -1;
+  while ((currHypo = (currHypo->GetNextHypo()))) {
+    int srcStart = currHypo->GetCurrSourceWordsRange().GetStartPos();
+    distortion -= abs(srcStart - (lastSrcEnd+1));
+    lastSrcEnd = currHypo->GetCurrSourceWordsRange().GetEndPos();
+  }
+  //cerr << "distortion " << distortion << endl;
+  return distortion;
+}
+
+
+FValue DistortionPenaltyFeatureFunction::getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                  const TargetGap& leftGap, const TargetGap& rightGap) 
+{
+  FValue distortion;
+  const Hypothesis* leftTgtNextHypo = leftGap.rightHypo;
+  const Hypothesis* rightTgtPrevHypo = rightGap.leftHypo;
+  //if the segments are contiguous and we're swapping, then these hypos have to be swapped so 
+  //that they're in the order they'd appear in in the proposed target
+  if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
+    if (leftTgtNextHypo->GetCurrSourceWordsRange() != rightOption->GetSourceWordsRange()) {
+      const Hypothesis* tmp = leftTgtNextHypo;
+      leftTgtNextHypo = rightTgtPrevHypo;
+      rightTgtPrevHypo = tmp;
+    }
+  }
+  CheckValidReordering(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange(), 
+                       leftGap.leftHypo, leftTgtNextHypo, 
+                       rightTgtPrevHypo, rightGap.rightHypo, distortion);
+  //cerr << leftOption->GetSourceWordsRange() << " " << rightOption->GetSourceWordsRange() << " " << distortion << endl;
+  //cerr << "lg.rh" << leftTgtNextHypo->GetCurrSourceWordsRange() << " rg.lh" << rightTgtPrevHypo->GetCurrSourceWordsRange() << endl;
+  return distortion;
+  
+}
+
+}
--- a/josiah/DistortionPenaltyFeature.h
+++ b/josiah/DistortionPenaltyFeature.h
@ -0,0 +1,65 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+
+#include "FeatureFunction.h"
+
+namespace Josiah {
+
+class DistortionPenaltyFeature  : public Feature {
+  public:
+    virtual FeatureFunctionHandle getFunction( const Sample& sample ) const;
+};
+     
+    
+    
+class DistortionPenaltyFeatureFunction :     public SingleValuedFeatureFunction {
+
+  public:
+    DistortionPenaltyFeatureFunction(const Sample& sample) 
+  : SingleValuedFeatureFunction(sample,"Distortion") {}
+    
+    
+    
+    
+  protected:
+    virtual FValue computeScore();
+    
+    /** Score due to one segment */
+    virtual FValue getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {return 0;}
+    
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    virtual FValue getContiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& gap){return 0;}
+    
+    virtual FValue getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap) {return 0;}
+    
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    virtual FValue getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                      const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    
+ 
+};
+
+
+}
--- a/josiah/FV.cpp
+++ b/josiah/FV.cpp
@ -0,0 +1,102 @@
+/*
+     Moses - factored phrase-based language decoder
+     Copyright (C) 2010 University of Edinburgh
+
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+*/
+
+#include <fstream>
+#include <iostream>
+
+#include <boost/archive/text_oarchive.hpp>
+
+#include "FeatureVector.h"
+
+using namespace Josiah;
+using namespace std;
+
+int main() {
+  FVector fv;
+  FName g3("L", "1");
+  FName g4("L", "2");
+  FName t1("T", "1");
+  FName p2("P", "2");
+  
+  fv[g3] = 2.0;
+  fv[g4] = 1.3;
+  cerr << "fv=" << fv << endl;
+  
+
+  
+  FVector fv2;
+  fv2[g3] = 1.5;
+  fv2[t1] = 3.0;
+  
+  FVector fvsum = fv + fv2;
+  FVector fvdiff = fv - fv2;
+  FVector fvprod = fv * 1.4;
+  FVector fvdiv = fv  / 4.0;
+  
+  cerr << "fv2=" << fv2 << endl;
+  cerr << "fvsum=" << fvsum << endl;
+  cerr << "fvdiff=" << fvdiff << endl;
+  cerr << "fvprod=" << fvprod << endl;
+  cerr << "fvdiv=" << fvdiv << endl;
+  cerr << "fv.fvprod=" <<  inner_product(fv,fvprod) << endl;
+  cerr << "fvprod.fv=" <<  inner_product(fvprod,fv) << endl;
+  
+  cerr << "fv * fv2 = " << (fv*fv2) << endl;
+  cerr << "fv / fv2 = " << (fv/fv2) << endl;
+  
+  FVector fvp2 = fv + 2.0;
+  cerr << "fv + 2 = " << fvp2 << endl;
+  //cerr << "(fv+2)[" << g3 << "] = " << fvp2[g3] << " (fv+2)[" << g4 << "] = " << fvp2[g4] << " (fv+2)[" << t1 << "] = " << fvp2[t1] <<  endl;
+  
+  FVector fv2m1 = fv2 - 1.0;
+  cerr << "(fv + 2) + (fv2 -1) = " << (fvp2 + fv2m1) << endl;
+  cerr << "(fv + 2) - (fv2 -1) = " << (fvp2 - fv2m1) << endl;
+  cerr << "(fv + 2) * (fv2 -1) = " << (fvp2 * fv2m1) << endl;
+  cerr << "(fv + 2) / (fv2 -1) = " << (fvp2 / fv2m1) << endl;
+  cerr << "max((fv + 2),(fv2 -1)) = " << fvmax(fvp2,fv2m1) << endl;
+  
+  cerr << "(fv + 2) + (fv2) = " << (fvp2 + fv2) << endl;
+  cerr << "(fv + 2) - (fv2) = " << (fvp2 - fv2) << endl;
+  cerr << "(fv + 2) * (fv2) = " << (fvp2 * fv2) << endl;
+  cerr << "fv2 / (fv + 2) = " << (fv2 / fvp2) << endl;
+  cerr << "max((fv + 2),(fv2)) = " << fvmax(fv2,fvp2) << endl;
+  //fv2[g4] = 3.1; //error
+  
+  cerr << "fv2 . (fv + 2) = " << inner_product(fv2,fv+2) << endl;
+  cerr << "(fv2-1) . (fv) = " << inner_product(fv2-1,fv) << endl;
+  
+  cerr << "(fv -1)[p2] = " << (fv -1)[p2] << endl;
+  
+  cerr << "fvp2 = " << fvp2 << endl;
+  cerr << "++fvp2[g3] = " << ++fvp2[g3] << endl;
+  //cerr << "fvp2[g3] = " << ++fvp2[g3] << endl;
+  cerr << "fvp2 = " << fvp2 << endl;
+  fvp2[p2] += 5;
+  cerr << "fvp2 = " << fvp2 << endl;
+
+  
+  FVector loaded;
+  loaded.load("weights.txt");
+  cerr << "loaded=" << loaded << endl;
+  
+
+  return 0;
+}
--- a/josiah/FV_mpi.cpp
+++ b/josiah/FV_mpi.cpp
@ -0,0 +1,102 @@
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <sstream>
+
+#ifdef MPI_ENABLED
+#include <boost/mpi/environment.hpp>
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+
+#include <boost/unordered_map.hpp>
+#include <boost/serialization/access.hpp>
+#include <boost/serialization/map.hpp>
+namespace mpi = boost::mpi;
+#endif
+
+#include "FeatureVector.h"
+
+using namespace Josiah;
+using namespace std;
+
+
+//typedef boost::unordered_map<string,float> nvmap;
+typedef map<string,float> nvmap;
+
+struct Data {
+
+    string a;
+    float b;
+};
+
+namespace boost { namespace serialization {
+    template<class Archive>
+    void serialize(Archive& ar, Data& d, const unsigned int) {
+        ar & d.a;
+        ar & d.b;
+    }
+}
+}
+
+int main(int argc, char* argv[])
+{
+#ifdef MPI_ENABLED
+    mpi::environment env(argc, argv);
+    mpi::communicator world;
+
+    //cerr << world.rank() << endl;
+
+    float rank = world.rank();
+
+    ostringstream ostr;
+    ostr << rank;
+
+    string filename = "mpi.log" + ostr.str();
+    ofstream log(filename.c_str());
+    assert(log);
+    log << "MPI rank: " << rank << endl;
+    log << "MPI size: " << world.size() << endl;
+
+
+    FVector fv;
+
+
+    FName fn_r("R", ostr.str());
+    FName fn_s("S", ostr.str());
+    FName fn_t("T", ostr.str());
+    FName fn_i ("I", "1");
+    FName fn_j ("J", "1");
+    FName fn_k ("K", "1");
+
+    fv[fn_r] = 1.2;
+    fv[fn_s] = 2.1;
+    fv[fn_t] = -1.2;
+    fv[fn_i] = 1/(1 + rank);
+    fv[fn_j] = 2;
+    fv[fn_k] = 23.1;
+    
+    log  << "FV: " << fv << endl;
+
+    FVector sum;
+
+    //mpi::broadcast(world,fv,0);
+    mpi::reduce(world,fv,sum,FVectorPlus(),0);
+
+
+    /*
+    float sent = 1  / (float)(world.rank() + 3);
+    log << "sent: " << sent << endl;
+    float rcvd;
+    mpi::reduce(world,send,rcvd,std::plus<float>(), 0);
+    if (world.rank() == 0) cerr  << "Received " << rcvd << endl;
+    */
+
+    if (rank == 0) {
+       cerr << "Sum: " <<  sum << endl;
+    }
+
+    log.close();
+#endif
+    return 0;
+}
--- a/josiah/FeatureFunction.h
+++ b/josiah/FeatureFunction.h
@ -0,0 +1,174 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <boost/shared_ptr.hpp>
+
+#include "FeatureVector.h"
+#include "Hypothesis.h"
+#include "TranslationOption.h"
+
+
+using namespace Moses;
+
+
+namespace Josiah {
+
+  class Sample;
+
+
+/**
+* Represents a gap in the target sentence. During score computation, this is used to specify where the proposed
+* TranslationOptions are to be placed. The left and right hypothesis are to the left and to the right in target order.
+* Note that the leftHypo could be the null hypo (if at start) and the rightHypo could be a null pointer (if at end).
+**/
+struct TargetGap {
+  TargetGap(const Hypothesis* lh, const Hypothesis* rh, const WordsRange& s) :
+      leftHypo(lh), rightHypo(rh), segment(s) {
+      //check that they're in target order.
+      assert(!lh->GetPrevHypo() || lh->GetCurrTargetWordsRange() < s);
+      assert(!rh || s < rh->GetCurrTargetWordsRange());
+  }
+  
+  const Hypothesis* leftHypo;
+  const Hypothesis* rightHypo;
+  WordsRange segment;
+};
+
+/**   Abstract base class for Gibbler feature functions.
+   *  1. When a new Sample() object is created to begin sampling on a new sentence:
+ *     - constructor - passing in the sample
+ *     - updateTarget() - to indicate to the FeatureFunction that the target words have changed
+ *     - assignScore() -  to tell the FeatureFunction to set its initial score.
+ *  2. When scoring possible transitions.
+ *     -  doXXX() - to calculate the score deltas.
+ *  3. When performing a transition.
+ *    - updateTarget() - called with new target words. For paired updates, this is called twice, and after the first 
+ *                        call the feature_vector (in the sample) will be inconsistent with the target words
+ **/
+class FeatureFunction {
+  public:
+    /** Initialise with new sample */
+    FeatureFunction(const Sample& sample): m_sample(sample) {}
+    /** Update the target words.*/
+    virtual void updateTarget(){/*do nothing*/}
+    
+    /** Assign the total score of this feature on the current hypo */
+    virtual void assignScore(FVector& scores) = 0;
+    
+    /** Score due to one segment */
+    virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores) = 0;
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                          const TargetGap& gap, FVector& scores) = 0;
+    virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                             const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) = 0;
+    
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                              const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) = 0;
+    
+    
+    virtual ~FeatureFunction() {}
+    
+  protected:
+    const Sample& getSample() const {return m_sample;}
+    
+  private:
+    const Sample& m_sample;
+    
+};
+
+typedef boost::shared_ptr<FeatureFunction> FeatureFunctionHandle;
+typedef std::vector<FeatureFunctionHandle> FeatureFunctionVector;
+
+/**
+  * Base class for Gibbler Features. 
+  * Feature methods are called as follows:
+  * 1. When Gibbler starts up, and initialises feature functions:
+  *      - constructor
+  * When a new sample() object is created:
+  *      - getFunction() - this creates a FeatureFunction object.
+ **/
+ class Feature {
+   public:
+     Feature() {}
+     virtual FeatureFunctionHandle getFunction(const Sample& sample) const = 0;
+     virtual ~Feature() {}
+ };
+
+
+/** 
+  * A feature function with a single value
+  **/
+class SingleValuedFeatureFunction: public FeatureFunction {
+  public:
+    SingleValuedFeatureFunction(const Sample& sample, const std::string& name) :
+      FeatureFunction(sample), m_name(name) {}
+    virtual void assignScore(FVector& scores)
+      {scores[m_name] = computeScore();}
+    virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores)
+      {scores[m_name] =  getSingleUpdateScore(option,gap);}
+    virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                          const TargetGap& gap, FVector& scores)
+    {scores[m_name] = getContiguousPairedUpdateScore(leftOption,rightOption,gap);}
+    virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                             const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
+    {scores[m_name] = getDiscontiguousPairedUpdateScore(leftOption,rightOption,leftGap,rightGap);}
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                              const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
+    {scores[m_name] = getFlipUpdateScore(leftOption,rightOption,leftGap,rightGap);}
+    
+    /**
+      * Actual feature functions need to implement these methods.
+    **/
+  protected:
+    virtual FValue computeScore() = 0;
+    /** Score due to one segment */
+    virtual FValue getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) = 0;
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    virtual FValue getContiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+       const TargetGap& gap) = 0;
+    virtual FValue getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap) = 0;
+    
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    virtual FValue getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                     const TargetGap& leftGap, const TargetGap& rightGap) = 0;
+    
+    virtual ~SingleValuedFeatureFunction() {}
+    
+  private:
+    FName m_name;
+  
+    
+};
+
+
+
+typedef boost::shared_ptr<Feature> FeatureHandle;
+typedef std::vector<FeatureHandle> FeatureVector;
+
+
+
+
+
+} //namespace
--- a/josiah/Gain.cpp
+++ b/josiah/Gain.cpp
@ -0,0 +1,94 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#include <fstream>
+
+#include "FactorCollection.h"
+#include "Util.h"
+
+#include "Gain.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+
+void TextToTranslation(const string& text, Translation& words) {
+  vector<string> tokens = Tokenize(text);
+  words.clear();
+  FactorCollection& factorCollection = FactorCollection::Instance();
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    const Factor* factor = factorCollection.AddFactor(Input, 0, tokens[i]);
+    words.push_back(factor);
+  }
+}
+
+void Gain::LoadReferences(const vector<string>& refFiles,
+     const string& sourceFile) {
+  assert(refFiles.size());
+  vector<boost::shared_ptr<ifstream> > refIns(refFiles.size());
+  for (size_t i = 0; i < refFiles.size(); ++i) {
+    refIns[i].reset(new ifstream());
+    refIns[i]->open(refFiles[i].c_str());
+    assert(refIns[i]->good());
+  }
+  ifstream srcIn(sourceFile.c_str());
+  assert(srcIn);
+
+  size_t count = 0;
+  while(srcIn.good()) {
+    string line;
+    getline(srcIn,line);
+    if (line.empty()) continue;
+    Translation source;
+    TextToTranslation(line,source);
+    vector<Translation> refs(refFiles.size());;
+    for (size_t i = 0; i < refFiles.size(); ++i) {
+      getline(*refIns[i],line);
+      assert(refIns[i]->good());
+      TextToTranslation(line,refs[i]);
+    }
+    AddReferences(refs,source);
+    ++count;
+  }
+  //check we were at the end of all the references
+  for (size_t i = 0; i < refFiles.size(); ++i) {
+    string line;
+    getline(*refIns[i],line);
+    assert(line.empty());
+  }
+  VERBOSE(1, "Loaded " << count << " references" << endl);
+}
+
+GainFunctionHandle Gain::GetGainFunction(size_t sentenceId) {
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(sentenceId);
+  return GetGainFunction(sentenceIds);
+}
+
+
+float GainFunction::Evaluate(const Translation& hypothesis) const {
+  vector<Translation> hyps;
+  hyps.push_back(hypothesis);
+  return Evaluate(hyps);
+}
+
+
+
+}
--- a/josiah/Gain.h
+++ b/josiah/Gain.h
@ -0,0 +1,66 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <boost/shared_ptr.hpp>
+
+#include "Decoder.h"
+
+namespace Josiah {
+
+class GainFunction;
+typedef boost::shared_ptr<GainFunction> GainFunctionHandle;
+
+void TextToTranslation(const std::string& text, Translation& words);
+
+/**
+ * Factory for gain functions.
+ **/
+class Gain {
+  public:
+    /** Load the reference files */
+    void LoadReferences(const std::vector<std::string>& refFilenames,
+           const std::string& sourceFile);
+    /** Get the function to calculate the gain on these sentences */
+    virtual GainFunctionHandle GetGainFunction(const std::vector<size_t>& sentenceIds) = 0;
+    /** Add the set of references for a specific sentence */
+    virtual void AddReferences(const std::vector<Translation>& refs, const Translation& source) = 0;
+    /** Convenience method fof single sentence */
+    GainFunctionHandle GetGainFunction(size_t sentenceId);
+    virtual float GetAverageReferenceLength(size_t sentenceId) const = 0;
+    virtual ~Gain() {}
+};
+
+class GainFunction {
+  public:
+    /** Calculate Gain for set of hypotheses */
+    virtual float Evaluate(const std::vector<Translation>& hypotheses) const = 0;
+    /** Add the stats for this hypothesis to the smoothing stats being collected */
+    virtual void AddSmoothingStats(size_t sentenceId, const Translation& hypothesis) {}
+    /** Inform the GainFunction that we've finished with this sentence, and it can now
+     update the parent's stats */
+    virtual void UpdateSmoothingStats() {}
+    /** Shortcut for evaluating just one sentence */
+    float Evaluate(const Translation& hypothesis) const;
+    virtual ~GainFunction() {}
+
+};
+
+}
--- a/josiah/Gibbler.cpp
+++ b/josiah/Gibbler.cpp
@ -0,0 +1,441 @@
+#include "Gibbler.h"
+#include "Derivation.h"
+#include "Hypothesis.h"
+#include "TranslationOptionCollection.h"
+#include "GibblerMaxTransDecoder.h"
+#include "StaticData.h"
+#include "AnnealingSchedule.h"
+#include "GibbsOperator.h"
+
+using namespace std;
+
+namespace Josiah {
+
+
+
+Sample::Sample(Hypothesis* target_head, const std::vector<Word>& source, const FeatureVector& features, bool doRaoBlackwell) : 
+        m_sourceWords(source),   m_doRaoBlackwell(doRaoBlackwell), m_updates(0) { 
+  std::map<int, Hypothesis*> source_order;
+  this->target_head = target_head;
+  Hypothesis* next = NULL;
+
+  for (Hypothesis* h = target_head; h; h = const_cast<Hypothesis*>(h->GetPrevHypo())) {
+    size_t startPos = h->GetCurrSourceWordsRange().GetStartPos();
+    SetSourceIndexedHyps(h); 
+    if (h->GetPrevHypo()){
+      source_order[startPos] = h;  
+    }
+    else {
+      source_order[-1] = h;  
+    }
+    this->target_tail = h;
+    h->SetNextHypo(next);
+    next = h;
+  }
+  
+  std::map<int, Hypothesis*>::const_iterator source_it = source_order.begin();
+  Hypothesis* prev = NULL;
+  this->source_tail = source_it->second;
+  
+  
+  for (; source_it != source_order.end(); source_it++) {
+    Hypothesis *h = source_it->second;  
+    h->SetSourcePrevHypo(prev);
+    if (prev != NULL) 
+      prev->SetSourceNextHypo(h);
+    this->source_head = h;
+    prev = h;
+  }
+  
+  this->source_head->SetSourceNextHypo(NULL);
+  this->target_head->SetNextHypo(NULL);
+  
+  this->source_tail->SetSourcePrevHypo(NULL);
+  this->target_tail->SetPrevHypo(NULL);
+  
+  for (FeatureVector::const_iterator i=features.begin(); i!=features.end(); ++i){
+    // tell the feature that we have a new sample
+    m_featureFunctions.push_back((*i)->getFunction(*this));
+  }
+  
+  UpdateTargetWords();
+      
+  for (FeatureFunctionVector::const_iterator i=m_featureFunctions.begin(); i!=m_featureFunctions.end(); ++i){
+    (*i)->assignScore(feature_values);
+  }
+      
+}
+ 
+Sample::~Sample() {
+  RemoveAllInColl(cachedSampledHyps);
+}
+
+
+Hypothesis* Sample::CreateHypothesis(Hypothesis& prevTarget, const TranslationOption& option) {
+  UpdateCoverageVector(prevTarget, option);
+  
+  Hypothesis* hypo = Hypothesis::Create(prevTarget, option, NULL);
+  prevTarget.SetNextHypo(hypo);
+  cachedSampledHyps.insert(hypo);
+  SetSourceIndexedHyps(hypo);
+  //SetTgtIndexedHyps(hypo);
+  return hypo;
+}
+
+
+void Sample::UpdateTargetWords()  {
+  m_targetWords.clear();
+  const Hypothesis* currHypo = GetTargetTail(); //target tail
+  
+  
+  IFVERBOSE(2) {
+    VERBOSE(2,"Sentence: ");
+  }
+  
+  //we're now at the dummy hypo at the start of the sentence
+  while ((currHypo = (currHypo->GetNextHypo()))) {
+    const TargetPhrase& targetPhrase = currHypo->GetCurrTargetPhrase();
+    for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+      m_targetWords.push_back(targetPhrase.GetWord(i));
+      IFVERBOSE(2) {
+        VERBOSE(2,targetPhrase.GetWord(i) << " ");  
+      }  
+    }
+    IFVERBOSE(2) {
+      if (currHypo->GetCurrTargetPhrase().GetSize() > 0) {
+        VERBOSE(2, "|" << currHypo->GetCurrSourceWordsRange().GetStartPos()
+                << "-" << currHypo->GetCurrSourceWordsRange().GetEndPos() << "| ");    
+      }
+    }
+  }
+  IFVERBOSE(2) {
+    VERBOSE(2,endl);
+  }
+  
+  IFVERBOSE(2) {
+    VERBOSE(2,"FVs: " << feature_values << endl);
+  }
+  //Inform the extra features that the target words have changed
+  for (FeatureFunctionVector::const_iterator i=m_featureFunctions.begin(); i!=m_featureFunctions.end(); ++i){
+    (*i)->updateTarget();
+  }
+}
+
+  
+Hypothesis* Sample::GetHypAtSourceIndex(size_t i)  {
+  std::map<size_t, Hypothesis*>::iterator it = sourceIndexedHyps.find(i);
+  if (it == sourceIndexedHyps.end())
+    return NULL;
+  return it->second;
+}
+ 
+  
+void Sample::SetSourceIndexedHyps(Hypothesis* h) {
+  
+  size_t startPos = h->GetCurrSourceWordsRange().GetStartPos();
+  size_t endPos = h->GetCurrSourceWordsRange().GetEndPos();
+  if (startPos + 1 == 0 ) {
+    sourceIndexedHyps[startPos] = h; 
+    return;
+  }
+  for (size_t i = startPos; i <= endPos; i++) {
+    sourceIndexedHyps[i] = h; 
+  } 
+}
+  
+  
+void Sample::SetTgtNextHypo(Hypothesis* newHyp, Hypothesis* currNextHypo) {
+  if (newHyp) {
+    newHyp->SetNextHypo(currNextHypo);  
+  }
+    
+  if (currNextHypo) {
+    currNextHypo->SetPrevHypo(newHyp);  
+  }
+}
+  
+void Sample::SetSrcPrevHypo(Hypothesis* newHyp, Hypothesis* srcPrevHypo) {
+  if (newHyp) {
+    newHyp->SetSourcePrevHypo(srcPrevHypo); 
+  }
+    
+  if (srcPrevHypo) {
+    srcPrevHypo->SetSourceNextHypo(newHyp);  
+  }
+}  
+  
+void Sample::FlipNodes(const TranslationOption& leftTgtOption, const TranslationOption& rightTgtOption, Hypothesis* m_prevTgtHypo, Hypothesis* m_nextTgtHypo, const FVector& deltaFV) {
+  bool tgtSideContiguous = false; 
+  Hypothesis *oldRightHypo = GetHypAtSourceIndex(leftTgtOption.GetSourceWordsRange().GetStartPos()); //this one used to be on the right
+  Hypothesis *oldLeftHypo = GetHypAtSourceIndex(rightTgtOption.GetSourceWordsRange().GetStartPos());//this one used to be on the left
+  
+  //created the new left most tgt
+  Hypothesis *newLeftHypo = CreateHypothesis(*m_prevTgtHypo, leftTgtOption);
+  //are the options contiguous on the target side?
+  Hypothesis *tgtSidePredecessor = const_cast<Hypothesis*>(oldRightHypo->GetPrevHypo()); //find its target side predecessor
+  //If the flip is contiguous on the target side, then the predecessor is the flipped one 
+  if (tgtSidePredecessor->GetCurrSourceWordsRange() == rightTgtOption.GetSourceWordsRange()) {
+    tgtSidePredecessor = newLeftHypo;
+    tgtSideContiguous = true;
+  }
+  //update the target side sample pointers now 
+  if  (!tgtSideContiguous) {
+    Hypothesis *leftHypoTgtSideSuccessor = const_cast<Hypothesis*>(oldLeftHypo->GetNextHypo());
+    SetTgtNextHypo(newLeftHypo, leftHypoTgtSideSuccessor);
+  }
+  
+  //update the target word ranges of the ones in between
+  if (!tgtSideContiguous) {
+    size_t startTgtPos = newLeftHypo->GetCurrTargetWordsRange().GetEndPos();
+    for (Hypothesis *h = const_cast<Hypothesis*>(oldLeftHypo->GetNextHypo()); h != oldRightHypo ; h = const_cast<Hypothesis*>(h->GetNextHypo())) {
+     WordsRange& range = h->GetCurrTargetWordsRange();
+     size_t size = range.GetNumWordsCovered();
+     range.SetStartPos(startTgtPos+1);
+     range.SetEndPos(startTgtPos+size);
+     startTgtPos += size;
+    }
+  }
+
+  //now create the one that goes on the right 
+  Hypothesis *newRightHypo = CreateHypothesis(*tgtSidePredecessor, rightTgtOption);
+  SetTgtNextHypo(newRightHypo, m_nextTgtHypo);
+  
+  //update the source side sample pointers now 
+  Hypothesis* newLeftSourcePrevHypo = GetHypAtSourceIndex(newLeftHypo->GetCurrSourceWordsRange().GetStartPos() - 1 );
+  Hypothesis* newLeftSourceNextHypo = GetHypAtSourceIndex(newLeftHypo->GetCurrSourceWordsRange().GetEndPos() + 1 );
+  
+  SetSrcPrevHypo(newLeftHypo, newLeftSourcePrevHypo);
+  SetSrcPrevHypo(newLeftSourceNextHypo, newLeftHypo);
+  
+  Hypothesis* newRightSourcePrevHypo = GetHypAtSourceIndex(newRightHypo->GetCurrSourceWordsRange().GetStartPos() - 1 );
+  Hypothesis* newRightSourceNextHypo = GetHypAtSourceIndex(newRightHypo->GetCurrSourceWordsRange().GetEndPos() + 1 );
+
+  SetSrcPrevHypo(newRightHypo, newRightSourcePrevHypo);
+  SetSrcPrevHypo(newRightSourceNextHypo, newRightHypo);
+
+  UpdateHead(oldRightHypo, newLeftHypo, source_head);
+  UpdateHead(oldLeftHypo, newRightHypo, source_head);
+  UpdateHead(oldRightHypo, newRightHypo, target_head);
+  UpdateHead(oldLeftHypo, newRightHypo, target_head);
+ 
+  UpdateFeatureValues(deltaFV);
+  UpdateTargetWords();
+  
+  DeleteFromCache(oldRightHypo);
+  DeleteFromCache(oldLeftHypo);
+  
+  //Sanity check
+  IFVERBOSE(4) {
+    float totalDistortion(0.0);
+    for (Hypothesis* h = target_tail; h; h = const_cast<Hypothesis*>(h->GetNextHypo())) {
+      Hypothesis *next = const_cast<Hypothesis*>(h->GetNextHypo());
+      if (next) {
+        totalDistortion += ComputeDistortionDistance(h->GetCurrSourceWordsRange(), next->GetCurrSourceWordsRange());   
+      }
+      else {
+        break;
+      }
+    }
+    VERBOSE(4, "Total distortion for this sample " << totalDistortion << endl);
+  } 
+  
+}
+
+  float Sample::ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) 
+  {
+    int dist = 0;
+    if (prev.GetNumWordsCovered() == 0) {
+      dist = current.GetStartPos();
+    } else {
+      dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
+    }
+    return - (float) abs(dist);
+  }  
+  
+void Sample::ChangeTarget(const TranslationOption& option, const FVector& deltaFV)  {
+  size_t optionStartPos = option.GetSourceWordsRange().GetStartPos();
+  Hypothesis *currHyp = GetHypAtSourceIndex(optionStartPos);
+  Hypothesis& prevHyp = *(const_cast<Hypothesis*>(currHyp->GetPrevHypo()));
+
+  Hypothesis *newHyp = CreateHypothesis(prevHyp, option);
+  SetTgtNextHypo(newHyp, const_cast<Hypothesis*>(currHyp->GetNextHypo()));
+  UpdateHead(currHyp, newHyp, target_head);
+  
+  SetSrcPrevHypo(newHyp, const_cast<Hypothesis*>(currHyp->GetSourcePrevHypo()));
+  SetSrcPrevHypo(const_cast<Hypothesis*>(currHyp->GetSourceNextHypo()), newHyp);
+  UpdateHead(currHyp, newHyp, source_head);
+  
+  //Update target word ranges
+  int tgtSizeChange = static_cast<int> (option.GetTargetPhrase().GetSize()) - static_cast<int> (currHyp->GetTargetPhrase().GetSize());
+  if (tgtSizeChange != 0) {
+    UpdateTargetWordRange(newHyp, tgtSizeChange);  
+  }
+  
+  DeleteFromCache(currHyp);
+  UpdateFeatureValues(deltaFV);
+  UpdateTargetWords();
+}  
+
+void Sample::MergeTarget(const TranslationOption& option, const FVector& deltaFV)  {
+  size_t optionStartPos = option.GetSourceWordsRange().GetStartPos();
+  size_t optionEndPos = option.GetSourceWordsRange().GetEndPos();
+  
+  Hypothesis *currStartHyp = GetHypAtSourceIndex(optionStartPos);
+  Hypothesis *currEndHyp = GetHypAtSourceIndex(optionEndPos);
+  
+  assert(currStartHyp != currEndHyp);
+  
+  Hypothesis* prevHyp = NULL;
+  Hypothesis* newHyp = NULL;
+  
+  if (currStartHyp->GetCurrTargetWordsRange() < currEndHyp->GetCurrTargetWordsRange()) {
+    prevHyp = const_cast<Hypothesis*> (currStartHyp->GetPrevHypo());
+    newHyp = CreateHypothesis(*prevHyp, option);
+    
+    //Set the target ptrs
+    SetTgtNextHypo(newHyp, const_cast<Hypothesis*>(currEndHyp->GetNextHypo()));
+    UpdateHead(currEndHyp, newHyp, target_head);
+  } 
+  else {
+    prevHyp = const_cast<Hypothesis*> (currEndHyp->GetPrevHypo());
+    newHyp = CreateHypothesis(*prevHyp, option);
+    
+    SetTgtNextHypo(newHyp, const_cast<Hypothesis*>(currStartHyp->GetNextHypo()));
+    UpdateHead(currStartHyp, newHyp, target_head);
+  }
+  
+  //Set the source ptrs
+  SetSrcPrevHypo(newHyp, const_cast<Hypothesis*>(currStartHyp->GetSourcePrevHypo()));
+  SetSrcPrevHypo(const_cast<Hypothesis*>(currEndHyp->GetSourceNextHypo()), newHyp);
+  UpdateHead(currEndHyp, newHyp, source_head);
+                    
+  //Update target word ranges
+  int newTgtSize = option.GetTargetPhrase().GetSize();
+  int prevTgtSize = currStartHyp->GetTargetPhrase().GetSize() + currEndHyp->GetTargetPhrase().GetSize();
+  int tgtSizeChange = newTgtSize - prevTgtSize;
+  if (tgtSizeChange != 0) {
+    UpdateTargetWordRange(newHyp, tgtSizeChange);  
+  }
+  
+  DeleteFromCache(currStartHyp);
+  DeleteFromCache(currEndHyp);
+  
+  UpdateFeatureValues(deltaFV);
+  UpdateTargetWords();
+}
+  
+void Sample::SplitTarget(const TranslationOption& leftTgtOption, const TranslationOption& rightTgtOption,  const FVector& deltaFV) {
+  size_t optionStartPos = leftTgtOption.GetSourceWordsRange().GetStartPos();
+  Hypothesis *currHyp = GetHypAtSourceIndex(optionStartPos);
+  
+  Hypothesis& prevHyp = *(const_cast<Hypothesis*>(currHyp->GetPrevHypo()));
+  Hypothesis *newLeftHyp = CreateHypothesis(prevHyp, leftTgtOption);
+  Hypothesis *newRightHyp = CreateHypothesis(*newLeftHyp, rightTgtOption);
+  
+  //Update tgt ptrs
+  SetTgtNextHypo(newRightHyp, const_cast<Hypothesis*>(currHyp->GetNextHypo()));
+  UpdateHead(currHyp, newRightHyp, target_head);
+  
+  //Update src ptrs
+  assert (newLeftHyp->GetCurrSourceWordsRange() < newRightHyp->GetCurrSourceWordsRange()); //monotone  
+  SetSrcPrevHypo(newLeftHyp, const_cast<Hypothesis*>(currHyp->GetSourcePrevHypo()));
+  SetSrcPrevHypo(newRightHyp, newLeftHyp);
+  SetSrcPrevHypo(const_cast<Hypothesis*>(currHyp->GetSourceNextHypo()), newRightHyp); 
+  UpdateHead(currHyp, newRightHyp, source_head);
+    
+  //Update target word ranges
+  int prevTgtSize = currHyp->GetTargetPhrase().GetSize();
+  int newTgtSize = newLeftHyp->GetTargetPhrase().GetSize() + newRightHyp->GetTargetPhrase().GetSize();
+  int tgtSizeChange = newTgtSize - prevTgtSize;
+  if (tgtSizeChange != 0) {
+    UpdateTargetWordRange(newRightHyp, tgtSizeChange);  
+  }
+  
+  DeleteFromCache(currHyp);
+  UpdateFeatureValues(deltaFV);
+  UpdateTargetWords();
+}  
+  
+void Sample::UpdateHead(Hypothesis* currHyp, Hypothesis* newHyp, Hypothesis *&head) {
+  if (head == currHyp)
+    head = newHyp;
+}
+  
+void Sample::UpdateTargetWordRange(Hypothesis* hyp, int tgtSizeChange) {
+  Hypothesis* nextHyp = const_cast<Hypothesis*>(hyp->GetNextHypo());
+  if (!nextHyp)
+    return;
+    
+  for (Hypothesis* h = nextHyp; h; h = const_cast<Hypothesis*>(h->GetNextHypo())){
+    WordsRange& range = h->GetCurrTargetWordsRange();
+    range.SetStartPos(range.GetStartPos()+tgtSizeChange);
+    range.SetEndPos(range.GetEndPos()+tgtSizeChange);
+  }
+}  
+  
+void Sample::UpdateFeatureValues(const FVector& deltaFV) {
+  feature_values +=deltaFV;
+}
+
+void Sample::CheckFeatureConsistency() const {
+  FVector expected;
+  for (FeatureFunctionVector::const_iterator i=m_featureFunctions.begin(); i!=m_featureFunctions.end(); ++i){
+    (*i)->assignScore(expected);
+  }
+  if (expected != feature_values) {
+    VERBOSE(1, "Expected: " << expected << endl);
+    VERBOSE(1, "Actual: " << feature_values << endl);
+    ostringstream msg;
+    msg << "Score mismatch: e-a = " << (expected-feature_values);
+    throw runtime_error(msg.str());
+  }
+}
+
+//update  the bitmap of the predecessor
+void Sample::UpdateCoverageVector(Hypothesis& hyp, const TranslationOption& option) {
+ size_t startPos = option.GetSourceWordsRange().GetStartPos();
+ size_t endPos = option.GetSourceWordsRange().GetEndPos();
+
+ WordsBitmap & wordBitmap = hyp.GetWordsBitmap();
+ wordBitmap.SetValue(startPos, endPos, false);
+} 
+  
+void Sample::DeleteFromCache(Hypothesis *hyp) {
+  set<Hypothesis*>::iterator it = find(cachedSampledHyps.begin(), cachedSampledHyps.end(), hyp);
+  if (it != cachedSampledHyps.end()){
+    delete *it;
+    cachedSampledHyps.erase(it);
+  }
+}
+
+
+
+bool Sample::DoRaoBlackwell() const {
+    return m_doRaoBlackwell;
+}
+
+void Sample::AddConditionalFeatureValues( const FVector & fv ) {
+    m_conditionalFeatureValues += fv;
+    ++m_updates;
+}
+
+const FVector Sample::GetConditionalFeatureValues( ) const {
+    if (m_doRaoBlackwell) {
+        FVector fv(m_conditionalFeatureValues);
+        fv /= m_updates;
+        return fv;
+    } else {
+        return GetFeatureValues();
+    }
+}
+
+void Sample::ResetConditionalFeatureValues(){
+    m_updates = 0;
+    m_conditionalFeatureValues.clear();
+}
+
+
+
+
+}
+
--- a/josiah/Gibbler.h
+++ b/josiah/Gibbler.h
@ -0,0 +1,113 @@
+#pragma once
+
+#include <map>
+#include <set>
+#include <vector>
+#include "FeatureFunction.h"
+#include "FeatureVector.h"
+
+namespace Moses {
+class Hypothesis;
+class TranslationOptionCollection;
+class TranslationOption;
+class Word;
+}
+
+using namespace Moses;
+namespace Josiah {
+
+class AnnealingSchedule;
+class GibbsOperator;
+class Sampler;
+class OnlineLearner;
+class SampleAcceptor;
+  
+class Sample {
+ private:
+  std::vector<Word> m_targetWords;
+  const std::vector<Word>& m_sourceWords;
+
+  Hypothesis* target_head;
+  Hypothesis* target_tail;
+
+  Hypothesis* source_head;
+  Hypothesis* source_tail;
+
+  FVector feature_values;
+  FeatureFunctionVector m_featureFunctions;
+
+  std::set<Hypothesis*> cachedSampledHyps;
+  
+  std::map<size_t, Hypothesis*>  sourceIndexedHyps;
+  
+  //Used for conditional estimation (aka Rao-Blackwellisation)
+  bool m_doRaoBlackwell;
+  FVector m_conditionalFeatureValues;
+  size_t m_updates;
+  
+  void SetSourceIndexedHyps(Hypothesis* h);
+  void UpdateFeatureValues(const FVector& deltaFV);
+  void UpdateTargetWordRange(Hypothesis* hyp, int tgtSizeChange);   
+  void UpdateHead(Hypothesis* currHyp, Hypothesis* newHyp, Hypothesis *&head);
+  void UpdateCoverageVector(Hypothesis& hyp, const TranslationOption& option) ;  
+  Hypothesis* CreateHypothesis( Hypothesis& prevTarget, const TranslationOption& option);
+  
+  void SetTgtNextHypo(Hypothesis*  newHyp, Hypothesis* currNextHypo);
+  void SetSrcPrevHypo(Hypothesis*  newHyp, Hypothesis* srcPrevHypo);
+  void UpdateTargetWords();
+  void DeleteFromCache(Hypothesis *hyp);
+  float ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) ;
+  
+ public:
+  Sample(Hypothesis* target_head, const std::vector<Word>& source, const FeatureVector& features, bool raoBlackwell);
+  ~Sample();
+  int GetSourceSize() const { return m_sourceWords.size(); }
+  Hypothesis* GetHypAtSourceIndex(size_t ) ;
+  const Hypothesis* GetSampleHypothesis() const {
+    return target_head;
+  }
+  
+  const Hypothesis* GetTargetTail() const {
+    return target_tail;
+  }
+  
+  const FVector& GetFeatureValues() const {
+    return feature_values;
+  }
+  
+  const FeatureFunctionVector& GetFeatureFunctions() const {
+    return m_featureFunctions; 
+  }
+  
+  /** Check that the feature values are correct */
+  void CheckFeatureConsistency() const;
+  
+  void FlipNodes(size_t x, size_t y, const FVector& deltaFV) ;
+  void FlipNodes(const TranslationOption& , const TranslationOption&, Hypothesis* , Hypothesis* , const FVector& deltaFV);
+  void ChangeTarget(const TranslationOption& option, const FVector& deltaFV); 
+  void MergeTarget(const TranslationOption& option, const FVector& deltaFV);
+  void SplitTarget(const TranslationOption& leftTgtOption, const TranslationOption& rightTgtOption,  const FVector& deltaFV);
+  /** Words in the current target */
+  const std::vector<Word>& GetTargetWords() const { return m_targetWords; }
+  const std::vector<Word>& GetSourceWords() const { return m_sourceWords; } 
+  
+  int GetTargetLength()  { return m_targetWords.size(); }
+  
+  //Used for conditional estimation (aka Rao-Blackwellisation)
+  bool DoRaoBlackwell() const;
+  void AddConditionalFeatureValues(const FVector& fv);
+  void ResetConditionalFeatureValues();
+  const FVector GetConditionalFeatureValues() const;
+  
+  friend class Sampler;
+  friend class GibbsOperator;
+};
+
+typedef boost::shared_ptr<Sample> SampleHandle;
+typedef std::vector<SampleHandle> SampleVector;
+
+}
+
+
+
+
--- a/josiah/GibblerAnnealedExpectedLossTrainer.cpp
+++ b/josiah/GibblerAnnealedExpectedLossTrainer.cpp
@ -0,0 +1,30 @@
+#include "GibblerAnnealedExpectedLossTrainer.h"
+
+#include "Hypothesis.h"
+#include "Derivation.h"
+
+using namespace std;
+
+namespace Josiah {
+  
+float GibblerAnnealedExpectedLossCollector::UpdateGradient(FVector* gradient, FValue* exp_len, FValue *unreg_exp_gain) {
+  //the distribution is fetched here so that it only has to be done once during gradient calculation
+  m_p.clear();
+  m_derivationCollector.getDistribution(m_p);
+  return ExpectedLossCollector::UpdateGradient(gradient,exp_len, unreg_exp_gain);
+}
+
+float GibblerAnnealedExpectedLossCollector::getRegularisationGradientFactor(size_t i) {
+  FValue temperature = GetTemperature();
+  const Derivation* d = m_derivationCollector.getSample(i);
+  FValue prob =  m_p[d];
+  return -temperature * log (N()*prob) ;
+}
+
+float GibblerAnnealedExpectedLossCollector:: getRegularisation() {
+  
+  return GetTemperature() * m_derivationCollector.getEntropy();
+}
+
+}
+
--- a/josiah/GibblerAnnealedExpectedLossTrainer.h
+++ b/josiah/GibblerAnnealedExpectedLossTrainer.h
@ -0,0 +1,46 @@
+#pragma once
+
+#include <map>
+#include <utility>
+#include <ext/hash_map>
+
+#include "ScoreComponentCollection.h"
+#include "Derivation.h"
+#include "GibblerExpectedLossTraining.h"
+#include "Phrase.h"
+#include "Sampler.h"
+#include "GibblerMaxDerivDecoder.h"
+
+using namespace Moses;
+
+namespace Josiah {
+  
+  class GainFunction;
+
+  
+  class GibblerAnnealedExpectedLossCollector : public ExpectedLossCollector {
+  public:
+    GibblerAnnealedExpectedLossCollector(const GainFunctionHandle& gain, Sampler& sampler) 
+      :  ExpectedLossCollector(gain) {
+        sampler.AddCollector(&m_derivationCollector);
+      }
+    
+    FValue ComputeEntropy();
+    FValue GetTemperature() { return m_temp;}
+    void SetTemperature(FValue temp) {m_temp = temp;} 
+    virtual FValue UpdateGradient(FVector* gradient, FValue* exp_len, FValue* unreg_exp_gain);
+    virtual FValue getRegularisationGradientFactor(size_t i);
+    virtual FValue getRegularisation();
+
+    
+  private:
+    float m_temp;
+    DerivationCollector m_derivationCollector;
+    
+    //cache the distribution during gradient calculation
+    std::map<const Derivation*,double> m_p;
+    
+    
+  };
+  
+}
--- a/josiah/GibblerExpectedLossTraining.cpp
+++ b/josiah/GibblerExpectedLossTraining.cpp
@ -0,0 +1,112 @@
+#include <ext/algorithm>
+
+#include "GibblerExpectedLossTraining.h"
+
+#include "Hypothesis.h"
+#include "WeightManager.h"
+
+
+using namespace std;
+using namespace __gnu_cxx;
+
+namespace Josiah {
+
+void ExpectedLossCollector::collect(Sample& s) {
+  const Hypothesis* h = s.GetSampleHypothesis();
+  vector<const Factor*> trans;
+  h->GetTranslation(&trans, 0);
+  const FValue gain = m_gainFunction->Evaluate(trans);
+  m_lengths.push_back(trans.size());
+  const FVector& fvs = s.GetFeatureValues();
+  const FVector& rbFvs = s.GetConditionalFeatureValues();
+  VERBOSE(2, gain << "\tFeatures=" << fvs << endl);
+  VERBOSE(2, gain << "\tRao-Blackwellised features=" << rbFvs << endl);
+  //VERBOSE(0, "Collected : Target " << s << ", gain " << gain << "\tFeatures=" << s.GetFeatureValues() << endl);
+  m_gains.push_back(gain);
+//  m_samples.push_back(Derivation(s));
+  m_featureVectors.push_back(fvs);
+  m_rbFeatureVectors.push_back(rbFvs);
+  MPI_VERBOSE(2,"Sample: " << Derivation(s) << endl) 
+}
+
+float ExpectedLossCollector::UpdateGradient(FVector* gradient,FValue *exp_len, FValue *unreg_exp_gain) {
+  
+
+  
+  FVector feature_expectations = getFeatureExpectations();
+
+  MPI_VERBOSE(1,"FEXP: " << feature_expectations << endl)
+  
+  const FVector& weights = WeightManager::instance().get();
+  FValue exp_score = inner_product(feature_expectations, weights);
+  
+  //gradient computation
+  FVector grad;
+  FValue exp_gain = 0;
+  for (size_t i = 0; i < N(); ++i) {
+    FVector fv = m_featureVectors[i];
+    MPI_VERBOSE(2,"FV: " << fv)
+    const FValue gain = m_gains[i];
+    fv -= feature_expectations;
+    MPI_VERBOSE(2,"DIFF: " << fv)
+    fv *= (gain + getRegularisationGradientFactor(i));
+    MPI_VERBOSE(2,"GAIN: " << gain << " RF: " << getRegularisationGradientFactor(i) <<  endl);
+    exp_gain += gain/N();
+    fv /= N();
+    MPI_VERBOSE(2,"WEIGHTED: " << fv << endl)
+    grad += fv;
+    MPI_VERBOSE(2,"grad: " << grad << endl)
+      
+  }
+  cerr << "Exp gain without reg term :  " << exp_gain << endl;
+  *unreg_exp_gain = exp_gain;
+  exp_gain += getRegularisation();
+  cerr << "Exp gain with reg term:  " << exp_gain << endl;
+  
+  (*gradient) += grad;
+  MPI_VERBOSE(1,"Gradient: " << grad << endl)
+
+  cerr << "Gradient: " << grad << endl;
+  
+  //expected length
+  if (exp_len) {
+    *exp_len = 0;
+    for (size_t i = 0; i < N(); ++i) {
+        *exp_len += m_lengths[i];
+    }
+    *exp_len /= N();
+  }
+
+  return exp_gain;
+}
+
+
+
+ 
+
+double ExpectedLossCollector::getExpectedGain() const {
+    double exp_gain = 0;
+    for (size_t i = 0; i < N(); ++i) {
+        exp_gain += m_gains[i];
+    }
+    exp_gain /= N();
+    return exp_gain;
+}
+  
+FVector ExpectedLossCollector::getFeatureExpectations() const {
+  FVector sum;
+  for (size_t i = 0; i < m_featureVectors.size(); ++i) {
+    sum += m_featureVectors[i];
+  }
+  sum /= m_featureVectors.size();
+    
+  return sum;
+}
+
+
+
+
+
+
+}
+
--- a/josiah/GibblerExpectedLossTraining.h
+++ b/josiah/GibblerExpectedLossTraining.h
@ -0,0 +1,48 @@
+#pragma once
+
+#include <map>
+#include <utility>
+
+#include "Derivation.h"
+#include "FeatureVector.h"
+#include "Gain.h"
+#include "Gibbler.h"
+#include "MpiDebug.h"
+#include "StaticData.h"
+#include "SampleCollector.h"
+
+using namespace Moses;
+
+namespace Josiah {
+
+//class Derivation;
+
+class ExpectedLossCollector : public SampleCollector {
+  public:
+    ExpectedLossCollector( const GainFunctionHandle gainFunction): m_gainFunction(gainFunction) {}
+    //ExpectedLossCollector()  {}
+  
+    virtual ~ExpectedLossCollector() {}
+    virtual void collect(Sample& sample);
+    // returns the expected gain and expected sentence length
+    virtual float UpdateGradient(FVector* gradient, FValue* exp_len, FValue* unreg_gain);
+    virtual FVector getFeatureExpectations() const;
+    double getExpectedGain() const;
+    
+  protected:
+    /** Hooks for adding, eg, entropy regularisation. The first is added in to the gradient, the second to the objective.*/
+    virtual FValue getRegularisationGradientFactor(size_t i) {return 0;}
+    virtual FValue getRegularisation() {return 0;}
+    virtual bool ComputeScaleGradient() {return false;}
+    const GainFunctionHandle& m_gainFunction;
+    std::vector<FVector> m_featureVectors;
+    std::vector<FVector> m_rbFeatureVectors; // Rao-Blackwellised feature vectors
+    std::vector<FValue> m_gains;
+    std::vector<size_t> m_lengths;
+//    std::vector<Derivation> m_samples;
+    
+    
+  
+};
+
+}
--- a/josiah/GibblerMaxDerivDecoder.cpp
+++ b/josiah/GibblerMaxDerivDecoder.cpp
@ -0,0 +1,87 @@
+#include "GibblerMaxDerivDecoder.h"
+#include "StaticData.h"
+#include "MpiDebug.h"
+#include <iomanip>
+
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+  void DerivationCollector::outputDerivationProbability(const DerivationProbability& dp,size_t n,  std::ostream& out) {
+  out << std::setprecision(8) << dp.second << " " << dp.second*n <<" " << *(dp.first);
+}
+  
+  void DerivationCollector::reset() {
+    MaxCollector<Derivation>::reset();
+    m_derivByTrans.clear();
+  }
+  
+  void DerivationCollector::collect(Sample& sample) {
+    collectSample(Derivation(sample)); 
+    IFVERBOSE(1) {
+      VERBOSE(1,"Collected: " << Derivation(sample) << endl);
+    }
+    if (m_collectDerivByTrans) {
+      //derivations per translation
+      Derivation d(sample);
+      ostringstream os;
+      vector<string> sentence;
+      d.getTargetSentence(sentence);
+      copy(sentence.begin(),sentence.end(),ostream_iterator<string>(os," "));
+      m_derivByTrans[os.str()].insert(d);
+    }
+    size_t n = N() + 1;
+    if (m_pd > 0 && n > 0 && n%m_pd == 0) {
+      pair<const Derivation*,float> max = getMax();
+      if (max.first) {
+        MPI_VERBOSE(0, "MaxDeriv(" << n << "): " << std::setprecision(8) << max.second << " " << max.second*n <<" " << *(max.first) << endl) 
+        MPI_VERBOSE(0, "DerivEntropy(" << n << "): " << getEntropy() << endl)
+      }
+    }
+  }
+  
+  
+  
+  
+  
+  void DerivationCollector::outputDerivationsByTranslation(ostream& out) {
+    out << "Derivations per translation" << endl;
+    multimap<size_t,string,greater<size_t> > sortedCounts;
+    for (map<string, set<Derivation> >::const_iterator i = m_derivByTrans.begin();
+         i != m_derivByTrans.end(); ++i) {
+           sortedCounts.insert(pair<size_t,string>(i->second.size(),i->first));
+         }
+    
+         for (multimap<size_t,string, greater<size_t> >::const_iterator i = sortedCounts.begin(); i != sortedCounts.end(); ++i) {
+           out << "COUNT: " <<  i->first << " TRANS:" << i->second << endl;
+           if (i->first > 1) {
+             for (set<Derivation>::const_iterator j = m_derivByTrans[i->second].begin(); 
+                  j != m_derivByTrans[i->second].end(); ++j) {
+                    out << *j << endl;
+                  }
+           }
+         }
+    
+  }
+
+ 
+  /**argmax and max*/
+  std::pair<const Derivation*,float> DerivationCollector::getMAP() const {
+    const Derivation* argmax = NULL;
+    float max = -10000;
+    map<const Derivation*,double> p;
+    getDistribution(p);
+    for (map<const Derivation*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
+      float score = pi->first->getScore();
+      if (score > max) {
+        max = score;
+        argmax = pi->first;
+      }
+    }
+    
+    return pair<const Derivation*,float>(argmax,max);
+  }
+
+}
--- a/josiah/GibblerMaxDerivDecoder.h
+++ b/josiah/GibblerMaxDerivDecoder.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <functional>
+#include <string>
+#include <vector>
+#include <set>
+
+#include "Derivation.h"
+#include "GibblerMaxTransDecoder.h"
+
+namespace Josiah {
+
+  class DerivationCollector: public virtual MaxCollector<Derivation> {
+    public:
+      DerivationCollector(): MaxCollector<Derivation>("Deriv"),  m_pd(0) ,m_collectDerivByTrans(false) {}
+      void collect(Sample& sample);
+      /** Write max periodically to stderr */
+      void setPeriodicDecode(int pd) {m_pd = pd;}
+      void setCollectDerivationsByTranslation(bool dbyt) {m_collectDerivByTrans = dbyt;}
+      void outputDerivationsByTranslation(std::ostream& out);
+      void outputDerivationProbability(const DerivationProbability& dp,size_t n, std::ostream& out);
+      void reset();  
+      virtual ~DerivationCollector(){}
+      std::pair<const Derivation*,float> getMAP() const;
+    private:
+      std::map<std::string,std::set<Derivation> > m_derivByTrans;
+      int m_pd;
+      bool m_collectDerivByTrans;
+  };
+}
--- a/josiah/GibblerMaxTransDecoder.cpp
+++ b/josiah/GibblerMaxTransDecoder.cpp
@ -0,0 +1,289 @@
+#include "GibblerMaxTransDecoder.h"
+#include "Derivation.h"
+#include "StaticData.h"
+#include "Gibbler.h"
+
+
+#include <sstream>
+#include <map>
+#include <ext/algorithm>
+
+using namespace __gnu_cxx;
+using namespace std;
+
+namespace Josiah
+{
+
+  template<class M>
+  void MaxCollector<M>::reset() 
+  {
+    m_samples.clear();
+    m_sampleList.clear();
+    SampleCollector::reset();
+  }
+  
+  template<class M>
+  void MaxCollector<M>::getDistribution(map<const M*,double>& p) const
+  {
+      double pevent = 1.0/N();
+    
+    for (typename map<M,vector<size_t> >::const_iterator i = m_samples.begin(); i != m_samples.end(); ++i) {
+      const M* sample = &(i->first);
+      p[sample] = i->second.size()*pevent;
+    }
+    IFVERBOSE(2) {
+      float total = 0;
+      VERBOSE(2, "Distribution: ");
+      //sort it
+      multimap<double, const M*> sortedp;
+      for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
+        sortedp.insert(make_pair(pi->second,pi->first));
+        total += pi->second;
+      }
+      
+      for (typename multimap<double, const M*>::reverse_iterator spi = sortedp.rbegin(); spi != sortedp.rend(); ++spi) {
+        VERBOSE(2, spi->second << "{ " << *(spi->second) << " }: " <<  spi->first << " " << endl;);
+      }
+      VERBOSE(2, endl << "Total = " << total << endl);
+    }
+  }
+  
+  template<class M>
+  void MaxCollector<M>::printDistribution(ostream& out) const
+  {
+    map<const M*, double> p;
+    getDistribution(p);
+    //sort it
+    multimap<double, const M*> sortedp;
+    for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
+      sortedp.insert(make_pair(pi->second,pi->first));
+    }
+    
+    for (typename multimap<double, const M*>::reverse_iterator spi = sortedp.rbegin(); spi != sortedp.rend(); ++spi) {
+      out << *(spi->second) << "|||" << spi->first << endl;
+    }
+  }
+  
+  template<class M>
+  float MaxCollector<M>::getEntropy() const
+  {
+    map<const M*, double> p;
+    getDistribution(p);
+    float entropy = 0;
+    //cerr << "Entropy: ";
+    for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
+      //cerr << pi->second << " ";
+      entropy -= pi->second*log(pi->second);
+    }
+    //cerr << endl;
+    //cerr << "Entropy : " << entropy << endl;
+    return entropy;
+  }
+
+  template<class M>
+  void MaxCollector<M>::collectSample( const M &m)
+  {
+    m_samples[m].push_back(N());
+    typename map<M,vector<size_t> >::const_iterator i = m_samples.find(m);
+    m_sampleList.push_back(&(i->first));
+    
+    if (m_outputMaxChange) {
+      pair<const M*,float> max = getMax();
+      if (max.first != m_max) {
+        m_max = max.first;
+        cerr << "NewMax" << m_name << "(" << N() << ") ";
+        cerr << *m_max;
+        cerr << endl;
+      }
+    }
+  }
+  
+  template<class M>
+      const M* MaxCollector<M>::getSample(size_t index) const 
+  {
+    return m_sampleList.at(index);
+  }
+  
+  template<class M>
+      pair<const M*,float> MaxCollector<M>::getMax() const 
+  {
+    const M* argmax = NULL;
+    float max = 0;
+    map<const M*,double> p;
+    getDistribution(p);
+    for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
+      if (pi->second > max) {
+        max = pi->second;
+        argmax = pi->first;
+      }
+    }
+    
+    return pair<const M*,float>(argmax,max);
+  }
+  
+  template<class M>
+  struct ProbGreaterThan :  public std::binary_function<const pair<const M*,float>&,const pair<const M*,float>&,bool>{
+    bool operator()(const pair<const M*,float>& d1, const pair<const M*,float>& d2) const {
+      return d1.second > d2.second; 
+    }
+  };
+  
+  template<class M>
+      void MaxCollector<M>::getNbest(vector<pair<const M*, float> >& nbest, size_t n) const 
+  {
+    map<const M*,double> p;
+    getDistribution(p);
+    nbest.assign(p.begin(),p.end());
+    ProbGreaterThan<M> comparator;
+
+    stable_sort(nbest.begin(),nbest.end(),comparator);
+    if (n > 0) {
+      while (nbest.size() > n) {
+        nbest.pop_back();
+      }
+    }
+  }
+
+  template class MaxCollector<Josiah::Derivation>;
+  template class MaxCollector<Josiah::Translation>;
+
+
+
+  string ToString(const Translation& ws)
+  {
+    ostringstream os;
+    for (Translation::const_iterator i = ws.begin(); i != ws.end(); ++i)
+      os << (*i)->GetString() << " ";
+    return os.str();
+  }
+
+  ostream& operator<<(ostream& out, const Translation& ws)
+  {
+    out << ToString(ws);
+    return out;
+  }
+
+  void GibblerMaxTransDecoder::collect(Sample& sample)
+  {
+    const Hypothesis* h = sample.GetSampleHypothesis();
+    vector<const Factor*> trans;
+    h->GetTranslation(&trans, 0);
+    collectSample(trans);
+
+    
+  }
+  
+  pair<const Translation*,float> GibblerMaxTransDecoder::getMbr(size_t mbrSize, size_t topNsize) const {
+    
+  //Posterior probs computed using the whole evidence set
+  //MBR decoding outer loop using configurable size
+ /*   vector<pair<const Translation*, float> > topNTranslations;
+    getNbest(topNTranslations,topNsize);
+    
+    GainFunctionVector g;
+    vector<pair<const Translation*, float> >::iterator it;
+    for (it = topNTranslations.begin(); it != topNTranslations.end(); ++it) {
+      VERBOSE(1, "translation: " <<   ToString(*it->first) << " " << (it->second) << endl);
+      g.push_back(new SentenceBLEU(4,*it->first)); //Calc the sufficient statistics for the translation
+    }
+  
+  //Main MBR computation done here
+    float bleu(0.0), weightedLoss(0.0), weightedLossCumul(0.0), minMBRLoss(100000);
+    vector<float> mbrLoss;
+    int minMBRLossIdx(-1);
+    mbrSize = min(mbrSize,  topNTranslations.size());
+    VERBOSE(1, "MBR SIZE " << mbrSize << ", all Translations Size " << topNTranslations.size() << endl);
+  
+  //Outer loop using only the top #mbrSize samples 
+    for(size_t i = 0; i < mbrSize; ++i) {
+      weightedLossCumul = 0.0;
+      const GainFunction& gf = g[i];
+      VERBOSE(2, "Reference " << ToString(*topNTranslations[i].first) << endl);
+      for(size_t j = 0; j < topNTranslations.size(); ++j) {//Inner loop using all samples
+        if (static_cast<size_t>(i) != j) {
+          bleu = gf.ComputeGain(g[j]);
+          VERBOSE(2, "Hypothesis " << ToString(*topNTranslations[j].first) << endl);
+          weightedLoss = (1- bleu) * topNTranslations[j].second;
+          VERBOSE(2, "Bleu " << bleu << ", prob " <<  topNTranslations[j].second << ", weightedLoss : " << weightedLoss << endl);
+          weightedLossCumul += weightedLoss;
+          if (weightedLossCumul > minMBRLoss)
+            break;
+        }
+      }
+      VERBOSE(2, "Bayes risk for cand " << i << " " <<  weightedLossCumul << endl);
+      if (weightedLossCumul < minMBRLoss){
+        minMBRLoss = weightedLossCumul;
+        minMBRLossIdx = i;
+      }
+    }
+    VERBOSE(2, "Minimum Bayes risk cand is " <<  minMBRLossIdx << " with risk " << minMBRLoss << endl);
+  
+    return topNTranslations[minMBRLossIdx]; */
+    assert(!"Not yet implemented with new gain function");
+  }
+  
+  size_t GibblerMaxTransDecoder::getMbr(const vector<pair<Translation,float> >& translations, size_t topNsize) const {
+    
+    //Posterior probs computed using the whole evidence set
+    //MBR decoding outer loop using configurable size
+  /*  vector<pair<const Translation*, float> > topNTranslations;
+    getNbest(topNTranslations,topNsize);
+    
+    GainFunctionVector gEvidenceSet;
+    vector<pair<const Translation*, float> >::iterator it;
+    for (it = topNTranslations.begin(); it != topNTranslations.end(); ++it) {
+      VERBOSE(1, "Evidence translation: " <<   ToString(*it->first) << " " << (it->second) << endl);
+      gEvidenceSet.push_back(new SentenceBLEU(4,*it->first)); //Calc the sufficient statistics for the translation
+    }
+    
+    GainFunctionVector gHypothesisSet;
+    vector<pair<Translation, float> >::const_iterator itt;
+    for (itt = translations.begin(); itt != translations.end(); ++itt) {
+      VERBOSE(1, "Hypothesis translation: " <<   ToString(itt->first) << " " << (itt->second) << endl);
+      gHypothesisSet.push_back(new SentenceBLEU(4,itt->first)); //Calc the sufficient statistics for the translation
+    }
+    
+    //Main MBR computation done here
+    float bleu(0.0), weightedLoss(0.0), weightedLossCumul(0.0), minMBRLoss(100000);
+    vector<float> mbrLoss;
+    int minMBRLossIdx(-1);
+    size_t mbrSize = translations.size();
+    VERBOSE(1, "MBR SIZE " << mbrSize << ", all Translations Size " << topNTranslations.size() << endl);
+    
+    //Outer loop using only the top #mbrSize samples 
+    for(size_t i = 0; i < mbrSize; ++i) {
+      weightedLossCumul = 0.0;
+      const GainFunction& gf = gHypothesisSet[i];
+      VERBOSE(1, "Reference " << ToString(translations[i].first) << " : [" << translations[i].second << "]" <<  endl);
+      for(size_t j = 0; j < topNTranslations.size(); ++j) {//Inner loop using all samples
+        //if (static_cast<size_t>(i) != j) {
+          bleu = gf.ComputeGain(gEvidenceSet[j]);
+          VERBOSE(1, "Hypothesis " << ToString(*topNTranslations[j].first) << endl);
+          weightedLoss = (1- bleu) * topNTranslations[j].second;
+          VERBOSE(1, "Bleu " << bleu << ", prob " <<  topNTranslations[j].second << ", weightedLoss : " << weightedLoss << endl);
+          weightedLossCumul += weightedLoss;
+          if (weightedLossCumul > minMBRLoss)
+            break;
+        //}
+      }
+      VERBOSE(1, "Bayes risk for cand " << i << " " <<  weightedLossCumul << endl);
+      if (weightedLossCumul < minMBRLoss){
+        VERBOSE(1, "New best MBR sol: " << ToString(translations[i].first) << " " <<  weightedLossCumul << endl);
+        minMBRLoss = weightedLossCumul;
+        minMBRLossIdx = i;
+      }
+    }
+    VERBOSE(2, "Minimum Bayes risk cand is " <<  minMBRLossIdx << " with risk " << minMBRLoss << endl);
+    
+    return minMBRLossIdx; */
+    assert(!"Not yet implemented with new gain function");
+  }
+  
+  
+}
+
+
+
+
+
+
--- a/josiah/GibblerMaxTransDecoder.h
+++ b/josiah/GibblerMaxTransDecoder.h
@ -0,0 +1,79 @@
+#pragma once
+
+#include <algorithm>
+#include <vector>
+#include <utility>
+#include <map>
+
+#include "ScoreComponentCollection.h"
+#include "Phrase.h"
+#include "SampleCollector.h"
+
+
+namespace Moses { 
+  class Factor;
+}
+
+using namespace Moses;
+
+namespace Josiah {
+  
+ typedef std::vector<const Moses::Factor*> Translation;
+  std::ostream& operator<<(std::ostream& out, const Translation& ws);
+  
+  /**
+   * Collector that looks for a max (eg translation, derivation).
+   **/
+template <class M>
+    class MaxCollector : public virtual SampleCollector {
+    public:
+      MaxCollector<M>(const std::string& name) : m_name(name), m_outputMaxChange(false) {}
+      /** Should be called to report that an example of M was found in the sample*/
+      void collectSample(const M&);
+      /**argmax and max*/
+      virtual std::pair<const M*,float> getMax() const;
+      /** n-best list. Set n=0 to get all translations*/
+      void getNbest(std::vector<std::pair<const M*, float> >& nbest, size_t n) const;
+      /**Estimate of the probability distribution */
+      void getDistribution(std::map<const M*,double>& p) const;
+      /**Print the probability distribution to a file*/
+      void printDistribution(std::ostream& out) const;
+      /** Output the max  whenever it changes */
+      void setOutputMaxChange(bool outputMaxChange){m_outputMaxChange = outputMaxChange;}
+      /** The sample at a given index.*/
+      const M* getSample(size_t index) const;
+      float getEntropy() const;
+      void reset();
+      
+      virtual ~MaxCollector<M>(){}
+   
+    private:
+      //maps the sample to the indices at which it was found.
+      std::map<M,std::vector<size_t> > m_samples;
+      //maps indices to samples
+      std::vector<const M*> m_sampleList;
+      //used for debug messages
+      std::string m_name;
+      //output when max changes?
+      bool m_outputMaxChange;
+      const M* m_max;
+  
+};
+
+std::string ToString(const Translation& ws); 
+
+class GibblerMaxTransDecoder : public virtual MaxCollector<Translation> {
+ public:
+  GibblerMaxTransDecoder() : MaxCollector<Translation>("Trans") {}
+  virtual void collect(Sample& sample);
+  /** Do mbr decoding */
+  std::pair<const Translation*,float> getMbr(size_t mbrSize, size_t topNsize = 0) const;
+  /** Do mbr decoding */
+  size_t getMbr(const std::vector<std::pair<Translation, float> > & translations, size_t topNsize = 0) const;
+  virtual ~GibblerMaxTransDecoder(){}
+
+ private:
+};
+
+}
+
--- a/josiah/GibbsOperator.cpp
+++ b/josiah/GibbsOperator.cpp
@ -0,0 +1,419 @@
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Hypothesis.h"
+#include "TranslationOptionCollection.h"
+#include "Utils.h"
+#include "WordsRange.h"
+
+#include "GibbsOperator.h"
+#include "Selector.h"
+
+
+using namespace std;
+using namespace Moses;
+
+namespace Josiah {
+  
+  
+  
+
+
+static float ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) 
+{
+  int dist = 0;
+  if (prev.GetNumWordsCovered() == 0) {
+    dist = current.GetStartPos();
+  } else {
+    dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
+  }
+  //cerr << "Computing dist " << prev << " " << current << " " << -abs(dist) << endl;
+  return - (float) abs(dist);
+}
+
+ 
+GibbsOperator::~GibbsOperator() {} 
+  
+PrunedTranslationOptionList::PrunedTranslationOptionList(
+        const TranslationOptionCollection& toc, 
+        const WordsRange& segment,
+        size_t count) :
+  m_options(toc.GetTranslationOptionList(segment)),
+  m_count(count)
+{ }
+
+
+TranslationOptionList::const_iterator
+PrunedTranslationOptionList::begin() const {
+  return m_options.begin();
+}
+
+TranslationOptionList::const_iterator
+PrunedTranslationOptionList::end() const {
+  if (!m_count || m_count > m_options.size()) {
+    return m_options.end();
+  } else {
+    return m_options.begin() + m_count;
+  }
+}
+  
+  
+/*
+  if (sample.DoRaoBlackwell()) {
+  FVector fv(sample.GetFeatureValues());
+  fv -= noChangeDelta->getScores();
+      //Add FV(d)*p(d) for each delta.
+  vector<double> scores;
+      //m_acceptor->getNormalisedScores(deltas,scores);
+      //scores now contain the normalised logprobs
+  assert(scores.size() == deltas.size());
+  for (size_t i = 0; i < deltas.size(); ++i) {
+    if (scores[i] < -30) continue; //floor
+    FVector deltaFv = deltas[i]->getScores();
+    deltaFv *= exp(scores[i]);
+    fv +=deltaFv;
+  }
+      //cout << "Rao-Blackwellised fv: " << fv << endl;
+  sample.AddConditionalFeatureValues(fv);
+} */
+  
+  
+  
+void MergeSplitOperator::propose(Sample& sample, const TranslationOptionCollection& toc,
+                             TDeltaVector& deltas, TDeltaHandle& noChangeDelta)
+{
+ 
+  
+  size_t sourceSize = sample.GetSourceSize();
+  if (sourceSize == 1) return;
+  size_t splitIndex = RandomNumberGenerator::instance().
+          getRandomIndexFromZeroToN(sourceSize-1) + 1;
+ 
+  //NB splitIndex n refers to the position between word n-1 and word n. Words are zero indexed
+  VERBOSE(3,"Sampling at source index " << splitIndex << endl);
+    
+  Hypothesis* hypothesis = sample.GetHypAtSourceIndex(splitIndex);
+  
+  auto_ptr<TargetGap> gap;
+  auto_ptr<TargetGap> leftGap;
+  auto_ptr<TargetGap> rightGap;
+    
+  //find out which source and target segments this split-merge operator should consider
+  //if we're at the left edge of a segment, then we're on a split
+  if (hypothesis->GetCurrSourceWordsRange().GetStartPos() == splitIndex) {
+    VERBOSE(3, "Existing split" << endl);
+    WordsRange rightSourceSegment = hypothesis->GetCurrSourceWordsRange();
+    WordsRange rightTargetSegment = hypothesis->GetCurrTargetWordsRange();
+    const Hypothesis* prev = hypothesis->GetSourcePrevHypo();
+    assert(prev);
+    assert(prev->GetSourcePrevHypo()); //must be a valid hypo
+    WordsRange leftSourceSegment = prev->GetCurrSourceWordsRange();
+    WordsRange leftTargetSegment = prev->GetCurrTargetWordsRange();
+    if (leftTargetSegment.GetEndPos() + 1 ==  rightTargetSegment.GetStartPos()) {
+      //contiguous on target side.
+      //In this case source and target order are the same
+      //Add MergeDeltas
+      WordsRange sourceSegment(leftSourceSegment.GetStartPos(), rightSourceSegment.GetEndPos());
+      WordsRange targetSegment(leftTargetSegment.GetStartPos(), rightTargetSegment.GetEndPos());
+      gap.reset( new TargetGap(prev->GetPrevHypo(), hypothesis->GetNextHypo(), targetSegment));
+      VERBOSE(3, "Creating merge deltas for merging source segments  " << leftSourceSegment << " with " <<
+             rightSourceSegment << " and target segments " << leftTargetSegment << " with " << rightTargetSegment  << endl);
+      PrunedTranslationOptionList  options(toc, sourceSegment, m_toptionLimit);
+      for (TranslationOptionList::const_iterator i = options.begin(); i != options.end(); ++i) {
+        TDeltaHandle delta(new MergeDelta(sample,*i,*(gap.get())));
+        deltas.push_back(delta);
+      }
+    }
+    
+    //make sure that the 'left' and 'right' refer to the target order
+    auto_ptr<PrunedTranslationOptionList> leftOptions;
+    auto_ptr<PrunedTranslationOptionList> rightOptions;
+    
+    if (leftTargetSegment < rightTargetSegment) {
+        //source and target order same
+        leftOptions.reset(new PrunedTranslationOptionList(toc,leftSourceSegment,m_toptionLimit));
+        rightOptions.reset(new PrunedTranslationOptionList(toc,rightSourceSegment,m_toptionLimit));
+        leftGap.reset(new TargetGap(prev->GetPrevHypo(), prev->GetNextHypo(), prev->GetCurrTargetWordsRange()));
+        rightGap.reset(new TargetGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), 
+              hypothesis->GetCurrTargetWordsRange()));
+        noChangeDelta.reset(new   PairedTranslationUpdateDelta(sample,&(prev->GetTranslationOption())
+          ,&(hypothesis->GetTranslationOption()),*leftGap, *rightGap));
+        
+    } else {
+        //target in opposite order to source
+        leftOptions.reset(new PrunedTranslationOptionList(toc,rightSourceSegment,m_toptionLimit));
+        rightOptions.reset(new PrunedTranslationOptionList(toc,leftSourceSegment,m_toptionLimit));
+        leftGap.reset(new TargetGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), 
+              hypothesis->GetCurrTargetWordsRange()));
+        rightGap.reset(new TargetGap(prev->GetPrevHypo(), prev->GetNextHypo(), prev->GetCurrTargetWordsRange()));
+        noChangeDelta.reset(new   PairedTranslationUpdateDelta(sample,&(hypothesis->GetTranslationOption())
+          ,&(prev->GetTranslationOption()),*leftGap, *rightGap));
+    }
+      
+
+    //Add PairedTranslationUpdateDeltas
+      
+    for (TranslationOptionList::const_iterator ri = rightOptions->begin(); ri != rightOptions->end(); ++ri) {
+      for (TranslationOptionList::const_iterator li = leftOptions->begin(); li != leftOptions->end(); ++li) {
+        TDeltaHandle delta(new PairedTranslationUpdateDelta(sample,*li, *ri, *leftGap, *rightGap));
+        deltas.push_back(delta);
+      }
+    }
+      //cerr << "Added " << ds << " deltas" << endl;
+  } else {
+      VERBOSE(3, "No existing split" << endl);
+      WordsRange sourceSegment = hypothesis->GetCurrSourceWordsRange();
+      gap.reset( new TargetGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), hypothesis->GetCurrTargetWordsRange()));
+      noChangeDelta.reset(new TranslationUpdateDelta(sample,&(hypothesis->GetTranslationOption()),*(gap.get())));
+      //Add TranslationUpdateDeltas
+      PrunedTranslationOptionList options(toc,sourceSegment,m_toptionLimit);
+      //cerr << "Got " << options.size() << " options for " << sourceSegment << endl;
+      VERBOSE(3, "Creating simple deltas for source segment " << sourceSegment << " and target segment " <<gap.get()->segment
+            << endl);
+      for (TranslationOptionList::const_iterator i = options.begin(); i != options.end(); ++i) {
+        TDeltaHandle delta(new TranslationUpdateDelta(sample,*i,*(gap.get())));
+        deltas.push_back(delta);
+      }
+      //cerr << "Added " << ds << " deltas" << endl;
+
+      
+      //Add SplitDeltas
+      VERBOSE(3, "Adding deltas to split " << sourceSegment << " at " << splitIndex << endl);
+      //Note no reordering in split
+      WordsRange leftSourceSegment(sourceSegment.GetStartPos(),splitIndex-1);
+      WordsRange rightSourceSegment(splitIndex,sourceSegment.GetEndPos());
+      PrunedTranslationOptionList leftOptions(toc,leftSourceSegment,m_toptionLimit);
+      PrunedTranslationOptionList rightOptions(toc,rightSourceSegment,m_toptionLimit);
+      for (TranslationOptionList::const_iterator ri = rightOptions.begin(); ri != rightOptions.end(); ++ri) {
+        for (TranslationOptionList::const_iterator li = leftOptions.begin(); li != leftOptions.end(); ++li) {
+          TDeltaHandle delta(new SplitDelta(sample, *li, *ri, *(gap.get())));
+          deltas.push_back(delta);
+        }
+      }
+      
+  }
+}
+
+void TranslationSwapOperator::propose(Sample& sample, const TranslationOptionCollection& toc,
+                             TDeltaVector& deltas, TDeltaHandle& noChangeDelta)  {
+  
+  
+  size_t curPos = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(sample.GetSourceSize());
+  const Hypothesis* currHypo = sample.GetHypAtSourceIndex(curPos);
+  
+  TargetGap gap(currHypo->GetPrevHypo(), currHypo->GetNextHypo(), currHypo->GetCurrTargetWordsRange());
+  const WordsRange& sourceSegment = currHypo->GetCurrSourceWordsRange();
+  VERBOSE(3, "Considering source segment " << sourceSegment << " and target segment " << gap.segment << endl); 
+    
+  const TranslationOption* noChangeOption = &(currHypo->GetTranslationOption());
+  noChangeDelta.reset(new TranslationUpdateDelta(sample,noChangeOption,gap));
+    
+    
+  //const TranslationOptionList&  options = toc.GetTranslationOptionList(sourceSegment);
+  PrunedTranslationOptionList options(toc,sourceSegment,m_toptionLimit);
+  for (TranslationOptionList::const_iterator i = options.begin(); i != options.end(); ++i) {
+      TDeltaHandle delta(new TranslationUpdateDelta(sample,*i,gap));
+      deltas.push_back(delta);
+  }
+}
+
+  
+void FlipOperator::propose(Sample& sample, const TranslationOptionCollection& toc,
+                             TDeltaVector& deltas, TDeltaHandle& noChangeDelta)
+{
+  VERBOSE(2, "Running an iteration of the flip operator" << endl);
+  CollectAllSplitPoints(sample);
+  if (m_splitPoints.size() < 2) {
+    return;
+  }
+  
+  size_t i = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(GetSplitPoints().size());
+  size_t j = i;
+  while (i == j) {
+    j = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(GetSplitPoints().size());
+  }
+  
+  if (i < j) {
+    VERBOSE(2, "Forward Flipping phrases at pos " << m_splitPoints[i] << " and "  << m_splitPoints[j] << endl);
+  } else {
+    VERBOSE(2, "Backward Flipping phrases at pos " << m_splitPoints[i] << " and "  << m_splitPoints[j] << endl);
+  }
+    
+  Hypothesis* hypothesis = sample.GetHypAtSourceIndex(m_splitPoints[i]);
+  WordsRange thisSourceSegment = hypothesis->GetCurrSourceWordsRange();
+  WordsRange thisTargetSegment = hypothesis->GetCurrTargetWordsRange();
+  
+  Hypothesis* followingHyp = sample.GetHypAtSourceIndex(m_splitPoints[j]);  
+  //would this be a valid reordering?
+  WordsRange followingSourceSegment = followingHyp->GetCurrSourceWordsRange();
+  WordsRange followingTargetSegment = followingHyp->GetCurrTargetWordsRange();  
+  
+  if (thisTargetSegment <  followingTargetSegment ) {
+    //source and target order are the same
+    bool contiguous = (thisTargetSegment.GetEndPos() + 1 ==  followingTargetSegment.GetStartPos());
+    
+    /*contiguous on target side, flipping would make this a swap
+     would this be a valid reordering if we flipped?*/
+    float totalDistortion = 0;
+    
+    Hypothesis *newLeftNextHypo, *newRightPrevHypo;
+    if  (contiguous) {
+      newLeftNextHypo = hypothesis;
+      newRightPrevHypo = followingHyp;
+    } 
+    else {
+      newLeftNextHypo = const_cast<Hypothesis*>(hypothesis->GetNextHypo());
+      newRightPrevHypo = const_cast<Hypothesis*>(followingHyp->GetPrevHypo());
+    }
+    
+    bool isValidSwap = CheckValidReordering(followingHyp->GetCurrSourceWordsRange(), hypothesis->GetCurrSourceWordsRange(), hypothesis->GetPrevHypo(), newLeftNextHypo, newRightPrevHypo, followingHyp->GetNextHypo(), totalDistortion);
+    if (isValidSwap) {//yes
+      TargetGap leftGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), thisTargetSegment);
+      TargetGap rightGap(followingHyp->GetPrevHypo(), followingHyp->GetNextHypo(), followingTargetSegment);
+      TDeltaHandle delta(new FlipDelta(sample, &(followingHyp->GetTranslationOption()), 
+                                              &(hypothesis->GetTranslationOption()), 
+                                              leftGap, rightGap));
+      deltas.push_back(delta);
+      
+      CheckValidReordering(hypothesis->GetCurrSourceWordsRange(), followingHyp->GetCurrSourceWordsRange(), hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), followingHyp->GetPrevHypo(),  followingHyp->GetNextHypo(), totalDistortion); 
+      
+      noChangeDelta.reset(new  FlipDelta(sample, &(hypothesis->GetTranslationOption()), 
+                       &(followingHyp->GetTranslationOption()), leftGap, rightGap)); 
+      deltas.push_back(noChangeDelta);
+      
+    }  
+  }
+  else {
+    //swapped on target side, flipping would make this monotone
+    bool contiguous = (thisTargetSegment.GetStartPos() ==  followingTargetSegment.GetEndPos() + 1);
+    float totalDistortion = 0;
+    
+    Hypothesis *newLeftNextHypo, *newRightPrevHypo;
+    if  (contiguous) {
+      newLeftNextHypo = followingHyp; 
+      newRightPrevHypo = hypothesis;
+    } 
+    else {
+      newLeftNextHypo = const_cast<Hypothesis*>(followingHyp->GetNextHypo());
+      newRightPrevHypo = const_cast<Hypothesis*>(hypothesis->GetPrevHypo());
+    }
+    bool isValidSwap = CheckValidReordering(hypothesis->GetCurrSourceWordsRange(), followingHyp->GetCurrSourceWordsRange(), followingHyp->GetPrevHypo(), newLeftNextHypo, newRightPrevHypo, hypothesis->GetNextHypo(), totalDistortion);        
+    if (isValidSwap) {//yes
+      TargetGap leftGap(followingHyp->GetPrevHypo(), followingHyp->GetNextHypo(), followingTargetSegment);
+      TargetGap rightGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), thisTargetSegment);
+      
+      
+      TDeltaHandle delta(new FlipDelta(sample, &(hypothesis->GetTranslationOption()), 
+                                              &(followingHyp->GetTranslationOption()),  leftGap, rightGap));
+      deltas.push_back(delta);
+      
+      
+      CheckValidReordering(followingHyp->GetCurrSourceWordsRange(),hypothesis->GetCurrSourceWordsRange(), followingHyp->GetPrevHypo(), followingHyp->GetNextHypo(), hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), totalDistortion);        
+      noChangeDelta.reset(new FlipDelta(sample,&(followingHyp->GetTranslationOption()), 
+                       &(hypothesis->GetTranslationOption()), leftGap, rightGap));
+      deltas.push_back(noChangeDelta); 
+    }  
+  }
+} 
+
+  bool CheckValidReordering(const WordsRange& leftSourceSegment, const WordsRange& rightSourceSegment, const Hypothesis* leftTgtPrevHypo, const Hypothesis* leftTgtNextHypo, const Hypothesis* rightTgtPrevHypo, const Hypothesis* rightTgtNextHypo, float & totalDistortion){
+    totalDistortion = 0;
+    //linear distortion
+    //const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
+    //Calculate distortion for leftmost target 
+    //who is proposed new leftmost's predecessor?   
+    //  Hypothesis *leftPrevHypo = const_cast<Hypothesis*>(rightTgtHypo->GetPrevHypo());      
+    float distortionScore = 0.0;
+    
+    
+    if (leftTgtPrevHypo) {
+      distortionScore = ComputeDistortionDistance(
+                                                  leftTgtPrevHypo->GetCurrSourceWordsRange(),
+                                                  leftSourceSegment
+                                                  );
+      
+      if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
+        return false;
+      }  
+      totalDistortion += distortionScore;
+    }
+    
+    
+    
+    if (leftTgtNextHypo) {  
+      //Calculate distortion from leftmost target to right target
+      distortionScore = ComputeDistortionDistance(
+                                                  leftSourceSegment,
+                                                  leftTgtNextHypo->GetCurrSourceWordsRange()
+                                                  ); 
+      
+      if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
+        return false;
+      }
+      
+      totalDistortion += distortionScore;
+    }  
+    
+    //Calculate distortion from rightmost target to its successor
+    //Hypothesis *rightNextHypo = const_cast<Hypothesis*> (leftTgtHypo->GetNextHypo());  
+    
+    if (rightTgtPrevHypo  && rightTgtPrevHypo->GetCurrSourceWordsRange() != leftSourceSegment) {  
+      distortionScore = ComputeDistortionDistance(
+                                                  rightTgtPrevHypo->GetCurrSourceWordsRange(),
+                                                  rightSourceSegment
+                                                  );
+      
+      if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
+        return false;
+      }
+      
+      totalDistortion += distortionScore;
+    } 
+    
+    if (rightTgtNextHypo) {  
+      //Calculate distortion from leftmost target to right target
+      distortionScore = ComputeDistortionDistance(
+                                                  rightSourceSegment,
+                                                  rightTgtNextHypo->GetCurrSourceWordsRange()
+                                                  ); 
+      
+      if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
+        return false;
+      }
+      
+      totalDistortion += distortionScore;
+    }
+    
+    return true;
+  }
+  
+  void FlipOperator::CollectAllSplitPoints(Sample& sample) {
+    m_splitPoints.clear();
+    size_t sourceSize = sample.GetSourceSize();
+    for (size_t splitIndex = 0; splitIndex < sourceSize; ++splitIndex) {
+      Hypothesis* hypothesis = sample.GetHypAtSourceIndex(splitIndex);
+      if (hypothesis->GetCurrSourceWordsRange().GetEndPos() == splitIndex) {
+        m_splitPoints.push_back(splitIndex);
+      }
+    }
+  }
+  
+  
+}//namespace
--- a/josiah/GibbsOperator.h
+++ b/josiah/GibbsOperator.h
@ -0,0 +1,134 @@
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <ctime>
+#include <iomanip>
+
+
+
+#include "FeatureVector.h"
+#include "Gibbler.h"
+#include "TranslationDelta.h"
+#include "TypeDef.h"
+
+namespace Moses {
+  class Hypothesis;
+  class TranslationOptionCollection;
+  class WordsRange;
+}
+
+using namespace Moses;
+
+namespace Josiah {
+
+  /**
+   * Used to extract the top-n translation options.
+   **/
+  class PrunedTranslationOptionList {
+    public:
+      PrunedTranslationOptionList(
+        const Moses::TranslationOptionCollection& toc, 
+        const Moses::WordsRange& segment,
+        size_t count);
+
+      TranslationOptionList::const_iterator begin() const;
+      TranslationOptionList::const_iterator end() const;
+
+    private:
+      const TranslationOptionList& m_options;
+      size_t m_count;
+  };
+
+  /** Abstract base class for gibbs operators **/
+  class GibbsOperator {
+    public:
+        GibbsOperator(const std::string& name, float prob) : m_name(name),  m_prob(prob) {}
+        /** Proposes a set of possible changes to the current sample, and a delta signifying 'noChange'. */
+        virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
+                             TDeltaVector& deltas, TDeltaHandle& noChangeDelta) = 0;
+        /** The name of this operator */
+        const std::string& name() const {return m_name;}
+        /** The weight given to this operator in random scanning */
+        float GetScanProb() const {return m_prob;}
+        virtual ~GibbsOperator();
+     protected:
+        std::string m_name;
+        float m_prob; // the probability of sampling this operator
+  };
+  
+  /**
+    * Operator that keeps ordering constant, but visits each (internal) source word boundary, and 
+    * merge or split the segment(s) at that boundary, and update the translation.
+    **/
+  class MergeSplitOperator : public virtual GibbsOperator {
+    public:
+      MergeSplitOperator(float scanProb = 0.333,size_t toptionLimit=20) :
+         GibbsOperator("merge-split", scanProb),
+         m_toptionLimit(toptionLimit) {}
+      virtual ~MergeSplitOperator() {}
+      virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
+                             TDeltaVector& deltas, TDeltaHandle& noChangeDelta);
+    private:
+      size_t m_toptionLimit;
+  };
+  
+  /**
+    * Operator which may update any translation option, but may not change segmentation or ordering.
+    **/
+  class TranslationSwapOperator : public virtual GibbsOperator {
+    public:
+      TranslationSwapOperator(float scanProb = 0.333, size_t toptionLimit = 0) :
+          GibbsOperator("translation-swap", scanProb),
+          m_toptionLimit(toptionLimit) {}
+      virtual ~TranslationSwapOperator() {}
+      virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
+                             TDeltaVector& deltas, TDeltaHandle& noChangeDelta);
+    private:
+      size_t m_toptionLimit;
+  };
+  
+  /**
+   * Operator which performs local reordering provided both source segments and target segments are contiguous, and that the swaps
+   * will not violate the reordering constraints of the model
+   **/
+  class FlipOperator : public virtual GibbsOperator {
+  public:
+    FlipOperator(float scanProb = 0.333) : GibbsOperator("flip", scanProb) {}
+    virtual ~FlipOperator() {}
+    virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
+                             TDeltaVector& deltas, TDeltaHandle& noChangeDelta);
+    
+    const std::vector<size_t> & GetSplitPoints() {
+      return m_splitPoints;
+    }
+  private:
+    void CollectAllSplitPoints(Sample& sample);
+    std::vector<size_t> m_splitPoints;
+  };
+  
+  bool CheckValidReordering(const WordsRange& leftSourceSegment, const WordsRange& rightSourceSegment, const Hypothesis* leftTgtPrevHypo, const Hypothesis* leftTgtNextHypo, const Hypothesis* rightTgtPrevHypo, const Hypothesis* rightTgtNextHypo, float & totalDistortion);
+  
+  
+}
+
--- a/josiah/Hildreth.cpp
+++ b/josiah/Hildreth.cpp
@ -0,0 +1,61 @@
+#include <iostream>
+#include <sstream>
+
+
+#include <boost/program_options.hpp>
+
+#include "FeatureVector.h"
+#include "OnlineLearner.h"
+
+namespace po = boost::program_options;
+using namespace Josiah;
+using namespace std;
+
+
+int main(int argc, char** argv) {
+  vector<float> avec;
+  float b;
+  float C;
+  bool help;
+  po::options_description desc("Allowed options");
+  desc.add_options()
+  ("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
+  ("a", po::value<vector<float> >(&avec), "Constraint vector")
+  ("b", po::value<float>(&b), "Constraint scalar")
+  ("c", po::value<float>(&C)->default_value(0.0f), "slack");
+  po::options_description cmdline_options;
+  cmdline_options.add(desc);
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc,argv).
+            options(cmdline_options).run(), vm);
+  po::notify(vm);
+  
+  
+  
+  if (help) {
+    cout << "Usage: " + string(argv[0]) +  " -f mosesini-file [options]" << endl;
+    cout << desc << endl;
+    return 0;
+  }
+
+  FVector a;
+  for (size_t i = 0; i < avec.size(); ++i) {
+    ostringstream name;
+    name << i;
+    a[name.str()] = avec[i];
+  }
+  vector<FVector> as;
+  as.push_back(a);
+  vector<float> bs;
+  bs.push_back(b);
+
+  vector<float> alpha;
+  if (C) {
+    alpha = hildreth(as,bs,C);
+  } else {
+    alpha = hildreth(as,bs);
+  }
+  cout << alpha[0] << endl;
+  
+}
+
--- a/josiah/InputSource.cpp
+++ b/josiah/InputSource.cpp
@ -0,0 +1,55 @@
+#include "InputSource.h"
+
+using namespace std;
+
+
+namespace Josiah {
+
+InputSource::~InputSource() {}
+
+StreamInputSource::StreamInputSource(std::istream& is) : in(is) {
+}
+
+bool StreamInputSource::HasMore() const {
+  return (in);
+}
+
+void StreamInputSource::GetSentence(std::string* sentence, int* lineno) {
+  (void) lineno;
+  std::getline(in, *sentence);
+};
+
+
+BatchedFileInputSource::BatchedFileInputSource(
+            const string& filename, int rank, int size): m_next(0) {
+   ifstream in(filename.c_str());
+   if (!in) {
+       throw runtime_error("Failed to open input file: " + filename);
+   }
+
+   vector<string> lines;
+   string line;
+   while(getline(in,line)) {
+       lines.push_back(line);
+   }
+   float batchSize = (float)lines.size()/size;
+   cerr << "Batch size: " << batchSize << endl;
+   size_t start = (size_t)(rank*batchSize+0.5);
+   size_t end = (size_t)((rank+1)*batchSize+0.5);
+   m_lines.resize(end-start);
+   copy(lines.begin()+start,lines.begin()+end,m_lines.begin());
+   cerr << "batch start: " << start << " batch end: " << end << endl;
+
+}
+
+bool BatchedFileInputSource::HasMore() const {
+    return m_next < m_lines.size();
+}
+
+void BatchedFileInputSource::GetSentence(string* sentence, int* lineno) {
+    *lineno = m_next;
+    *sentence = m_lines[m_next++];
+}
+
+}
+
--- a/josiah/InputSource.h
+++ b/josiah/InputSource.h
@ -0,0 +1,42 @@
+#pragma once
+
+#include <stdexcept>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+namespace Josiah {
+
+struct InputSource {
+  virtual bool HasMore() const = 0;
+  virtual void GetSentence(std::string* sentence, int* lineno) = 0;
+  virtual ~InputSource();
+};
+
+struct StreamInputSource : public InputSource {
+  std::istream& in;
+  StreamInputSource(std::istream& is);
+  virtual bool HasMore() const;
+  virtual void GetSentence(std::string* sentence, int* lineno);
+};
+
+
+/**
+ * Splits a file into batches.
+ **/
+class BatchedFileInputSource : public InputSource {
+    public:
+        BatchedFileInputSource(
+            const std::string& filename, int rank, int size);
+
+        virtual bool HasMore() const;
+        virtual void GetSentence(std::string* sentence, int* lineno);
+
+    private:
+        std::vector<std::string> m_lines;
+        size_t m_next;
+};
+
+}
+
--- a/josiah/Josiah.cpp
+++ b/josiah/Josiah.cpp
@ -0,0 +1,566 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#endif
+
+#include <boost/program_options.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "AnnealingSchedule.h"
+#include "Bleu.h"
+#include "Decoder.h"
+#include "Derivation.h"
+#include "FeatureVector.h"
+#include "Gibbler.h"
+#include "InputSource.h"
+#include "TrainingSource.h"
+#include "GibbsOperator.h"
+#include "Gain.h"
+#include "GibblerExpectedLossTraining.h"
+#include "GibblerAnnealedExpectedLossTrainer.h"
+#include "GibblerMaxTransDecoder.h"
+#include "MpiDebug.h"
+#include "Selector.h"
+#include "StaticData.h"
+#include "Optimizer.h"
+#include "Utils.h"
+#include "WeightManager.h"
+
+
+using namespace std;
+using namespace Josiah;
+using namespace Moses;
+using boost::lexical_cast;
+using boost::bad_lexical_cast;
+using boost::split;
+using boost::is_any_of;
+namespace po = boost::program_options;
+
+
+/**
+  * Main for Josiah - the Gibbs sampler for moses.
+ **/
+int main(int argc, char** argv) {
+  int rank = 0, size = 1;
+#ifdef MPI_ENABLED
+  MPI_Init(&argc,&argv);
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&size);
+  cerr << "MPI rank: " << rank << endl; 
+  cerr << "MPI size: " << size << endl;
+#endif
+  GibbsTimer timer;
+  size_t iterations;
+  unsigned int topn;
+  int debug;
+  int mpidebug;
+  string mpidebugfile;
+  string feature_file;
+  int burning_its;
+  int mbr_size, topNsize;
+  string inputfile;
+  string outputfile;
+  string mosesini;
+  bool decode;
+  bool translate;
+  bool translation_distro;
+  bool derivation_distro;
+  bool help;
+  bool expected_sbleu;
+  bool expected_sbleu_da;
+  bool output_expected_sbleu;
+  unsigned training_batch_size;
+  bool mbr_decoding;
+  bool do_timing;
+  int max_training_iterations;
+  uint32_t seed;
+  int lineno;
+  bool randomize;
+  FValue scalefactor;
+  FValue eta;
+  FValue mu;
+  string weightfile;
+  vector<string> ref_files;
+  int periodic_decode;
+  bool collect_dbyt;
+  bool output_max_change;
+  bool anneal;
+  unsigned int reheatings;
+  float max_temp;
+  FValue prior_variance;
+  FValue prior_mean;
+  string prev_gradient_file;
+  float start_temp_expda;
+  float stop_temp_expda;
+  float floor_temp_expda; 
+  float anneal_ratio_da;
+  float gamma;
+  bool use_metanormalized_egd;
+  int optimizerFreq; 
+  int weight_dump_freq;
+  string weight_dump_stem;
+  int init_iteration_number;
+  bool greedy, fixedTemp;
+  float fixed_temperature;
+  bool mapdecode;
+  vector<string> ngramorders;
+  bool raoBlackwell;
+  bool use_moses_kbesthyposet;
+  bool print_moseskbest;
+  bool randomScan;
+  size_t lag;
+  float flip_prob, merge_split_prob, retrans_prob;
+  bool calc_exact_posterior, filter_by_posterior;
+  float evidenceSetShrinkFactor;
+  bool randomShrink;
+  float log_base_factor;
+  bool checkFeatures;
+  po::options_description desc("Allowed options");
+  desc.add_options()
+        ("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
+        ("config,f",po::value<string>(&mosesini),"Moses ini file")
+        ("verbosity,v", po::value<int>(&debug)->default_value(0), "Verbosity level")
+        ("mpi-debug-level", po::value<int>(&MpiDebug::verbosity)->default_value(0), "Verbosity level for debugging messages used in mpi.")
+        ("mpi-debug-file", po::value<string>(&mpidebugfile), "Debug file stem for use by mpi processes")
+        ("random-seed,e", po::value<uint32_t>(&seed), "Random seed")
+        ("timing,m", po::value(&do_timing)->zero_tokens()->default_value(false), "Display timing information.")
+      ("iterations,s", po::value<size_t>(&iterations)->default_value(10), 
+       "Number of sampler iterations")
+        ("burn-in,b", po::value<int>(&burning_its)->default_value(1), "Duration (in sampling iterations) of burn-in period")
+        ("scale-factor,c", po::value<FValue>(&scalefactor)->default_value(1.0), "Scale factor for model weights.")
+        ("input-file,i",po::value<string>(&inputfile),"Input file containing tokenised source")
+        ("output-file-prefix,o",po::value<string>(&outputfile),"Output file prefix for translations, MBR output, etc")
+        ("nbest-drv,n",po::value<unsigned int>(&topn)->default_value(0),"Write the top n derivations to stdout")
+	("weights,w",po::value<string>(&weightfile),"Weight file")
+        ("decode-derivation,d",po::value( &decode)->zero_tokens()->default_value(false),"Write the most likely derivation to stdout")
+        ("decode-translation,t",po::value(&translate)->zero_tokens()->default_value(false),"Write the most likely translation to stdout")
+      ("distro-derivation", po::value(&derivation_distro)->zero_tokens()->default_value(false), "Print derivation probability distribution")
+      ("distro-translation", po::value(&translation_distro)->zero_tokens()->default_value(false), "Print translation probability distribution")
+        ("periodic-derivation,p",po::value(&periodic_decode)->default_value(0), "Periodically write the max derivation to stderr")
+      ("max-change", po::value(&output_max_change)->zero_tokens()->default_value(false), "Whenever the max deriv or max trans changes, write it to stderr")
+        ("collect-dbyt",po::value(&collect_dbyt)->zero_tokens()->default_value(false), "Collect derivations per translation")
+        ("line-number,L", po::value(&lineno)->default_value(0), "Starting reference/line number")
+        ("randomize-batches,R", po::value(&randomize)->zero_tokens()->default_value(false), "Randomize training batches")
+        ("gaussian-prior-variance", po::value<FValue>(&prior_variance)->default_value(0.0f), "Gaussian prior variance (0 for no prior)")
+        ("gaussian-prior-mean,P", po::value<FValue>(&prior_mean)->default_value(0.0f), "Gaussian prior mean")
+        ("expected-bleu-training,T", po::value(&expected_sbleu)->zero_tokens()->default_value(false), "Train to maximize expected sentence BLEU")
+          ("output-expected-sbleu", po::value(&output_expected_sbleu)->zero_tokens()->default_value(false), "Output expected bleu and feature expectations at end of sampling")
+        ("max-training-iterations,M", po::value(&max_training_iterations)->default_value(30), "Maximum training iterations")
+        ("training-batch-size,S", po::value(&training_batch_size)->default_value(0), "Batch size to use during xpected bleu training, 0 = full corpus")
+	("reheatings", po::value<unsigned int>(&reheatings)->default_value(1), "Number of times to reheat the sampler")
+	("anneal,a", po::value(&anneal)->default_value(false)->zero_tokens(), "Use annealing during the burn in period")
+	("max-temp", po::value<float>(&max_temp)->default_value(4.0), "Annealing maximum temperature")
+        ("eta", po::value<FValue>(&eta), "Default learning rate for SGD/EGD")
+        ("prev-gradient", po::value<string>(&prev_gradient_file), "File containing previous gradient for restarting SGD/EGD")
+        ("mu", po::value<float>(&mu)->default_value(1.0f), "Metalearning rate for EGD")
+        ("gamma", po::value<float>(&gamma)->default_value(0.9f), "Smoothing parameter for Metanormalized EGD ")
+        ("mbr-size", po::value<int>(&mbr_size)->default_value(200),"Number of samples to use for MBR decoding")
+        ("mbr", po::value(&mbr_decoding)->zero_tokens()->default_value(false), "Minimum Bayes Risk Decoding")
+        ("topn-size", po::value<int>(&topNsize)->default_value(0),"Number of samples to use for inner loop of MBR decoding")
+        ("ref,r", po::value<vector<string> >(&ref_files), "Reference translation files for training")
+      ("extra-feature-config,X", po::value<string>(&feature_file), "Configuration file for extra (non-Moses) features")
+        ("check-features", po::value<bool>(&checkFeatures)->zero_tokens()->default_value(false), "Check features for consistency after every update")
+        ("use-metanormalized-egd,N", po::value(&use_metanormalized_egd)->zero_tokens()->default_value(false), "Use metanormalized EGD")
+        ("expected-bleu-deterministic-annealing-training,D", po::value(&expected_sbleu_da)->zero_tokens()->default_value(false), "Train to maximize expected sentence BLEU using deterministic annealing")   
+        ("optimizer-freq", po::value<int>(&optimizerFreq)->default_value(1),"Number of optimization to perform at given temperature")
+   ("initial-det-anneal-temp", po::value<float>(&start_temp_expda)->default_value(1000.0f), "Initial deterministic annealing entropy temperature")
+   ("final-det-anneal-temp", po::value<float>(&stop_temp_expda)->default_value(0.001f), "Final deterministic annealing entropy temperature")
+   ("floor-temp", po::value<float>(&floor_temp_expda)->default_value(0.0f), "Floor temperature for det annealing")
+  ("det-annealing-ratio,A", po::value<float>(&anneal_ratio_da)->default_value(0.5f), "Deterministc annealing ratio")
+      ("weight-dump-freq", po::value<int>(&weight_dump_freq)->default_value(0), "Frequency to dump weight files during training")
+  ("weight-dump-stem", po::value<string>(&weight_dump_stem)->default_value("weights"), "Stem of filename to use for dumping weights")
+      ("init-iteration-number", po::value<int>(&init_iteration_number)->default_value(0), "First training iteration will be one after this (useful for restarting)")
+ ("greedy", po::value(&greedy)->zero_tokens()->default_value(false), "Greedy sample acceptor")
+  ("fixed-temp-accept", po::value(&fixedTemp)->zero_tokens()->default_value(false), "Fixed temperature sample acceptor")
+  ("fixed-temperature", po::value<float>(&fixed_temperature)->default_value(1.0f), "Temperature for fixed temp sample acceptor")
+  ("rao-blackwell", po::value(&raoBlackwell)->zero_tokens()->default_value(false), "Do Rao-Blackwellisation (aka conditional estimation")
+  ("mapdecode", po::value(&mapdecode)->zero_tokens()->default_value(false), "MAP decoding")
+  ("mh.ngramorders", po::value< vector <string> >(&ngramorders), "Indicate LMs and ngram orders to be used during MH/Gibbs")
+  ("use-moses-kbesthyposet", po::value(&use_moses_kbesthyposet)->zero_tokens()->default_value(false), "Use Moses to generate kbest hypothesis set")
+  ("print-moseskbest", po::value(&print_moseskbest)->zero_tokens()->default_value(false), "Print Moses kbest")
+  ("lag", po::value<size_t>(&lag)->default_value(10), "Lag between collecting samples")
+  ("flip-prob", po::value<float>(&flip_prob)->default_value(0.6f), "Probability of applying flip operator during random scan")
+  ("merge-split-prob", po::value<float>(&merge_split_prob)->default_value(0.2f), "Probability of applying merge-split operator during random scan")
+  ("retrans-prob", po::value<float>(&retrans_prob)->default_value(0.2f), "Probability of applying retrans operator during random scan")
+  ("calc-exact-post", po::value(&calc_exact_posterior)->zero_tokens()->default_value(false), "Calculate exact posterior")
+  ("filter-exact-post", po::value(&filter_by_posterior)->zero_tokens()->default_value(false), "Filter sample set using exact posterior")
+  ("evidence-shrink",  po::value<float>(&evidenceSetShrinkFactor)->default_value(0.9f), "Evidence set shrink factor for MBR decoding")
+  ("random-shrink",  po::value(&randomShrink)->zero_tokens()->default_value(false), "Shrink evidence set randomly, otherwise shrink by discarding low probability elements")
+          ("log-base-factor", po::value<float>(&log_base_factor)->default_value(1.0f), "Scaling factor for log probabilities in translation and language models");
+ 
+  po::options_description cmdline_options;
+  cmdline_options.add(desc);
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc,argv).
+            options(cmdline_options).run(), vm);
+  po::notify(vm);
+
+  
+
+  if (help) {
+      std::cout << "Usage: " + string(argv[0]) +  " -f mosesini-file [options]" << std::endl;
+      std::cout << desc << std::endl;
+      return 0;
+  }
+  
+  if (weightfile.empty()) {
+    std::cerr << "Setting all feature weights to zero" << std::endl;
+    WeightManager::init();
+  } else {
+    std::cerr << "Loading feature weights from " << weightfile <<  std::endl;
+    WeightManager::init(weightfile);
+  }
+
+  if (expected_sbleu && expected_sbleu_da) {
+    std::cerr << "Incorrect usage: Cannot do both expected bleu training and expected bleu deterministic annealing training" << std::endl;
+    return 0;
+  }
+
+  float opProb = flip_prob + merge_split_prob + retrans_prob;
+  if (fabs(1.0 - opProb) > 0.00001) {
+    std::cerr << "Incorrect usage: specified operator probs should sum up to 1" << std::endl;
+    return 0;  
+  }
+  
+  
+  
+  if (translation_distro) translate = true;
+  if (derivation_distro) decode = true;
+  
+  
+  if (mosesini.empty()) {
+      cerr << "Error: No moses ini file specified" << endl;
+      return 1;
+  }
+
+  if (mpidebugfile.length()) {
+      MpiDebug::init(mpidebugfile,rank);
+  }
+  cerr << "optimizer freq " << optimizerFreq << endl;
+  assert(optimizerFreq != 0);
+  
+  if (do_timing) {
+    timer.on();
+  }
+  
+  if (log_base_factor != 1.0) {
+      //cerr << "Setting log base factor to " << log_base_factor << endl;
+      cerr << "setting log base factor disabled" << endl;
+      exit(1);
+      //SetLogBaseFactor(log_base_factor);
+  }
+  
+   //set up moses
+  initMoses(mosesini,debug);
+  
+  FeatureVector features; 
+  FVector coreWeights;
+  configure_features_from_file(feature_file, features, false, coreWeights);
+  std::cerr << "Using " << features.size() << " features" << std::endl;
+  
+  
+  //scale model weights
+  WeightManager::instance().scale(scalefactor);
+  VERBOSE(1,"Scaled weights by factor of " << scalefactor << endl);
+  
+  
+
+  if (vm.count("random-seed")) {
+    RandomNumberGenerator::instance().setSeed(seed + rank);
+  }      
+      
+  auto_ptr<Gain> gain;
+  
+  ostream* out = &cout;
+  if (!outputfile.empty()) {
+    ostringstream os;
+    os << setfill('0');
+    os << outputfile << '.' << setw(3) << rank << "_of_" << size;
+    VERBOSE(1, "Writing output to: " << os.str() << endl);
+    out = new ofstream(os.str().c_str());
+  }
+  auto_ptr<istream> in;
+  auto_ptr<InputSource> input;
+  
+  auto_ptr<Optimizer> optimizer;
+  FVector etaVector(eta);
+
+
+  
+  FVector prev_gradient;
+  if (!prev_gradient_file.empty()) {
+    prev_gradient.load(prev_gradient_file);
+  }
+
+  if (use_metanormalized_egd) {
+    optimizer.reset(new MetaNormalizedExponentiatedGradientDescent(
+                                                             etaVector,
+                                                             mu,
+                                                             0.1,   // minimal step scaling factor
+                                                             gamma,                                       
+                                                             max_training_iterations,
+                                                             prev_gradient));
+  } else {
+    optimizer.reset(new ExponentiatedGradientDescent(
+                                                                   etaVector,
+                                                                   mu,
+                                                                   0.1f,   // minimal step scaling factor
+                                                                   max_training_iterations,
+                                                                   prev_gradient));
+  }
+  if (optimizer.get()) {
+      optimizer->SetIteration(init_iteration_number);
+  }
+  if (prior_variance != 0.0f) {
+    assert(prior_variance > 0);
+    std::cerr << "Using Gaussian prior: \\sigma^2=" << prior_variance << " \\mu=" << prior_mean <<  endl;
+    optimizer->SetUseGaussianPrior(prior_mean, prior_variance);
+  }
+  ExpectedBleuTrainer* trainer = NULL;
+  if (expected_sbleu || expected_sbleu_da) {
+    gain.reset(new Bleu());
+    gain->LoadReferences(ref_files,inputfile);
+    vector<string> input_lines;
+    ifstream infiles(inputfile.c_str());
+    assert (infiles);
+    while(infiles) {
+      string line;
+      getline(infiles, line);
+      if (line.empty() && infiles.eof()) break;
+      assert(!line.empty());
+      input_lines.push_back(line);
+    }
+    VERBOSE(1, "Loaded " << input_lines.size() << " lines in training mode" << endl);
+    if (!training_batch_size || training_batch_size > input_lines.size())
+      training_batch_size = input_lines.size();
+    VERBOSE(1, "Batch size: " << training_batch_size << endl);
+    trainer = new ExpectedBleuTrainer(rank, size, training_batch_size, &input_lines, seed, randomize, optimizer.get(),     
+                                      weight_dump_freq, weight_dump_stem);
+    input.reset(trainer);
+  } else {
+    if (inputfile.size()) {
+      input.reset(new BatchedFileInputSource(inputfile,rank,size));
+    } else {
+      input.reset(new StreamInputSource(cin));
+    }
+  }
+  
+  auto_ptr<SamplingSelector> selector(new SamplingSelector());
+  auto_ptr<AnnealingSchedule> annealingSchedule;
+  if (anneal) {
+    annealingSchedule.reset(new LinearAnnealingSchedule(burning_its, max_temp));
+    selector->SetAnnealingSchedule(annealingSchedule.get()); 
+  }
+  
+
+  auto_ptr<AnnealingSchedule> detAnnealingSchedule;
+  if (expected_sbleu_da) {
+    detAnnealingSchedule.reset(new ExponentialAnnealingSchedule(start_temp_expda, stop_temp_expda, floor_temp_expda, anneal_ratio_da));
+  }
+  
+  
+  
+  
+
+  timer.check("Processing input file");
+  while (input->HasMore()) {
+    string line;
+    input->GetSentence(&line, &lineno);
+    cerr << "line : " << line << endl;
+    if (line.empty()) {
+      if (!input->HasMore()) continue;
+      assert(!"I don't like empty lines");
+    }
+    //configure the sampler
+    Sampler sampler;
+    sampler.SetSelector(selector.get());
+    sampler.SetCheckFeatures(checkFeatures);
+    VERBOSE(2,"Reheatings: " << reheatings << endl);
+    sampler.SetReheatings(reheatings);
+    sampler.SetLag(lag); //thinning factor for sample collection
+    auto_ptr<DerivationCollector> derivationCollector;
+    auto_ptr<ExpectedLossCollector> elCollector;
+    auto_ptr<GibblerMaxTransDecoder> transCollector;
+    if (expected_sbleu || output_expected_sbleu) {
+      elCollector.reset(new ExpectedLossCollector(gain->GetGainFunction(lineno)));
+      sampler.AddCollector(elCollector.get());
+    }
+    else if (expected_sbleu_da) {
+      elCollector.reset(new GibblerAnnealedExpectedLossCollector(gain->GetGainFunction(lineno), sampler));
+      sampler.AddCollector(elCollector.get());
+      //Set the annealing temperature
+      int it = optimizer->GetIteration() / optimizerFreq  ;
+      float temp = detAnnealingSchedule->GetTemperatureAtTime(it);
+      
+      GibblerAnnealedExpectedLossCollector* annealedELCollector = static_cast<GibblerAnnealedExpectedLossCollector*>(elCollector.get());
+      annealedELCollector->SetTemperature(temp);
+      cerr << "Annealing temperature " << annealedELCollector->GetTemperature() << endl;
+    }
+    if (mapdecode || decode || topn > 0 || periodic_decode > 0) {
+      DerivationCollector* collector = new DerivationCollector();
+      collector->setPeriodicDecode(periodic_decode);
+      collector->setCollectDerivationsByTranslation(collect_dbyt);
+      collector->setOutputMaxChange(output_max_change);
+      derivationCollector.reset(collector);
+      sampler.AddCollector(derivationCollector.get());
+    }
+    if (translate || mbr_decoding) {
+      transCollector.reset(new GibblerMaxTransDecoder());
+      transCollector->setOutputMaxChange(output_max_change);
+      sampler.AddCollector(transCollector.get() );
+    }
+    
+    MergeSplitOperator mso(merge_split_prob);
+    FlipOperator fo(flip_prob);
+    TranslationSwapOperator tso(retrans_prob);
+    
+    
+    
+    
+    sampler.AddOperator(&mso);
+    sampler.AddOperator(&tso);
+    sampler.AddOperator(&fo);
+    
+    if (greedy || fixed_temperature == 0) {
+      assert(!"greedy not supported");
+    }
+    else if (fixedTemp){
+      assert(!"fixed temp not supported");
+    }
+    
+    
+    sampler.SetIterations(iterations);
+    sampler.SetBurnIn(burning_its);
+
+    timer.check("Running decoder");
+
+    vector<TranslationHypothesis> translations;
+    translations.push_back(TranslationHypothesis(line));
+    timer.check("Running sampler");
+
+    
+    
+    sampler.Run(translations,features, raoBlackwell);
+    
+    
+    timer.check("Outputting results");
+
+    if (expected_sbleu || expected_sbleu_da) {
+      
+      FVector gradient;
+      FValue exp_trans_len = 0;
+      FValue unreg_exp_gain = 0;
+      const float exp_gain = elCollector->UpdateGradient(&gradient, &exp_trans_len, &unreg_exp_gain);
+      
+      (*out) << '(' << lineno << ") Expected sentence BLEU: " << exp_gain 
+             << "   \tExpected length: " << exp_trans_len << endl;
+      if (trainer)
+        trainer->IncorporateGradient(
+           exp_trans_len,
+           gain->GetAverageReferenceLength(lineno),
+           exp_gain,
+           unreg_exp_gain,
+           gradient);
+    }
+    if (output_expected_sbleu) {
+        (*out) << "ESBLEU: " <<  lineno << " " << elCollector->getExpectedGain() << endl;
+        (*out) << "EFVs: " << lineno;
+        FVector scores = elCollector->getFeatureExpectations();
+        (*out) << scores << endl;
+    }
+    
+   
+    
+    if (derivationCollector.get()) {
+      cerr << "DerivEntropy " << derivationCollector->getEntropy() << endl;
+      vector<pair<const Derivation*, float> > nbest;
+      derivationCollector->getNbest(nbest,max(topn,1u));
+      for (size_t i = 0; i < topn && i < nbest.size() ; ++i) {  
+        //const Derivation d = *(nbest[i].first);
+        cerr << "NBEST: " << lineno << " ";
+        derivationCollector->outputDerivationProbability(nbest[i],derivationCollector->N(),cerr);
+        cerr << endl;
+      }
+      if (mapdecode) {
+        pair<const Derivation*, float> map_soln = derivationCollector->getMAP();
+        vector<string> sentence;
+        map_soln.first->getTargetSentence(sentence);
+        VERBOSE(1, "MAP Soln, model score [" << map_soln.second << "]" << endl)
+        copy(sentence.begin(),sentence.end(),ostream_iterator<string>(*out," "));
+        (*out) << endl << flush;
+      }
+
+      if (decode) {
+        pair<const Derivation*, float> max = derivationCollector->getMax();
+        vector<string> sentence;
+        max.first->getTargetSentence(sentence);
+        VERBOSE(1, "sample Soln, model score [" << max.first->getScore() << "]" << endl)
+        copy(sentence.begin(),sentence.end(),ostream_iterator<string>(*out," "));
+        (*out) << endl << flush;
+      }
+      if (collect_dbyt) {
+        derivationCollector->outputDerivationsByTranslation(std::cerr);
+        
+      }
+      if (derivation_distro) {
+        std::cout << "BEGIN: derivation probability distribution" << std::endl;
+        derivationCollector->printDistribution(std::cout);
+        std::cout << "END: derivation probability distribution" << std::endl;
+      }
+      
+    }
+    if (translate) {
+      cerr << "TransEntropy " << transCollector->getEntropy() << endl;
+      pair<const Translation*,float> maxtrans = transCollector->getMax();
+      (*out) << *maxtrans.first;
+      (*out) << endl << flush;
+      if (translation_distro) {
+        std::cout << "BEGIN: translation probability distribution" << std::endl;
+        transCollector->printDistribution(std::cout);
+        std::cout << "END: translation probability distribution" << std::endl;
+      }
+      
+    }
+    if (mbr_decoding) {
+      pair<const Translation*,float> maxtrans;
+      // use samples as hyp set
+        maxtrans = transCollector->getMbr(mbr_size, topNsize);  
+        (*out) << *maxtrans.first;
+        (*out) << endl << flush;
+    }
+    ++lineno;
+  }
+#ifdef MPI_ENABLED
+  MPI_Finalize();
+#endif
+  (*out) << flush;
+  if (!outputfile.empty())
+    delete out;
+  return 0;
+}
--- a/josiah/LanguageModelFeature.cpp
+++ b/josiah/LanguageModelFeature.cpp
@ -0,0 +1,476 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LanguageModelFeature.h"
+
+#include <vector>
+
+#include "Gibbler.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  
+  LanguageModelFeature::LanguageModelFeature(const Moses::LanguageModel* lmodel) :
+      m_lmodel(lmodel) {}
+  
+  FeatureFunctionHandle LanguageModelFeature::getFunction(const Sample& sample) const {
+    return FeatureFunctionHandle(new LanguageModelFeatureFunction(sample, m_lmodel));
+  }
+  
+  LanguageModelFeatureFunction::LanguageModelFeatureFunction
+      (const Sample& sample, const LanguageModel* lmodel):
+      SingleValuedFeatureFunction(sample,lmodel->GetScoreProducerDescription()),
+  m_lmodel(lmodel) {}
+  
+  
+  
+  /** Compute total score for sentence */
+  FValue LanguageModelFeatureFunction::computeScore() {
+    FValue score = 0;
+    size_t order = m_lmodel->GetNGramOrder();
+    vector<const Word*> lmcontext;
+    const vector<Word>& target = getSample().GetTargetWords();
+    lmcontext.reserve(target.size() + 2*(order-1));
+    for (size_t i = 0; i < order-1; ++i) {
+      lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
+    }
+    for (size_t i = 0; i < target.size(); ++i) {
+      lmcontext.push_back(&(target[i]));
+    }
+    lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
+    
+    vector<const Word*> ngram(order);
+    for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
+      size_t ngramCtr = 0;
+      for (size_t j = ngramstart; j < ngramstart+order; ++j) {
+        ngram[ngramCtr++] = lmcontext[j];
+      }
+      score += GetValue(ngram);
+    }
+    
+    return score;
+  }
+  
+  /** Score due to one segment */
+  FValue LanguageModelFeatureFunction::getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap) {
+    size_t order = m_lmodel->GetNGramOrder();
+    const TargetPhrase& targetPhrase = option->GetTargetPhrase();
+    vector<const Word*> lmcontext;
+    lmcontext.reserve(targetPhrase.GetSize() + 2*(order-1));
+      
+    int start = gap.segment.GetStartPos() - (order-1);
+      
+    //fill in the pre-context
+    for (size_t i = 0; i < order-1; ++i) {
+      if (start+(int)i < 0) {
+        lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
+      } else {
+        lmcontext.push_back(&(getSample().GetTargetWords()[i+start]));
+      }
+    }
+      
+    size_t startOption = lmcontext.size();
+    //fill in the target phrase
+    for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+      lmcontext.push_back(&(targetPhrase.GetWord(i)));
+    }
+    size_t endOption = lmcontext.size();
+      
+    //fill in the postcontext
+    for (size_t i = 0; i < order-1; ++i) {
+      size_t targetPos = i + gap.segment.GetEndPos() + 1;
+      if (targetPos >= getSample().GetTargetWords().size()) {
+        if (targetPos == getSample().GetTargetWords().size()) {
+          lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
+        }
+      } else {
+        lmcontext.push_back(&(getSample().GetTargetWords()[targetPos]));
+      }
+    }
+      
+    //debug
+    IFVERBOSE(3) {
+      VERBOSE(3,"Segment: " << gap.segment << " phrase: " << option->GetTargetPhrase() << endl);
+      VERBOSE(3,"LM context ");
+      for (size_t j = 0;  j < lmcontext.size(); ++j) {
+        VERBOSE(3,*(lmcontext[j]) << " ");
+      }
+      VERBOSE(3,endl);
+    }
+      
+    //score lm
+    FValue lmscore = 0;
+    vector<const Word*> ngram(order);
+    bool useOptionCachedLMScore = false;
+    size_t ngramCtr;
+    
+    for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
+      if (ngramstart >= startOption && ngramstart + order - 1 < endOption) {
+        useOptionCachedLMScore = true;
+      }  
+      else {
+        ngramCtr = 0;
+        for (size_t j = ngramstart; j < ngramstart+order; ++j) {
+          ngram[ngramCtr++] = lmcontext[j];
+        }
+        lmscore += GetValue(ngram);
+      }  
+    }
+    if (useOptionCachedLMScore) {
+      const ScoreComponentCollection& sc = option->GetScoreBreakdown();
+      lmscore += sc.GetScoreForProducer(m_lmodel);  
+    }
+    VERBOSE(2,"Language model score: " << lmscore << endl); 
+    return lmscore;
+  }
+  
+  /** Score due to two segments. The left and right refer to the target positions.**/
+  FValue LanguageModelFeatureFunction::getContiguousPairedUpdateScore(const TranslationOption* leftOption,
+      const TranslationOption* rightOption, const TargetGap& gap) {
+        
+        //Create the whole segment
+        const WordsRange& targetSegment = gap.segment;
+    
+        //create the phrase
+        size_t lsize = leftOption->GetTargetPhrase().GetSize();
+        size_t rsize = rightOption->GetTargetPhrase().GetSize();
+        vector<const Word*> targetPhrase(lsize+rsize);
+        size_t i = 0; 
+        for (size_t j = 0; j < lsize; ++j, ++i) {
+          targetPhrase[i] = &(leftOption->GetTargetPhrase().GetWord(j));
+        }
+        for (size_t j = 0; j < rsize; ++j, ++i) {
+          targetPhrase[i] = &(rightOption->GetTargetPhrase().GetWord(j));
+        } 
+        
+    
+        //set the indices for start and end positions
+        size_t leftStartPos(0);
+        size_t leftEndPos(leftOption->GetTargetPhrase().GetSize()); 
+        size_t rightStartPos(leftEndPos);
+        size_t rightEndPos(targetPhrase.size());
+    
+       
+        size_t order = m_lmodel->GetNGramOrder();
+        vector<const Word*> lmcontext;
+        lmcontext.reserve(targetPhrase.size() + 2*(order-1));
+    
+        int start = targetSegment.GetStartPos() - (order-1);
+    
+        //fill in the pre-context
+        for (size_t i = 0; i < order-1; ++i) {
+          if (start+(int)i < 0) {
+            lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
+          } else {
+            lmcontext.push_back(&(getSample().GetTargetWords()[i+start]));
+          }
+        }
+    
+        //Offset the indices by pre-context size
+        leftStartPos += lmcontext.size();
+        leftEndPos += lmcontext.size();
+        rightStartPos += lmcontext.size();
+        rightEndPos += lmcontext.size();
+    
+        //fill in the target phrase
+        for (size_t i = 0; i < targetPhrase.size(); ++i) {
+          lmcontext.push_back(targetPhrase[i]);
+        }
+    
+        //fill in the postcontext
+        for (size_t i = 0; i < order-1; ++i) {
+          size_t targetPos = i + targetSegment.GetEndPos() + 1;
+          if (targetPos >= getSample().GetTargetWords().size()) {
+            if (targetPos == getSample().GetTargetWords().size()) {
+              lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
+            }
+          } else {
+            lmcontext.push_back(&(getSample().GetTargetWords()[targetPos]));
+          }
+        }
+    
+        //debug
+        IFVERBOSE(3) {
+          VERBOSE(3,"Segment: " << targetSegment << /*" phrase: " << targetPhrase << */endl);
+          VERBOSE(3,"LM context ");
+          for (size_t j = 0;  j < lmcontext.size(); ++j) {
+            VERBOSE(3,*(lmcontext[j]) << " ");
+          }
+          VERBOSE(3,endl);
+        }
+    
+        //score lm
+        FValue lmscore = 0;
+        vector<const Word*> ngram(order);
+        bool useLeftOptionCacheLM(false), useRightOptionCacheLM(false) ;
+        size_t ngramCtr;
+        for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
+          if (ngramstart >= leftStartPos && ngramstart + order - 1 < leftEndPos) {
+            useLeftOptionCacheLM = true;
+          }
+          else if (ngramstart >= rightStartPos && ngramstart + order - 1 < rightEndPos) {
+            useRightOptionCacheLM = true;
+          }
+          else {
+            ngramCtr = 0;
+            for (size_t j = ngramstart; j < ngramstart+order; ++j) {
+              ngram[ngramCtr++] = lmcontext[j];
+            }
+            lmscore += GetValue(ngram);
+          }
+        }
+        if (useLeftOptionCacheLM) {
+          const ScoreComponentCollection & sc = leftOption->GetScoreBreakdown();
+          lmscore += sc.GetScoreForProducer(m_lmodel);
+        }
+        if (useRightOptionCacheLM) {
+          const ScoreComponentCollection & sc = rightOption->GetScoreBreakdown();
+          lmscore += sc.GetScoreForProducer(m_lmodel);
+        }
+    
+        VERBOSE(2,"Language model score: " << lmscore << endl);
+             
+        return lmscore;
+        
+   }
+   
+   FValue LanguageModelFeatureFunction::getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+       const TargetGap& leftGap, const TargetGap& rightGap) 
+   {
+     const Phrase& leftTgtPhrase = leftOption->GetTargetPhrase();
+     const Phrase& rightTgtPhrase = rightOption->GetTargetPhrase();
+     size_t order = m_lmodel->GetNGramOrder();
+     vector<const Word*> lmcontext;
+     lmcontext.reserve(max(leftTgtPhrase.GetSize(), rightTgtPhrase.GetSize()) + 2*(order-1));
+      
+     int start = leftGap.segment.GetStartPos() - (order-1);
+      
+      //fill in the pre-context
+     for (size_t i = 0; i < order-1; ++i) {
+       if (start+(int)i < 0) {
+         lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
+       } else {
+         lmcontext.push_back(&(getSample().GetTargetWords()[i+start]));
+       }
+     }
+      
+     size_t leftStartPos(lmcontext.size()); // to track option's cached LM Score
+      
+      //fill in the target phrase
+     for (size_t i = 0; i < leftTgtPhrase.GetSize(); ++i) {
+       lmcontext.push_back(&(leftTgtPhrase.GetWord(i)));
+     }
+      
+      // to track option's cached LM Score
+     size_t leftEndPos(lmcontext.size());      
+     size_t rightStartPos(0), rightEndPos(0);
+      
+      
+      //fill in the postcontext needed for leftmost phrase
+      //First get words from phrases in between, then from right phrase, then words past right phrase, then end of sentence
+     size_t gapSize = rightGap.segment.GetStartPos() - leftGap.segment.GetEndPos() - 1;
+     size_t leftSegmentEndPos = leftGap.segment.GetEndPos();
+      
+     for (size_t i = 0; i < order - 1; i++) {
+       int rightOffset = i - gapSize;
+       if (rightOffset < 0) {
+         lmcontext.push_back(&(getSample().GetTargetWords()[leftSegmentEndPos + i + 1]));    
+       }
+       else if (rightOffset < (int)rightTgtPhrase.GetSize() ) {
+         if (rightOffset == 0) {
+           rightStartPos = lmcontext.size();
+         }
+         lmcontext.push_back(&(rightTgtPhrase.GetWord(rightOffset)));
+         rightEndPos = lmcontext.size();
+       }
+       else if (rightOffset - rightTgtPhrase.GetSize() + rightGap.segment.GetEndPos() + 1 < getSample().GetTargetWords().size() ) {
+         lmcontext.push_back(&(getSample().GetTargetWords()[(rightOffset - rightTgtPhrase.GetSize() + rightGap.segment.GetEndPos()  + 1)]));  
+       }
+       else {
+         lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
+         break;
+       }
+     }
+      
+      
+     VERBOSE(3,"Left LM Context : "); 
+     for (size_t i = 0; i < lmcontext.size(); i++) {
+       VERBOSE(3,*lmcontext[i] << " ");
+     }
+     VERBOSE(3, endl);
+      
+      //score lm
+     FValue lmscore = 0;
+     vector<const Word*> ngram(order);
+     size_t ngramCtr;
+     bool useLeftOptionCacheLM(false), useRightOptionCacheLM(false) ;
+      
+     for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
+       if (ngramstart >= leftStartPos && ngramstart + order - 1 < leftEndPos) {
+         useLeftOptionCacheLM = true;
+         VERBOSE(3, "In flip, Left LM Context, Using cached option LM score for left Option: " << leftOption->GetTargetPhrase() << endl;)
+       }  
+       else if (ngramstart >= rightStartPos && ngramstart + order - 1 < rightEndPos) {
+         useRightOptionCacheLM = true;
+         VERBOSE(3, "In flip, Left LM Context, Using cached option LM score for right Option: " << rightOption->GetTargetPhrase() << endl;)
+       }
+       else {
+         ngramCtr =0;
+         for (size_t j = ngramstart; j < ngramstart+order; ++j) {
+           ngram[ngramCtr++] =lmcontext[j];
+         }
+         lmscore += GetValue(ngram);
+       }  
+     }
+      
+     if (useLeftOptionCacheLM) {
+       const ScoreComponentCollection & sc = leftOption->GetScoreBreakdown();
+       lmscore += sc.GetScoreForProducer(m_lmodel);
+     }
+      
+     VERBOSE(2,"Left option Language model score: " << lmscore << endl); 
+      
+      //Now for the right target phrase
+     lmcontext.clear();
+      //Reset the indices
+     leftStartPos = 0;
+     leftEndPos = 0;
+     rightStartPos = 0;
+     rightEndPos = 0;
+      
+      //Fill in the pre-context
+     size_t i = 0;
+     if (order <= gapSize) { //no risk of ngram overlaps with left phrase post context
+       i = order -1;
+     }
+     else {//how far back can we go
+       i = gapSize;
+     }
+      
+     size_t leftOffset = gapSize + leftTgtPhrase.GetSize();
+      
+     for ( ; i > 0 ; --i) {
+       if (i > leftOffset + leftGap.segment.GetStartPos()) {                      
+         lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
+       }
+       else if (i > leftOffset) {
+         lmcontext.push_back(&(getSample().GetTargetWords()[leftOffset - i + leftGap.segment.GetStartPos() ]));
+       }                      
+       else if ( i > gapSize) {
+         if (i - gapSize == 1){
+           leftStartPos = lmcontext.size();
+         }
+         lmcontext.push_back(&(leftTgtPhrase.GetWord(leftOffset - i)));
+         leftEndPos = lmcontext.size();
+       }
+       else {
+         lmcontext.push_back(&(getSample().GetTargetWords()[leftGap.segment.GetEndPos() + gapSize - i + 1 ]));
+       }
+     }  
+      
+      //Fill in right target phrase
+     rightStartPos = lmcontext.size();
+      
+      //fill in the target phrase
+     for (size_t i = 0; i < rightTgtPhrase.GetSize(); ++i) {
+       lmcontext.push_back(&(rightTgtPhrase.GetWord(i)));
+     }
+      
+     rightEndPos = lmcontext.size();      
+      
+      //Fill in post context
+     for (size_t i = 0; i < order-1; ++i) {
+       if ( i + rightGap.segment.GetEndPos() + 1 < getSample().GetTargetWords().size() ) {
+         lmcontext.push_back(&(getSample().GetTargetWords()[i + rightGap.segment.GetEndPos() + 1]));         
+       }
+       else { 
+         lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
+         break;
+       }
+     }  
+      
+     VERBOSE(3,"Right LM Context : "); 
+     for (size_t i = 0; i < lmcontext.size(); i++) {
+       VERBOSE(3,*lmcontext[i] << " ");
+     }
+     VERBOSE(3, endl);
+      
+     useRightOptionCacheLM = false;
+      
+     if ((int) lmcontext.size() - (int) (order -1) < 0 ) {//The left LM context completely subsumes the right LM Context, we're done
+       VERBOSE(2,"Language model score: " << lmscore << endl); 
+       return lmscore;
+     }
+      
+     size_t maxNgram = lmcontext.size() - (order -1);
+
+     for (size_t ngramstart = 0; ngramstart < maxNgram; ++ngramstart) {
+       if (ngramstart >= leftStartPos && ngramstart + order - 1 < leftEndPos) {
+         useLeftOptionCacheLM = true;
+         VERBOSE(3, "In flip, Right LM Context, Using cached option LM score for left Option: " << leftOption->GetTargetPhrase() << endl;)
+       }  
+       if (ngramstart >= rightStartPos && ngramstart + order - 1 < rightEndPos) {
+         useRightOptionCacheLM = true;
+         VERBOSE(3, "In flip, Right LM Context, Using cached option LM score for right Option: " << rightOption->GetTargetPhrase() << endl;)
+       }
+       else {
+         ngramCtr = 0;
+         for (size_t j = ngramstart; j < ngramstart+order; ++j) {
+           ngram[ngramCtr++] = lmcontext[j]; 
+         }
+         lmscore += GetValue(ngram);
+       }  
+     }
+      
+     if (useRightOptionCacheLM) {
+       const ScoreComponentCollection & sc = rightOption->GetScoreBreakdown();
+       lmscore += sc.GetScoreForProducer(m_lmodel);
+     }
+      
+      
+     VERBOSE(2,"Language model score: " << lmscore << endl); 
+     return lmscore;  
+   }
+   
+   /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+   FValue LanguageModelFeatureFunction::getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                     const TargetGap& leftGap, const TargetGap& rightGap)
+   {
+     
+     bool contiguous =  (leftGap.segment.GetEndPos() + 1 ==  rightGap.segment.GetStartPos()) ;
+    
+     if (contiguous) {
+       WordsRange segment(leftGap.segment.GetStartPos(), rightGap.segment.GetEndPos());
+       TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, segment);
+       return getContiguousPairedUpdateScore(leftOption, rightOption, gap);
+     } else {
+       return getDiscontiguousPairedUpdateScore(leftOption, rightOption, leftGap, rightGap);
+     }
+     
+   }
+  
+  float LanguageModelFeatureFunction::GetValue(const std::vector<const Word*>& context) {
+    auto_ptr<FFState> state(m_lmodel->GetImplementation()->NewState());
+    return m_lmodel->GetImplementation()->GetValueForgotState(context,*state.get());
+  }
+  
+}
+
--- a/josiah/LanguageModelFeature.h
+++ b/josiah/LanguageModelFeature.h
@ -0,0 +1,67 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include "LanguageModel.h"
+
+#include "FeatureFunction.h"
+
+namespace Josiah {
+
+class LanguageModelFeature : public Feature {
+  public:
+    LanguageModelFeature(const Moses::LanguageModel* lmodel);
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+    
+  private:
+    const Moses::LanguageModel* m_lmodel;
+};
+
+class LanguageModelFeatureFunction : public SingleValuedFeatureFunction {
+  
+  public:
+    LanguageModelFeatureFunction(const Sample& sample, const Moses::LanguageModel* lmodel);
+    
+    virtual FValue computeScore();
+    
+    /** Score due to one segment */
+    virtual FValue getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap);
+    
+    
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    virtual FValue getContiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+        const TargetGap& gap);
+    
+    virtual FValue getDiscontiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    virtual FValue getFlipUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+                                      const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    
+    
+  private:
+    float GetValue(const std::vector<const Word*>& context);
+    const Moses::LanguageModel* m_lmodel;
+  
+};
+
+}
--- a/josiah/LexicalReorderingFeature.cpp
+++ b/josiah/LexicalReorderingFeature.cpp
@ -0,0 +1,164 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#include <sstream>
+#include <string>
+
+#include "LexicalReorderingFeature.h"
+#include "ScoreComponentCollection.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+LexicalReorderingFeature::LexicalReorderingFeature
+  (Moses::LexicalReordering* lexReorder,size_t index) :
+    m_mosesLexReorder(lexReorder),
+    m_index(index) {
+  size_t featureCount = m_mosesLexReorder->GetNumScoreComponents();
+  const string& root = "LexicalReordering";
+  for (size_t i = 1; i <= featureCount; ++i) {
+    ostringstream namestream;
+    if (index > 0) {
+      namestream << index << "-";
+    }
+    namestream << i;
+    m_featureNames.push_back(FName(root,namestream.str()));
+  }
+  
+}
+
+FeatureFunctionHandle LexicalReorderingFeature::getFunction(const Sample& sample) const {
+  return FeatureFunctionHandle(
+    new LexicalReorderingFeatureFunction(sample,m_featureNames,m_mosesLexReorder));
+}
+
+LexicalReorderingFeatureFunction::LexicalReorderingFeatureFunction
+  (const Sample& sample, std::vector<FName> featureNames,
+    LexicalReordering* lexReorder):
+    FeatureFunction(sample),
+    m_featureNames(featureNames),
+    m_mosesLexReorder(lexReorder) {
+}
+
+/** Assign the total score of this feature on the current hypo */
+void LexicalReorderingFeatureFunction::assignScore(FVector& scores) {
+  
+  /*
+  const Hypothesis * currHypo = getSample().GetTargetTail();
+  const FFState* state = m_mosesLexReorder->EmptyHypothesisState(currHypo->GetInput());
+  ScoreComponentCollection accumulator;
+  cerr << *currHypo << endl;
+  while ((currHypo = (currHypo->GetNextHypo()))) {
+    state = m_mosesLexReorder->Evaluate(*currHypo,state,&accumulator);
+    cerr << "AS: " << accumulator << endl;
+  }*/
+  vector<float> mosesScores = m_accumulator.GetScoresForProducer(m_mosesLexReorder);
+  for (size_t i = 0; i < m_featureNames.size(); ++i) {
+    scores[m_featureNames[i]] = mosesScores[i];
+  }
+
+}
+
+/** Update the  previous state map.*/
+void LexicalReorderingFeatureFunction::updateTarget() {
+  m_prevStates.clear();
+  m_accumulator.ZeroAll();
+  const Hypothesis * currHypo = getSample().GetTargetTail();
+  LRStateHandle prevState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->EmptyHypothesisState(currHypo->GetInput())));
+  while ((currHypo = (currHypo->GetNextHypo()))) {
+    LRStateHandle currState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->Evaluate(*currHypo,prevState.get(),&m_accumulator)));
+    for (size_t i = 0; i < currHypo->GetCurrTargetWordsRange().GetNumWordsCovered(); ++i) {
+      m_prevStates.push_back(prevState);
+    }
+    prevState = currState;
+  }
+  
+}
+
+void LexicalReorderingFeatureFunction::addScore
+  (vector<float>& accumulator, FVector& scores) {
+  for (size_t i = 0; i < accumulator.size(); ++i) {
+    scores[m_featureNames[i]] += accumulator[i];
+    accumulator[i] = 0;
+  }
+}
+    
+/** Score due to one segment */
+void LexicalReorderingFeatureFunction::doSingleUpdate
+  (const TranslationOption* option, const TargetGap& gap, FVector& scores) {
+  vector<float> accumulator(m_mosesLexReorder->GetNumScoreComponents(),0);
+  //The previous state of the (new) current hypo.
+  LRStateHandle prevState = m_prevStates[gap.segment.GetStartPos()];
+  //Evaluate the score of inserting this hypo, and get the prev state 
+  //for the next hypo.
+  prevState.reset(prevState->Expand(*option,accumulator));
+  addScore(accumulator,scores);
+  //if there's a hypo on the right, then evaluate it.
+  if (gap.rightHypo) {
+    prevState.reset(prevState->Expand(gap.rightHypo->GetTranslationOption(),accumulator));
+    addScore(accumulator,scores);
+  }
+  
+}
+
+/* Score due to two segments. The left and right refer to the target positions.**/
+void LexicalReorderingFeatureFunction::doContiguousPairedUpdate
+  (const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& gap, FVector& scores) {
+  vector<float> accumulator(m_mosesLexReorder->GetNumScoreComponents(),0);
+  //The previous state of the (new) current hypo.
+  LRStateHandle prevState(m_prevStates[gap.segment.GetStartPos()]);
+  //Evaluate the hypos in the gap
+  prevState.reset(prevState->Expand(*leftOption,accumulator));
+  addScore(accumulator,scores);
+  prevState.reset(prevState->Expand(*rightOption,accumulator));
+  addScore(accumulator,scores);
+  //if there's a hypo on the right, then evaluate it.
+  if (gap.rightHypo) {
+    prevState.reset(prevState->Expand(gap.rightHypo->GetTranslationOption(),accumulator));
+    addScore(accumulator,scores);
+  }
+
+}
+
+void LexicalReorderingFeatureFunction::doDiscontiguousPairedUpdate
+(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+  doSingleUpdate(leftOption,leftGap, scores);
+  doSingleUpdate(rightOption,rightGap, scores);
+}
+    
+/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+void LexicalReorderingFeatureFunction::doFlipUpdate(
+  const TranslationOption* leftOption,
+  const TranslationOption* rightOption, 
+  const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+  if (leftGap.segment.GetEndPos() + 1 == rightGap.segment.GetStartPos()) {
+    TargetGap gap(leftGap.leftHypo,rightGap.rightHypo,
+      WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
+    doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
+  } else {
+    doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
+  }
+}
+    
+
+}
+
--- a/josiah/LexicalReorderingFeature.h
+++ b/josiah/LexicalReorderingFeature.h
@ -0,0 +1,87 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <map>
+
+#include <boost/shared_ptr.hpp>
+
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+#include "LexicalReordering.h"
+
+namespace Josiah {
+
+
+typedef boost::shared_ptr<const Moses::LexicalReorderingState> LRStateHandle;
+
+/** Wraps Moses lexical reordering */
+class LexicalReorderingFeature : public Feature {
+  public:
+    LexicalReorderingFeature(Moses::LexicalReordering* lexReorder,size_t index);
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+
+  private:
+    Moses::LexicalReordering* m_mosesLexReorder;
+    size_t m_index;
+    std::vector<FName> m_featureNames;
+    size_t m_beginIndex;
+};
+
+class LexicalReorderingFeatureFunction : public FeatureFunction {
+  public:
+    LexicalReorderingFeatureFunction
+    (const Sample&, std::vector<FName> featureNames,
+      Moses::LexicalReordering* lexReorder);
+
+    /** Assign the total score of this feature on the current hypo */
+    virtual void assignScore(FVector& scores);
+
+    virtual void updateTarget();
+    
+    /** Score due to one segment */
+    virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& gap, FVector& scores);
+    virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+    
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                      const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+    
+  private:
+    void addScore(std::vector<float>& accumulator, FVector& scores);
+    std::vector<FName> m_featureNames;
+    Moses::LexicalReordering* m_mosesLexReorder;
+
+    //typedef std::map<Moses::WordsRange, const Moses::Hypothesis*> CurrentHypos_t;
+    //typedef std::map<Moses::WordsRange, const Moses::FFState*> PreviousStates_t;
+
+    //CurrentHypos_t m_currentHypos;
+    //PreviousStates_t m_previousStates;
+    //maps the word index to the previous state involved in score calculation
+    std::vector<LRStateHandle> m_prevStates;
+    ScoreComponentCollection m_accumulator;
+
+};
+
+}
--- a/josiah/Makefile.am
+++ b/josiah/Makefile.am
@ -0,0 +1,78 @@
+lib_LIBRARIES = libjosiah.a
+bin_PROGRAMS = josiah samplerank unittest truncate
+
+
+libjosiah_a_SOURCES = \
+	AnnealingSchedule.cpp \
+  Bleu.cpp \
+  Derivation.cpp \
+	Decoder.cpp \
+  Dependency.cpp \
+  DiscriminativeLMFeature.cpp \
+  DistortionPenaltyFeature.cpp \
+  Gain.cpp \
+  Gibbler.cpp \
+  GibblerMaxDerivDecoder.cpp \
+  GibblerMaxTransDecoder.cpp \
+  GibbsOperator.cpp \
+	InputSource.cpp \
+  LanguageModelFeature.cpp \
+  LexicalReorderingFeature.cpp \
+  MetaFeature.cpp \
+  MpiDebug.cpp \
+	Optimizer.cpp \
+  ParenthesisFeature.cpp \
+  PhraseBoundaryFeature.cpp \
+  PhraseFeature.cpp \
+  PhrasePairFeature.cpp \
+  PosProjectionFeature.cpp \
+  Pos.cpp \
+  ReorderingFeature.cpp \
+  SampleCollector.cpp \
+  Sampler.cpp \
+  Selector.cpp \
+  SourceToTargetRatio.cpp \
+  StatelessFeature.cpp \
+	TrainingSource.cpp \
+	TranslationDelta.cpp \
+  Utils.cpp \
+  WeightManager.cpp \
+  WordPenaltyFeature.cpp
+
+josiah_SOURCES = \
+  GibblerAnnealedExpectedLossTrainer.cpp \
+  GibblerExpectedLossTraining.cpp \
+  Josiah.cpp
+
+
+samplerank_SOURCES = \
+    SampleRank.cpp \
+    SampleRankSelector.cpp \
+    OnlineLearner.cpp \
+    OnlineTrainingCorpus.cpp
+
+unittest_SOURCES = \
+  OnlineTrainingCorpus.cpp \
+  Test.cpp \
+  TestBleu.cpp  \
+  TestOnlineTrainingCorpus.cpp
+
+truncate_SOURCES = \
+  Truncate.cpp
+   
+AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
+
+
+josiah_LDADD = -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@  $(BOOST_LDFLAGS)  $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS) $(BOOST_MPI_LIBS)
+josiah_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a 
+
+samplerank_LDADD =  -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@   $(BOOST_LDFLAGS)  $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS) $(BOOST_MPI_LIBS)
+samplerank_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a 
+
+unittest_CPPFLAGS = $(AM_CPPFLAGS) -DBOOST_TEST_DYN_LINK
+unittest_LDADD =  -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@   $(BOOST_LDFLAGS)  $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS) $(BOOST_MPI_LIBS) -lboost_unit_test_framework
+unittest_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a 
+
+truncate_LDADD = -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@    $(BOOST_LDFLAGS)  $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
+truncate_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a 
+
--- a/josiah/MetaFeature.cpp
+++ b/josiah/MetaFeature.cpp
@ -0,0 +1,109 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+ 
+#include "MetaFeature.h"
+
+using namespace std;
+
+namespace Josiah {
+
+
+MetaFeature::MetaFeature(const FVector& weights, const FeatureVector& features) :
+    m_weights(weights),
+    m_features(features){}
+
+  
+
+FeatureFunctionHandle MetaFeature::getFunction(const Sample& sample) const {
+      return FeatureFunctionHandle(new MetaFeatureFunction(sample,*this));
+}
+
+
+FeatureFunctionVector MetaFeature::getFeatureFunctions(const Sample& sample) const {
+  FeatureFunctionVector ffv;
+  for (FeatureVector::const_iterator i = m_features.begin(); i != m_features.end(); ++i) {
+    ffv.push_back((*i)->getFunction(sample));
+  }
+  return ffv;
+}
+
+const FVector& MetaFeature::getWeights() const {
+  return m_weights;
+}
+
+MetaFeatureFunction::MetaFeatureFunction(const Sample& sample, const MetaFeature& parent) 
+  : SingleValuedFeatureFunction(sample,"core"),
+  m_parent(parent),
+  m_featureFunctions(parent.getFeatureFunctions(sample))
+{}
+
+
+
+FValue MetaFeatureFunction::computeScore() {
+  FVector scores;
+  for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
+    (*i)->assignScore(scores);
+  }
+  return scores.inner_product(m_parent.getWeights());
+}
+    
+/** Score due to one segment */
+FValue MetaFeatureFunction::getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap) {
+  FVector scores;
+  for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
+    (*i)->doSingleUpdate(option,gap,scores);
+  }
+  return scores.inner_product(m_parent.getWeights());
+}
+
+
+/** Score due to two segments. The left and right refer to the target positions.**/
+FValue MetaFeatureFunction::getContiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+    const TargetGap& gap) 
+{
+  FVector scores;
+  for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
+    (*i)->doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
+  }
+  return scores.inner_product(m_parent.getWeights());
+}
+
+FValue MetaFeatureFunction::getDiscontiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+    const TargetGap& leftGap, const TargetGap& rightGap) 
+{
+  FVector scores;
+  for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
+    (*i)->doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
+  }
+  return scores.inner_product(m_parent.getWeights());
+}
+
+/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+FValue MetaFeatureFunction::getFlipUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+                                       const TargetGap& leftGap, const TargetGap& rightGap) 
+{
+  FVector scores;
+  for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
+    (*i)->doFlipUpdate(leftOption,rightOption,leftGap,rightGap,scores);
+  }
+  return scores.inner_product(m_parent.getWeights());
+}
+                                  
+}
+
--- a/josiah/MetaFeature.h
+++ b/josiah/MetaFeature.h
@ -0,0 +1,73 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+ 
+#pragma once
+
+#include "FeatureFunction.h"
+
+namespace Josiah {
+
+/**
+  * A feature which is consists of a collection of other features.
+ **/
+ class MetaFeature : public Feature {
+   public:
+     MetaFeature(const FVector& weights, const FeatureVector& features);
+     virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+     
+     const std::string& getName() const;
+     FeatureFunctionVector getFeatureFunctions(const Sample& sample) const;
+     const FVector& getWeights() const;
+     
+   private:
+     FVector m_weights;
+     FeatureVector m_features;
+ };
+ 
+ class MetaFeatureFunction : public SingleValuedFeatureFunction {
+    
+   public:
+    
+     MetaFeatureFunction(const Sample& sample, const MetaFeature& parent);
+
+     virtual FValue computeScore();
+    
+     /** Score due to one segment */
+     virtual FValue getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap);
+    
+    
+     /** Score due to two segments. The left and right refer to the target positions.**/
+     virtual FValue getContiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+         const TargetGap& gap);
+    
+     virtual FValue getDiscontiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+         const TargetGap& leftGap, const TargetGap& rightGap);
+    
+     /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+     virtual FValue getFlipUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption, 
+                                       const TargetGap& leftGap, const TargetGap& rightGap);
+     
+   private:
+     const MetaFeature& m_parent;
+     FeatureFunctionVector m_featureFunctions;
+     
+ };
+ 
+}
+
--- a/josiah/MpiDebug.cpp
+++ b/josiah/MpiDebug.cpp
@ -0,0 +1,41 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+
+#include "MpiDebug.h"
+
+using namespace std;
+
+namespace Josiah {
+
+int MpiDebug::verbosity = 0;
+ofstream MpiDebug::out;
+
+void MpiDebug::init(const string& fstem, int rank) {
+    ostringstream fname;
+    fname << fstem;
+    fname << ".";
+    fname << rank;
+    out.open(fname.str().c_str());
+    if (!out.good()) {
+        std::cerr << "Warn: unable to open mpi debug file" << fname.str() << endl;
+    }
+}
+
+}
--- a/josiah/MpiDebug.h
+++ b/josiah/MpiDebug.h
@ -0,0 +1,41 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+namespace Josiah {
+
+struct MpiDebug {
+    static int verbosity;
+    static std::ofstream out;
+    static void init(const std::string& fstem, int rank);
+};
+
+
+#ifdef MPI_ENABLED
+#define MPI_VERBOSE(level,str) {if (Josiah::MpiDebug::verbosity >= level) { Josiah::MpiDebug::out << str;} }
+#else
+#define MPI_VERBOSE(level,str)
+#endif
+
+}
--- a/josiah/Ngram.cpp
+++ b/josiah/Ngram.cpp
@ -0,0 +1,86 @@
+// $Id:  $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Ngram.h"
+
+using namespace Josiah;
+using namespace std;
+
+  
+  
+Josiah::NgramCollector::NgramCollector(size_t order) : m_order(order) {
+    FactorCollection &factorCollection = FactorCollection::Instance();
+    m_start = factorCollection.AddFactor(Output, 0, BOS_);
+    m_end = factorCollection.AddFactor(Output, 1, BOS_);
+}
+  
+string Josiah::NgramCollector::ToString(const vector<const Factor*>& ws) const {
+  ostringstream os;
+  for (vector<const Factor*>::const_iterator i = ws.begin(); i != ws.end(); ++i)
+    os << (*i)->GetString() << " ";
+  return os.str();
+}
+
+void Josiah::NgramCollector::collect(Sample& sample) {
+  const Hypothesis* h = sample.GetSampleHypothesis();
+  vector<const Factor*> trans;
+  h->GetTranslation(&trans, 0);
+  for (int ngramstart = -(m_order-1); ngramstart < (int)trans.size(); ++ngramstart) {
+    vector<const Factor*> ngram(m_order);
+    for (int i = ngramstart; i < ngramstart+(int)m_order; ++i) {
+      if (i < 0) {
+        ngram[i-ngramstart] = m_start;
+      } else if (i >= (int)trans.size()) {
+        ngram[i-ngramstart] = m_end;
+      } else {
+        ngram[i-ngramstart] = trans[i];
+      }
+    }
+    m_counts.insert(ngram);
+    m_ngrams.insert(ngram);
+  }
+}
+  
+void Josiah::NgramCollector::dump( std::ostream & out ) const {
+  //maps n-1-gram prefixes to suffix-count map
+  map<vector<const Factor*>, multimap<size_t, const Factor*, greater<size_t> > >sortedCounts;
+  for (set<vector<const Factor*> >::const_iterator i = m_ngrams.begin(); i != m_ngrams.end(); ++i) {
+    vector<const Factor*> prefix(m_order-1);
+    copy(i->begin(),i->end()-1,prefix.begin());
+    const Factor* suffix = i->at(i->size()-1);
+    cerr << "ngram: " << ToString(*i) << " prefix: " << ToString(prefix) << " suffix: " << suffix->GetString() << endl;
+    sortedCounts[prefix].insert(pair<size_t,const Factor* >(m_counts.count(*i),suffix));
+    
+  }
+  
+  for (map<vector<const Factor*>, multimap<size_t, const Factor*, greater<size_t> > >::const_iterator i = sortedCounts.begin(); 
+        i != sortedCounts.end(); ++i) {
+          for (multimap<size_t, const Factor*, greater<size_t> >::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
+            out << j->first << " ";
+            out << ToString(i->first);
+            out << j->second->GetString() << endl;
+          }
+  }
+}
+  
+
+
+
--- a/josiah/OnlineLearner.cpp
+++ b/josiah/OnlineLearner.cpp
@ -0,0 +1,459 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+
+#ifdef MPI_ENABLED
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+#endif
+
+#include "OnlineLearner.h"
+#include "Utils.h"
+
+#ifdef MPI_ENABLED
+namespace mpi = boost::mpi;
+#endif
+
+using namespace std;
+
+namespace Josiah {
+  
+  
+  PerceptronLearner::PerceptronLearner() :
+      m_learningRate(1.0) {}
+  
+  void PerceptronLearner::setLearningRate(float learningRate) {
+    m_learningRate = learningRate;
+  }
+  
+  void  PerceptronLearner::doUpdate(
+      const FVector& currentFV,
+      const FVector& targetFV,
+      const FVector&,
+      float currentGain,
+      float targetGain,
+      float,
+      FVector& weights) 
+  {
+    //Do update if target gain is better than curr gain
+    if (targetGain > currentGain) {
+      weights -= m_learningRate * currentFV;
+      weights += m_learningRate * targetFV;  
+    }
+  }
+  
+  MiraLearner::MiraLearner() :
+      m_slack(0),
+      m_marginScale(1),
+      m_fixMargin(false),
+      m_margin(1),
+      m_useSlackRescaling(false) {}
+  
+  void MiraLearner::setSlack(float slack) {
+    m_slack = slack;
+  }
+  
+  void MiraLearner::setMarginScale(float marginScale) {
+    m_marginScale = marginScale;
+  }
+  
+  void MiraLearner::setFixMargin(bool fixMargin) {
+    m_fixMargin = fixMargin;
+  }
+  
+  void MiraLearner::setMargin(float margin) {
+    m_margin = margin;
+  }
+
+  void MiraLearner::setUseSlackRescaling(bool useSlackRescaling) {
+    m_useSlackRescaling = useSlackRescaling;
+  }
+
+  void MiraLearner::setScaleLossByTargetGain(bool scaleLossByTargetGain) {
+    m_scaleLossByTargetGain = scaleLossByTargetGain;
+  }
+  
+  void  MiraLearner::doUpdate(
+      const FVector& currFV,
+      const FVector& targetFV,
+      const FVector& ,
+      float currGain,
+      float targetGain,
+      float ,
+      FVector& weights)
+  {
+    FValue currScore = currFV.inner_product(weights);
+    FValue targetScore = targetFV.inner_product(weights);
+    
+    VERBOSE(1,"currGain: " << currGain << " targetGain " << targetGain << endl);
+    
+    IFVERBOSE(1) {
+      cerr << "target deriv has (scaled) gain " << m_marginScale * targetGain << " , fv : " << targetFV << " [ " << targetScore << " ]" << endl;
+      cerr << "curr deriv has (scaled) gain " << m_marginScale * currGain << " , fv : " << currFV << " [ " << currScore << " ]" << endl;
+    }
+    
+    float loss = m_marginScale * (targetGain - currGain);
+    if (m_scaleLossByTargetGain) {
+      loss = loss*targetGain;
+    }
+    
+    float margin;
+    if (m_fixMargin || m_useSlackRescaling) {
+      margin = m_margin;
+    } else {
+      margin =  loss;
+    }
+    
+    //constraint is a.x >= b, where x is the change in weight vector
+    float b = margin - (targetScore - currScore);
+    if (b<= 0) {
+      VERBOSE(1, "MiraLearner: alpha = " << 0  << endl);
+      return;
+    }
+    FVector a = targetFV - currFV;
+    float norma =inner_product(a,a);
+    if (norma == 0) {
+      VERBOSE(1, "MiraLearner: alpha = " << 0  << endl);
+      return;
+    }
+    
+    //Update is min(C , b / ||a||^2) a
+    //where C is slack
+    //See Crammer et al, passive-aggressive paper for soln of similar problem.
+    float alpha = b / norma;
+    float slack = m_slack;
+    if (m_useSlackRescaling) {
+      slack = slack * loss;
+    }
+    VERBOSE(1, "MiraLearner: b = " << b << " norma = " << norma <<
+      " unclipped alpha = " << alpha << endl);
+    if (slack && alpha > slack) {
+      alpha = slack;
+    }
+    
+    VERBOSE(1, "MiraLearner: alpha = " << alpha << endl);
+    weights += alpha * a;
+  }
+      
+  void  MiraPlusLearner::doUpdate(
+      const FVector& currFV,
+      const FVector& targetFV,
+      const FVector& optimalFV,
+      float currGain,
+      float targetGain,
+      float optimalGain,
+      FVector& weights)
+  {
+    FValue currScore = currFV.inner_product(weights);
+    FValue targetScore = targetFV.inner_product(weights);
+    FValue optimalGainScore = optimalFV.inner_product(weights);
+
+    VERBOSE(1,"currGain: " << currGain << " targetGain " << targetGain << " optimalGain " << optimalGain << endl);
+    
+    cerr << "currGain: " << currGain << " targetGain " << targetGain << " optimalGain " << optimalGain << endl;
+    cerr << "currScore: " << currScore << " targetScore " << targetScore << " optimalScore " << optimalGainScore << endl;
+    
+    
+    IFVERBOSE(1) {
+      cerr << "Optimal deriv has (scaled) gain " << m_marginScale * optimalGain << " , fv : " << optimalFV << " [ " << optimalGainScore << " ]" << endl;
+      cerr << "target deriv has (scaled) gain " << m_marginScale * targetGain << " , fv : " << targetFV << " [ " << targetScore << " ]" << endl;
+      cerr << "curr deriv has (scaled) gain " << m_marginScale * currGain << " , fv : " << currFV << " [ " << currScore << " ]" << endl;
+    }
+    
+    vector<float> b;
+    vector<FVector> distance;
+    
+    float tgtcurrmargin = m_marginScale * (targetGain - currGain);
+    float opttgtmargin = m_marginScale * (optimalGain - targetGain);
+    float optcurrmargin = m_marginScale * (optimalGain - currGain);
+    
+    if (m_fixMargin) {
+      tgtcurrmargin = m_margin;
+      opttgtmargin = m_margin;
+      optcurrmargin = m_margin;
+    }
+    
+    
+    //Score of target - Score of curr >= 1
+    b.push_back(tgtcurrmargin - (targetScore - currScore));
+    distance.push_back(targetFV);
+    distance.back() -= currFV;
+
+    VERBOSE(2, cerr << "b[0] = " << b[0] << " distance[0] = " << distance[0] << endl);
+    
+    
+    if (optimalGain > targetGain) {
+      //Score of optimal - Score of Target > 1
+      b.push_back(opttgtmargin - (optimalGainScore - targetScore));
+      distance.push_back(optimalFV);
+      distance.back() -= targetFV;
+      VERBOSE(2, cerr << "b[1] = " << b[1] << " distance[1] = " << distance[1] << endl);
+      
+      //Score of optimal - Score of curr > 1 
+      b.push_back(optcurrmargin - (optimalGainScore - currScore));
+      distance.push_back(optimalFV);
+      distance.back() -= currFV;
+      VERBOSE(2, cerr << "b[2] = " << b[2] << " distance[2] = " << distance[2] << endl);
+    }
+
+    /*
+    cerr << "b " << b[0];
+    if (b.size() > 1) {
+      cerr << " " << b[1] << " " << b[2];
+    }
+    cerr << endl;
+    */
+    
+    
+    vector<float> alpha;
+    if (m_slack == -1)
+      alpha = hildreth(distance,b);
+    else
+      alpha = hildreth(distance,b, m_slack);
+    
+    FVector update;
+    for (size_t k = 0; k < alpha.size(); k++) {
+      IFVERBOSE(1) {
+        cerr << "alpha " << alpha[k] << endl;
+        cerr << "dist " << distance[k] << endl;
+      }
+      distance[k] *= alpha[k];
+      update += distance[k];
+    }
+
+    
+    cerr << "alpha " << alpha[0];
+    if (alpha.size() > 1) {
+      cerr << " " << alpha[1] << " " << alpha[2];
+    }
+    cerr << endl;
+    cerr << update << endl;
+    
+    
+    
+    weights += update;
+    IFVERBOSE(1) {
+      cerr << "Mira++ updated weights to " << weights << endl;
+    }
+    
+    IFVERBOSE(1) { 
+      //Sanity check
+      currScore = currFV.inner_product(weights);
+      targetScore = targetFV.inner_product(weights);
+      optimalGainScore = optimalFV.inner_product(weights);
+      
+      cerr << "Updated Current Weights : " << weights << endl;
+      
+      cerr << "Target score - curr score " << targetScore - currScore << endl;
+      cerr << "margin * (Target gain - curr gain) " << m_marginScale * (targetGain - currGain) << endl;
+      
+      cerr << "Optimal score - target score " << optimalGainScore - targetScore << endl;
+      cerr << "margin * (Optimal gain - target gain) " << m_marginScale * (optimalGain - targetGain) << endl;
+      
+      cerr << "Optimal score - curr score " << optimalGainScore - currScore << endl;
+      cerr << "margin * (Optimal gain - curr gain) " << m_marginScale * (optimalGain - currGain) << endl;
+    }
+  }
+  
+  
+
+  vector<FValue> hildreth (const vector<FVector>& a, const vector<FValue>& b) {
+    
+    size_t i;
+    int max_iter = 10000;
+    float eps = 0.00000001;
+    float zero = 0.000000000001;
+    
+    vector<FValue> alpha ( b.size() );
+    vector<FValue> F ( b.size() );
+    vector<FValue> kkt ( b.size() );
+    
+    float max_kkt = -1e100;
+    
+    size_t K = b.size();
+    
+    float A[K][K];
+    bool is_computed[K];
+    for ( i = 0; i < K; i++ )
+    {
+      A[i][i] = a[i].inner_product( a[i]);
+      is_computed[i] = false;
+    }
+    
+    int max_kkt_i = -1;
+    
+    
+    for ( i = 0; i < b.size(); i++ )
+    {
+      F[i] = b[i];
+      kkt[i] = F[i];
+      if ( kkt[i] > max_kkt )
+      {
+        max_kkt = kkt[i];
+        max_kkt_i = i;
+      }
+    }
+    
+    int iter = 0;
+    FValue diff_alpha;
+    FValue try_alpha;
+    FValue add_alpha;
+    
+    while ( max_kkt >= eps && iter < max_iter )
+    {
+      
+      diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
+      try_alpha = alpha[max_kkt_i] + diff_alpha;
+      add_alpha = 0.0;
+      
+      if ( try_alpha < 0.0 )
+        add_alpha = -1.0 * alpha[max_kkt_i];
+      else
+        add_alpha = diff_alpha;
+      
+      alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
+      
+      if ( !is_computed[max_kkt_i] )
+      {
+        for ( i = 0; i < K; i++ )
+        {
+          A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
+          //A[i][max_kkt_i] = 0; // for version 1
+          is_computed[max_kkt_i] = true;
+        }
+      }
+      
+      for ( i = 0; i < F.size(); i++ )
+      {
+        F[i] -= add_alpha * A[i][max_kkt_i];
+        kkt[i] = F[i];
+        if ( alpha[i] > zero )
+          kkt[i] = abs ( F[i] );
+      }
+      max_kkt = -1e100;
+      max_kkt_i = -1;
+      for ( i = 0; i < F.size(); i++ )
+        if ( kkt[i] > max_kkt )
+        {
+          max_kkt = kkt[i];
+          max_kkt_i = i;
+        }
+      
+      iter++;
+    }
+    
+    return alpha;
+  }
+  
+  vector<FValue> hildreth (const vector<FVector>& a, const vector<FValue>& b, FValue C) {
+    
+    size_t i;
+    int max_iter = 10000;
+    FValue eps = 0.00000001;
+    FValue zero = 0.000000000001;
+    
+    vector<FValue> alpha ( b.size() );
+    vector<FValue> F ( b.size() );
+    vector<FValue> kkt ( b.size() );
+    
+    float max_kkt = -1e100;
+    
+    size_t K = b.size();
+    
+    float A[K][K];
+    bool is_computed[K];
+    for ( i = 0; i < K; i++ )
+    {
+      A[i][i] = a[i].inner_product( a[i]);
+      is_computed[i] = false;
+    }
+    
+    int max_kkt_i = -1;
+    
+    
+    for ( i = 0; i < b.size(); i++ )
+    {
+      F[i] = b[i];
+      kkt[i] = F[i];
+      if ( kkt[i] > max_kkt )
+      {
+        max_kkt = kkt[i];
+        max_kkt_i = i;
+      }
+    }
+    
+    int iter = 0;
+    FValue diff_alpha;
+    FValue try_alpha;
+    FValue add_alpha;
+    
+    while ( max_kkt >= eps && iter < max_iter )
+    {
+      
+      diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
+      try_alpha = alpha[max_kkt_i] + diff_alpha;
+      add_alpha = 0.0;
+      
+      if ( try_alpha < 0.0 )
+        add_alpha = -1.0 * alpha[max_kkt_i];
+      else if (try_alpha > C)
+				add_alpha = C - alpha[max_kkt_i];
+      else
+        add_alpha = diff_alpha;
+      
+      alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
+      
+      if ( !is_computed[max_kkt_i] )
+      {
+        for ( i = 0; i < K; i++ )
+        {
+          A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
+          //A[i][max_kkt_i] = 0; // for version 1
+          is_computed[max_kkt_i] = true;
+        }
+      }
+      
+      for ( i = 0; i < F.size(); i++ )
+      {
+        F[i] -= add_alpha * A[i][max_kkt_i];
+        kkt[i] = F[i];
+        if (alpha[i] > C - zero)
+					kkt[i]=-kkt[i];
+				else if (alpha[i] > zero)
+					kkt[i] = abs(F[i]);
+        
+      }
+      max_kkt = -1e100;
+      max_kkt_i = -1;
+      for ( i = 0; i < F.size(); i++ )
+        if ( kkt[i] > max_kkt )
+        {
+          max_kkt = kkt[i];
+          max_kkt_i = i;
+        }
+      
+      iter++;
+    }
+    
+    return alpha;
+  }
+  
+  
+  
+}
--- a/josiah/OnlineLearner.h
+++ b/josiah/OnlineLearner.h
@ -0,0 +1,118 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+
+
+#pragma once
+
+#include <iostream>
+#include <vector>
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#endif
+
+#include <boost/shared_ptr.hpp>
+
+#include "FeatureVector.h"
+
+namespace Josiah {
+      std::vector<Moses::FValue> hildreth ( const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b );
+      std::vector<Moses::FValue> hildreth ( const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b, Moses::FValue );
+  
+  class OnlineLearner {
+    public:
+      void virtual doUpdate(
+                    const Moses::FVector& currentFV,
+                    const Moses::FVector& targetFV,
+                    const Moses::FVector& optimaFV,
+                    float currentGain,
+                    float targetGain,
+                    float optimalGain,
+                    Moses::FVector& weights) = 0;
+      virtual bool usesOptimalSolution() {return false;}
+      
+      virtual ~OnlineLearner() {}
+	
+    protected:
+  };
+  
+  class PerceptronLearner : public OnlineLearner {
+    public:
+      PerceptronLearner();
+      void setLearningRate(float learningRate);
+      
+      void virtual doUpdate(
+                    const Moses::FVector& currentFV,
+                    const Moses::FVector& targetFV,
+                    const Moses::FVector& optimalFV,
+                    float currentGain,
+                    float targetGain,
+                    float optimalGain,
+                    Moses::FVector& weights);
+      
+      private:
+        float m_learningRate;
+  };
+  
+  
+  
+  class MiraLearner : public OnlineLearner {
+    public:
+      MiraLearner();
+      void setSlack(float slack);
+      void setMarginScale(float marginScale);
+      void setFixMargin(bool fixMargin);
+      void setMargin(float margin);
+      /** For incorporating gain, an alternative to margin rescaling */
+      void setUseSlackRescaling(bool useSlackRescaling);
+      void setScaleLossByTargetGain(bool scaleLossByTargetGain);
+      
+      void virtual doUpdate(
+          const Moses::FVector& currentFV,
+          const Moses::FVector& targetMFV,
+          const Moses::FVector& optimalFV,
+          float currentGain,
+          float targetGain,
+          float optimalGain,
+          Moses::FVector& weights);
+      
+    protected:
+      float m_slack;
+      float m_marginScale;
+      bool m_fixMargin;
+      float m_margin;
+      bool m_useSlackRescaling;
+      bool m_scaleLossByTargetGain;
+  };
+  
+  class MiraPlusLearner : public MiraLearner {
+    public:
+      void virtual doUpdate(
+          const Moses::FVector& currentFV,
+          const Moses::FVector& targetFV,
+          const Moses::FVector& optimaFV,
+          float currentGain,
+          float targetGain,
+          float optimalGain,
+          Moses::FVector& weights);
+      virtual bool usesOptimalSolution() {return true;}
+  };
+  
+  typedef boost::shared_ptr<OnlineLearner> OnlineLearnerHandle;
+}
--- a/josiah/OnlineLearnerOld.cpp
+++ b/josiah/OnlineLearnerOld.cpp
@ -0,0 +1,493 @@
+/*
+ *  OnlineLearner.cpp
+ *  josiah
+ *
+ *  Created by Abhishek Arun on 26/06/2009.
+ *  Copyright 2009 __MyCompanyName__. All rights reserved.
+ *
+ */
+#ifdef MPI_ENABLED
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+#include "MpiDebug.h"
+#endif
+
+#include "OnlineLearner.h"
+#include "TranslationDelta.h"
+#include "StaticData.h"
+#include "Sampler.h"
+#include "Gibbler.h"
+#include "WeightManager.h"
+
+#ifdef MPI_ENABLED
+namespace mpi = boost::mpi;
+#endif
+
+namespace Josiah {
+  
+  FVector OnlineLearner::GetAveragedWeights() { 
+    return m_cumulWeights / m_iteration;
+  }
+  
+  void OnlineLearner::UpdateCumul() { 
+    m_cumulWeights += GetCurrWeights();
+    m_iteration++;
+  }
+
+  vector<FValue> OnlineLearner::hildreth (const vector<FVector>& a, const vector<FValue>& b) {
+    
+    size_t i;
+    int max_iter = 10000;
+    float eps = 0.00000001;
+    float zero = 0.000000000001;
+    
+    vector<FValue> alpha ( b.size() );
+    vector<FValue> F ( b.size() );
+    vector<FValue> kkt ( b.size() );
+    
+    float max_kkt = -1e100;
+    
+    size_t K = b.size();
+    
+    float A[K][K];
+    bool is_computed[K];
+    for ( i = 0; i < K; i++ )
+    {
+      A[i][i] = a[i].inner_product( a[i]);
+      is_computed[i] = false;
+    }
+    
+    int max_kkt_i = -1;
+    
+    
+    for ( i = 0; i < b.size(); i++ )
+    {
+      F[i] = b[i];
+      kkt[i] = F[i];
+      if ( kkt[i] > max_kkt )
+      {
+        max_kkt = kkt[i];
+        max_kkt_i = i;
+      }
+    }
+    
+    int iter = 0;
+    FValue diff_alpha;
+    FValue try_alpha;
+    FValue add_alpha;
+    
+    while ( max_kkt >= eps && iter < max_iter )
+    {
+      
+      diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
+      try_alpha = alpha[max_kkt_i] + diff_alpha;
+      add_alpha = 0.0;
+      
+      if ( try_alpha < 0.0 )
+        add_alpha = -1.0 * alpha[max_kkt_i];
+      else
+        add_alpha = diff_alpha;
+      
+      alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
+      
+      if ( !is_computed[max_kkt_i] )
+      {
+        for ( i = 0; i < K; i++ )
+        {
+          A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
+          //A[i][max_kkt_i] = 0; // for version 1
+          is_computed[max_kkt_i] = true;
+        }
+      }
+      
+      for ( i = 0; i < F.size(); i++ )
+      {
+        F[i] -= add_alpha * A[i][max_kkt_i];
+        kkt[i] = F[i];
+        if ( alpha[i] > zero )
+          kkt[i] = abs ( F[i] );
+      }
+      max_kkt = -1e100;
+      max_kkt_i = -1;
+      for ( i = 0; i < F.size(); i++ )
+        if ( kkt[i] > max_kkt )
+        {
+          max_kkt = kkt[i];
+          max_kkt_i = i;
+        }
+      
+      iter++;
+    }
+    
+    return alpha;
+  }
+  
+  vector<float> OnlineLearner::hildreth (const vector<FVector>& a, const vector<FValue>& b, FValue C) {
+    
+    size_t i;
+    int max_iter = 10000;
+    FValue eps = 0.00000001;
+    FValue zero = 0.000000000001;
+    
+    vector<FValue> alpha ( b.size() );
+    vector<FValue> F ( b.size() );
+    vector<FValue> kkt ( b.size() );
+    
+    float max_kkt = -1e100;
+    
+    size_t K = b.size();
+    
+    float A[K][K];
+    bool is_computed[K];
+    for ( i = 0; i < K; i++ )
+    {
+      A[i][i] = a[i].inner_product( a[i]);
+      is_computed[i] = false;
+    }
+    
+    int max_kkt_i = -1;
+    
+    
+    for ( i = 0; i < b.size(); i++ )
+    {
+      F[i] = b[i];
+      kkt[i] = F[i];
+      if ( kkt[i] > max_kkt )
+      {
+        max_kkt = kkt[i];
+        max_kkt_i = i;
+      }
+    }
+    
+    int iter = 0;
+    FValue diff_alpha;
+    FValue try_alpha;
+    FValue add_alpha;
+    
+    while ( max_kkt >= eps && iter < max_iter )
+    {
+      
+      diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
+      try_alpha = alpha[max_kkt_i] + diff_alpha;
+      add_alpha = 0.0;
+      
+      if ( try_alpha < 0.0 )
+        add_alpha = -1.0 * alpha[max_kkt_i];
+      else if (try_alpha > C)
+				add_alpha = C - alpha[max_kkt_i];
+      else
+        add_alpha = diff_alpha;
+      
+      alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
+      
+      if ( !is_computed[max_kkt_i] )
+      {
+        for ( i = 0; i < K; i++ )
+        {
+          A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
+          //A[i][max_kkt_i] = 0; // for version 1
+          is_computed[max_kkt_i] = true;
+        }
+      }
+      
+      for ( i = 0; i < F.size(); i++ )
+      {
+        F[i] -= add_alpha * A[i][max_kkt_i];
+        kkt[i] = F[i];
+        if (alpha[i] > C - zero)
+					kkt[i]=-kkt[i];
+				else if (alpha[i] > zero)
+					kkt[i] = abs(F[i]);
+        
+      }
+      max_kkt = -1e100;
+      max_kkt_i = -1;
+      for ( i = 0; i < F.size(); i++ )
+        if ( kkt[i] > max_kkt )
+        {
+          max_kkt = kkt[i];
+          max_kkt_i = i;
+        }
+      
+      iter++;
+    }
+    
+    return alpha;
+  }
+  
+  
+  void PerceptronLearner::doUpdate(const TDeltaHandle& curr, 
+                                   const TDeltaHandle& target, 
+                                   const TDeltaHandle& noChangeDelta, 
+                                   const FVector& optimalFV,
+                                   const FValue optimalGain, 
+                                   const GainFunctionHandle& gf) {
+    //Do update if target gain is better than curr gain
+    if (target->GetGain(gf) > curr->GetGain(gf)) {
+      GetCurrWeights() -= curr->getScores();
+      GetCurrWeights() += target->getScores();  
+      m_numUpdates++;
+    }
+     
+    UpdateCumul();
+  }
+	
+  void CWLearner::doUpdate(const TDeltaHandle& curr, 
+                           const TDeltaHandle& target, 
+                           const TDeltaHandle& noChangeDelta, 
+                           const FVector& optimalFV,
+                           const FValue optimalGain, 
+                           const GainFunctionHandle& gf) {
+		//we consider the following binary classification task: does the target jump have a higher gain than the curr jump?
+
+		//the score for the input features (could also be calculated by m_features * current weights)
+		float scoreDiff = target->getScore() - curr->getScore();
+		VERBOSE(1, "ScoreDiff: " << scoreDiff << endl)
+		
+		//what is the actual gain of target vs current (the gold gain)
+		float gainDiff = target->GetGain(gf) - curr->GetGain(gf); 
+		VERBOSE(1, "GainDiff: " << gainDiff << endl)
+		
+		//the gold 1/-1 label
+		float y = gainDiff > 0 ? 1.0 : -1.0;
+		VERBOSE(1, "Label: " << y << endl)
+
+		
+		//the mean of margin for this task is y * score
+		float marginMean = y * scoreDiff;	
+		VERBOSE(1, "marginMean: " << marginMean << endl)
+
+		//only update at error
+		if (marginMean < 0) {
+		
+			//the input feature vector to this task is (f(target) - f(curr)) 
+            m_features.clear();
+			m_features += target->getScores();
+			m_features += curr->getScores();
+
+			VERBOSE(1, "feature delta: " << m_features << endl)
+			
+			//the variance is based on the input features
+			float marginVariance = calculateMarginVariance(m_features);
+			VERBOSE(1, "marginVariance: " << marginVariance << endl)
+		
+			//get the kkt multiplier
+			float alpha = kkt(marginMean,marginVariance);
+			VERBOSE(1, "alpha: " << alpha << endl)
+			
+			//update the mean parameters
+			updateMean(alpha,	y);
+
+			VERBOSE(1, "new weights: " << GetCurrWeights() << endl)
+			
+			//update the variance parameters
+			updateVariance(alpha);
+			
+			VERBOSE(1, "new variance: " << m_currSigmaDiag << endl)
+
+			
+			//remember that we made an update
+			m_numUpdates++;
+
+		}
+		UpdateCumul();
+	}
+	
+	
+  
+    void MiraLearner::doUpdate(const TDeltaHandle& curr, 
+                               const TDeltaHandle& target, 
+                               const TDeltaHandle& noChangeDelta, 
+                               const FVector& optimalFV,
+                               const FValue optimalGain, 
+                               const GainFunctionHandle& gf) {
+    
+    vector<float> b;
+    float scoreDiff = target->getScore() - curr->getScore();
+    float gainDiff = target->GetGain(gf) - curr->GetGain(gf); //%BLEU
+    
+    //Scale the margin
+    gainDiff *= m_marginScaleFactor;
+  
+    //Or set it to a fixed value
+    if (m_fixMargin) {
+      gainDiff = m_margin;  
+    }
+    
+    
+    bool doMira = false;
+    if (scoreDiff < gainDiff) { //MIRA Constraints not satisfied, run MIRA
+      doMira = true;
+      b.push_back( gainDiff - scoreDiff);  
+      vector<FVector> distance;
+      FVector dist(target->getScores());
+      dist -= curr->getScores();
+      distance.push_back(dist);    
+      
+      vector<float> alpha;
+      
+      if (m_slack == -1)
+        alpha = hildreth(distance,b);
+      else
+        alpha = hildreth(distance,b, m_slack);
+      
+      FVector update;
+      for (size_t k = 0; k < alpha.size(); k++) {
+        IFVERBOSE(1) { 
+          cerr << "alpha " << alpha[k] << endl;
+        }  
+        FVector dist = distance[k];
+        dist *= alpha[k];
+        update += dist;
+        m_numUpdates++;
+      }
+      
+      //Normalize
+      VERBOSE(1, "Update wv " << update << endl);
+      if (m_normalizer)
+        m_normalizer->Normalize(update);
+      VERBOSE(1,"After norm, Update wv " << update << endl);
+      
+
+    }
+    else {
+      IFVERBOSE(1) { 
+        cerr << "Not doing updates cos constraints already satisified" << endl;
+        cerr << "Target score" << target->getScore() << ", curr score " << curr->getScore() << endl;
+        cerr << "Target (scaled) gain" << target->GetGain(gf) * m_marginScaleFactor << ", curr gain " << curr->GetGain(gf) * m_marginScaleFactor << endl;
+      }  
+    }
+    
+    UpdateCumul();
+   
+    IFVERBOSE(1) { 
+    if (doMira) {
+      //Sanity check
+      curr->updateWeightedScore();
+      target->updateWeightedScore();
+      
+     cerr << "Target score - curr score " << target->getScore() - curr->getScore() << endl;
+      cerr << "Target scaled gain - curr scaled gain " << ((target->GetGain(gf) - curr->GetGain(gf)) *  m_marginScaleFactor ) << endl;
+    }
+    }
+  }
+  
+  /*This Update enforces 3 constraints :
+   1. Score of target  - Score of curr >= 1
+   2. Score of optimal - Score of Target > 1
+   3. Score of optimal - Score of curr > 1
+   
+  */
+  void MiraPlusLearner::doUpdate(const TDeltaHandle& curr, 
+                                 const TDeltaHandle& target, 
+                                 const TDeltaHandle& noChangeDelta, 
+                                 const FVector& optimalFV,
+                                 const FValue optimalGain, 
+                                 const GainFunctionHandle& gf) {
+    
+    FVector currFV = curr->getScores();
+    currFV -= noChangeDelta->getScores();
+    currFV += curr->getSample().GetFeatureValues();
+    FValue currScore = currFV.inner_product(WeightManager::instance().get());
+    
+    FVector targetFV = target->getScores();
+    targetFV -= noChangeDelta->getScores();
+    targetFV += target->getSample().GetFeatureValues();
+    FValue targetScore = targetFV.inner_product(WeightManager::instance().get());
+    
+    FValue optimalGainScore = optimalFV.inner_product(WeightManager::instance().get());
+
+    float targetGain = target->GetGain(gf);
+    float currGain = curr->GetGain(gf);
+    
+    IFVERBOSE(1) {
+      cerr << "Optimal deriv has (scaled) gain " << m_marginScaleFactor * optimalGain << " , fv : " << optimalFV << " [ " << optimalGainScore << " ]" << endl;
+      cerr << "target deriv has (scaled) gain " << m_marginScaleFactor * targetGain << " , fv : " << targetFV << " [ " << targetScore << " ]" << endl;
+      cerr << "curr deriv has (scaled) gain " << m_marginScaleFactor * currGain << " , fv : " << currFV << " [ " << currScore << " ]" << endl;
+    }
+    
+    vector<float> b;
+    vector<FVector> distance;
+    
+    float tgtcurrmargin = m_marginScaleFactor * (targetGain - currGain);
+    float opttgtmargin = m_marginScaleFactor * (optimalGain - targetGain);
+    float optcurrmargin = m_marginScaleFactor * (optimalGain - currGain);
+    
+    if (m_fixMargin) {
+      tgtcurrmargin = m_margin;
+      opttgtmargin = m_margin;
+      optcurrmargin = m_margin;
+    }
+    
+    
+    //Score of target - Score of curr >= 1
+    b.push_back(tgtcurrmargin - (targetScore - currScore));
+    distance.push_back(targetFV);
+    distance.back() -= currFV;
+
+    VERBOSE(2, cerr << "b[0] = " << b[0] << " distance[0] = " << distance[0] << endl);
+    
+    
+    if (optimalGain > targetGain) {
+      //Score of optimal - Score of Target > 1
+      b.push_back(opttgtmargin - (optimalGainScore - targetScore));
+      distance.push_back(optimalFV);
+      distance.back() -= targetFV;
+      VERBOSE(2, cerr << "b[1] = " << b[1] << " distance[1] = " << distance[1] << endl);
+      
+      //Score of optimal - Score of curr > 1 
+      b.push_back(optcurrmargin - (optimalGainScore - currScore));
+      distance.push_back(optimalFV);
+      distance.back() -= currFV;
+      VERBOSE(2, cerr << "b[2] = " << b[2] << " distance[2] = " << distance[2] << endl);
+    }
+    
+    
+    vector<float> alpha;
+    if (m_slack == -1)
+      alpha = hildreth(distance,b);
+    else
+      alpha = hildreth(distance,b, m_slack);
+    
+    FVector update;
+    for (size_t k = 0; k < alpha.size(); k++) {
+      IFVERBOSE(1) {
+        cerr << "alpha " << alpha[k] << endl;
+        cerr << "dist " << distance[k] << endl;
+      }
+      distance[k] *= alpha[k];
+      update += distance[k];
+    }
+    
+    //Normalize
+    VERBOSE(2, "Before normalise: " << update << endl);
+    if (m_normalizer)
+      m_normalizer->Normalize(update);
+    VERBOSE(2, "After normalise: " << update << endl);
+    GetCurrWeights() += update;
+    IFVERBOSE(1) {
+      cerr << "Mira++ updated weights to " << GetCurrWeights() << endl;
+    }
+    
+    m_numUpdates++;
+    UpdateCumul();
+    
+    IFVERBOSE(1) { 
+      //Sanity check
+      currScore = currFV.inner_product(WeightManager::instance().get());
+      targetScore = targetFV.inner_product(WeightManager::instance().get());
+      optimalGainScore = optimalFV.inner_product(WeightManager::instance().get());
+      
+      cerr << "Updated Current Weights : " << GetCurrWeights() << endl;
+      
+      cerr << "Target score - curr score " << targetScore - currScore << endl;
+      cerr << "margin * (Target gain - curr gain) " << m_marginScaleFactor * (targetGain - currGain) << endl;
+      
+      cerr << "Optimal score - target score " << optimalGainScore - targetScore << endl;
+      cerr << "margin * (Optimal gain - target gain) " << m_marginScaleFactor * (optimalGain - targetGain) << endl;
+      
+      cerr << "Optimal score - curr score " << optimalGainScore - currScore << endl;
+      cerr << "margin * (Optimal gain - curr gain) " << m_marginScaleFactor * (optimalGain - currGain) << endl;
+    }
+  }
+}
--- a/josiah/OnlineLearnerOld.h
+++ b/josiah/OnlineLearnerOld.h
@ -0,0 +1,174 @@
+#pragma once
+
+#include <cmath>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <vector>
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#endif
+
+#include "FeatureVector.h"
+#include "Gain.h"
+#include "TranslationDelta.h"
+#include "WeightManager.h"
+
+namespace Josiah {
+  class Sampler;
+  class WeightNormalizer;
+  
+  class OnlineLearner {
+    public :	
+			OnlineLearner(const FVector& initWeights, const std::string& name) : 
+               m_name(name), m_iteration(0) {} //, m_averaging(true)
+            virtual void doUpdate(const TDeltaHandle& curr, 
+                                  const TDeltaHandle& target, 
+                                  const TDeltaHandle& noChangeDelta, 
+                                  const FVector& optimalFV,
+                                  const FValue optimalGain, 
+                                  const GainFunctionHandle& gf)  = 0;
+      void UpdateCumul() ;
+      FVector& GetCurrWeights() {return WeightManager::instance().get();}
+      FVector GetAveragedWeights() ;
+      virtual ~OnlineLearner() {}
+      virtual void reset() {}
+      virtual size_t GetNumUpdates() = 0;
+      const std::string & GetName() {return m_name;}
+    protected:
+      //bool m_averaging;
+      FVector m_cumulWeights;
+      std::string m_name;
+      size_t m_iteration;
+      std::vector<FValue> hildreth ( const std::vector<FVector>& a, const std::vector<FValue>& b );
+      std::vector<FValue> hildreth ( const std::vector<FVector>& a, const std::vector<FValue>& b, FValue );
+  };
+
+  class PerceptronLearner : public OnlineLearner {
+    public :
+      PerceptronLearner(const FVector& initWeights, const std::string& name, FValue learning_rate = 1.0) : OnlineLearner(initWeights, name), m_learning_rate(learning_rate), m_numUpdates() {}
+      virtual void doUpdate(const TDeltaHandle& curr, 
+                            const TDeltaHandle& target, 
+                            const TDeltaHandle& noChangeDelta, 
+                            const FVector& optimalFV,
+                            const FValue optimalGain, 
+                            const GainFunctionHandle& gf);
+      virtual ~PerceptronLearner() {}
+      virtual void reset() {m_numUpdates = 0;}
+      virtual size_t GetNumUpdates() { return m_numUpdates;}
+    private:
+      FValue m_learning_rate;
+      size_t m_numUpdates;
+  };
+  
+	class CWLearner : public OnlineLearner {
+		public :
+                  CWLearner(const FVector& initWeights, const std::string& name, FValue confidence = 1.644854f, FValue initialVariance = 1.0f) :
+                      OnlineLearner(initWeights, name),   m_features(initWeights),m_confidence(confidence),  m_epsilon(0.0000001f),m_numUpdates(){
+                m_currSigmaDiag += initialVariance;						
+			}
+            virtual void doUpdate(const TDeltaHandle& curr, 
+                                  const TDeltaHandle& target, 
+                                  const TDeltaHandle& noChangeDelta, 
+                                  const FVector& optimalFV,
+                                  const FValue optimalGain, 
+                                  const GainFunctionHandle& gf);
+		virtual ~CWLearner() {}
+		virtual void reset() {m_numUpdates = 0;}
+		virtual size_t GetNumUpdates() { return m_numUpdates;}
+    private:
+		FVector m_currSigmaDiag;
+		FVector m_features;
+		FValue m_confidence;
+		FValue m_learning_rate;
+		FValue m_epsilon;
+		size_t m_numUpdates;	
+		
+		bool sign(FValue value) { return value > 0.0; } 
+			
+		FValue kkt(FValue marginMean, FValue marginVariance) {
+			if (marginMean >= m_confidence * marginVariance) return 0.0;
+			//margin variance approximately == 0 ? 
+			if (marginVariance < 0.0 + m_epsilon && marginVariance > 0.0 - m_epsilon) return 0.0;
+			FValue v	= 1.0 + 2.0 * m_confidence * marginMean;
+			FValue lambda = (-v + sqrt(v * v - 8.0 * m_confidence * (marginMean - m_confidence * marginVariance))) / (4.0 * m_confidence * marginVariance);
+			return lambda > 0.0 ? lambda : 0.0;
+		}
+		
+		FValue calculateMarginVariance(const FVector& features) { 
+          return (features*features*m_currSigmaDiag).sum();
+		}
+		
+        void updateMean(FValue alpha, FValue y) {
+            WeightManager::instance().get() += alpha*y*m_currSigmaDiag*m_features;
+		}
+
+        void updateVariance(FValue alpha) {		
+            m_currSigmaDiag = 1.0 / (1.0 / m_currSigmaDiag + (2.0 * alpha * m_confidence * m_features * m_features));
+		}
+		
+		
+	}; 
+	
+	
+  class MiraLearner : public OnlineLearner {
+    public :
+    MiraLearner(const FVector& initWeights,  const std::string& name, bool fixMargin, FValue margin, FValue slack, FValue scale_margin = 1.0, WeightNormalizer* wn = NULL) : OnlineLearner(initWeights, name), m_numUpdates(), m_fixMargin(fixMargin), m_margin(margin), m_slack(slack), m_marginScaleFactor(scale_margin), m_normalizer(wn) {}
+    virtual void doUpdate(const TDeltaHandle& curr, 
+                          const TDeltaHandle& target, 
+                          const TDeltaHandle& noChangeDelta, 
+                          const FVector& optimalFV,
+                          const FValue optimalGain, 
+                          const GainFunctionHandle& gf);
+      virtual ~MiraLearner() {}
+      virtual void reset() {m_numUpdates = 0;}
+      virtual size_t GetNumUpdates() { return m_numUpdates;} 
+      void SetNormalizer(WeightNormalizer* normalizer) {m_normalizer = normalizer;}
+    protected:
+      size_t m_numUpdates;
+      bool m_fixMargin;
+      FValue m_margin;
+      FValue m_slack;  
+      FValue m_marginScaleFactor;
+      WeightNormalizer* m_normalizer;
+  };
+  
+  class MiraPlusLearner : public MiraLearner {
+    public :
+      MiraPlusLearner(const FVector& initWeights, const std::string& name, bool fixMargin, FValue margin, FValue slack, FValue scale_margin = 1.0, WeightNormalizer* wn = NULL) : MiraLearner(initWeights, name, fixMargin, margin, slack, scale_margin, wn) {}
+      virtual void doUpdate(const TDeltaHandle& curr, 
+                            const TDeltaHandle& target, 
+                            const TDeltaHandle& noChangeDelta, 
+                            const FVector& optimalFV,
+                            const FValue optimalGain, 
+                            const GainFunctionHandle& gf);
+      virtual ~MiraPlusLearner() {}
+  };
+  
+  class WeightNormalizer {
+    public :
+      WeightNormalizer(FValue norm) {m_norm = norm;}
+      virtual ~WeightNormalizer() {}
+      virtual void Normalize(FVector& ) = 0; 
+    protected :
+      FValue m_norm;
+  };
+  
+  class L1Normalizer : public WeightNormalizer {
+    public:
+      L1Normalizer (FValue norm) : WeightNormalizer(norm) {}
+      virtual ~L1Normalizer() {}
+      virtual void Normalize(FVector& weights) {
+        weights *= (m_norm / weights.l1norm());
+      } 
+  };
+  
+  class L2Normalizer : public WeightNormalizer {
+    public:
+      L2Normalizer (FValue norm) : WeightNormalizer(norm) {}
+      virtual ~L2Normalizer() {}
+      virtual void Normalize(FVector& weights) {
+        weights *= (m_norm / weights.l2norm());
+      } 
+  };
+}
--- a/josiah/OnlineTrainingCorpus.cpp
+++ b/josiah/OnlineTrainingCorpus.cpp
@ -0,0 +1,158 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <fstream>
+#include <stdexcept> 
+
+#ifdef MPI_ENABLED
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+namespace mpi=boost::mpi;
+#endif
+ 
+#include "OnlineTrainingCorpus.h"
+#include "Selector.h"
+#include "Utils.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  
+  OnlineTrainingCorpus::OnlineTrainingCorpus (
+      const std::string& sourceFile,
+      size_t batchLines,
+      size_t epochLines,
+      size_t maxLines,
+      int mpiSize,
+      int mpiRank):
+  m_batchLines(batchLines),
+  m_epochLines(epochLines),
+  m_maxLines(maxLines),
+  m_mpiSize(mpiSize),
+  m_mpiRank(mpiRank),
+  m_totalLines(0)
+  {
+    if (batchLines && epochLines % batchLines != 0) {
+      throw runtime_error("Size of epoch should be divisible by batch size");
+    }
+    if (maxLines % epochLines != 0) {
+      throw runtime_error("Maximum lines should be divisible by epoch size");
+    }
+    if (batchLines > 1) {
+      if (epochLines % mpiSize != 0) {
+        throw runtime_error("When using batching, shards should be of equal size");
+      }
+      size_t shardLines = epochLines / mpiSize;
+      if (shardLines % batchLines != 0) {
+          throw runtime_error("Shard size should be divisible by batch size");
+      }
+    }
+    
+    //Load the source file
+    ifstream in(sourceFile.c_str());
+    if (!in) {
+      throw runtime_error("Unable to open: " + sourceFile);
+    }
+    string line;
+    while (getline(in,line)) {
+      m_lines.push_back(line);
+    }
+    
+    //Line ids
+    for (size_t i = 0; i < m_lines.size(); ++i) {
+      m_lineIds.push_back(i); 
+    }
+    m_lineIdIndex = 0;
+    RandomIndex rand;
+    random_shuffle(m_lineIds.begin(),m_lineIds.end(),rand);
+    InitEpoch();
+    
+  }
+  
+  void OnlineTrainingCorpus::InitEpoch() {
+    //sentence ids in this epoch
+    vector<size_t> epoch;
+    if (m_mpiRank == 0) {
+      while (epoch.size() < m_epochLines) {
+        epoch.push_back(m_lineIds[m_lineIdIndex]);
+        ++m_lineIdIndex;
+        if (m_lineIdIndex >= m_lineIds.size()) m_lineIdIndex = 0;
+      }
+    }
+    
+    //split into shards
+    m_shard.clear();
+#ifdef MPI_ENABLED  
+    mpi::communicator world;
+    mpi::broadcast(world,epoch,0);
+#endif
+    if (m_batchLines) {
+      float shard_size = m_epochLines / (float)m_mpiSize;
+      VERBOSE(1, "Shard size: " << shard_size << endl);
+      size_t shard_start = (size_t)(shard_size *m_mpiRank);
+      size_t shard_end = (size_t)(shard_size * (m_mpiRank+1));
+      if (m_mpiRank == m_mpiSize-1) shard_end = m_epochLines;
+      VERBOSE(1, "Rank: " << m_mpiRank << " Shard start: " << shard_start << " Shard end: " << shard_end << endl);
+      for (size_t i = shard_start; i < shard_end; ++i) {
+        m_shard.push_back(epoch[i]);
+      }
+    } else {
+      //each core gets whole epoch as a shard
+      m_shard.insert(m_shard.begin(),epoch.begin(),epoch.end()); 
+      VERBOSE(1,"Shard contains whole epoch" << endl);
+    }
+  }
+  
+  /** Next batch of sentences. Flags indicate whether we should mix or dump at the end of this batch*/
+  void OnlineTrainingCorpus::GetNextBatch(
+      std::vector<std::string>* lines, 
+      std::vector<std::size_t>* lineNumbers,
+      bool* shouldMix) 
+  {
+    lines->clear();
+    lineNumbers->clear();
+    while (lines->size() < m_batchLines || 
+      (m_batchLines == 0 && lines->size() < m_epochLines)) {
+      lineNumbers->push_back(m_shard.back());
+      lines->push_back(m_lines[m_shard.back()]);
+      m_shard.pop_back();
+      VERBOSE(1,"Add to batch: " << lineNumbers->back() << " rank: " << m_mpiRank << endl);
+    }
+    if (m_shard.empty()) {
+      *shouldMix = true;
+      InitEpoch();
+    } else {
+      *shouldMix = false;
+    }
+    VERBOSE(1, "Mix?: " << *shouldMix << " rank: " << m_mpiRank << endl);
+    if (m_batchLines) {
+      m_totalLines += (m_batchLines*m_mpiSize);
+    } else {
+      m_totalLines += m_epochLines;
+    }
+    VERBOSE(1,"Total lines: " << m_totalLines <<  " rank: " << m_mpiRank << endl);
+  }
+    
+  bool OnlineTrainingCorpus::HasMore() const {
+    return m_totalLines < m_maxLines;
+  }
+  
+  
+}
--- a/josiah/OnlineTrainingCorpus.h
+++ b/josiah/OnlineTrainingCorpus.h
@ -0,0 +1,69 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+ 
+ 
+#pragma once
+
+#include <string>
+#include <vector>
+
+
+namespace Josiah {
+
+/**
+  * Used to manage the training corpus - batching,sharding etc.
+ **/
+class OnlineTrainingCorpus {
+  
+  public:
+    OnlineTrainingCorpus(
+      const std::string& sourceFile,
+      size_t batchLines, //Size of batches
+      size_t epochLines, //Total lines in each epoch. These will be split into shards
+      size_t maxLines, //Total lines to be processed
+      int mpiSize,
+      int mpiRank);
+    
+    //NB: maxLines must be divisible by epochLines
+    // epochLines must be divisible by batchLines.
+    
+    /** Next batch of sentences. Flags indicate whether we should mix or dump at the end of this batch*/
+    void GetNextBatch(std::vector<std::string>* lines, 
+                      std::vector<std::size_t>* lineNumbers,
+                      bool* shouldMix);
+    
+    bool HasMore() const;
+    
+  private:
+    void InitEpoch();
+    
+    size_t m_batchLines;
+    size_t m_epochLines;
+    size_t m_maxLines;
+    int m_mpiSize;
+    int m_mpiRank;
+    size_t m_totalLines; 
+    
+    std::vector<std::string> m_lines;
+    std::vector<size_t> m_lineIds;
+    std::vector<size_t> m_shard;
+    size_t m_lineIdIndex;
+};
+
+}
--- a/josiah/Optimizer.cpp
+++ b/josiah/Optimizer.cpp
@ -0,0 +1,101 @@
+#include "Optimizer.h"
+
+#include <iostream>
+
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+Optimizer::~Optimizer() {}
+
+  
+
+void Optimizer::Optimize(
+     FValue f,
+     const FVector x,
+     const FVector& gr,
+     FVector* new_x
+     ) {
+  ++iteration_;
+  FVector gradient = gr;
+  if (use_gaussian_prior_) {
+    gradient -= mean_;
+    gradient /= variance_;
+  }
+  cerr << "OPTIMIZER ITERATION #" << iteration_ << endl;
+  cerr << "  CURR VALUES: " << x << endl;
+  cerr << "  GRADIENT: " << gr << endl;
+  if (use_gaussian_prior_)
+    cerr << "P-GRADIENT: " << gradient << endl;
+  OptimizeImpl(f, x, gradient,  new_x);
+  cerr << "NEW VALUES: " << *new_x << endl;
+  if (HasConverged()) {
+    cerr << "OPTIMIZER CONVERGED IN " << iteration_ << " ITERATIONS.\n";
+  } else if (GetIteration() >= max_iterations_) {
+    cerr << "OPTIMIZER REACHED MAX ITERATIONS. STOPPING.\n";
+    SetHasConverged();
+  }
+}
+
+void DumbStochasticGradientDescent::OptimizeImpl(
+     FValue f,
+     const FVector& x,
+     const FVector& gradient,
+     FVector* new_x) {
+  FVector g = gradient;
+  g *= eta_;
+  *new_x = x;
+  *new_x += g;
+}
+
+void ExponentiatedGradientDescent::OptimizeImpl(
+     FValue,
+     const FVector& x,
+     const FVector& gradient,
+     FVector* new_x) {
+  //for (unsigned i = 0; i < eta_.size(); ++i) {
+    //eta_[i] = eta_[i] * max(min_multiplier_, 1.0f + mu_ * gradient[i] * (eta_[i] * prev_g_[i]));
+    eta_ *= fvmax(min_multiplier_, 1.0 + mu_ * gradient * eta_ * prev_g_);
+  //}
+  cerr << "ETA: " << eta_ << endl;
+  *new_x = gradient;
+  *new_x *= eta_;
+  *new_x += x;
+  cerr << "New x: " << *new_x << endl;
+  prev_g_ = gradient;
+}
+  
+void MetaNormalizedExponentiatedGradientDescent::OptimizeImpl(
+     FValue,
+     const FVector& x,
+     const FVector& gradient,
+     FVector* new_x) {
+  
+
+    
+  cerr << "Curr x: " << x << endl;
+  //for (unsigned i = 0; i < v_.size(); ++i) {
+    //v_[i] = gamma_ * v_[i] + ((1 - gamma_) * gradient[i] * gradient[i]);
+    v_ = gamma_ * v_ + ((1-gamma_) * gradient * gradient);
+  //}
+    
+  //for (unsigned i = 0; i < eta_.size(); ++i) {
+    //eta_[i] = eta_[i] * max(min_multiplier_, 1.0f + ((mu_ * gradient[i] *  prev_g_[i])/ v_[i]));
+    eta_ = eta_ * fvmax(min_multiplier_, 1 + ((mu_ * gradient * prev_g_) / v_));
+  //}
+  
+  cerr << "ETA: " << eta_ << endl;
+  *new_x = gradient;
+  *new_x *= eta_;
+  cerr << "Gradient * ETA: " << *new_x << endl;
+  *new_x += x;
+  cerr << "New x: " << *new_x << endl;
+  prev_g_ = gradient;
+}
+  
+
+  
+}
+
--- a/josiah/Optimizer.h
+++ b/josiah/Optimizer.h
@ -0,0 +1,135 @@
+#pragma once
+
+#include <vector>
+
+#include "FeatureVector.h"
+
+namespace Josiah {
+
+struct Optimizer {
+  Optimizer(int max_iterations) 
+    : iteration_(0),
+      converged_(false),
+      max_iterations_(max_iterations),
+      use_gaussian_prior_(false) {}
+  virtual ~Optimizer();
+
+  void SetUseGaussianPrior(const Moses::FValue mean,
+                           const Moses::FValue variance) {
+    use_gaussian_prior_ = true;
+    mean_ = mean;
+    variance_ = variance;
+  }
+
+  void Optimize(
+     Moses::FValue f, // if known
+     const Moses::FVector x,  // not ref! don't change!
+     const Moses::FVector& gradient,
+     Moses::FVector* new_x);
+
+  bool HasConverged() const {
+    return converged_;
+  }
+
+  int GetIteration() const {
+    return iteration_;
+  }
+
+  void SetIteration(int iteration) { 
+    iteration_ = iteration;
+  }
+  
+ protected:
+  
+  virtual void OptimizeImpl(
+     float f, // if known
+     const Moses::FVector& x,
+     const Moses::FVector& gradient,
+     Moses::FVector* new_x) = 0;
+  
+  
+  void SetHasConverged(bool converged = true) {
+    converged_ = converged;
+  }
+
+ private:
+  int iteration_;
+  bool converged_;
+  int max_iterations_;
+  bool use_gaussian_prior_;
+  Moses::FValue mean_;      // for gaussian prior
+  Moses::FValue variance_;                // for gaussian prior
+  
+};
+
+class DumbStochasticGradientDescent : public Optimizer {
+ public:
+  DumbStochasticGradientDescent(Moses::FValue eta, int max_iters) :
+    Optimizer(max_iters), eta_(eta) {}
+
+  virtual void OptimizeImpl(
+     float f,
+     const Moses::FVector& x,
+     const Moses::FVector& gradient,
+     Moses::FVector* new_x);
+
+ private:
+  Moses::FValue eta_;
+};
+
+// see N. Schraudolph (1999) Local Gain Adaptation in Stochastic Gradient
+// Descent, Technical Report IDSIA-09-99, p. 2.
+// No, this isn't stochastic metadescent, but EGD is described there too
+class ExponentiatedGradientDescent : public Optimizer {
+ public:
+  ExponentiatedGradientDescent(const Moses::FVector& eta,
+      Moses::FValue mu, Moses::FValue min_multiplier, int max_iters, const Moses::FVector& prev_gradient) :
+    Optimizer(max_iters), eta_(eta), mu_(mu), min_multiplier_(min_multiplier), prev_g_(prev_gradient) { 
+     //std::cerr << "Eta : " << eta_ << std::endl;
+     //std::cerr << "Prev gradient : " << prev_g_ << std::endl;
+   }
+
+  void SetPreviousGradient(const Moses::FVector& prev_g) { prev_g_ = prev_g;}
+  void SetEta(const Moses::FVector& eta) { eta_ = eta;}
+  
+  
+  
+  virtual void OptimizeImpl(
+                            Moses::FValue,
+                            const Moses::FVector& x,
+                            const Moses::FVector& gradient,
+                            Moses::FVector* new_x);
+  
+ 
+  
+ protected:
+  Moses::FVector eta_;
+  const Moses::FValue mu_;
+  const Moses::FValue min_multiplier_;
+  Moses::FVector prev_g_;
+};
+
+class MetaNormalizedExponentiatedGradientDescent : public ExponentiatedGradientDescent {
+ public:
+  MetaNormalizedExponentiatedGradientDescent(const Moses::FVector& eta,
+                                 Moses::FValue mu, Moses::FValue min_multiplier, Moses::FValue gamma, int max_iters, const Moses::FVector& prev_gradient) :
+  ExponentiatedGradientDescent(eta, mu, min_multiplier, max_iters, prev_gradient), v_(eta), gamma_(gamma) {
+    std::cerr << " MetaNormalizedExponentiatedGradientDescent, gamma : " << gamma << std::endl;
+  }
+    
+    
+  virtual void OptimizeImpl(
+                            Moses::FValue f,
+                            const Moses::FVector& x,
+                            const Moses::FVector& gradient,
+                            Moses::FVector* new_x);
+  
+    
+  private:
+    Moses::FVector v_;
+    Moses::FValue gamma_;
+  };
+  
+  
+}
+
--- a/josiah/ParenthesisFeature.cpp
+++ b/josiah/ParenthesisFeature.cpp
@ -0,0 +1,390 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+#include <iterator>
+
+#include "ParenthesisFeature.h"
+
+using namespace std;
+
+namespace Josiah {
+  
+  ParenthesisFeature::ParenthesisFeature(const std::string lefts, const std::string rights) :
+      m_lefts(lefts), m_rights(rights) {}
+  
+  FeatureFunctionHandle ParenthesisFeature::getFunction(const Sample& sample) const {
+    return FeatureFunctionHandle(new ParenthesisFeatureFunction(sample, m_lefts, m_rights));
+  }
+    
+    
+void ParenthesisCounts::count(vector<Word>::const_iterator begin, vector<Word>::const_iterator end,
+           const string& lefts, const string& rights) 
+{
+    size_t segmentSize = end - begin;
+    for (size_t i = 0; i < m_ll.size(); ++i) {
+        m_ll[i].resize(segmentSize); 
+        m_lr[i].resize(segmentSize); 
+        m_rl[i].resize(segmentSize); 
+        m_rr[i].resize(segmentSize);
+        m_leftPositions[i].clear();
+        m_rightPositions[i].clear();
+    }
+    
+    //iterate right to left
+    reverse_iterator<vector<Word>::const_iterator> ri(end);
+    reverse_iterator<vector<Word>::const_iterator> rend(begin);
+    size_t wpos = segmentSize;
+    for ( ;ri != rend; ++ri) {
+        --wpos;
+        const string& text = ri->operator[](0)->GetString();
+        size_t lid = string::npos;
+        size_t rid = string::npos;
+        if (text.size() == 1) {
+            lid = lefts.find(text);
+            rid = rights.find(text);
+        }
+        for (size_t pid = 0; pid < m_ll.size(); ++pid) {
+            size_t curr_lr = m_lr[pid].size() > wpos+1 ? m_lr[pid][wpos+1] : 0;
+            size_t curr_rr = m_rr[pid].size() > wpos+1 ? m_rr[pid][wpos+1] : 0;
+            if (pid == lid) {
+                //found a left parenthesis
+                ++curr_lr;
+                m_leftPositions[pid].push_back(wpos);
+            }
+            if (pid == rid) {
+                //found a right parenthesis
+                ++curr_rr;
+                m_rightPositions[pid].push_back(wpos);
+            }
+            m_lr[pid][wpos] = curr_lr;
+            m_rr[pid][wpos] = curr_rr;
+        }
+    }
+    
+    assert(wpos == 0);
+    
+    //iterate left to right
+    vector<Word>::const_iterator i = begin;
+    for (; i != end; ++i) {
+        const string& text = i->operator[](0)->GetString();
+        size_t lid = string::npos;
+        size_t rid = string::npos;
+        if (text.size() == 1) {
+            lid = lefts.find(text);
+            rid = rights.find(text);
+        }
+        for (size_t pid = 0; pid < m_ll.size(); ++pid) {
+            size_t curr_ll = wpos > 0 ? m_ll[pid][wpos-1] : 0;
+            size_t curr_rl = wpos > 0 ? m_rl[pid][wpos-1] : 0;
+            if (pid == lid) ++curr_ll;
+            if (pid == rid) ++curr_rl;
+            m_ll[pid][wpos] = curr_ll;
+            m_rl[pid][wpos] = curr_rl; 
+        }
+        
+        ++wpos;
+    }
+}
+
+
+
+
+void ParenthesisFeatureFunction::updateTarget() {
+    const vector<Word>& words = getSample().GetTargetWords();
+    m_counts.count(words.begin(),words.end(),m_lefts,m_rights);
+}
+
+
+void ParenthesisFeatureFunction::assignScore(FVector& scores) {
+    //count number of mismatches of each type
+    getViolations(m_counts,scores);
+}
+
+struct WordsRangeCovers {
+    WordsRangeCovers(const WordsRange& range) : m_range(range) {}
+    bool operator() (size_t pos) {return m_range.covers(pos);}
+    const WordsRange& m_range;
+};
+
+void ParenthesisFeatureFunction::getViolations(const ParenthesisCounts& counts, FVector& violations,
+                                       const ParenthesisCounts* outsideCounts, const WordsRange* segment) 
+{
+                                          
+    for (size_t pid = 0; pid < m_numValues; ++pid) {
+        //left violations to left of segment
+        if (outsideCounts) {
+            size_t leftsInSegment = count_if(outsideCounts->leftPositions()[pid].begin(), 
+                                                 outsideCounts->leftPositions()[pid].end(),
+                                                 WordsRangeCovers(*segment));
+            size_t rightsInSegment = count_if(outsideCounts->rightPositions()[pid].begin(), 
+                                             outsideCounts->rightPositions()[pid].end(),
+                                             WordsRangeCovers(*segment));
+            for (size_t i = 0; i < outsideCounts->leftPositions()[pid].size(); ++i) {
+                size_t ppos = outsideCounts->leftPositions()[pid][i];
+                if (ppos >= segment->GetStartPos()) continue;
+                size_t lr = outsideCounts->lr(pid,ppos);
+                size_t rr = outsideCounts->rr(pid,ppos);
+                //cerr << "lr: " <<  lr << " rr: " << rr << " ";
+                //account for contents of segment
+                lr += counts.leftPositions()[pid].size() - leftsInSegment;
+                rr += counts.rightPositions()[pid].size() - rightsInSegment;
+                //cerr << "lr: " <<  lr << " rr: " << rr << " rpos " << counts.rightPositions().size() << " ris: " << rightsInSegment << endl;
+                if (lr > rr) {
+                    violations[m_names[pid]] = violations[m_names[pid]] + 1;
+                }
+            }
+        }
+        
+        //right violations to left of segment
+        //Ignore since the new segment cannot change these
+        
+        //left violations inside the segment 
+        for (size_t i = 0; i < counts.leftPositions()[pid].size(); ++i) {
+            size_t ppos = counts.leftPositions()[pid][i];
+            size_t lr = counts.lr(pid,ppos);
+            size_t rr = counts.rr(pid,ppos);
+            if (outsideCounts) {
+                if (segment->GetEndPos()+1 < outsideCounts->segmentLength()) {
+                    lr += outsideCounts->lr(pid,segment->GetEndPos()+1);
+                    rr += outsideCounts->rr(pid,segment->GetEndPos()+1);
+                }
+            }
+            if (lr > rr) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        //right violations in the segment
+        for (size_t i = 0; i < counts.rightPositions()[pid].size(); ++i) {
+            size_t ppos = counts.rightPositions()[pid][i];
+            size_t rl = counts.rl(pid,ppos);
+            size_t ll = counts.ll(pid,ppos);
+            if (outsideCounts) {
+                if (segment->GetStartPos() > 0) {
+                    rl += outsideCounts->rl(pid,segment->GetStartPos()-1);
+                    ll += outsideCounts->ll(pid,segment->GetStartPos()-1);
+                }
+            }
+            if (rl > ll) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        
+        //left violations to right of segment
+        //Ignore since the new segment cannot change these
+        
+        //right violations to right of segment
+        if (outsideCounts) {
+            size_t leftsInSegment = count_if(outsideCounts->leftPositions()[pid].begin(), 
+                                             outsideCounts->leftPositions()[pid].end(),
+                                             WordsRangeCovers(*segment));
+            size_t rightsInSegment = count_if(outsideCounts->rightPositions()[pid].begin(), 
+                                              outsideCounts->rightPositions()[pid].end(),
+                                              WordsRangeCovers(*segment));
+            for (size_t i = 0; i < outsideCounts->rightPositions()[pid].size(); ++i) {
+                size_t ppos = outsideCounts->rightPositions()[pid][i];
+                if (ppos <= segment->GetEndPos()) continue;
+                size_t rl = outsideCounts->rl(pid,ppos);
+                size_t ll = outsideCounts->ll(pid,ppos);
+                //account for segment
+                rl += counts.rightPositions()[pid].size() - rightsInSegment;
+                ll += counts.leftPositions()[pid].size() - leftsInSegment;
+                if (rl > ll) {
+                  violations[m_names[pid]] = violations[m_names[pid]] + 1;
+                }
+            }
+        }
+        
+    }
+}
+
+
+
+void ParenthesisFeatureFunction::getViolations(const ParenthesisCounts& leftSegmentCounts, const ParenthesisCounts& rightSegmentCounts,
+                   const WordsRange& leftSegment, const WordsRange& rightSegment,
+                   const ParenthesisCounts& outsideCounts, FVector& violations) 
+{
+    for (size_t pid = 0; pid < m_numValues; ++pid) {
+        //count the existing parentheses in the left and right segments
+        size_t leftsInLeftSegment = count_if(outsideCounts.leftPositions()[pid].begin(), 
+                                             outsideCounts.leftPositions()[pid].end(),
+                                             WordsRangeCovers(leftSegment));
+        size_t leftsInRightSegment = count_if(outsideCounts.leftPositions()[pid].begin(),
+                                              outsideCounts.leftPositions()[pid].end(),
+                                              WordsRangeCovers(rightSegment));
+        size_t rightsInLeftSegment = count_if(outsideCounts.rightPositions()[pid].begin(),
+                                              outsideCounts.rightPositions()[pid].end(),
+                                              WordsRangeCovers(leftSegment));
+        size_t rightsInRightSegment = count_if(outsideCounts.rightPositions()[pid].begin(),
+                                              outsideCounts.rightPositions()[pid].end(),
+                                              WordsRangeCovers(rightSegment));
+        
+        //check left parentheses in  left segment
+        for (size_t i = 0; i < leftSegmentCounts.leftPositions()[pid].size(); ++i) {
+            size_t ppos = leftSegmentCounts.leftPositions()[pid][i];
+            size_t lr = leftSegmentCounts.lr(pid,ppos);
+            size_t rr = leftSegmentCounts.rr(pid,ppos);
+            if (leftSegment.GetEndPos()+1 < outsideCounts.segmentLength()) {
+                lr += outsideCounts.lr(pid,leftSegment.GetEndPos()+1);
+                rr += outsideCounts.rr(pid,leftSegment.GetEndPos()+1);
+            }
+            //account for right segment
+            lr += (rightSegmentCounts.leftPositions()[pid].size() - leftsInRightSegment);
+            rr += (rightSegmentCounts.rightPositions()[pid].size() - rightsInRightSegment);
+            if (lr > rr) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        
+        //check right parentheses in left segment
+        for (size_t i = 0; i < leftSegmentCounts.rightPositions()[pid].size(); ++i) {
+            size_t ppos = leftSegmentCounts.rightPositions()[pid][i];
+            size_t rl = leftSegmentCounts.rl(pid,ppos);
+            size_t ll = leftSegmentCounts.ll(pid,ppos);
+            if (leftSegment.GetStartPos() > 0) {
+                rl += outsideCounts.rl(pid, leftSegment.GetStartPos()-1);
+                ll += outsideCounts.ll(pid, leftSegment.GetStartPos()-1);
+            }
+            if (rl > ll) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        
+        //check left parentheses in right segment
+        for (size_t i = 0; i < rightSegmentCounts.leftPositions()[pid].size(); ++i) {
+            size_t ppos = rightSegmentCounts.leftPositions()[pid][i];
+            size_t lr = rightSegmentCounts.lr(pid,ppos);
+            size_t rr = rightSegmentCounts.rr(pid,ppos);
+            if (rightSegment.GetEndPos()+1 < outsideCounts.segmentLength()) {
+                lr += outsideCounts.lr(pid, rightSegment.GetEndPos()+1);
+                rr += outsideCounts.rr(pid, rightSegment.GetEndPos()+1);
+            }
+            if (lr > rr) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        
+        //check for right parentheses in right segment
+        for (size_t i =  0; i < rightSegmentCounts.rightPositions()[pid].size(); ++i) {
+            size_t ppos = rightSegmentCounts.rightPositions()[pid][i];
+            size_t rl = rightSegmentCounts.rl(pid,ppos);
+            size_t ll = rightSegmentCounts.ll(pid,ppos);
+            if (rightSegment.GetStartPos() > 0) {
+                rl += outsideCounts.rl(pid,rightSegment.GetStartPos()-1);
+                ll += outsideCounts.ll(pid, rightSegment.GetStartPos()-1);
+            }
+            //account for left segment
+            rl += (leftSegmentCounts.rightPositions()[pid].size() - rightsInLeftSegment);
+            ll += (leftSegmentCounts.leftPositions()[pid].size() - leftsInLeftSegment);
+            if (rl > ll) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        
+        //check for left parentheses outside of both segments
+        for (size_t i = 0; i < outsideCounts.leftPositions()[pid].size(); ++i) {
+            size_t ppos = outsideCounts.leftPositions()[pid][i];
+            //ignore if parenthesis is in the left segment, right segment, or to the right of the right segment
+            if (ppos >= rightSegment.GetStartPos() || leftSegment.covers(ppos)) continue;
+            size_t lr = outsideCounts.lr(pid,ppos);
+            size_t rr = outsideCounts.rr(pid,ppos);
+            if (ppos < rightSegment.GetStartPos()) {
+                //account for right segment
+                lr += (rightSegmentCounts.leftPositions()[pid].size() - leftsInRightSegment);
+                rr += (rightSegmentCounts.rightPositions()[pid].size() - rightsInRightSegment);
+            }
+            if (ppos < leftSegment.GetStartPos()) {
+                //account for left segment
+                lr += (leftSegmentCounts.leftPositions()[pid].size() - leftsInLeftSegment);
+                rr += (leftSegmentCounts.rightPositions()[pid].size() - rightsInLeftSegment);
+            }
+            if (lr > rr) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        
+        //check for right parentheses outside of both segments
+        for (size_t i = 0; i < outsideCounts.rightPositions()[pid].size(); ++i) {
+            size_t ppos = outsideCounts.rightPositions()[pid][i];
+            //ignore if parenthesis is in the right segment, left segment, or to the left of left segment
+            if (ppos <= leftSegment.GetEndPos() || rightSegment.covers(ppos)) continue;
+            size_t rl = outsideCounts.rl(pid,ppos);
+            size_t ll = outsideCounts.ll(pid,ppos);
+            if (ppos > leftSegment.GetEndPos()) {
+                //account for the left segment
+                rl += (leftSegmentCounts.rightPositions()[pid].size() - rightsInLeftSegment);
+                ll += (leftSegmentCounts.leftPositions()[pid].size() - leftsInLeftSegment);
+            }
+            if (ppos > rightSegment.GetEndPos()) {
+                //account for right segment
+                rl += (rightSegmentCounts.rightPositions()[pid].size() - rightsInRightSegment);
+                ll += (rightSegmentCounts.leftPositions()[pid].size() - leftsInRightSegment);
+            }
+            if (rl > ll) {
+              violations[m_names[pid]] = violations[m_names[pid]] + 1;
+            }
+        }
+        
+        
+    }
+}
+
+void ParenthesisFeatureFunction::scoreUpdate(const Phrase& phrase, const WordsRange& segment, FVector& scores) {
+    m_leftSegmentCounts.count(phrase.begin(), phrase.end(), m_lefts, m_rights);
+    getViolations(m_leftSegmentCounts,scores,&m_counts,&segment);
+}
+
+void ParenthesisFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores) {
+    scoreUpdate(option->GetTargetPhrase(),gap.segment,scores);
+}
+
+
+void ParenthesisFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& gap, FVector& scores) 
+{
+    Phrase phrase(leftOption->GetTargetPhrase());
+    phrase.Append(rightOption->GetTargetPhrase());
+    scoreUpdate(phrase,gap.segment,scores);
+}
+
+void ParenthesisFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
+{
+    const Phrase& leftPhrase = leftOption->GetTargetPhrase();
+    m_leftSegmentCounts.count(leftPhrase.begin(),leftPhrase.end(),m_lefts,m_rights);
+    const Phrase& rightPhrase = rightOption->GetTargetPhrase();
+    m_rightSegmentCounts.count(rightPhrase.begin(),rightPhrase.end(),m_lefts,m_rights);
+    getViolations(m_leftSegmentCounts,m_rightSegmentCounts,leftGap.segment,rightGap.segment,m_counts,scores);
+}
+
+/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+void ParenthesisFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                            const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
+{
+    if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
+        TargetGap gap(leftGap.leftHypo, rightGap.rightHypo,WordsRange(leftGap.segment.GetStartPos(), rightGap.segment.GetEndPos()));
+        doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
+    } else {
+        doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
+    }
+}
+
+
+
+}
--- a/josiah/ParenthesisFeature.h
+++ b/josiah/ParenthesisFeature.h
@ -0,0 +1,139 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+
+namespace Josiah {
+
+/**
+ * Store the various counts at each position.
+**/
+class ParenthesisCounts {
+    public:
+        ParenthesisCounts(size_t numValues):
+            m_ll(numValues), m_rl(numValues), m_lr(numValues), m_rr(numValues)
+                ,m_leftPositions(numValues),m_rightPositions(numValues) {}
+        
+        //getters
+        size_t numValues() const {return m_ll.size();}
+        size_t segmentLength() const {return m_ll[0].size();}
+        size_t ll(size_t pid, size_t position) const {return m_ll[pid][position];}
+        size_t rl(size_t pid, size_t position) const {return m_rl[pid][position];}
+        size_t lr(size_t pid, size_t position) const {return m_lr[pid][position];}
+        size_t rr(size_t pid, size_t position) const {return m_rr[pid][position];}
+        
+        const std::vector<std::vector<size_t> >& leftPositions() const {return m_leftPositions;}
+        const std::vector<std::vector<size_t> >& rightPositions() const {return m_rightPositions;}
+        
+        //Initialise counts
+        void count(std::vector<Word>::const_iterator begin, std::vector<Word>::const_iterator end,
+                  const std::string& lefts, const std::string& rights);
+
+    private:
+        std::vector<std::vector<size_t> > m_ll; //left brackets to left
+        std::vector<std::vector<size_t> > m_rl; //right brackets to left
+        std::vector<std::vector<size_t> > m_lr; //left brackets to right
+        std::vector<std::vector<size_t> > m_rr; //right brackets to right
+        
+        std::vector<std::vector<size_t> > m_leftPositions; //positions of left parentheses
+        std::vector<std::vector<size_t> > m_rightPositions; //positions of right parentheses
+};
+
+
+/**
+ * Feature that checks for matching between brackets and similar construcions.
+**/
+class ParenthesisFeature : public Feature {
+  public:
+    ParenthesisFeature(const std::string lefts, const std::string rights);
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+    
+  private:
+    std::string m_lefts,m_rights;
+};
+
+class ParenthesisFeatureFunction: public FeatureFunction {
+    public:
+        ParenthesisFeatureFunction(const Sample& sample,const std::string lefts, const std::string rights) : 
+          FeatureFunction(sample),
+        m_numValues(lefts.size()), m_lefts(lefts), m_rights(rights),
+        m_counts(m_numValues), m_leftSegmentCounts(m_numValues), m_rightSegmentCounts(m_numValues) {
+          for (size_t i = 0; i < lefts.size(); ++i) {
+            m_names.push_back(FName("par",lefts.substr(i,1)));
+          }
+        }
+        
+        /** Update the target words.*/
+        virtual void updateTarget();
+    
+        /** Assign the total score of this feature on the current hypo */
+        virtual void assignScore(FVector& scores);
+    
+        /** Score due to one segment */
+        virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
+        /** Score due to two segments. The left and right refer to the target positions.**/
+        virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                              const TargetGap& gap, FVector& scores);
+        virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+    
+        /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+        virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                  const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
+    
+        virtual ~ParenthesisFeatureFunction() {}
+    
+    private:
+        /** Violations from a segment, with optional outside counts. If outside counts and segment are missing, then 
+        it is assumed that we are */
+        void getViolations(const ParenthesisCounts& counts, FVector& violations, 
+                           const ParenthesisCounts* outsideCounts=NULL, const WordsRange* segment=NULL);
+        
+        /** Violations from a pair of segments, with outside counts */
+        void getViolations(const ParenthesisCounts& leftSegmentCounts, const ParenthesisCounts& rightSegmentCounts,
+                           const WordsRange& leftSegment, const WordsRange& rightSegment,
+                           const ParenthesisCounts& outsideCounts, FVector& scores);
+        
+        void scoreUpdate(const Moses::Phrase& phrase, const Moses::WordsRange& segment, FVector& scores);
+                           
+        
+        size_t m_numValues;
+        
+        //left and right parenthesis characters
+        std::string m_lefts;
+        std::string m_rights;
+        
+        //Counts for current target
+        ParenthesisCounts m_counts;
+        
+        //counts for current segments
+        ParenthesisCounts m_leftSegmentCounts;
+        ParenthesisCounts m_rightSegmentCounts;
+        
+        std::vector<FName> m_names;
+        
+        
+        
+};
+
+}
--- a/josiah/PhraseBoundaryFeature.cpp
+++ b/josiah/PhraseBoundaryFeature.cpp
@ -0,0 +1,235 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Gibbler.h"
+#include "PhraseBoundaryFeature.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  const string PhraseBoundaryFeature::STEM = "pb";
+  const string PhraseBoundaryFeature::SEP = ":";
+  const string PhraseBoundaryFeature::SOURCE = "src";
+  const string PhraseBoundaryFeature::TARGET = "tgt";
+  const string PhraseBoundaryFeature::BOS = "<s>";
+  const string PhraseBoundaryFeature::EOS = "</s>";
+
+  PhraseBoundaryFeature::PhraseBoundaryFeature(
+    const FactorList& sourceFactors,
+    const FactorList& targetFactors,
+    const vector<string>& sourceVocabFiles,
+    const vector<string>& targetVocabFiles) :
+      m_sourceFactors(sourceFactors), m_targetFactors(targetFactors) 
+    {
+      assert(sourceFactors.size() == sourceVocabFiles.size());  
+      assert(targetFactors.size() == targetVocabFiles.size());  
+      m_sourceVocabs.resize(sourceVocabFiles.size());
+      for (size_t i = 0; i < sourceVocabFiles.size(); ++i) {
+        loadVocab(sourceVocabFiles[i],m_sourceVocabs[i]);
+      }
+      m_targetVocabs.resize(targetVocabFiles.size());
+      for (size_t i = 0; i < targetVocabFiles.size(); ++i) {
+        loadVocab(targetVocabFiles[i],m_targetVocabs[i]);
+      }
+    }
+
+  FeatureFunctionHandle PhraseBoundaryFeature::getFunction(const Sample& sample) const {
+    return FeatureFunctionHandle(new PhraseBoundaryFeatureFunction(sample,*this));
+  }
+
+  void PhraseBoundaryFeature::addSourceFeatures(
+    const Word* leftWord, const Word* rightWord, FVector& scores) const {
+    addFeatures(leftWord,rightWord,m_sourceFactors,SOURCE, m_sourceVocabs,scores);
+  }
+  
+  void PhraseBoundaryFeature::addTargetFeatures(
+    const Word* leftWord, const Word* rightWord, FVector& scores) const {
+    addFeatures(leftWord,rightWord,m_targetFactors,TARGET, m_targetVocabs,scores);
+  }       
+
+  void PhraseBoundaryFeature::addFeatures(
+    const Word* leftWord, const Word* rightWord,
+       const FactorList& factors, const string& side,
+       const vector<set<string> >& vocabs,  FVector& scores) const 
+  {
+    for (size_t i = 0; i < factors.size(); ++i) {
+      ostringstream name;
+      name << side << SEP;
+      name << factors[i];
+      name << SEP;
+      if (leftWord) {
+        const string& leftWordText = leftWord->GetFactor(factors[i])->GetString();
+        if (vocabs[i].size() != 0 && 
+            vocabs[i].find(leftWordText) == vocabs[i].end()) {
+          continue;
+        }
+        name << leftWordText;
+      } else {
+        name << BOS;
+      }
+      name << SEP;
+      if (rightWord) {
+        const string& rightWordText = rightWord->GetFactor(factors[i])->GetString();
+        if (vocabs[i].size() != 0 &&
+            vocabs[i].find(rightWordText) == vocabs[i].end()) {
+          continue;
+        }
+        name << rightWordText;
+      } else {
+        name << EOS;
+      }
+      FName fName(STEM,name.str());
+      ++scores[fName];
+    }
+  }
+
+  PhraseBoundaryFeatureFunction::PhraseBoundaryFeatureFunction
+    (const Sample& sample, const PhraseBoundaryFeature& parent) :
+    FeatureFunction(sample), m_parent(parent) {}
+    
+    /** Update the target words.*/
+    void PhraseBoundaryFeatureFunction::updateTarget() {}
+    
+    /** Assign the total score of this feature on the current hypo */
+    void PhraseBoundaryFeatureFunction::assignScore(FVector& scores) {
+      const Hypothesis* currHypo = getSample().GetTargetTail();
+      scoreOptions(NULL,&(currHypo->GetNextHypo()->GetTranslationOption()),scores);
+      while ((currHypo = (currHypo->GetNextHypo()))) {
+        const TranslationOption* leftOption = 
+          &(currHypo->GetTranslationOption());
+        const TranslationOption* rightOption = NULL;
+        if (currHypo->GetNextHypo()) {
+          rightOption = &(currHypo->GetNextHypo()->GetTranslationOption());
+        }
+        scoreOptions(leftOption,rightOption,scores);          
+      }
+    }
+
+    /** Score due to one segment */
+     void PhraseBoundaryFeatureFunction::doSingleUpdate(
+      const TranslationOption* option, const TargetGap& gap, FVector& scores) {
+      const TranslationOption* leftOption = NULL;
+      if (gap.leftHypo->GetPrevHypo()) {
+        leftOption = &(gap.leftHypo->GetTranslationOption());
+      }
+      const TranslationOption* rightOption = option;
+      scoreOptions(leftOption,rightOption,scores);
+
+      leftOption = option;
+      if (gap.rightHypo) {
+        rightOption = &(gap.rightHypo->GetTranslationOption());
+      } else {
+        rightOption = NULL;
+      }
+      scoreOptions(leftOption,rightOption,scores);
+    }
+
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    void PhraseBoundaryFeatureFunction::doContiguousPairedUpdate(
+      const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& gap, FVector& scores) 
+    {
+      const TranslationOption* prevOption = NULL;
+      if (gap.leftHypo->GetPrevHypo()) {
+        prevOption = &(gap.leftHypo->GetTranslationOption());
+      }
+      const TranslationOption* nextOption = NULL;
+      if (gap.rightHypo) {
+        nextOption = &(gap.rightHypo->GetTranslationOption());
+      }
+      scoreOptions(prevOption,leftOption,scores);
+      scoreOptions(leftOption,rightOption,scores);
+      scoreOptions(rightOption,nextOption,scores);
+    }
+
+    void PhraseBoundaryFeatureFunction::doDiscontiguousPairedUpdate(
+      const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+
+      doSingleUpdate(leftOption,leftGap,scores);
+      doSingleUpdate(rightOption,rightGap,scores);
+    }
+
+    /** Score due to flip. Again, left and right refer to order on the
+         <emph>target</emph> side. */
+    void PhraseBoundaryFeatureFunction::doFlipUpdate(
+      const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+      if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
+        //contiguous
+        TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, 
+            WordsRange(leftGap.segment.GetStartPos(),
+                       rightGap.segment.GetEndPos()));
+        doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
+      } else {
+        //discontiguous
+        doDiscontiguousPairedUpdate(leftOption,rightOption,
+          leftGap,rightGap,scores);
+      }
+    }
+
+    static const Word* getBeginWord(const Phrase& phrase) {
+      return &(*phrase.begin());
+    }
+
+    static const Word* getEndWord(const Phrase& phrase) {
+      return &(phrase.GetWord(phrase.GetSize()-1));
+    }
+
+    void PhraseBoundaryFeatureFunction::scoreOptions(
+      const TranslationOption* leftOption, const TranslationOption* rightOption,
+        FVector& scores)
+    {
+      //source
+      const Word* leftSourceWord = NULL;
+      const Word* rightSourceWord = NULL;
+      if (leftOption) {
+        leftSourceWord = getEndWord(*(leftOption->GetSourcePhrase()));
+      }
+      if (rightOption) {
+        rightSourceWord = getBeginWord(*(rightOption->GetSourcePhrase()));
+      }
+      m_parent.addSourceFeatures(leftSourceWord,rightSourceWord,scores);
+
+      //target
+      const Word* leftTargetWord = NULL;
+      const Word* rightTargetWord = NULL;
+      if (leftOption) {
+        leftTargetWord = getEndWord(leftOption->GetTargetPhrase());
+      }
+      if (rightOption) {
+        rightTargetWord = getBeginWord(rightOption->GetTargetPhrase());
+      }
+      m_parent.addTargetFeatures(leftTargetWord,rightTargetWord,scores);
+    }
+
+    void PhraseBoundaryFeature::loadVocab(
+     const std::string& filename, std::set<std::string>& vocab) {
+      if (filename.empty()) return;
+      ifstream in(filename.c_str());
+      assert(in);
+      string line;
+      while(getline(in,line)) {
+       vocab.insert(line);
+      }
+    }
+
+}
+
--- a/josiah/PhraseBoundaryFeature.h
+++ b/josiah/PhraseBoundaryFeature.h
@ -0,0 +1,113 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include<set>
+#include<vector>
+
+#include "FeatureFunction.h"
+
+namespace Josiah {
+
+/**
+  * Bigrams at phrase boundaries. 
+ **/
+class PhraseBoundaryFeature : public Feature {
+  public:
+    PhraseBoundaryFeature(
+      const Moses::FactorList& sourceFactors,
+      const Moses::FactorList& targetFactors,
+      const std::vector<std::string>& sourceVocabFiles,
+      const std::vector<std::string>& targetVocabFiles);
+
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+
+    /** If either word is null, then eos or bos is assumed */
+    void addSourceFeatures(
+      const Moses::Word* leftWord, const Moses::Word* rightWord,
+         FVector& scores) const;
+
+    void addTargetFeatures(
+      const Moses::Word* leftWord, const Moses::Word* rightWord,
+         FVector& scores) const;
+
+  private:
+    void addFeatures(
+      const Moses::Word* leftWord, const Moses::Word* rightWord,
+         const Moses::FactorList& factors, const std::string& side,
+         const std::vector<std::set<std::string> >& vocabs,
+         FVector& scores) const;
+    void loadVocab(const std::string& filename, std::set<std::string>& vocab);
+
+    static const std::string SEP;
+    static const std::string STEM;
+    static const std::string SOURCE;
+    static const std::string TARGET;
+    static const std::string BOS;
+    static const std::string EOS;
+
+    Moses::FactorList m_sourceFactors;
+    Moses::FactorList m_targetFactors;
+
+    std::vector<std::set<std::string> > m_sourceVocabs;
+    std::vector<std::set<std::string> > m_targetVocabs;
+};
+
+class PhraseBoundaryFeatureFunction : public FeatureFunction {
+  public:
+    PhraseBoundaryFeatureFunction(const Sample& sample, const PhraseBoundaryFeature& parent);
+    
+    /** Update the target words.*/
+    virtual void updateTarget();
+    
+    /** Assign the total score of this feature on the current hypo */
+    virtual void assignScore(FVector& scores);
+
+    /** Score due to one segment */
+    virtual void doSingleUpdate(
+      const TranslationOption* option, const TargetGap& gap, FVector& scores);
+
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    virtual void doContiguousPairedUpdate(
+      const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& gap, FVector& scores);
+
+    virtual void doDiscontiguousPairedUpdate(
+      const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+
+    /** Score due to flip. Again, left and right refer to order on the
+         <emph>target</emph> side. */
+    virtual void doFlipUpdate(
+      const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
+
+  private:
+    void scoreOptions(
+      const TranslationOption* leftOption, const TranslationOption* rightOption,
+        FVector& scores);
+    const PhraseBoundaryFeature& m_parent;
+};
+
+
+
+}
+
+
--- a/josiah/PhraseFeature.cpp
+++ b/josiah/PhraseFeature.cpp
@ -0,0 +1,127 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "PhraseFeature.h"
+
+#include <sstream>
+
+#include "Gibbler.h"
+#include "StaticData.h"
+#include "WeightManager.h"
+
+using namespace std;
+using namespace Moses;
+
+namespace Josiah {
+
+  set<PhraseFeature*> PhraseFeature::s_phraseFeatures;
+  
+  PhraseFeature::PhraseFeature
+    (PhraseDictionaryFeature* dictionary, size_t index) : m_phraseDictionary(dictionary) {
+      //pre-calculate the feature names
+      const string& root = m_phraseDictionary->GetScoreProducerDescription();
+      size_t featureCount = m_phraseDictionary->GetNumScoreComponents();
+      for (size_t i = 1; i <= featureCount; ++i) {
+        ostringstream namestream;
+        if (index > 0) {
+          namestream << index << "-";
+        }
+        namestream << i;
+        m_featureNames.push_back(FName(root,namestream.str()));
+      }
+      
+      s_phraseFeatures.insert(this);
+    }
+
+    void PhraseFeature::updateWeights(const FVector& weights) {
+      for (set<PhraseFeature*>::iterator i = s_phraseFeatures.begin();
+          i != s_phraseFeatures.end(); ++i) {
+        PhraseFeature* pf = *i;
+        vector<float> newWeights(pf->m_featureNames.size());
+        for (size_t j = 0; j < pf->m_featureNames.size(); ++j) {
+          FValue weight = weights[pf->m_featureNames[j]];
+          newWeights[j] = weight;
+        }
+        ScoreComponentCollection mosesWeights = StaticData::Instance().GetAllWeights();
+        mosesWeights.Assign(pf->m_phraseDictionary,newWeights);
+        (const_cast<StaticData&>(StaticData::Instance()))
+          .SetAllWeights(mosesWeights);
+        //pf->m_phraseDictionary->GetFeature()->SetWeightTransModel(newWeights);
+      }
+    }
+    
+    FeatureFunctionHandle PhraseFeature::getFunction(const Sample& sample ) const {
+      return FeatureFunctionHandle
+        (new PhraseFeatureFunction(sample,m_phraseDictionary,m_featureNames));
+    }
+    
+    PhraseFeatureFunction::PhraseFeatureFunction(const Sample& sample, Moses::PhraseDictionaryFeature* phraseDictionary, std::vector<FName> featureNames) :
+        FeatureFunction(sample), 
+         m_featureNames(featureNames),
+         m_phraseDictionary(phraseDictionary) {}
+  
+    /** Assign the total score of this feature on the current hypo */
+    void PhraseFeatureFunction::assignScore(FVector& scores) {
+      for (size_t i = 0; i < m_featureNames.size(); ++i) {
+        scores[m_featureNames[i]] = 0;
+      }
+      const Hypothesis* currHypo = getSample().GetTargetTail();
+      while ((currHypo = (currHypo->GetNextHypo()))) {
+        assign(&(currHypo->GetTranslationOption()), scores);
+      }
+    }
+    
+    /** Score due to one segment */
+    void PhraseFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores) {
+      assign(option,scores);
+    }
+    
+    
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    void PhraseFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& gap, FVector& scores) {
+      assign(leftOption,scores);
+      assign(rightOption,scores);
+      
+    }
+    void PhraseFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+       assign(leftOption,scores);
+       assign(rightOption,scores);
+      
+    }
+    
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    void PhraseFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                     const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+      assign(leftOption,scores);
+      assign(rightOption,scores);
+      
+    }
+    
+    /** Add the phrase features into the feature vector */
+    void PhraseFeatureFunction::assign(const TranslationOption* option, FVector& scores) {
+      const ScoreComponentCollection& breakdown = option->GetScoreBreakdown();
+      vector<float> mosesScores= breakdown.GetScoresForProducer(m_phraseDictionary);
+      for (size_t i = 0; i < m_featureNames.size(); ++i) {
+        scores[m_featureNames[i]] += mosesScores[i];
+      }
+    }
+  
+}
--- a/josiah/PhraseFeature.h
+++ b/josiah/PhraseFeature.h
@ -0,0 +1,76 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <vector>
+
+#include "FeatureFunction.h"
+#include "PhraseDictionary.h"
+
+namespace Josiah {
+  
+  
+
+/** The Moses phrase features. */
+class PhraseFeature : public Feature {
+  public:
+    PhraseFeature(PhraseDictionaryFeature* dictionary, size_t id);
+    virtual FeatureFunctionHandle getFunction(const Sample& sample ) const;
+    
+    /** Inform all phrase features that the weights have been updated so 
+    that the new weights can be passed to moses */
+    static void updateWeights(const FVector& weights);
+    
+    
+  private:
+    static std::set<PhraseFeature*> s_phraseFeatures; 
+    Moses::PhraseDictionaryFeature* m_phraseDictionary;
+    std::vector<FName> m_featureNames;
+  
+};
+
+class PhraseFeatureFunction : public FeatureFunction {
+  
+public:
+  
+  PhraseFeatureFunction(const Sample& sample, Moses::PhraseDictionaryFeature* phraseDictionary,  std::vector<FName> featureNames);
+  
+  /** Assign the total score of this feature on the current hypo */
+  virtual void assignScore(FVector& scores);
+  
+  /** Score due to one segment */
+  virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
+  /** Score due to two segments. The left and right refer to the target positions.**/
+  virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& gap, FVector& scores);
+  virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+  
+  /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+  virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                    const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+  private:
+    void assign(const TranslationOption* option, FVector& scores);
+    std::vector<FName> m_featureNames;
+    Moses::PhraseDictionaryFeature* m_phraseDictionary;
+};
+  
+}
+
--- a/josiah/PhrasePairFeature.cpp
+++ b/josiah/PhrasePairFeature.cpp
@ -0,0 +1,78 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <sstream>
+
+#include "AlignmentInfo.h"
+#include "Gibbler.h"
+#include "PhrasePairFeature.h"
+
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+  const std::string PhrasePairFeature::PREFIX = "pp"; 
+
+  PhrasePairFeature::PhrasePairFeature
+    (Moses::FactorType sourceFactorId, Moses::FactorType targetFactorId) 
+      : m_sourceFactorId(sourceFactorId), m_targetFactorId(targetFactorId) {}
+
+  const Moses::Factor* PhrasePairFeature::getSourceFactor
+    (const Moses::Word& word) const {
+    return word[m_sourceFactorId];
+  }
+
+  const Moses::Factor* PhrasePairFeature::getTargetFactor
+    (const Moses::Word& word) const {
+    return word[m_targetFactorId];
+  }
+
+
+
+  void PhrasePairFeature::assign
+    (const TranslationOption* option, FVector& scores) const {
+    const TargetPhrase& target = option->GetTargetPhrase();
+    const Phrase* source = option->GetSourcePhrase();
+    const AlignmentInfo& align = target.GetAlignmentInfo();
+//    cerr << source->GetStringRep(vector<FactorType>(1));
+//    cerr << "|" <<  target.GetStringRep(vector<FactorType>(1));
+    for (AlignmentInfo::const_iterator i = align.begin(); i != align.end(); ++i) {
+      const Factor* sourceFactor = 
+        getSourceFactor(source->GetWord(i->first));
+      const Factor* targetFactor = 
+        getTargetFactor(target.GetWord(i->second));
+      ostringstream namestr;
+      namestr << sourceFactor->GetString();
+      namestr << ":";
+      namestr << targetFactor->GetString();
+      FName name(PhrasePairFeature::PREFIX,namestr.str());
+      ++scores[name];
+//      cerr << " " << name;
+    }
+//    cerr << endl;
+
+  }
+
+  
+
+}
+
+
--- a/josiah/PhrasePairFeature.h
+++ b/josiah/PhrasePairFeature.h
@ -0,0 +1,50 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include "StatelessFeature.h"
+
+namespace Josiah {
+
+/**
+  * The 'phrase pair' features of Watanabe et al. These are formed by 
+  * pairing aligned words (or other factors) between the source and target 
+  * side of a phrase pair.
+ **/
+class PhrasePairFeature : public StatelessFeature {
+  public:
+    static const std::string PREFIX;
+
+    PhrasePairFeature(Moses::FactorType sourceFactorId,
+                      Moses::FactorType targetFactorId);
+
+    const Moses::Factor* getSourceFactor(const Moses::Word& word) const;
+    const Moses::Factor* getTargetFactor(const Moses::Word& word) const;
+    /** Scores due to this translation option */
+    virtual void assign
+      (const Moses::TranslationOption* option, FVector& scores) const;
+    
+  private:
+    Moses::FactorType m_sourceFactorId;
+    Moses::FactorType m_targetFactorId; 
+
+};
+}
+
--- a/josiah/Pos.cpp
+++ b/josiah/Pos.cpp
@ -0,0 +1,175 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Pos.h"
+
+
+using namespace std;
+using namespace Moses;
+
+namespace Josiah {
+  
+  static string ToString(const TagSequence& ws)
+  {
+    ostringstream os;
+    for (TagSequence::const_iterator i = ws.begin(); i != ws.end(); ++i)
+      os << (*i)->GetString() << ",";
+    return os.str();
+  }
+
+  static ostream& operator<<(ostream& out, const TagSequence& ws)
+  {
+    out << ToString(ws);
+    return out;
+  }
+
+template<class P>
+static void getPosTags(const P& words,  TagSequence& tags, FactorType factorType) {
+  for (typename P::const_iterator i = words.begin(); i != words.end(); ++i) {
+    tags.push_back(i->operator[](factorType));
+    
+    /*cerr << "F0: " << *(i->operator[](0)) << endl;
+    const Factor* f1 = i->operator[](1);
+    if (f1) {
+      cerr << "F1: " << *f1 << endl;
+    } else {
+      cerr << "F1: " << "missing" << endl;
+  }*/
+  }
+}
+
+static void getSegmentWords(const vector<Word>& words, const WordsRange& segment, vector<Word>& segmentWords) {
+  for (size_t i = segment.GetStartPos(); i <= segment.GetEndPos(); ++i) {
+    segmentWords.push_back(words[i]);
+  }
+}
+
+
+
+float Josiah::PosFeatureFunction::computeScore() {
+  m_sourceTags.clear();
+  TagSequence targetTags;
+  getPosTags(getSample().GetSourceWords(), m_sourceTags, m_sourceFactorType);
+  getPosTags(getSample().GetTargetWords(), targetTags, m_targetFactorType);
+  //cerr << "Source " << m_sourceTags << endl;
+  //cerr << "Target " << targetTags << endl;
+  return computeScore(m_sourceTags, targetTags);
+}
+
+float Josiah::PosFeatureFunction::getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap)
+{
+  const WordsRange& sourceSegment = option->GetSourceWordsRange();
+  TagSequence newTargetTags;
+  getPosTags(option->GetTargetPhrase(), newTargetTags, m_targetFactorType);
+  return getSingleUpdateScore(sourceSegment,gap.segment, newTargetTags);
+}
+
+float Josiah::PosFeatureFunction::getContiguousPairedUpdateScore
+    (const TranslationOption * leftOption, const TranslationOption * rightOption, const TargetGap& gap)
+{
+  //just treat this as one segment
+  WordsRange sourceSegment(min(leftOption->GetStartPos(),rightOption->GetStartPos()),
+                           max(leftOption->GetEndPos(), rightOption->GetEndPos()));
+  TagSequence newTargetTags;
+  Phrase targetPhrase = leftOption->GetTargetPhrase();
+  targetPhrase.Append(rightOption->GetTargetPhrase());
+  getPosTags(targetPhrase, newTargetTags, m_targetFactorType);
+  return getSingleUpdateScore(sourceSegment, gap.segment, newTargetTags);
+}
+
+float Josiah::PosFeatureFunction::getDiscontiguousPairedUpdateScore
+    (const TranslationOption * leftOption, const TranslationOption * rightOption,
+    const TargetGap& leftGap, const TargetGap& rightGap) {
+      //treat as two gaps
+      return getSingleUpdateScore(leftOption,leftGap) + getSingleUpdateScore(rightOption,rightGap);
+
+}
+
+
+float Josiah::PosFeatureFunction::getFlipUpdateScore(const TranslationOption * leftOption, const TranslationOption * rightOption,
+    const TargetGap& leftGap, const TargetGap& rightGap)
+{
+  pair<WordsRange,WordsRange> sourceSegments(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange());
+  pair<WordsRange,WordsRange> targetSegments(leftGap.segment, rightGap.segment);
+  
+  return getFlipUpdateScore(sourceSegments, targetSegments);
+}
+
+
+
+
+void Josiah::PosFeatureFunction::getCurrentTargetTags(TagSequence& tags) const
+{
+  getPosTags(getSample().GetTargetWords(), tags, m_targetFactorType);
+}
+
+bool Josiah::SourceVerbPredicate::operator ( )( const Factor * tag )
+{
+  const string& tagString = tag->GetString();
+  //This works for TreeTagger de
+  return !tagString.empty() && tagString[0] == 'V';
+}
+
+bool Josiah::TargetVerbPredicate::operator ( )( const Factor * tag )
+{
+  //This is for lopar en
+  const string& tagString = tag->GetString();
+  //cerr << tagString  << " " << (tagString.length() > 1 && !tagString.compare(0,2,"md")) << endl;
+  return (!tagString.empty() && tagString[0] == 'v') || (tagString.length() > 1 && !tagString.compare(0,2,"md"));
+}
+
+VerbDifferenceFeature::VerbDifferenceFeature(FactorType sourceFactorType, FactorType targetFactorType) :
+    m_sourceFactorType(sourceFactorType),
+    m_targetFactorType(targetFactorType)
+{}
+    
+FeatureFunctionHandle VerbDifferenceFeature::getFunction( const Sample& sample ) const {
+  return FeatureFunctionHandle(new VerbDifferenceFeatureFunction(sample, m_sourceFactorType, m_targetFactorType));
+}
+
+
+
+float Josiah::VerbDifferenceFeatureFunction::computeScore( const TagSequence & sourceTags, const TagSequence & targetTags ) const
+{
+  SourceVerbPredicate svp;
+  int sourceVerbs = (int)count_if(sourceTags.begin(), sourceTags.end(), svp);
+  TargetVerbPredicate tvp;
+  int targetVerbs = (int)count_if(targetTags.begin(), targetTags.end(), tvp);
+  //cerr << "ComputeScore: source " << sourceVerbs << " target: " << targetVerbs << endl;
+  return targetVerbs - sourceVerbs;
+}
+
+float Josiah::VerbDifferenceFeatureFunction::getSingleUpdateScore(
+    const Moses::WordsRange& sourceSegment, const Moses::WordsRange& targetSegment, 
+    const TagSequence& newTargetTags) const
+{
+  TargetVerbPredicate tvp;
+  int targetVerbs = (int)count_if(newTargetTags.begin(), newTargetTags.end(), tvp);
+  //cerr << "SingleUpdate: new " << newTargetVerbs << " old " << oldTargetVerbs << endl;
+  return targetVerbs;
+}
+
+}
+
+
+
+
+
+
+
--- a/josiah/Pos.h
+++ b/josiah/Pos.h
@ -0,0 +1,135 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <algorithm>
+#include <iostream>
+
+#include "TypeDef.h"
+
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+
+
+namespace Josiah {
+
+  typedef std::vector<const Moses::Factor*> TagSequence;
+
+  class Sample;
+
+/**
+  * Abstract base class for feature functions which use source/target pos tags.
+  **/
+class PosFeatureFunction : public  SingleValuedFeatureFunction {
+  public:
+    PosFeatureFunction(const Sample& sample, 
+                        const std::string& name, Moses::FactorType sourceFactorType, Moses::FactorType targetFactorType) 
+        : SingleValuedFeatureFunction(sample,name), m_sourceFactorType(sourceFactorType), m_targetFactorType(targetFactorType) {
+      //assert(targetFactorType < StaticData::Instance().GetMaxNumFactors(Output));
+      //assert(sourceFactorType < StaticData::Instance().GetMaxNumFactors(Input));
+    }
+    //These methods must be implemented by a subclass
+    /** Full score of sample*/
+    virtual float computeScore(const TagSequence& sourceTags, const TagSequence& targetTags) const = 0;
+    /**Change in score when updating one segment*/
+    virtual float getSingleUpdateScore(const Moses::WordsRange& sourceSegment, const Moses::WordsRange& targetSegment, 
+                                       const TagSequence& newTargetTags) const = 0;
+    /**Change in score when flipping two segments. Note that both pairs are in target order */
+    virtual float getFlipUpdateScore(const std::pair<Moses::WordsRange,Moses::WordsRange>& sourceSegments,
+                                       const std::pair<Moses::WordsRange,Moses::WordsRange>& targetSegments) const = 0;
+    
+
+    /** All tags */
+    void getCurrentTargetTags(TagSequence& tags) const;
+    
+    /** Compute full score of a sample from scratch **/
+    virtual float computeScore();
+    /** Change in score when updating one segment */
+    virtual float getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap);
+    /** Change in score when updating two segments **/
+    virtual float getContiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& gap);
+    virtual float getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    /** Change in score when flipping */
+    virtual float getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                     const TargetGap& leftGap, const TargetGap& rightGap);
+    virtual ~PosFeatureFunction() {}
+    
+  protected:
+    const TagSequence& getSourceTags() {return m_sourceTags;}
+  
+  private:
+    TagSequence m_sourceTags;
+    FactorType m_sourceFactorType;
+    FactorType m_targetFactorType;
+};
+
+
+//FIXME: These should be configurable because they will change for different tag sets.
+struct SourceVerbPredicate {
+  bool operator()(const Factor* tag);
+};
+
+struct TargetVerbPredicate {
+  bool operator()(const Factor* tag);
+};
+
+
+
+/**
+ * Feature which counts the difference between the verb counts on each side (target-source).
+**/
+class VerbDifferenceFeature : public Feature {
+  public:
+    VerbDifferenceFeature(FactorType sourceFactorType, FactorType targetFactorType);
+    virtual FeatureFunctionHandle getFunction( const Sample& sample ) const;
+    
+  private:
+    FactorType m_sourceFactorType;
+    FactorType m_targetFactorType;
+};
+    
+class VerbDifferenceFeatureFunction: public  PosFeatureFunction {
+  public:
+  VerbDifferenceFeatureFunction(const Sample& sample, FactorType sourceFactorType, FactorType targetFactorType) :
+      PosFeatureFunction(sample, "VerbDifference", sourceFactorType, targetFactorType) {}
+
+  
+    virtual float computeScore(const TagSequence& sourceTags, const TagSequence& targetTags) const;
+    virtual float getSingleUpdateScore ( 
+          const Moses::WordsRange& sourceSegment, const Moses::WordsRange& targetSegment, 
+          const TagSequence& newTargetTags) const;
+    virtual float getFlipUpdateScore(const std::pair<Moses::WordsRange,Moses::WordsRange>& sourceSegments,
+                                     const std::pair<Moses::WordsRange,Moses::WordsRange>& targetSegments) const
+          {return 0;} //flipping can't change the verb difference
+    
+    virtual ~VerbDifferenceFeatureFunction() {}
+    
+  private:
+    
+  
+};
+
+}
+
+
+
--- a/josiah/PosProjectionFeature.cpp
+++ b/josiah/PosProjectionFeature.cpp
@ -0,0 +1,156 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "PosProjectionFeature.h"
+
+using namespace std;
+using namespace Moses;
+
+namespace Josiah {
+    void PosProjectionFeatureFunction::updateTarget() {
+        m_tagProjection.clear();
+        for (const Hypothesis* currHypo = getSample().GetTargetTail()->GetNextHypo();
+                currHypo != NULL; currHypo = currHypo->GetNextHypo()) {
+            const Phrase* sourcePhrase = currHypo->GetSourcePhrase();
+            for (Phrase::const_iterator i = sourcePhrase->begin();
+                i != sourcePhrase->end(); ++i) {
+                m_tagProjection.push_back(i->GetFactor(m_sourceFactorType));
+                //cerr << " " << i->GetFactor(0)->GetString();
+            }
+        }
+        
+    }
+    
+    PosProjectionBigramFeature::PosProjectionBigramFeature(Moses::FactorType sourceFactorType,const std::string& tags):
+        m_sourceFactorType(sourceFactorType)
+    {
+      if (tags != "*") {
+        vector<string> tagList = Tokenize(tags, ",");
+        copy(tagList.begin(), tagList.end(), inserter(m_tags, m_tags.end()));
+        VERBOSE(1, "PosProjectionBigramFeature configured with " << m_tags.size() << " tags" << endl);
+      } else {
+        VERBOSE(1, "PosProjectionBigramFeature will consider all tags" << endl);
+      }
+    }
+        
+    FeatureFunctionHandle PosProjectionBigramFeature::getFunction( const Sample& sample ) const {
+          return FeatureFunctionHandle(new PosProjectionBigramFeatureFunction(sample, m_sourceFactorType, m_tags));
+    }
+    
+    const string PosProjectionBigramFeatureFunction::ROOT = "ppf";
+    
+     PosProjectionBigramFeatureFunction::PosProjectionBigramFeatureFunction
+          (const Sample& sample, Moses::FactorType sourceFactorType,const set<string>& tags) :
+         PosProjectionFeatureFunction(sample, sourceFactorType), m_tags(tags){}
+
+    void PosProjectionBigramFeatureFunction::countBigrams
+        (const TagSequence& tagSequence, FVector& counts) {
+        //cerr << "Tag bigrams ";
+        for (TagSequence::const_iterator tagIter = tagSequence.begin();
+            tagIter+1 != tagSequence.end(); ++tagIter) {
+            const string& currTag = (*tagIter)->GetString();
+            if (m_tags.size() && m_tags.find(currTag) == m_tags.end()) continue;
+            const string& nextTag = (*(tagIter+1))->GetString();
+            if (m_tags.size() && m_tags.find(nextTag) == m_tags.end()) continue;
+            FName name(ROOT, currTag + ":" + nextTag);
+            ++counts[name];
+        }
+        //cerr << endl;
+    }
+
+    /** Assign the total score of this feature on the current hypo */
+    void PosProjectionBigramFeatureFunction::assignScore(FVector& scores) {
+        countBigrams(getCurrentTagProjection(),scores);
+    }
+
+    /** Score due to one segment */
+    void PosProjectionBigramFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores) 
+    {
+        //no change
+    }
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    void PosProjectionBigramFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                          const TargetGap& gap, FVector& scores) 
+    {
+        //no change
+    }
+
+    void PosProjectionBigramFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+            const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) 
+    {
+        //no change
+    }
+
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    void PosProjectionBigramFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                              const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) 
+    { 
+        bool contiguous = (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos());
+        //changes the projection, so recalculate
+        TagSequence tagProjection;
+        if (leftGap.leftHypo->GetPrevHypo()) {
+            //include word to left of gap
+            const Phrase* leftPhrase = leftGap.leftHypo->GetSourcePhrase();
+            tagProjection.push_back(leftPhrase->GetWord(leftPhrase->GetSize()-1).GetFactor(sourceFactorType()));
+        }
+        //include words to go in left gap
+        for (Phrase::const_iterator i = leftOption->GetSourcePhrase()->begin();
+            i != leftOption->GetSourcePhrase()->end(); ++i) {
+            tagProjection.push_back(i->GetFactor(sourceFactorType()));
+        }
+        if (contiguous) {
+            //include words to go in right gap
+            for (Phrase::const_iterator i = rightOption->GetSourcePhrase()->begin();
+                i != rightOption->GetSourcePhrase()->end(); ++i) {
+                tagProjection.push_back(i->GetFactor(sourceFactorType()));
+            }
+            //and word to right of gap
+            if (rightGap.rightHypo) {
+                const Phrase* rightPhrase = rightGap.rightHypo->GetSourcePhrase();
+                tagProjection.push_back(rightPhrase->GetWord(0).GetFactor(sourceFactorType()));
+            }
+        } else {
+            //word to right of left gap
+            const Phrase* rightPhrase = leftGap.rightHypo->GetSourcePhrase();
+            tagProjection.push_back(rightPhrase->GetWord(0).GetFactor(sourceFactorType()));
+        }
+        countBigrams(tagProjection,scores);
+        if (!contiguous) {
+            //right gap
+            tagProjection.clear();
+            //word to the left
+            const Phrase* leftPhrase = rightGap.leftHypo->GetSourcePhrase();
+            tagProjection.push_back(leftPhrase->GetWord(leftPhrase->GetSize()-1).GetFactor(sourceFactorType()));
+            //words to go in right gap
+            for (Phrase::const_iterator i = rightOption->GetSourcePhrase()->begin();
+                i != rightOption->GetSourcePhrase()->end(); ++i) {
+                tagProjection.push_back(i->GetFactor(sourceFactorType()));
+            }
+            if (rightGap.rightHypo) {
+                const Phrase* rightPhrase = rightGap.rightHypo->GetSourcePhrase();
+                tagProjection.push_back(rightPhrase->GetWord(0).GetFactor(sourceFactorType()));
+            }
+            countBigrams(tagProjection,scores);
+        }
+    }
+
+
+
+}
+
--- a/josiah/PosProjectionFeature.h
+++ b/josiah/PosProjectionFeature.h
@ -0,0 +1,94 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+#include "Pos.h"
+
+namespace Josiah {
+
+
+/**
+ * Features derived from projection of source pos tags onto target.
+**/
+class PosProjectionFeatureFunction: public FeatureFunction {
+    public:
+        PosProjectionFeatureFunction(const Sample& sample, Moses::FactorType sourceFactorType)
+              : FeatureFunction(sample),
+              m_sourceFactorType(sourceFactorType) {}
+        /** Update the target words.*/
+        virtual void updateTarget();
+        virtual ~PosProjectionFeatureFunction() {}
+    
+    protected:
+        /** All projected tags */
+        const TagSequence& getCurrentTagProjection() const 
+            { return m_tagProjection;}
+        
+        Moses::FactorType sourceFactorType() const {return m_sourceFactorType;}
+
+    private:
+       Moses::FactorType m_sourceFactorType;
+       TagSequence m_tagProjection;
+ 
+};
+
+class PosProjectionBigramFeature : public Feature {
+  public:
+    PosProjectionBigramFeature(Moses::FactorType sourceFactorType,const std::string& tags);
+    virtual FeatureFunctionHandle getFunction( const Sample& sample ) const;
+    
+  private:
+    Moses::FactorType m_sourceFactorType;
+    std::set<std::string> m_tags; //tags to be considered - empty means consider all tags
+};
+
+class PosProjectionBigramFeatureFunction : public PosProjectionFeatureFunction {
+    public:
+
+      PosProjectionBigramFeatureFunction(const Sample& sample, Moses::FactorType sourceFactorType,const std::set<std::string>& tags);
+
+        /** Assign the total score of this feature on the current hypo */
+        virtual void assignScore(FVector& scores);
+    
+        /** Score due to one segment */
+        virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
+        /** Score due to two segments. The left and right refer to the target positions.**/
+        virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                              const TargetGap& gap, FVector& scores);
+        virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+    
+        /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+        virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                  const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
+    
+
+    private:
+        static const std::string ROOT;
+      
+        //Count the bigrams in the given tag sequence
+        void countBigrams(const TagSequence& tagSequence, FVector& counts);
+        const std::set<std::string>& m_tags; //tags to be considered - empty means consider all tags
+};
+
+}
--- a/josiah/ReorderingFeature.cpp
+++ b/josiah/ReorderingFeature.cpp
@ -0,0 +1,292 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <stdexcept>
+#include <fstream>
+#include <sstream>
+
+#include <boost/lexical_cast.hpp>
+
+#include "ReorderingFeature.h"
+
+#include "Gibbler.h"
+#include "Util.h"
+
+using namespace Moses;
+using namespace std;
+using boost::lexical_cast;
+
+namespace Josiah {
+  
+  string ReorderingFeatureTemplate::BOS = "<s>";
+
+
+ReorderingFeature::ReorderingFeature(const vector<string>& msd,
+    const std::vector<std::string>& msdVocab) 
+  {
+
+  const static string SOURCE = "source";
+  const static string TARGET = "target";
+  const static string PREV = "prev";
+  const static string CURR = "curr";
+
+  for (vector<string>::const_iterator i = msdVocab.begin(); i != msdVocab.end();
+    ++i) {
+    vector<string> msdVocabConfig = Tokenize(*i,":");
+    if (msdVocabConfig.size() != 3) {
+      ostringstream errmsg;
+      errmsg << "msdvocab configuration '" << *i << "' has incorrect format";
+      throw runtime_error(errmsg.str());
+    }
+    size_t factorId = lexical_cast<size_t>(msdVocabConfig[0]);
+    bool source = true;
+    if (msdVocabConfig[1] == TARGET) {
+      source = false;
+    } else if (msdVocabConfig[1] != SOURCE) {
+      throw runtime_error("msd vocab config has invalid source/target identifier");
+    }
+    string filename = msdVocabConfig[2];
+    vocab_t* vocab = NULL;
+    if (source) {
+      vocab = &(m_sourceVocabs[factorId]);
+    } else {
+      vocab = &(m_targetVocabs[factorId]);
+    }
+
+    loadVocab(filename,vocab);
+  }
+
+  for (vector<string>::const_iterator i = msd.begin(); i != msd.end(); ++i) {
+    vector<string> msdConfig = Tokenize(*i,":");
+    if (msdConfig.size() != 4) {
+      ostringstream errmsg;
+      errmsg << "msd configuration '" << *i << "' has incorrect format";
+      throw runtime_error(errmsg.str());
+    }
+    size_t factorId = lexical_cast<size_t>(msdConfig[1]);
+    bool source = true;
+    if (msdConfig[2] == TARGET) {
+      source = false;
+    } else if (msdConfig[2] != SOURCE) {
+      throw runtime_error("msd config has invalid source/target identifier");
+    }
+    bool curr = true;
+    if (msdConfig[3] == PREV) {
+      curr = false;
+    } else if (msdConfig[3] != CURR) {
+      throw runtime_error("msd config has invalid curr/prev identifier");
+    }
+    if (msdConfig[0] == "edge") {
+      m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,curr));
+    } else {
+      ostringstream errmsg;
+      errmsg << "Unknown msd feature type '" << msdConfig[0] << "'" << endl;
+      throw runtime_error(errmsg.str());
+    }
+
+    //set vocabulary, if necessary
+    vocab_t* vocab = NULL;
+    if (source) {
+      if (m_sourceVocabs.find(factorId) != m_sourceVocabs.end()) {
+        vocab = &(m_sourceVocabs[factorId]);
+      }
+    } else {
+      if (m_targetVocabs.find(factorId) != m_targetVocabs.end()) {
+        vocab = &(m_targetVocabs[factorId]);
+      }
+    }
+    m_templates.back()->setVocab(vocab);
+  }
+}
+
+FeatureFunctionHandle ReorderingFeature::getFunction(const Sample& sample) const {
+  return FeatureFunctionHandle(new ReorderingFeatureFunction(sample, *this));
+}
+  
+
+const std::vector<ReorderingFeatureTemplate*>& ReorderingFeature::getTemplates() const {
+  return m_templates;
+}
+
+void ReorderingFeature::loadVocab(string filename, vocab_t* vocab) {
+  VERBOSE(1, "Loading vocabulary for reordering feature from " << filename << endl);
+  vocab->clear();
+  ifstream in(filename.c_str());
+  if (!in) {
+    ostringstream errmsg;
+    errmsg << "Unable to load vocabulary from " << filename;
+    throw runtime_error(errmsg.str());
+  }
+  string line;
+  while (getline(in,line)) {
+    vocab->insert(line);
+  }
+}
+
+bool ReorderingFeatureTemplate::checkVocab(const std::string& word) const {
+  if (!m_vocab) return true;
+  return m_vocab->find(word) != m_vocab->end();
+}
+
+ ReorderingFeatureFunction::ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent) 
+  : FeatureFunction(sample), m_parent(parent)
+{}
+
+/** Assign features for the following options, assuming they are contiguous on the target side */
+void  ReorderingFeatureFunction::assign(const TranslationOption* prevOption, const TranslationOption* currOption, FVector& scores) {
+  for (vector<ReorderingFeatureTemplate*>::const_iterator i = m_parent.getTemplates().begin(); 
+        i != m_parent.getTemplates().end(); ++i) {
+    (*i)->assign(prevOption,currOption,getMsd(prevOption, currOption), scores);
+  }
+}
+
+
+const string&  ReorderingFeatureFunction::getMsd(const TranslationOption* prevOption, const TranslationOption* currOption) {
+  int prevStart = -1;
+  int prevEnd = -1;
+  if (prevOption) {
+    prevStart = prevOption->GetSourceWordsRange().GetStartPos();
+    prevEnd = prevOption->GetSourceWordsRange().GetEndPos();
+  }
+  int currStart = currOption->GetSourceWordsRange().GetStartPos();
+  int currEnd = currOption->GetSourceWordsRange().GetEndPos();
+  static string monotone = "msd:m";
+  static string swap = "msd:s";
+  static string discontinuous = "msd:d";
+  if (prevEnd + 1 == currStart) {
+    return monotone;
+  } else if (currEnd + 1 == prevStart) {
+    return swap;
+  } else {
+    return discontinuous;
+  }
+}
+
+void EdgeReorderingFeatureTemplate::assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
+                                          const std::string& prefix, FVector& scores) 
+{
+  static const string sourcePrev = "s:p:";
+  static const string sourceCurr = "s:c:";
+  static const string targetPrev = "t:p:";
+  static const string targetCurr = "t:c:";
+  
+  const Word* edge = NULL;
+  const string* position = NULL;
+  if (m_source && m_curr) {
+    edge = &(currOption->GetSourcePhrase()->GetWord(0));
+    position = &sourceCurr;
+  } else if (m_source && !m_curr) {
+    if (prevOption) {
+      const Phrase* sourcePhrase = prevOption->GetSourcePhrase();
+      edge = &(sourcePhrase->GetWord(sourcePhrase->GetSize()-1));
+    }
+    position = &sourcePrev;
+  } else if (!m_source && m_curr) {
+    edge = &(currOption->GetTargetPhrase().GetWord(0));
+    position = &targetCurr;
+  } else {
+    if (prevOption) {
+      const Phrase& targetPhrase = prevOption->GetTargetPhrase();
+      edge = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
+    }
+    position = &targetPrev;
+  }
+  
+  ostringstream namestr;
+  namestr << *position;
+  namestr << m_factor;
+  namestr << ":";
+  if (edge) {
+    const string& word = edge->GetFactor(m_factor)->GetString();
+    if (!checkVocab(word)) return;
+    namestr << word;
+  } else {
+    namestr << BOS;
+  }
+  FName name(prefix,namestr.str());
+  ++scores[name];
+}
+
+
+/** Assign the total score of this feature on the current hypo */
+void ReorderingFeatureFunction::assignScore(FVector& scores) 
+{
+  const Hypothesis* currHypo = getSample().GetTargetTail();
+  const TranslationOption* prevOption = NULL;
+  while ((currHypo = (currHypo->GetNextHypo()))) {
+    const TranslationOption* currOption = &(currHypo->GetTranslationOption());
+    assign(prevOption,currOption,scores);
+    prevOption = currOption;
+  }
+}
+    
+/** Score due to one segment */
+void ReorderingFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores)
+{
+  if (gap.leftHypo) {
+    assign(&(gap.leftHypo->GetTranslationOption()), option, scores);
+  }
+  if (gap.rightHypo) {
+    assign(option,&(gap.rightHypo->GetTranslationOption()), scores);
+  }
+}
+
+/** Score due to two segments. The left and right refer to the target positions.**/
+void ReorderingFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                      const TargetGap& gap, FVector& scores) 
+{
+  if (gap.leftHypo) {
+    assign(&(gap.leftHypo->GetTranslationOption()), leftOption,scores);
+  }
+  assign(leftOption,rightOption,scores);
+  if (gap.rightHypo) {
+    assign(rightOption, &(gap.rightHypo->GetTranslationOption()), scores);
+  }
+  
+}
+
+void ReorderingFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                          const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
+{
+  if (leftGap.leftHypo) {
+    assign(&(leftGap.leftHypo->GetTranslationOption()),leftOption,scores);
+  }
+  assign(leftOption, &(leftGap.rightHypo->GetTranslationOption()), scores);
+  assign(&(rightGap.leftHypo->GetTranslationOption()),rightOption,scores);
+  if (rightGap.rightHypo) {
+    assign(rightOption, &(rightGap.rightHypo->GetTranslationOption()),scores);
+  }
+}
+
+/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+void ReorderingFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                          const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
+{
+  if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
+    TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
+    doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
+  } else {
+    doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
+  }
+}
+
+
+
+}
+
--- a/josiah/ReorderingFeature.h
+++ b/josiah/ReorderingFeature.h
@ -0,0 +1,137 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <boost/unordered_set.hpp>
+
+#include "FeatureFunction.h"
+
+namespace Josiah {
+
+    typedef boost::unordered_set<std::string> vocab_t;
+
+
+/** 
+  * Used to define different types of reordering features.
+ **/
+class ReorderingFeatureTemplate {
+  public:
+    ReorderingFeatureTemplate(): m_vocab(NULL) {}
+    static std::string BOS;
+    virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption, 
+                        const std::string& prefix, FVector& scores) = 0;
+    
+    void setVocab(vocab_t* vocab) {m_vocab = vocab;}
+    bool checkVocab(const std::string& word) const ;
+    virtual ~ReorderingFeatureTemplate() {}
+
+  private:
+    vocab_t* m_vocab;
+};
+
+class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
+  public:
+    EdgeReorderingFeatureTemplate(size_t factor, bool source, bool curr) : m_factor(factor), m_source(source), m_curr(curr) {}
+    virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption, 
+                        const std::string& prefix, FVector& scores);
+  
+  private:
+    size_t m_factor;
+    bool m_source; //source or target?
+    bool m_curr; //curr of prev?
+};
+
+/**
+ * Features related to the ordering between segments.
+**/
+class ReorderingFeature : public Feature {
+  public:
+    
+  /**
+   * The msd vector will indicate which types of msd features are to be included. Each element is made
+      * up of four parts, separated by colons. The fields are:
+   * type: The type of feature (currently only edge is supported)
+   * factor_id: An integer representing the factor
+   * source/target: One of two possible values indicating whether the 
+   *         source or target words are used.
+   * prev/curr:  Indicates whether the feature uses the previous or 
+  current segment
+   *
+   * The msdVocab configuration items specify a vocabulary file for
+   * the source or target of a given factor. The format of these config
+   * strings is factor_id:source/target:filename  
+   *
+   */
+  ReorderingFeature(const std::vector<std::string>& msd,
+                    const std::vector<std::string>& msdVocab);
+  
+  virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+  
+  const std::vector<ReorderingFeatureTemplate*>& getTemplates() const;
+  
+  private:
+    std::vector<ReorderingFeatureTemplate*> m_templates;
+    std::map<size_t,vocab_t> m_sourceVocabs;
+    std::map<size_t,vocab_t > m_targetVocabs;
+    
+   
+    void loadVocab(std::string filename, vocab_t* vocab);
+
+};
+
+
+class ReorderingFeatureFunction : public FeatureFunction {
+    
+  public:
+    
+    ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent);
+    
+   
+    /** Assign the total score of this feature on the current hypo */
+    virtual void assignScore(FVector& scores);
+    
+    /** Score due to one segment */
+    virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
+    /** Score due to two segments. The left and right refer to the target positions.**/
+    virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                          const TargetGap& gap, FVector& scores);
+    virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                                             const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+    
+    /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+    virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption, 
+                              const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+    
+    
+  private:
+    const ReorderingFeature& m_parent;
+    
+    
+    /** Assign features for the following tow options, assuming they are contiguous on the target side */
+    void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption, FVector& scores);
+    
+    /** Monotone, swapped or discontinuous? The segments are assumed to have contiguous translations on the target side. */
+    const std::string& getMsd(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption);
+    
+    
+};
+
+}
+
--- a/josiah/SampleCollector.cpp
+++ b/josiah/SampleCollector.cpp
@ -0,0 +1,19 @@
+#include "SampleCollector.h"
+#include "Gibbler.h"
+#include "GibbsOperator.h"
+
+using namespace std;
+
+namespace Josiah {
+  
+  void PrintSampleCollector::collect(Sample& sample)  {
+    cout << "Sampled hypothesis: \"";
+    sample.GetSampleHypothesis()->ToStream(cout);
+    cout << "\"" << "  " << "Feature values: " << sample.GetFeatureValues() << endl;
+  }
+  
+  void SampleCollector::addSample( Sample & sample) {
+    collect(sample);
+    ++m_n;
+  }
+}
--- a/josiah/SampleCollector.h
+++ b/josiah/SampleCollector.h
@ -0,0 +1,41 @@
+#pragma once
+#include <vector>
+
+
+namespace Josiah {
+  
+  class Sample;
+  
+  /**
+   * Used by the operators to collect samples, for example to count ngrams, or just to print
+   * them out. 
+   **/
+  class SampleCollector {
+  public:
+    SampleCollector():  m_n(0)  {}
+    virtual void addSample(Sample& sample);
+    /** Number of samples */
+    size_t N() const {return m_n;}
+    virtual ~SampleCollector() {}
+    void reset() {
+      m_n = 0;
+    }
+    void SetN(size_t n) { m_n = n;}
+  protected:
+    /** The actual collection.*/
+    virtual void collect(Sample& sample) = 0;
+    
+    
+  private:
+    size_t m_n;
+  };
+  
+  class PrintSampleCollector  : public virtual SampleCollector {
+  public:
+    virtual void collect(Sample& sample);
+    virtual ~PrintSampleCollector() {}
+  };
+  
+  
+}
+
--- a/josiah/SampleRank.cpp
+++ b/josiah/SampleRank.cpp
@ -0,0 +1,431 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2009 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+namespace mpi=boost::mpi;
+#endif
+
+#include <boost/program_options.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "Bleu.h"
+#include "Decoder.h"
+#include "GibbsOperator.h"
+#include "Gibbler.h"
+#include "InputSource.h"
+#include "MpiDebug.h"
+#include "OnlineLearner.h"
+#include "OnlineTrainingCorpus.h"
+#include "PhraseFeature.h"
+#include "Sampler.h"
+#include "SampleRankSelector.h"
+#include "Utils.h"
+
+
+using namespace std;
+using namespace Josiah;
+using namespace Moses;
+using boost::is_any_of;
+namespace po = boost::program_options;
+
+static void MixWeights(size_t size, size_t rank) {
+#ifdef MPI_ENABLED
+    FVector avgWeights;
+    FVector& currWeights = WeightManager::instance().get();
+    MPI_VERBOSE(1, "Before mixing, current weights " << currWeights << endl);
+    mpi::communicator world;
+    mpi::reduce(world,currWeights,avgWeights, FVectorPlus(),0);
+    if (rank == 0) {
+      avgWeights /= size;
+    }
+    mpi::broadcast(world,avgWeights,0);
+    WeightManager::instance().get() = avgWeights;
+    MPI_VERBOSE(1, "After mixing, current weights: " << avgWeights << endl);
+#endif
+}
+
+int main(int argc, char** argv) {
+  int rank = 0, size = 1;
+#ifdef MPI_ENABLED
+  MPI_Init(&argc,&argv);
+  MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Comm_rank(comm,&rank);
+  MPI_Comm_size(comm,&size);
+  cerr << "MPI rank: " << rank << endl; 
+  cerr << "MPI size: " << size << endl;
+#endif
+  size_t iterations;
+  string feature_file;
+  int debug;
+  int mpidebug;
+  string mpidebugfile;
+  int burning_its;
+  string inputfile;
+  string mosesini;
+  bool help;
+  uint32_t seed;
+  string weightfile;
+  vector<string> ref_files;
+  size_t batchLines;
+  size_t epochLines;
+  size_t epochs;
+  string weight_dump_stem;
+  size_t weight_dump_batches;
+  size_t weight_dump_samples;
+  bool weight_dump_current;
+  size_t lag;
+  string learnerName;
+  bool chiang_target;
+  bool always_update;
+  bool update_target;
+  float cwInitialVariance, cwConfidence;
+  float perceptron_lr;
+  float fixed_temperature;
+  float fixed_temperature_scaling;
+  bool slack_rescaling, scale_loss_by_target_gain;
+  vector<float> burnin_anneal;
+  bool closestBestNeighbour;
+  bool approxDocBleu;
+  float approxDocBleuDecay;
+  bool fix_margin;
+  float margin, slack;
+  float tolerance;
+  bool ignoreUWP;
+  bool disableUWP;
+  bool l1Normalise, l2Normalise;
+  float norm, scale_margin;
+  float flip_prob, merge_split_prob, retrans_prob;
+  size_t merge_split_toptions, retrans_toptions;
+  bool enable_trans_options_cache;
+  bool use_alignment_info;
+  po::options_description desc("Allowed options");
+  desc.add_options()
+  ("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
+  ("config,f",po::value<string>(&mosesini),"Moses ini file")
+  ("verbosity,v", po::value<int>(&debug)->default_value(0), "Verbosity level")
+  ("mpi-debug-level", po::value<int>(&MpiDebug::verbosity)->default_value(0), "Verbosity level for debugging messages used in mpi.")
+  ("mpi-debug-file", po::value<string>(&mpidebugfile), "Debug file stem for use by mpi processes")
+  ("random-seed,e", po::value<uint32_t>(&seed), "Random seed")
+  ("iterations,s", po::value<size_t>(&iterations)->default_value(10), 
+   "Number of sampler iterations")
+  ("burn-in,b", po::value<int>(&burning_its)->default_value(1), "Duration (in sampling iterations) of burn-in period")
+  ("input-file,i",po::value<string>(&inputfile),"Input file containing tokenised source")
+  ("weights,w",po::value<string>(&weightfile),"Weight file")
+  ("ref,r", po::value<vector<string> >(&ref_files), "Reference translation files for training")
+  ("extra-feature-config,X", po::value<string>(&feature_file), "Configuration file for extra (non-Moses) features")
+  ("batch-lines", po::value<size_t>(&batchLines)->default_value(1), "Number of lines in each training batch")
+  ("epoch-lines", po::value<size_t>(&epochLines)->default_value(1000), "Number of lines in each epoch")
+  ("epochs", po::value<size_t>(&epochs)->default_value(1), "Number of training epochs")
+  ("weight-dump-stem", po::value<string>(&weight_dump_stem)->default_value(""), "Stem of filename to use for dumping weights - leave empty for no dumping")
+  ("weight-dump-batches", po::value<size_t>(&weight_dump_batches)->default_value(0), "Number of batches to process before dumping weights")
+  ("weight-dump-samples", po::value<size_t>(&weight_dump_samples)->default_value(0), "Number of samples to process before dumping weights")
+  ("weight-dump-current",po::value<bool>(&weight_dump_current)->zero_tokens()->default_value(false), "Dump the current weights, instead of the averaged weights")
+  ("lag", po::value<size_t>(&lag)->default_value(1), "How often  to collect weight updates for the average weights.")
+  ("learner", po::value(&learnerName)->default_value("perceptron"), "Use this online learner")
+  ("always-update", po::value<bool>(&always_update)->zero_tokens()->default_value(false),
+    "Always call the update, even if ranking is correct")
+  ("update-target", po::value<bool>(&update_target)->zero_tokens()->default_value(false),
+    "Update towards target, not chosen")
+  ("chiang-target", po::value<bool>(&chiang_target)->zero_tokens()->default_value(false),
+    "Use Chiang's gain+score to choose the target")
+  ("cw-initial-variance", po::value<float>(&cwInitialVariance)->default_value(1.0f), "Initial variance for CW Learning")
+  ("cw-confidence", po::value<float>(&cwConfidence)->default_value(1.644854f), "Initial confidence value for CW Learning, use value in probit([0.5,1.0])")
+  ("perc-lr", po::value<float>(&perceptron_lr)->default_value(1.0f), "Perceptron learning rate")
+  ("use-slack-rescaling", po::value<bool>(&slack_rescaling)->zero_tokens()->default_value(false), "Use slack rescaling in mira (default is margin rescaling)")
+  ("scale-loss-by-target-gain", po::value<bool>(&scale_loss_by_target_gain)->zero_tokens()->default_value(false), "Scale the loss by the target gain")
+  ("fixed-temperature", po::value<float>(&fixed_temperature)->default_value(1.0f), "Temperature for fixed temp sample acceptor")
+  ("scale-fixed-temperature", po::value<float>(&fixed_temperature_scaling)->default_value(1.0f), "Scaling applied to fixed temperature at the end of an epoch")
+  ("burnin-anneal", po::value<vector<float> >(&burnin_anneal)->multitoken(), "Specify (start stop floor ratio) for burnin annealing")
+  ("closest-best-neighbour", po::value(&closestBestNeighbour)->zero_tokens()->default_value(false), "Closest best neighbour")
+  ("use-approx-doc-bleu", po::value(&approxDocBleu)->zero_tokens()->default_value(false), "Compute approx doc bleu as gain")
+  ("approx-doc-bleu-decay", po::value<float>(&approxDocBleuDecay)->default_value(0.9), "Decay factor for approx doc bleu")
+  ("fix-margin", po::value(&fix_margin)->zero_tokens()->default_value(false), "Do MIRA update with a specified margin")
+  ("margin", po::value<float>(&margin)->default_value(1.0f), "Margin size")
+  ("slack", po::value<float>(&slack)->default_value(-1.0f), "Slack")
+  ("tolerance", po::value<float>(&tolerance)->default_value(0.0f), "Difference between chosen bleu and target bleu must be greater than this to force a weight update")
+  ("ignore-uwp", po::value<bool>(&ignoreUWP)->zero_tokens()->default_value(false), "Ignore unknown word penalty weight when training")
+  ("disable-uwp", po::value<bool>(&disableUWP)->zero_tokens()->default_value(false), "Disable the unknown word penalty weight when training")
+  ("l1normalise", po::value<bool>(&l1Normalise)->zero_tokens()->default_value(false), "L1normalise weight vector during MIRA samplerank training")
+  ("l2normalise", po::value<bool>(&l2Normalise)->zero_tokens()->default_value(false), "L2normalise weight vector during MIRA samplerank training")
+  ("norm", po::value<float>(&norm)->default_value(1.0f), "Normalise weight vector to this value")
+  ("margin-scale", po::value<float>(&scale_margin)->default_value(1.0f), "Scale margin by this factor")
+  ("enable-trans-options-cache", po::value<bool>(&enable_trans_options_cache)->zero_tokens()->default_value(false), "Enable the translation options cache")
+  ("flip-prob", po::value<float>(&flip_prob)->default_value(0.6f), "Probability of applying flip operator during random scan")
+  ("merge-split-prob", po::value<float>(&merge_split_prob)->default_value(0.2f), "Probability of applying merge-split operator during random scan")
+  ("retrans-prob", po::value<float>(&retrans_prob)->default_value(0.2f), "Probability of applying retrans operator during random scan")
+  ("merge-split-toptions", po::value<size_t>(&merge_split_toptions)->default_value(20), "Maximum number of translation options for merge-split")
+  ("retrans-toptions", po::value<size_t>(&retrans_toptions)->default_value(20), "Maximum number of translation options for retrans")
+  ("use-alignment-info",po::value<bool>(&use_alignment_info)->zero_tokens()->default_value(false), "Load the alignment info from the phrase table")
+  ;
+ 
+  po::options_description cmdline_options;
+  cmdline_options.add(desc);
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc,argv).
+            options(cmdline_options).run(), vm);
+  po::notify(vm);
+  
+  
+  
+  if (help) {
+    std::cout << "Usage: " + string(argv[0]) +  " -f mosesini-file [options]" << std::endl;
+    std::cout << desc << std::endl;
+    return 0;
+  }
+  
+  if (weightfile.empty()) {
+    std::cerr << "Setting all feature weights to zero" << std::endl;
+    WeightManager::init();
+  } else {
+    std::cerr << "Loading feature weights from " << weightfile <<  std::endl;
+    WeightManager::init(weightfile);
+  }
+  
+  if (mosesini.empty()) {
+    cerr << "Error: No moses ini file specified" << endl;
+    return 1;
+  }
+  
+  if (mpidebugfile.length()) {
+    MpiDebug::init(mpidebugfile,rank);
+  }
+   
+  float opProb = flip_prob + merge_split_prob + retrans_prob;
+  if (fabs(1.0 - opProb) > 0.00001) {
+    std::cerr << "Incorrect usage: specified operator probs should sum up to 1" << std::endl;
+    return 0;  
+  }
+
+  if (burnin_anneal.size() && burnin_anneal.size() != 4) {
+    cerr << "Error: --burnin-anneal requires 4 arguments" << endl;
+    return 1;
+  }
+
+  if (weight_dump_stem.size() && !weight_dump_samples && !weight_dump_batches) {
+    cerr << "Error: If weight_dump_stem is set then must specify either " << endl;
+    cerr << "       --weight_dump_samples or --weight_dump_batches" << endl;
+    return 1;
+  }
+
+  if (weight_dump_samples && weight_dump_batches) {
+    cerr << "Error: Must specify either --weight-dump-samples or --weight-dump-batches" << endl;
+    return 1;
+  }
+  
+  //set up moses
+  vector<string> extraArgs;
+  extraArgs.push_back("-ttable-limit");
+  size_t ttableLimit = max(merge_split_toptions, retrans_toptions);
+  ostringstream ttableLimitConfig;
+  ttableLimitConfig << ttableLimit;
+  extraArgs.push_back(ttableLimitConfig.str());
+  if (!enable_trans_options_cache) {
+    extraArgs.push_back("-persistent-cache-size");
+    extraArgs.push_back("0");
+  }
+  if (use_alignment_info) {
+    extraArgs.push_back("-use-alignment-info");
+  }
+  initMoses(mosesini,debug,extraArgs);
+  
+  FeatureVector features;
+  FVector coreWeights;
+  configure_features_from_file(feature_file, features,disableUWP,coreWeights);
+  std::cerr << "Using " << features.size() << " features" << std::endl;
+  
+  
+  if (vm.count("random-seed")) {
+    RandomNumberGenerator::instance().setSeed(seed + rank);
+  }      
+
+  auto_ptr<Bleu> bleu(new Bleu());
+  if (approxDocBleu) {
+    bleu->SetSmoothingWeight(approxDocBleuDecay);
+  }
+  auto_ptr<Gain> gain(bleu);
+  gain->LoadReferences(ref_files,inputfile);
+  
+  Sampler sampler;
+  sampler.SetLag(1); //thinning factor for sample collection
+
+  //configure the sampler
+  MergeSplitOperator mso(merge_split_prob,merge_split_toptions);
+  FlipOperator fo(flip_prob);
+  TranslationSwapOperator tso(retrans_prob,retrans_toptions);
+    
+  sampler.AddOperator(&mso);
+  sampler.AddOperator(&tso);
+  sampler.AddOperator(&fo);
+  
+  //Target Assigner
+  TargetAssignerHandle tgtAssigner;
+  if (closestBestNeighbour) {
+    tgtAssigner.reset(new ClosestBestNeighbourTgtAssigner());
+  } else if (chiang_target) {
+    tgtAssigner.reset(new ChiangBestNeighbourTgtAssigner());
+  }
+  else {
+    tgtAssigner.reset(new BestNeighbourTgtAssigner());
+  }
+  
+  
+  
+  
+  //Add the learner
+  OnlineLearnerHandle onlineLearner;
+  
+  if (learnerName == "perceptron") {
+    boost::shared_ptr<PerceptronLearner> perceptron(new PerceptronLearner());
+    perceptron->setLearningRate(perceptron_lr);
+    onlineLearner = perceptron;
+  } else if (learnerName == "mira+") {
+    boost::shared_ptr<MiraPlusLearner> mp(new MiraPlusLearner());
+    mp->setSlack(slack);
+    mp->setMarginScale(scale_margin);
+    mp->setFixMargin(fix_margin);
+    mp->setMargin(margin);
+    onlineLearner = mp;
+  } else if (learnerName == "mira") {
+    boost::shared_ptr<MiraLearner> m(new MiraLearner());
+    m->setSlack(slack);
+    m->setMarginScale(scale_margin);
+    m->setFixMargin(fix_margin);
+    m->setMargin(margin);
+    m->setUseSlackRescaling(slack_rescaling);
+    m->setScaleLossByTargetGain(scale_loss_by_target_gain);
+    onlineLearner = m;
+  } else {
+    throw runtime_error("Unknown learner: " + learnerName);
+  }
+
+  sampler.SetIterations(iterations);
+  sampler.SetBurnIn(burning_its);
+  
+  OnlineTrainingCorpus trainingCorpus
+      (inputfile,
+       batchLines,
+       epochLines,
+       epochs*epochLines,
+       size,
+       rank);
+  
+  bool byBatch  = false;
+  size_t weightDumpFrequency = weight_dump_samples;
+  if (!weightDumpFrequency) {
+    weightDumpFrequency = weight_dump_batches;
+    byBatch = true;
+  }
+  WeightCollectorHandle weightCollector(
+    new WeightCollector(weightDumpFrequency,byBatch,weight_dump_stem,size,rank));
+  weightCollector->SetL1Normalise(l1Normalise);
+  weightCollector->SetDumpCurrent(weight_dump_current);
+  weightCollector->SetLag(lag);
+  
+  while (trainingCorpus.HasMore()) {
+    vector<string> lines;
+    vector<size_t> lineNumbers;
+    bool shouldMix;
+    trainingCorpus.GetNextBatch(&lines,&lineNumbers, &shouldMix);
+     
+    //Makes sure that t-options get sorted by the appropriate weights
+    FVector currentWeights;
+    if (coreWeights != FVector()) {
+      currentWeights = coreWeights;
+    } else if (weightCollector->getBatchCount()) {
+      currentWeights = weightCollector->getAverageWeights();
+    } else {
+      currentWeights = WeightManager::instance().get();
+    }
+    setMosesWeights(currentWeights);
+
+    
+    //Generate random hypotheses
+    vector<TranslationHypothesis> translations;
+    for (size_t i = 0; i < lines.size(); ++i) {
+      translations.push_back(TranslationHypothesis(lines[i]));
+      cerr << "Source sentence: " << lines[i] << endl;
+      cerr << "Seed hypothesis: " << *(translations.back().getHypothesis()) << endl;
+    }
+    
+    
+    
+    //The selector for this sentence
+    GainFunctionHandle gf = gain->GetGainFunction(lineNumbers);
+    SampleRankSelector selector(gf, onlineLearner, tgtAssigner, weightCollector);
+    selector.SetTemperature(fixed_temperature);
+    //burnin annealing
+    auto_ptr<AnnealingSchedule> annealer;
+    if (burnin_anneal.size()) {
+      annealer.reset(new ExponentialAnnealingSchedule
+        (burnin_anneal[0],burnin_anneal[1], burnin_anneal[2], burnin_anneal[3]));
+    } else {
+      //fixed temp
+      annealer.reset(new ExponentialAnnealingSchedule
+      (fixed_temperature,fixed_temperature,fixed_temperature,1));
+    }
+    selector.SetBurninAnnealer(annealer.get());
+    selector.SetIgnoreUnknownWordPenalty(ignoreUWP);
+    selector.SetTolerance(tolerance);
+    selector.SetAlwaysUpdate(always_update);
+    selector.SetUpdateTarget(update_target);
+    sampler.SetSelector(&selector);
+    
+    
+    
+    sampler.Run(translations,features);
+    //cerr << "Performed " << onlineLearner->GetNumUpdates() << " updates for this sentence" << endl;
+
+
+    if (size == 1) {
+      cerr <<  "Batch count: " << weightCollector->getBatchCount() << endl;
+      cerr << "Curr Weights : " << WeightManager::instance().get() << endl;
+      cerr << "Average Weights : " << weightCollector->getAverageWeights() << endl;
+    } else {
+      MPI_VERBOSE(1,"Batch count: " << weightCollector->getBatchCount() << endl);
+      MPI_VERBOSE(1,"Current Weights : " << WeightManager::instance().get() << endl);
+      MPI_VERBOSE(1,"Average Weights : " << weightCollector->getAverageWeights() << endl);
+    }
+    
+    //PhraseFeature::updateWeights(WeightManager::instance().get());
+    if (approxDocBleu) {
+      //This sends the smoothing stats from gf to gain, and resets gf's smoothing stats
+      gf->UpdateSmoothingStats();
+    }
+    if (shouldMix) {
+      MixWeights(size,rank);
+      fixed_temperature *= fixed_temperature_scaling;
+      VERBOSE(1,"Fixed temperature scaled by " << fixed_temperature_scaling << " to " << fixed_temperature << endl);
+    }
+    weightCollector->endBatch();
+  } 
+  
+#ifdef MPI_ENABLED
+  MPI_Finalize();
+#endif
+  return 0;
+}
--- a/josiah/SampleRankSelector.cpp
+++ b/josiah/SampleRankSelector.cpp
@ -0,0 +1,402 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include "SampleRankSelector.h"
+
+#ifdef MPI_ENABLED
+#include <mpi.h>
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+namespace mpi=boost::mpi;
+#endif
+
+#include "Gibbler.h"
+#include "MpiDebug.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  
+  DeltaGain::DeltaGain(const GainFunctionHandle& gainFunction,const SampleVector& samples, size_t sampleId) :
+      m_gainFunction(gainFunction),
+      m_samples(samples),
+      m_sampleId(sampleId) {}
+  
+  float DeltaGain::operator()(const TDeltaHandle& delta) {
+    if (m_translations.size() == 0) {
+      for (size_t i = 0; i < m_samples.size(); ++i) {
+        Translation translation;
+        if (i == m_sampleId) {
+          delta->getNewSentence(translation);
+        } else {
+          const vector<Word>& targetWords = m_samples[i]->GetTargetWords();
+          translation.reserve(targetWords.size());
+          for (size_t j = 0; j < targetWords.size(); ++j) {
+            translation.push_back(targetWords[j].GetFactor(0));
+          }
+        }
+        m_translations.push_back(translation);
+      }
+    } else {
+      m_translations[m_sampleId].clear();
+      delta->getNewSentence(m_translations[m_sampleId]);
+    }
+    return m_gainFunction->Evaluate(m_translations);
+  }
+  
+  SampleRankSelector::SampleRankSelector(
+      const GainFunctionHandle& gainFunction,
+      const OnlineLearnerHandle& onlineLearner,
+      const TargetAssignerHandle& assigner,
+      const WeightCollectorHandle& weightCollector) :
+      m_gainFunction(gainFunction),
+      m_onlineLearner(onlineLearner),
+      m_assigner(assigner),
+      m_weightCollector(weightCollector),
+      m_burnin(false),
+      m_ignoreUnknownWordPenalty(false),
+      m_tolerance(0.0),
+      m_alwaysUpdate(false),
+      m_updateTarget(false)
+  {
+    m_unknownWordPenaltyName = StaticData::Instance().GetTranslationSystem
+      (TranslationSystem::DEFAULT).GetUnknownWordPenaltyProducer()
+          ->GetScoreProducerDescription();
+  }
+      
+  void SampleRankSelector::SetSamples(const SampleVector& samples) {
+        m_samples = samples;
+        m_optimalGain.clear();
+        m_optimalGain.resize(samples.size());
+        m_optimalGainSolutionScores.clear();
+        m_optimalGainSolutionScores.resize(samples.size());
+  }
+
+  void SampleRankSelector::SetIgnoreUnknownWordPenalty(bool ignore) {
+    m_ignoreUnknownWordPenalty = ignore;
+  }
+
+  void WeightCollector::SetL1Normalise(bool l1normalise) {
+    m_l1normalise = l1normalise;
+  }
+  void SampleRankSelector::SetAlwaysUpdate(bool alwaysUpdate) {
+    m_alwaysUpdate = alwaysUpdate;
+  }
+
+  void SampleRankSelector::SetUpdateTarget(bool updateTarget) {
+    m_updateTarget = updateTarget;
+  }
+
+  
+  TDeltaHandle SampleRankSelector::Select(
+                              size_t sampleId,
+                              const TDeltaVector& deltas,
+                              const TDeltaHandle& noChangeDelta, 
+                              size_t iteration) 
+  {
+
+    //choose by sampling.
+    if (m_burnin) {
+       //DeltaGain gain(m_gainFunction,m_samples,sampleId);
+       TDeltaHandle chosenDelta = m_burninSamplingSelector.Select(sampleId,deltas,noChangeDelta,iteration);
+       //cerr << "BURN " << gain(chosenDelta) << endl;
+       return chosenDelta;
+    }
+    TDeltaHandle chosenDelta = m_samplingSelector.Select(sampleId,deltas,noChangeDelta,iteration);
+    DeltaGain gain(m_gainFunction,m_samples,sampleId);
+
+  
+    float chosenGain = gain(chosenDelta);
+    float chosenScore = chosenDelta->getScore();
+
+    //oracle
+    int target = m_assigner->getTarget(deltas, chosenDelta, gain);
+  
+    if (target == -1)  return chosenDelta;
+  
+    //Only update if necessary, because it can be expensive
+    if (m_onlineLearner->usesOptimalSolution()) {
+        UpdateGainOptimalSol(deltas, noChangeDelta, sampleId, gain, target);
+    }
+  
+    float targetScore = deltas[target]->getScore();
+    float targetGain = gain(deltas[target]);
+//    cerr << "CS " << chosenScore << " TS " << targetScore <<
+//      " CG " << chosenGain << " TG " << targetGain << endl;
+    //FVector oldWeights = WeightManager::instance().get();
+    if (m_alwaysUpdate ||
+         (chosenScore > targetScore && chosenGain+m_tolerance < targetGain)  ||
+         (chosenScore < targetScore && chosenGain-m_tolerance > targetGain) ) {
+
+      FVector chosenScores = chosenDelta->getSample().GetFeatureValues() - noChangeDelta->getScores() + chosenDelta->getScores();
+      FVector targetScores = chosenDelta->getSample().GetFeatureValues() - noChangeDelta->getScores() + deltas[target]->getScores();
+      if (m_ignoreUnknownWordPenalty) {
+        chosenScores[m_unknownWordPenaltyName] = 0;
+        targetScores[m_unknownWordPenaltyName] = 0;
+      }
+      
+      m_onlineLearner->doUpdate(chosenScores,
+                                targetScores,
+                                m_optimalGainSolutionScores[sampleId],
+                                chosenGain,
+                                targetGain,
+                                m_optimalGain[sampleId],
+                                WeightManager::instance().get());
+    }
+
+     
+    //cerr << "WEIGHTS: " << WeightManager::instance().get() << endl;
+    //cerr << "BLEU: " << chosenGain << endl;
+    m_weightCollector->updateWeights();
+
+    //cerr << "WDIFF " << (WeightManager::instance().get() - oldWeights).l1norm() <<endl;
+  
+    //For approx doc bleu
+    const Hypothesis* h = chosenDelta->getSample().GetSampleHypothesis();
+    vector<const Factor*> trans;
+    h->GetTranslation(&trans, 0);
+    m_gainFunction->AddSmoothingStats(sampleId, trans);
+    
+    if (m_updateTarget) {
+      return deltas[target];
+    } else {
+      return chosenDelta;
+    }
+  }
+  
+
+  
+  void SampleRankSelector::UpdateGainOptimalSol(
+      const TDeltaVector& deltas, 
+      const TDeltaHandle& noChangeDelta,
+      size_t sampleId,
+      DeltaGain& gain,
+      int target)
+  {
+    if (m_assigner->m_name != "Best") {
+      //need to find the best solution, since we were
+      //not using the best neighbour target assigner
+      BestNeighbourTgtAssigner tgtAssigner;
+      target = tgtAssigner.getTarget(deltas, noChangeDelta, gain);
+    }
+    if (target == -1) return; 
+    float chosenGain = gain(deltas[target]);
+    if (chosenGain > m_optimalGain[sampleId]) {
+      m_optimalGain[sampleId] = chosenGain;
+      m_optimalGainSolutionScores[sampleId] = deltas[target]->getSample().GetFeatureValues();
+      m_optimalGainSolutionScores[sampleId] += deltas[target]->getScores();
+      m_optimalGainSolutionScores[sampleId] -= noChangeDelta->getScores();
+      if (m_ignoreUnknownWordPenalty) {
+        m_optimalGainSolutionScores[sampleId][m_unknownWordPenaltyName] = 0;
+      }
+      VERBOSE(1,"New optimal gain " << m_optimalGain[sampleId] << endl);
+    }
+  }
+  
+  void SampleRankSelector::SetTemperature(float temp) {
+    m_samplingSelector.SetTemperature(temp);
+  }
+
+  void SampleRankSelector::SetBurninAnnealer(AnnealingSchedule* schedule) {
+    m_burninSamplingSelector.SetAnnealingSchedule(schedule);
+  }
+  
+  void SampleRankSelector::BeginBurnin()  {
+    m_burnin = true;
+  }
+  
+  void SampleRankSelector::EndBurnin() {
+    m_burnin = false;
+  } 
+
+  void SampleRankSelector::SetTolerance(float tolerance) {
+    m_tolerance = tolerance;
+  }
+  
+  
+  size_t BestNeighbourTgtAssigner::getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
+                                             DeltaGain& gf) {
+  //Only do best neighbour for the moment
+    float bestGain = -1;
+    int bestGainIndex = -1;
+    for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
+      float gain = gf(*i);
+      if (gain > bestGain) {
+        bestGain = gain;
+        bestGainIndex = i - deltas.begin();
+      }
+    }
+    IFVERBOSE(2) {
+      if (bestGainIndex > -1) {
+        cerr << "best nbr has score " << deltas[bestGainIndex]->getScore() << " and gain " << bestGain << endl;
+      //cerr << "No change has score " << noChangeDelta->getScore() << " and gain " << noChangeDelta->getGain() << endl;    
+      }
+    }
+    return bestGainIndex;
+  }
+
+  size_t ClosestBestNeighbourTgtAssigner::getTarget(const TDeltaVector& deltas, const TDeltaHandle& chosenDelta,
+      DeltaGain& gf) {
+  //Only do best neighbour for the moment
+    float minScoreDiff = 10e10;
+    int closestBestNbr = -1;
+    float chosenGain = gf(chosenDelta);;
+    float chosenScore = chosenDelta->getScore(); 
+    for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
+      if (gf(*i) > chosenGain ) {
+        float scoreDiff = chosenScore -  (*i)->getScore();
+        if (scoreDiff < minScoreDiff) {
+          minScoreDiff = scoreDiff;
+          closestBestNbr = i - deltas.begin();
+        }
+      }
+    }
+  
+    return closestBestNbr;
+  }
+
+ 
+  size_t ChiangBestNeighbourTgtAssigner::getTarget
+    (const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta, DeltaGain& gf) {
+    float bestGain = -1e10;
+    int bestGainIndex = -1;
+    for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
+      float gain = gf(*i) + (*i)->getScore();
+      if (gain > bestGain) {
+        bestGain = gain;
+        bestGainIndex = i - deltas.begin();
+      }
+    }
+    IFVERBOSE(2) {
+      if (bestGainIndex > -1) {
+        cerr << "best nbr has score " << deltas[bestGainIndex]->getScore() << " and gain " << bestGain << endl;
+      }
+    }
+    return bestGainIndex;
+  }
+
+
+  static void DumpWeights(const string& weightDumpStem,
+       FVector averagedWeights, size_t size, size_t rank) {
+    static size_t epoch = 0;
+#ifdef MPI_ENABLED
+    MPI_VERBOSE(1, "Before averaging, this node's average weights: " << averagedWeights << endl);
+    mpi::communicator world;
+    FVector totalWeights;
+    mpi::reduce(world, averagedWeights, totalWeights, FVectorPlus(),0);
+#endif
+    if (rank == 0) {
+#ifdef MPI_ENABLED
+      averagedWeights = totalWeights / size;
+#endif
+      MPI_VERBOSE(1, "After averaging, average weights: " << averagedWeights << endl);
+      ostringstream filename;
+      filename << weightDumpStem << "_" << epoch;
+      VERBOSE(1, "Dumping weights for epoch " << epoch << " to " << filename.str() << endl);
+      averagedWeights.save(filename.str());
+    }
+    ++epoch;
+    
+  }
+
+
+  WeightCollector::WeightCollector(size_t frequency, bool byBatch,
+       const std::string& weightDumpStem, size_t size, size_t rank):
+    m_frequency(frequency),
+    m_byBatch(byBatch),
+    m_weightDumpStem(weightDumpStem),
+    m_updates(0),m_allUpdates(0),m_batches(0), m_size(size), m_rank(rank),
+      m_l1normalise(false),m_lag(1), m_dumpCurrent(false)
+
+  {
+    if (m_frequency == 0) m_weightDumpStem = "";
+    if (weightDumpStem.size()) {
+      if (byBatch) {
+        cerr << "Weight dumping by batch, frequency = " << frequency << endl;
+      } else {
+        cerr << "Weight dumping by sample, frequency = " << frequency << endl;
+      }
+    } else {
+      cerr << "No weight dumping " << endl;
+    }
+    m_unknownWordPenaltyName = StaticData::Instance().GetTranslationSystem(
+      TranslationSystem::DEFAULT).GetUnknownWordPenaltyProducer()
+          ->GetScoreProducerDescription();
+
+  }
+
+  void WeightCollector::updateWeights() {
+    ++m_allUpdates;
+    if (m_allUpdates % m_lag) return;
+    ++m_updates;
+    FVector weights = WeightManager::instance().get();
+    VERBOSE(1,"CURR_WEIGHTS " << weights << endl);
+    if (m_l1normalise) {
+      float uwp  = weights[m_unknownWordPenaltyName];
+     // if (m_ignoreUnknownWordPenalty) {
+        weights[m_unknownWordPenaltyName] = 0;
+     // }
+      weights /= weights.l1norm();
+     // if (m_ignoreUnknownWordPenalty) {
+        weights[m_unknownWordPenaltyName] = uwp;
+     // }
+    }
+    m_totalWeights += weights;
+    IFVERBOSE(1) {
+      VERBOSE(1,"AVE_WEIGHTS " << getAverageWeights() << endl);
+    }
+ //   cerr << "WT " <<  (WeightManager::instance().get())[FName("PhraseModel_1")] << " ";
+   // cerr <<  getAverageWeights() << endl; 
+    if (m_weightDumpStem.length() && !m_byBatch && m_updates % m_frequency == 0) {
+      if (m_dumpCurrent) {
+        DumpWeights(m_weightDumpStem, WeightManager::instance().get(), m_size, m_rank);
+      } else {
+        DumpWeights(m_weightDumpStem, getAverageWeights(), m_size, m_rank);
+      }
+    }
+  }
+
+  void WeightCollector::endBatch() {
+     ++m_batches;
+  //  cerr << "Batch " << m_batches << " rank " << m_rank << endl;
+    if (m_weightDumpStem.length() && m_byBatch && m_batches % m_frequency == 0) {
+      DumpWeights(m_weightDumpStem, getAverageWeights(), m_size, m_rank);
+    }
+   }
+
+  FVector WeightCollector::getAverageWeights() {
+    assert(m_updates);
+    return m_totalWeights/m_updates;
+  }
+
+  size_t WeightCollector::getBatchCount() {
+    return m_batches;
+  }
+
+  void WeightCollector::SetLag(size_t lag) {
+    if (lag == 0) lag = 1;
+    m_lag = lag;
+  }
+
+  void WeightCollector::SetDumpCurrent(bool dumpCurrent) {
+    m_dumpCurrent = dumpCurrent;
+  }
+
+}
--- a/josiah/SampleRankSelector.h
+++ b/josiah/SampleRankSelector.h
@ -0,0 +1,192 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <boost/shared_ptr.hpp>
+
+#include "Utils.h"
+
+#include "Gain.h"
+#include "OnlineLearner.h"
+#include "Sampler.h"
+#include "Selector.h"
+
+namespace Josiah {
+
+/** Calculates the gain due to a delta.
+  **/
+class DeltaGain {
+  public:
+    DeltaGain(const GainFunctionHandle& gainFunction, const SampleVector& samples, size_t sampleId);
+    float operator()(const TDeltaHandle& delta) ;
+    
+  private:
+    const GainFunctionHandle& m_gainFunction;
+    const SampleVector& m_samples;
+    size_t m_sampleId;
+    //cached list of translations
+    std::vector<Translation> m_translations;
+};
+
+
+/**
+  * Used to choose the oracle translation hypothesis.
+ **/
+class TargetAssigner {
+  public:
+    TargetAssigner(const std::string& name) : m_name(name) {}
+    virtual ~TargetAssigner(){}
+    size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
+                             DeltaGain& gf) = 0;
+    std::string m_name;
+};
+
+class BestNeighbourTgtAssigner : public TargetAssigner {
+  public:
+    BestNeighbourTgtAssigner() : TargetAssigner("Best") {}
+    virtual ~BestNeighbourTgtAssigner(){}
+    size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
+                             DeltaGain& gf);
+};
+
+class ClosestBestNeighbourTgtAssigner : public TargetAssigner {
+  public:
+    ClosestBestNeighbourTgtAssigner(): TargetAssigner("CBN") {}
+    virtual ~ClosestBestNeighbourTgtAssigner(){}
+    size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
+                             DeltaGain& gf);
+};
+
+class ChiangBestNeighbourTgtAssigner : public TargetAssigner {
+  public:
+    ChiangBestNeighbourTgtAssigner(): TargetAssigner("Chiang"){}
+    virtual ~ChiangBestNeighbourTgtAssigner(){}
+    size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
+                             DeltaGain& gf);
+};
+
+typedef boost::shared_ptr<TargetAssigner> TargetAssignerHandle;
+
+/** 
+ * In charge of collecting the weights and writing them to file.
+ **/
+class WeightCollector {
+  public:
+    /** 
+      * If frequency is non-zero, then dump weights to file. Count
+      * by samples, or by batch.
+      **/
+    WeightCollector(size_t frequency, bool byBatch,
+         const std::string& weightDumpStem, size_t size, size_t rank);
+    void updateWeights();
+    void endBatch();
+    FVector getAverageWeights();
+    size_t getBatchCount();
+    void SetL1Normalise(bool l1normalise);
+    void SetLag(size_t lag);
+    void SetDumpCurrent(bool dumpCurrent);
+
+
+  private:
+    size_t m_frequency;
+    bool m_byBatch;
+    std::string m_weightDumpStem;
+    FVector m_totalWeights;
+    size_t m_updates;
+    size_t m_allUpdates;
+    size_t m_batches;
+    size_t m_size;
+    size_t m_rank;
+    bool m_l1normalise;
+    std::string m_unknownWordPenaltyName;
+    size_t m_lag;
+    //dump current weights instead of average
+    bool m_dumpCurrent;
+    
+};
+
+typedef boost::shared_ptr<WeightCollector> WeightCollectorHandle;
+
+/**
+ * Implements the Sample Rank algorithm, by accepting the proposed list of deltas, choosing one,
+ * updating the weights accordingly, and returning the delta to apply.
+ **/
+class SampleRankSelector : public DeltaSelector {
+  
+  public:
+    SampleRankSelector(const GainFunctionHandle& gainFunction, 
+                       const OnlineLearnerHandle& onlineLearner, 
+                       const TargetAssignerHandle& assigner,
+                       const WeightCollectorHandle& weightCollector);
+    
+    /** Body of SampleRank algorithm */
+    virtual TDeltaHandle Select(size_t sampleId,
+                                const TDeltaVector& deltas,
+                                const TDeltaHandle& noChangeDelta, 
+                                size_t iteration);
+    virtual void BeginBurnin();
+    virtual void EndBurnin();
+    virtual void SetSamples(const SampleVector& samples);
+    void SetTemperature(float temp);
+    void SetBurninAnnealer(AnnealingSchedule* schedule);
+    void SetIgnoreUnknownWordPenalty(bool ignore);
+    void SetTolerance(float tolerance);
+    void SetAlwaysUpdate(bool alwaysUpdate);
+    void SetUpdateTarget(bool updateTarget);
+    virtual ~SampleRankSelector() {}
+    
+  private:
+    void UpdateGainOptimalSol(
+        const TDeltaVector& deltas, 
+        const TDeltaHandle& noChangeDelta,
+        size_t sampleId,
+        DeltaGain& gain,
+        int target);
+    
+    const GainFunctionHandle m_gainFunction;
+    const OnlineLearnerHandle m_onlineLearner;
+    const TargetAssignerHandle m_assigner;
+    const WeightCollectorHandle m_weightCollector;
+    bool m_burnin;
+    SamplingSelector m_samplingSelector;
+    SamplingSelector m_burninSamplingSelector;
+    //feature values for optimal gain solution
+    std::vector<FVector> m_optimalGainSolutionScores;
+    //gain of optimal gain solution
+    std::vector<FValue> m_optimalGain;
+    //The current batch of samples
+    SampleVector m_samples;
+    bool m_ignoreUnknownWordPenalty;
+    std::string m_unknownWordPenaltyName;
+    //difference between chosen bleu and target bleu must be greater
+    //than this to force a weight update
+    float m_tolerance;
+    //always call the updater, even if the ranking is correct. I think
+    //this is what Aron Culotta does.
+    bool m_alwaysUpdate;
+    //Jump to the target instead of the chosen
+    bool m_updateTarget;
+    
+};
+
+
+
+}
+
--- a/josiah/Sampler.cpp
+++ b/josiah/Sampler.cpp
@ -0,0 +1,129 @@
+
+
+#include "Sampler.h"
+#include "GibbsOperator.h"
+#include "Hypothesis.h"
+#include "TranslationOptionCollection.h"
+#include "Gibbler.h"
+#include "SampleCollector.h"
+
+using namespace std;
+
+
+namespace Josiah {
+
+  
+
+  void Sampler::AddOperator(GibbsOperator* o) {
+    m_operators.push_back(o); 
+  }
+  
+  GibbsOperator* Sampler::SampleNextOperator(const std::vector<GibbsOperator*>& operators) {
+    double random =  RandomNumberGenerator::instance().next();
+    
+    size_t position = 1;
+    double sum = operators[0]->GetScanProb();
+    for (; position < operators.size() && sum < random; ++position) {
+      sum += operators[position]->GetScanProb();
+    }
+    return operators[position-1];
+  }
+  
+  void Sampler::Run(const vector<TranslationHypothesis>& translations, const FeatureVector& features,  bool raoBlackwell) {
+    SampleVector samples;
+    for (size_t i = 0; i < translations.size(); ++i) {
+      samples.push_back(SampleHandle(new Sample(translations[i].getHypothesis(), 
+                          translations[i].getWords(), features, raoBlackwell)));
+    }
+    m_selector->SetSamples(samples);
+    
+    map<GibbsOperator*, size_t> samplesPerOperator; // to keep track of number of samples per operator
+    
+   
+    for (size_t k = 0; k < m_reheatings; ++k) {
+      if (m_burninIts) {
+        m_selector->BeginBurnin();
+        //do some burn-in
+        size_t allSamples = 0;
+        for (size_t its = 0; its < m_burninIts; ++allSamples) {
+          VERBOSE(2,"Gibbs burnin iteration: " << its << endl);
+          doSample(samples,translations,its);
+          
+          if (allSamples % m_lag == 0) //increment now
+            ++its;
+        }                  
+        m_selector->EndBurnin();
+      }
+      
+      //Sample now
+      size_t samplesCollected = 0;
+      size_t allSamples = 0;
+      while(samplesCollected < m_iterations) {
+        VERBOSE(2,"Gibbs sampling iteration: " << allSamples << "Collected: " << samplesCollected << endl);
+        
+        GibbsOperator* currOperator = doSample(samples,translations,samplesCollected);
+        
+        if (currOperator) {
+          ++samplesPerOperator[currOperator];
+          ++allSamples;
+          if (allSamples % m_lag == 0) {//collect and increment now
+            collectSample(*samples[0]);
+            ++samplesCollected;  
+          }
+        }
+      } 
+      
+      VERBOSE(1,"Sampled " << allSamples << ", collected " << samplesCollected << endl);
+      IFVERBOSE(1) {
+        for (map<GibbsOperator*, size_t>::const_iterator it = samplesPerOperator.begin(); it != samplesPerOperator.end(); ++it) {
+          cerr << "Sampled operator " << (it->first)->name() << ": " << it->second << " times." << endl;
+        }  
+      }
+    }
+  }
+  
+  void Sampler::collectSample(Sample& sample) {
+    for (size_t j = 0; j < m_collectors.size(); ++j) {
+      m_collectors[j]->addSample(sample);
+    }
+    
+    sample.ResetConditionalFeatureValues(); //for Rao-Blackellisation
+  }
+  
+  GibbsOperator* Sampler::doSample(const SampleVector& samples,
+                                   const vector<TranslationHypothesis>& translations, 
+                                   size_t iteration) {
+    //choose an operator, and sample
+    GibbsOperator* currOperator = SampleNextOperator(m_operators);
+    TDeltaHandle noChangeDelta;
+    TDeltaVector deltas;
+    size_t sampleIndex = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(samples.size());
+    currOperator->propose(*samples[sampleIndex],*(translations[sampleIndex].getToc()),deltas,noChangeDelta);
+    VERBOSE(2,"Created " << deltas.size() << " delta(s) with operator " << currOperator->name() << endl);
+    if (deltas.size()) {
+      TDeltaHandle selectedDelta = m_selector->Select(sampleIndex, deltas,noChangeDelta, iteration);
+      if (selectedDelta.get() != noChangeDelta.get()) {
+        selectedDelta->apply(*noChangeDelta);
+        if (m_checkFeatures) {
+          samples[sampleIndex]->CheckFeatureConsistency();
+        }
+      }
+      return currOperator;
+    } else {
+      return NULL;
+    }
+    
+    //cerr << "Sampled sentence " << sampleIndex << " and updated to "  << endl;
+    //for (size_t i = 0 ; i < samples.size(); ++i) {
+    /*  const vector<Word>& words = samples[sampleIndex]->GetTargetWords();
+      for (size_t j = 0; j < words.size(); ++j) {
+        cerr << words[j];
+      }
+      cerr << endl;
+      cerr << "FV " << samples[sampleIndex]->GetFeatureValues() << endl;
+      cerr << "Iteration " << iteration << " Score " << inner_product(samples[sampleIndex]->GetFeatureValues(),
+            WeightManager::instance().get()) << endl; */
+    //}
+
+  }
+}
--- a/josiah/Sampler.h
+++ b/josiah/Sampler.h
@ -0,0 +1,70 @@
+#pragma once
+
+#include <vector>
+
+#include <boost/shared_ptr.hpp>
+
+#include "Word.h"
+
+#include "AnnealingSchedule.h"
+#include "Decoder.h"
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+#include "Selector.h"
+
+
+
+
+namespace Moses {
+  class Hypothesis;
+  class TranslationOptionCollection;
+  class Word;
+}
+
+using namespace Moses;
+
+namespace Josiah {
+  
+  
+
+class SampleCollector;
+class GibbsOperator; 
+
+#define SAMPLEMAX 1000000
+  
+class Sampler {
+private:
+  std::vector<SampleCollector*> m_collectors;
+  std::vector<GibbsOperator*> m_operators;
+  DeltaSelector* m_selector;
+  size_t m_iterations;
+  size_t m_burninIts;
+  size_t m_reheatings;
+  const AnnealingSchedule* m_as;
+  size_t m_lag;
+  bool m_checkFeatures;
+  
+  void collectSample(Sample& sample);
+  GibbsOperator* SampleNextOperator(const std::vector<GibbsOperator*>& );
+  GibbsOperator* doSample(const SampleVector& samples,
+                          const std::vector<TranslationHypothesis>& translations, 
+                          size_t iteration);
+  
+public:
+  Sampler(): m_selector(NULL), m_iterations(10), m_reheatings(1), m_as(NULL),
+  m_lag(0), m_checkFeatures(false) {}
+  void Run(const std::vector<TranslationHypothesis>& translations, 
+           const FeatureVector& features,
+           bool raoBlackwell = false) ;
+  void AddOperator(GibbsOperator* o);
+  void AddCollector(SampleCollector* c) {m_collectors.push_back(c);}
+  void SetSelector(DeltaSelector* selector) {m_selector = selector;}
+  void SetIterations(size_t iterations) {m_iterations = iterations;}
+  void SetReheatings(size_t r) {m_reheatings = r;}
+  void SetLag(size_t l) {m_lag = l;}
+  void SetBurnIn(size_t burnin_its) {m_burninIts = burnin_its;}
+  void SetCheckFeatures(bool checkFeatures) {m_checkFeatures = checkFeatures;}
+  
+};
+
+}
--- a/josiah/Selector.cpp
+++ b/josiah/Selector.cpp
@ -0,0 +1,134 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Selector.h"
+
+#include <fstream>
+
+#include "StaticData.h"
+#include "Util.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  
+  static void getScores(const TDeltaVector& deltas, vector<double>& scores) {
+    for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
+      scores.push_back((*i)->getScore());
+    }
+  }
+  
+  static void normalize(vector<double>& scores) {
+    double sum = scores[0];
+    for (size_t i = 1; i < scores.size(); ++i) {
+      sum = log_sum(sum,scores[i]);
+    }
+    transform(scores.begin(),scores.end(),scores.begin(),bind2nd(minus<double>(),sum));
+  }
+  
+  static void getNormalisedScores(const TDeltaVector& deltas, vector<double>& scores, float temp) {
+    getScores(deltas, scores);
+    IFVERBOSE(2) {
+      cerr << "Before annealing, scores are :";
+      copy(scores.begin(),scores.end(),ostream_iterator<double>(cerr," "));
+      cerr << endl;
+    }
+  //do annealling
+    transform(scores.begin(),scores.end(),scores.begin(),bind2nd(multiplies<double>(), 1.0/temp));
+    IFVERBOSE(2) {
+      cerr << "After annealing, scores are :";
+      copy(scores.begin(),scores.end(),ostream_iterator<double>(cerr," "));
+      cerr << endl;
+    }
+  
+    normalize(scores);
+  }
+  
+  static size_t getSample(const vector<double>& scores, double random) {
+    size_t position = 1;
+    double sum = scores[0];
+    for (; position < scores.size() && sum < random; ++position) {
+      sum = log_sum(sum,scores[position]);
+    }
+  
+    size_t chosen =  position-1;
+    VERBOSE(3,"The chosen sample is " << chosen << endl);
+    return chosen;
+  }
+  
+  SamplingSelector::SamplingSelector() :
+      m_annealingSchedule(NULL), m_temperature(1) {}
+  
+  void SamplingSelector::SetAnnealingSchedule(AnnealingSchedule* annealingSchedule) {
+    m_annealingSchedule = annealingSchedule;
+  }
+  
+  void SamplingSelector::SetTemperature(float temperature) {
+    assert(temperature != 0);
+    m_temperature = temperature;
+    m_annealingSchedule = NULL;
+  }
+  
+  TDeltaHandle SamplingSelector::Select(size_t, const TDeltaVector& deltas, const TDeltaHandle&, size_t iteration) 
+  {
+    float T = m_temperature;
+    if (m_annealingSchedule) {
+      T = m_annealingSchedule->GetTemperatureAtTime(iteration);
+    }
+    
+    vector<double> scores;
+    getNormalisedScores(deltas,scores,T);
+  
+    double random =  log(RandomNumberGenerator::instance().next());
+    size_t chosen = getSample(scores, random);
+  
+    /*
+    cerr << "deltas: " << endl;
+    for (size_t i = 0; i < deltas.size(); ++i) {
+      cerr << scores[i] << endl;
+    }
+    cerr << "chosen " << chosen << endl;
+    cerr << random << endl;
+    */
+    
+    return deltas[chosen];
+  }
+  
+  
+  
+  RandomNumberGenerator::RandomNumberGenerator() :m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
+    uint32_t seed;
+    std::ifstream r("/dev/urandom");
+    if (r) {
+      r.read((char*)&seed,sizeof(uint32_t));
+    }
+    if (r.fail() || !r) {
+      std::cerr << "Warning: could not read from /dev/urandom. Seeding from clock" << std::endl;
+      seed = time(NULL);
+    }
+    std::cerr << "Seeding random number sequence to " << seed << endl;
+    m_generator.seed(seed);
+  }
+  
+  RandomNumberGenerator RandomNumberGenerator::s_instance;
+
+
+
+}
--- a/josiah/Selector.h
+++ b/josiah/Selector.h
@ -0,0 +1,124 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+/**
+ * Strategies used to select samples proposed by the gibbs operators.
+**/
+
+#include <vector>
+
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real.hpp>
+#include <boost/random/variate_generator.hpp>
+
+#include "AnnealingSchedule.h"
+#include "Gibbler.h"
+#include "TranslationDelta.h"
+
+namespace Josiah {
+
+  /**
+    * Abstract base class for sample selection strategy.
+   **/
+  class DeltaSelector {
+    public:
+      virtual TDeltaHandle Select(     size_t sampleId,  //which sample is selected from the batch
+                                       const TDeltaVector& deltas,
+                                       const TDeltaHandle& noChangeDelta, 
+                                       size_t iteration) = 0;
+      virtual void BeginBurnin() {}
+      virtual void EndBurnin() {}
+      virtual void SetSamples(const SampleVector& samples) {}
+      virtual ~DeltaSelector() {}
+  };
+  
+  
+  /**
+    * Selector that samples the delta by converting the scores to probabilities.
+   **/
+  class SamplingSelector : public DeltaSelector {
+    public:
+      SamplingSelector();
+      virtual TDeltaHandle  Select(    size_t sampleId,
+                                       const TDeltaVector& deltas,
+                                       const TDeltaHandle& noChangeDelta, 
+                                       size_t iteration);
+      void SetAnnealingSchedule(AnnealingSchedule* annealingSchedule);
+      void SetTemperature(float temperature);
+    
+    private:
+      //Note that the annealingSchedule overrides the temperature
+      AnnealingSchedule* m_annealingSchedule;
+      float m_temperature;
+  };
+  
+  typedef boost::mt19937 base_generator_type;
+  
+  
+  template<class T>
+      T log_sum (T log_a, T log_b)
+  {
+    T v;
+    if (log_a < log_b) {
+      v = log_b+log ( 1 + exp ( log_a-log_b ));
+    } else {
+      v = log_a+log ( 1 + exp ( log_b-log_a ));
+    }
+    return ( v );
+  }
+  
+  /**
+   * Wraps the random number generation and enables seeding.
+   **/
+  class RandomNumberGenerator {
+    //mersenne twister - and why not?
+    
+    
+    public:
+      static RandomNumberGenerator& instance() {return s_instance;}
+      double next() {return m_random();}
+      void setSeed(uint32_t seed){
+        m_generator.seed(seed);
+        std::cerr << "Setting random seed to " << seed << std::endl;
+      }
+    
+      size_t getRandomIndexFromZeroToN(size_t n) {
+        return (size_t)(next()*n);
+      }
+      
+    private:
+      static RandomNumberGenerator s_instance;
+      RandomNumberGenerator();
+      boost::uniform_real<> m_dist; 
+      base_generator_type m_generator;
+      boost::variate_generator<base_generator_type&, boost::uniform_real<> > m_random;
+      
+  };
+  
+  struct RandomIndex {
+    ptrdiff_t operator() (ptrdiff_t max) {
+      
+      return static_cast<ptrdiff_t>(RandomNumberGenerator::instance().getRandomIndexFromZeroToN(max));
+    }
+  };
+
+
+};
--- a/josiah/SourceToTargetRatio.cpp
+++ b/josiah/SourceToTargetRatio.cpp
@ -0,0 +1,61 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2009 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+#include "SourceToTargetRatio.h"
+
+
+
+using namespace std;
+using namespace Moses;
+
+namespace Josiah {
+ 
+  FeatureFunctionHandle SourceToTargetRatioFeature::getFunction(const Sample& sample) const {
+    return FeatureFunctionHandle(new SourceToTargetRatioFeatureFunction(sample));
+  }
+  
+  FValue Josiah::SourceToTargetRatioFeatureFunction::computeScore() {
+    return 1.0 - ((float) m_src_len /(float) getSample().GetTargetWords().size());
+  }
+  /** Score due to  one segment */
+  FValue Josiah::SourceToTargetRatioFeatureFunction::getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {
+    return 1.0 - ((float) m_src_len / (float) (getSample().GetTargetWords().size() + option->GetTargetPhrase().GetSize() - gap.segment.GetNumWordsCovered()));
+  }
+  /** Score due to two segments **/
+  FValue Josiah::SourceToTargetRatioFeatureFunction::getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,const TargetGap& gap) 
+  {
+      return 1.0 - ((float) m_src_len /(float)  (getSample().GetTargetWords().size() 
+          + leftOption->GetTargetPhrase().GetSize() 
+          + rightOption->GetTargetPhrase().GetSize() - gap.segment.GetNumWordsCovered()));
+  }
+  
+  FValue Josiah::SourceToTargetRatioFeatureFunction::getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
+                                                  const TargetGap& leftGap, const TargetGap& rightGap) 
+  {
+      return 1.0 - ((float) m_src_len /(float)  (getSample().GetTargetWords().size() + leftOption->GetTargetPhrase().GetSize() 
+          + rightOption->GetTargetPhrase().GetSize() -
+                        (leftGap.segment.GetNumWordsCovered() + rightGap.segment.GetNumWordsCovered() )) ) ;
+  }
+  
+  /** Score due to flip */
+  FValue Josiah::SourceToTargetRatioFeatureFunction::getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
+                                   const TargetGap& leftGap, const TargetGap& rightGap) {
+    return computeScore();
+  }  
+  
+}
--- a/josiah/SourceToTargetRatio.h
+++ b/josiah/SourceToTargetRatio.h
@ -0,0 +1,66 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2009 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#pragma once
+
+#include <algorithm>
+#include <iostream>
+
+#include "TypeDef.h"
+
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+
+
+namespace Josiah {
+
+class Sample;
+
+class SourceToTargetRatioFeature : public Feature {
+  public:
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+
+};
+  
+class SourceToTargetRatioFeatureFunction: public SingleValuedFeatureFunction {
+public:
+  SourceToTargetRatioFeatureFunction(const Sample& sample) : SingleValuedFeatureFunction(sample,"SourceToTargetRatio") 
+  { m_src_len = sample.GetSourceSize();}
+  virtual FValue computeScore();
+  /** Score due to  one segment */
+  virtual FValue getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) ;
+  /** Score due to two segments **/
+  virtual FValue getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
+                                               const TargetGap& gap) ;
+  
+  virtual FValue getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
+                                                  const TargetGap& leftGap, const TargetGap& rightGap);
+  
+  /** Score due to flip */
+  virtual FValue getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
+                                   const TargetGap& leftGap, const TargetGap& rightGap) ;
+  
+  virtual ~SourceToTargetRatioFeatureFunction() {}
+private:
+  size_t m_src_len;
+};
+
+}
+
+
--- a/josiah/StatelessFeature.cpp
+++ b/josiah/StatelessFeature.cpp
@ -0,0 +1,106 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <sstream>
+
+#include "Gibbler.h"
+#include "StatelessFeature.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+
+
+FeatureFunctionHandle StatelessFeature::getFunction(const Sample& sample) const {
+  return FeatureFunctionHandle(new StatelessFeatureFunction(sample,this));
+}
+
+
+
+StatelessFeatureAdaptor::StatelessFeatureAdaptor(
+  const MosesFeatureHandle& mosesFeature):
+  m_mosesFeature(mosesFeature)
+{
+  assert(!mosesFeature->ComputeValueInTranslationOption());
+  for (size_t i = 0; i < mosesFeature->GetNumScoreComponents(); ++i) {
+    ostringstream id;
+    id << i;
+    m_featureNames.push_back(FName(mosesFeature->GetScoreProducerDescription(),id.str()));
+  }
+}
+
+
+
+void StatelessFeatureAdaptor::assign
+  (const Moses::TranslationOption* toption, FVector& scores) const {
+  ScoreComponentCollection scc;
+  m_mosesFeature->Evaluate(toption->GetTargetPhrase(),&scc);
+  vector<float> mosesScores = scc.GetScoresForProducer(m_mosesFeature.get());
+  for (size_t i = 0; i < m_featureNames.size(); ++i) {
+    scores[m_featureNames[i]] = mosesScores.at(i);
+  }
+}
+
+
+StatelessFeatureFunction::StatelessFeatureFunction
+    (const Sample& sample, const StatelessFeature* parent):
+      FeatureFunction(sample), m_parent(parent) {}
+
+
+void StatelessFeatureFunction::assignScore(FVector& scores) {
+    const Hypothesis* currHypo = getSample().GetTargetTail();
+    while ((currHypo = (currHypo->GetNextHypo()))) {
+      m_parent->assign(&(currHypo->GetTranslationOption()), scores);
+    }
+}
+
+
+/** Score due to one segment */
+void StatelessFeatureFunction::doSingleUpdate
+  (const TranslationOption* option, const TargetGap& gap, FVector& scores) {
+  m_parent->assign(option,scores);
+}
+
+/** Score due to two segments. The left and right refer to the target positions.**/
+void StatelessFeatureFunction::doContiguousPairedUpdate
+  (const TranslationOption* leftOption,const TranslationOption* rightOption, 
+    const TargetGap& gap, FVector& scores) {
+  m_parent->assign(leftOption,scores);
+  m_parent->assign(rightOption,scores);
+}
+
+void StatelessFeatureFunction::doDiscontiguousPairedUpdate
+  (const TranslationOption* leftOption,const TranslationOption* rightOption, 
+    const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+  m_parent->assign(leftOption,scores);
+  m_parent->assign(rightOption,scores);
+}
+
+/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+void StatelessFeatureFunction::doFlipUpdate
+  (const TranslationOption* leftOption,const TranslationOption* rightOption, 
+     const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
+  //do nothing
+}
+
+
+
+
+}
--- a/josiah/StatelessFeature.h
+++ b/josiah/StatelessFeature.h
@ -0,0 +1,92 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <string>
+#include <boost/shared_ptr.hpp>
+#include "../moses/src/FeatureFunction.h"
+#include "FeatureFunction.h"
+
+namespace Josiah {
+
+typedef boost::shared_ptr<Moses::StatelessFeatureFunction> MosesFeatureHandle;
+
+/** 
+ * Stateless Gibbler feature
+ **/
+class StatelessFeature : public Feature {
+  public:
+    virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
+    /** Scores due to this translation option */
+    virtual void assign
+      (const Moses::TranslationOption* option, FVector& scores) const = 0;
+
+};
+
+/** 
+* Wraps a Moses stateless feature to give a gibbler feature.
+**/
+class StatelessFeatureAdaptor : public StatelessFeature {
+  public:
+    StatelessFeatureAdaptor(
+      const MosesFeatureHandle& mosesFeature);
+    virtual void assign(const Moses::TranslationOption* toption, FVector& scores) const;
+  
+  private:
+    MosesFeatureHandle m_mosesFeature;
+    std::vector<FName> m_featureNames;
+    
+};
+
+
+class StatelessFeatureFunction : public FeatureFunction {
+  public:
+
+  StatelessFeatureFunction
+    (const Sample& sample, const StatelessFeature* parent);
+
+  /** Assign the total score of this feature on the current hypo */
+  virtual void assignScore(FVector& scores);
+  
+  /** Score due to one segment */
+  virtual void doSingleUpdate
+    (const TranslationOption* option, const TargetGap& gap, FVector& scores);
+
+  /** Score due to two segments. The left and right refer to the target positions.**/
+  virtual void doContiguousPairedUpdate
+    (const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& gap, FVector& scores);
+
+  virtual void doDiscontiguousPairedUpdate
+    (const TranslationOption* leftOption,const TranslationOption* rightOption, 
+      const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+  
+  /** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
+  virtual void doFlipUpdate
+    (const TranslationOption* leftOption,const TranslationOption* rightOption, 
+       const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
+
+public:
+  const StatelessFeature* m_parent;
+};
+
+}
+
+
--- a/josiah/Test.cpp
+++ b/josiah/Test.cpp
@ -0,0 +1,25 @@
+/**********************************************************************
+
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+
+//Supplies the main for the moses test module
+#define BOOST_TEST_MODULE josiah
+#include <boost/test/unit_test.hpp>
+
--- a/josiah/TestBleu.cpp
+++ b/josiah/TestBleu.cpp
@ -0,0 +1,479 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <boost/test/unit_test.hpp>
+
+#include "FactorCollection.h"
+#include "Util.h"
+
+#include "Bleu.h"
+
+using namespace Josiah;
+using namespace Moses;
+using namespace std;
+
+BOOST_AUTO_TEST_SUITE(bleu)
+    
+static void checkNgram(const string& ngram, size_t count, const NGramMap& ngrams) {
+  Translation t;
+  TextToTranslation(ngram,t);
+  NGramMap::const_iterator i = ngrams.find(t);
+  size_t actualCount = 0;
+  if (i != ngrams.end()) {
+    actualCount = i->second;
+  }
+  BOOST_CHECK_MESSAGE(actualCount == count,ngram);
+}
+    
+BOOST_AUTO_TEST_CASE(ref_stats_single) {
+  Translation ref,src;
+  TextToTranslation("give me the statistics on this sentence , give me",ref);
+  TextToTranslation("the source is not really important",src);
+  vector<Translation> refs;
+  refs.push_back(ref);
+  Bleu bleu;
+  bleu.AddReferences(refs,src);
+  
+  NGramMap ngrams = bleu.GetReferenceStats(0);
+  BOOST_CHECK_EQUAL(ngrams.size(),(size_t)31);
+  checkNgram("give", 2, ngrams);
+  checkNgram("me", 2, ngrams);
+  checkNgram("the", 1, ngrams);
+  checkNgram("statistics", 1, ngrams);
+  checkNgram("on", 1, ngrams);
+  checkNgram("this", 1, ngrams);
+  checkNgram("sentence", 1, ngrams);
+  checkNgram(",", 1, ngrams);
+  checkNgram("give me", 2, ngrams);
+  checkNgram("me the", 1, ngrams);
+  checkNgram("the statistics", 1, ngrams);
+  checkNgram("statistics on", 1, ngrams);
+  checkNgram("on this", 1, ngrams);
+  checkNgram("this sentence", 1, ngrams);
+  checkNgram("sentence ,", 1, ngrams);
+  checkNgram(", give", 1, ngrams);
+  checkNgram("give me the", 1, ngrams);
+  checkNgram("me the statistics", 1, ngrams);
+  checkNgram("the statistics on", 1, ngrams);
+  checkNgram("statistics on this", 1, ngrams);
+  checkNgram("on this sentence", 1, ngrams);
+  checkNgram("sentence , give", 1, ngrams);
+  checkNgram(", give me", 1, ngrams);
+  checkNgram("give me the statistics", 1, ngrams);
+  checkNgram("me the statistics on", 1, ngrams);
+  checkNgram("the statistics on this", 1, ngrams);
+  checkNgram("statistics on this sentence", 1, ngrams);
+  checkNgram("on this sentence ,", 1, ngrams);
+  checkNgram("this sentence , give", 1, ngrams);
+  checkNgram("sentence , give me", 1, ngrams);
+}
+
+BOOST_AUTO_TEST_CASE(ref_stats_multi) {
+  Translation ref1,ref2,ref3,src;
+  TextToTranslation("what is this saying ?", ref1);
+  TextToTranslation("what saying is this ? ?", ref2);
+  TextToTranslation("what is this is this ?", ref3);
+  TextToTranslation("not important", src);
+  vector<Translation> refs;
+  refs.push_back(ref1);
+  refs.push_back(ref2);
+  refs.push_back(ref3);
+  Bleu bleu;
+  bleu.AddReferences(refs,src);
+  NGramMap ngrams = bleu.GetReferenceStats(0);
+  BOOST_CHECK_EQUAL(ngrams.size(),(size_t)31);
+  checkNgram("what", 1, ngrams);
+  checkNgram("is", 2, ngrams);
+  checkNgram("this", 2, ngrams);
+  checkNgram("saying", 1, ngrams);
+  checkNgram("?", 2, ngrams);
+  checkNgram("what is", 1, ngrams);
+  checkNgram("is this", 2, ngrams);
+  checkNgram("this saying", 1, ngrams);
+  checkNgram("saying ?", 1, ngrams);
+  checkNgram("what saying", 1, ngrams);
+  checkNgram("saying is", 1, ngrams);
+  checkNgram("this ?", 1, ngrams);
+  checkNgram("? ? ", 1, ngrams);
+  checkNgram("this is", 1, ngrams);
+  checkNgram("what is this", 1, ngrams);
+  checkNgram("is this saying", 1, ngrams);
+  checkNgram("this saying ?", 1, ngrams);
+  checkNgram("what saying is", 1, ngrams);
+  checkNgram("saying is this", 1, ngrams);
+  checkNgram("is this ?", 1, ngrams);
+  checkNgram("this ? ?", 1, ngrams);
+  checkNgram("is this is", 1, ngrams);
+  checkNgram("this is this", 1, ngrams);
+  checkNgram("what is this saying", 1, ngrams);
+  checkNgram("is this saying ?", 1, ngrams);
+  checkNgram("what saying is this", 1, ngrams);
+  checkNgram("saying is this ?", 1, ngrams);
+  checkNgram("is this ? ?", 1, ngrams);
+  checkNgram("what is this is", 1, ngrams);
+  checkNgram("is this is this", 1, ngrams);
+  checkNgram("this is this ?", 1, ngrams);
+}
+
+BOOST_AUTO_TEST_CASE(ref_length_single) {
+  Translation ref,src;
+  TextToTranslation("give me the statistics on this sentence , give me",ref);
+  TextToTranslation("the source is not really important",src);
+  vector<Translation> refs;
+  refs.push_back(ref);
+  Bleu bleu;
+  bleu.AddReferences(refs,src);
+  
+  vector<size_t> actual = bleu.GetReferenceLengths(0);
+  vector<size_t> expected;
+  expected.push_back(10);
+  BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected.begin(),expected.end());
+  
+}
+
+BOOST_AUTO_TEST_CASE(ref_length_multi) {
+  Translation ref1,ref2,ref3,src;
+  TextToTranslation("what is this saying ?", ref1);
+  TextToTranslation("what saying is this ? ?", ref2);
+  TextToTranslation("what is this is this ?", ref3);
+  TextToTranslation("not important", src);
+  vector<Translation> refs;
+  refs.push_back(ref1);
+  refs.push_back(ref2);
+  refs.push_back(ref3);
+  Bleu bleu;
+  bleu.AddReferences(refs,src);
+  
+  vector<size_t> actual = bleu.GetReferenceLengths(0);
+  size_t expected[] = {5,6,6};
+  BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected, expected+3);
+}
+
+BOOST_AUTO_TEST_CASE(multi_sentence) {
+  Translation ref1,ref2,src;
+  TextToTranslation("fee fye fo fum",ref1);
+  TextToTranslation("hee ha haw", ref2);
+  TextToTranslation("ra ra ra", src);
+  Bleu bleu;
+  vector<Translation> refs;
+  refs.push_back(ref1);
+  bleu.AddReferences(refs,src);
+  refs.clear();
+  refs.push_back(ref2);
+  bleu.AddReferences(refs,src);
+  
+  NGramMap ngrams = bleu.GetReferenceStats(0);
+  checkNgram("fee", 1, ngrams);
+  checkNgram("fye", 1, ngrams);
+  checkNgram("fo", 1, ngrams);
+  checkNgram("fum", 1, ngrams);
+  checkNgram("fee fye", 1, ngrams);
+  checkNgram("fye fo", 1, ngrams);
+  checkNgram("fo fum", 1, ngrams);
+  checkNgram("fee fye fo", 1, ngrams);
+  checkNgram("fye fo fum", 1, ngrams);
+  checkNgram("fee fye fo fum", 1, ngrams);
+  BOOST_CHECK_EQUAL(ngrams.size(),(size_t)10);
+  vector<size_t> actual = bleu.GetReferenceLengths(0);
+  size_t expected[] = {4};
+  BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected,expected+1);
+  
+  ngrams = bleu.GetReferenceStats(1);
+  checkNgram("hee", 1, ngrams);
+  checkNgram("ha", 1, ngrams);
+  checkNgram("haw", 1, ngrams);
+  checkNgram("hee ha", 1, ngrams);
+  checkNgram("ha haw", 1, ngrams);
+  checkNgram("hee ha haw", 1, ngrams);
+  BOOST_CHECK_EQUAL(ngrams.size(),(size_t)6);
+  actual = bleu.GetReferenceLengths(1);
+  expected[0] = 3;
+  BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected,expected+1);
+}
+
+BOOST_AUTO_TEST_CASE(source_length) {
+  Translation ref, src1, src2;
+  TextToTranslation("ref whatever", ref);
+  TextToTranslation("the first source sentence", src1);
+  TextToTranslation("the second source sentence a bit longer than the first",src2);
+  vector<Translation> refs;
+  refs.push_back(ref);
+  Bleu bleu;
+  bleu.AddReferences(refs,src1);
+  bleu.AddReferences(refs,src2);
+  BOOST_CHECK_EQUAL(bleu.GetSourceLength(0), (size_t)4);
+  BOOST_CHECK_EQUAL(bleu.GetSourceLength(1), (size_t)10);
+}
+
+BOOST_AUTO_TEST_CASE(evaluate_1ref_1hyp_nobp) {
+  Translation ref, src, hyp;
+  TextToTranslation("this is the correct one ,  this one",ref);
+  TextToTranslation("is this is the  one ,  this one", hyp);
+  TextToTranslation("whatever",src);
+  vector<Translation> refs;
+  refs.push_back(ref);
+  Bleu bleu;
+  bleu.AddReferences(refs,src);
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(0);
+  GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
+  float actual = gf->Evaluate(hyp);
+  float sm = BLEU_SMOOTHING;
+  // precisions: 7/8, 5/7, 3/6 and 1/5
+  float log_expected = log(7+sm) - log(8+sm) + log(5+sm) - log(7+sm) + log(3+sm) - log(6+sm) + log(1+sm) - log(5+sm);
+  log_expected /= BLEU_ORDER;
+  BOOST_CHECK_CLOSE(actual,100*exp(log_expected),0.001);
+}
+
+
+
+BOOST_AUTO_TEST_CASE(evaluate_1ref_3src_nobp) {
+  Translation ref0,ref1,ref2,hyp0,hyp1,hyp2,src;
+  TextToTranslation("the first ref", ref0);
+  TextToTranslation("the second ref 2", ref1);
+  TextToTranslation("another ref",ref2);
+  TextToTranslation("the first guessed ref", hyp0);
+  TextToTranslation("this is the second hypothesis", hyp1);
+  TextToTranslation("another ref hyp",hyp2);
+  TextToTranslation("whatever",src);
+  
+  vector<Translation> refs(1);
+  Bleu bleu;
+  refs[0] = ref0;
+  bleu.AddReferences(refs,src);
+  refs[0] = ref1;
+  bleu.AddReferences(refs,src);
+  refs[0] = ref2;
+  bleu.AddReferences(refs,src);
+  
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(0);
+  sentenceIds.push_back(1);
+  sentenceIds.push_back(2);
+  GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
+  
+  vector<Translation> hyps;
+  hyps.push_back(hyp0);
+  hyps.push_back(hyp1);
+  hyps.push_back(hyp2);
+  float actual = gf->Evaluate(hyps);
+  float sm = BLEU_SMOOTHING;
+  //precision 7/12, 3/9, 0/6 and 0/3
+  float log_expected = log(7+sm) - log(12+sm) + log(3+sm) - log(9+sm) + log(0+sm) - log(6+sm) + log(0+sm) - log(3+sm);
+  log_expected /= BLEU_ORDER;
+  BOOST_CHECK_EQUAL(actual, 100*exp(log_expected));
+}
+
+
+
+BOOST_AUTO_TEST_CASE(evaluate_1ref_1src_bp) {
+  Translation ref, src, hyp;
+  TextToTranslation("this is the correct one ,  this one",ref);
+  TextToTranslation("this is the short one", hyp);
+  TextToTranslation("whatever",src);
+  vector<Translation> refs;
+  refs.push_back(ref);
+  Bleu bleu;
+  bleu.AddReferences(refs,src);
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(0);
+  GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
+  
+  float actual = gf->Evaluate(hyp);
+  float sm = BLEU_SMOOTHING;
+  //precision: 4/5, 2/4, 1/3, 0/2
+  float log_expected = log(4+sm) - log(5+sm) + log(2+sm) - log(4+sm) + log(1+sm) - log(3+sm) + log(0+sm) - log(2+sm);
+  log_expected /= BLEU_ORDER;
+  log_expected += (1 - 8.0/5.0);
+  BOOST_CHECK_EQUAL(actual, 100*exp(log_expected));
+}
+
+BOOST_AUTO_TEST_CASE(evaluate_caching) {
+  Translation ref, src, hyp1, hyp2, hyp3;
+  TextToTranslation("this is the reference sentence",ref);
+  TextToTranslation("this the reference sentence .", hyp1);
+  TextToTranslation("the reference sentence , what ?", hyp2);
+  TextToTranslation("the reference phrase . where ?", hyp3);
+  TextToTranslation("whatever", src);
+  Bleu bleu;
+  vector<Translation> refs1;
+  refs1.push_back(ref);
+  bleu.AddReferences(refs1,src);
+  vector<Translation> refs2;
+  refs2.push_back(ref);
+  bleu.AddReferences(refs2,src);
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(0);
+  sentenceIds.push_back(1);
+  GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
+  
+  vector<Translation> hyps1;
+  hyps1.push_back(hyp1);
+  hyps1.push_back(hyp2);
+  float actual = gf->Evaluate(hyps1);
+  float sm = BLEU_SMOOTHING;
+  //precision 7/11, 4/9, 2/7, 0/5
+  float log_expected = log(7+sm) - log(11+sm) + log(4+sm) - log(9+sm) + log(2+sm) - log(7+sm) + log(0+sm) - log(5+sm);
+  log_expected /= BLEU_ORDER;
+  BOOST_CHECK_CLOSE(actual, 100*exp(log_expected),0.01);
+  
+  vector<Translation> hyps2;
+  hyps2.push_back(hyp1);
+  hyps2.push_back(hyp3);
+  actual = gf->Evaluate(hyps2);
+  //precision 6/11, 3/9, 1/7, 0/5
+  log_expected = log(6+sm) - log(11+sm) + log(3+sm) - log(9+sm) + log(1+sm) - log(7+sm) + log(0+sm) - log(5+sm);
+  log_expected /= BLEU_ORDER;
+  BOOST_CHECK_EQUAL(actual, 100*exp(log_expected));
+}
+
+
+BOOST_AUTO_TEST_CASE(evaluate_3ref_1src_bp) {
+  Translation ref1, ref2, ref3, src, hyp;
+  TextToTranslation("what is this saying ?", ref1);
+  TextToTranslation("what saying is this ? ?", ref2);
+  TextToTranslation("what is this is this ?", ref3);
+  TextToTranslation("is this is what", hyp);
+  TextToTranslation("whatever", src);
+  vector<Translation> refs;
+  refs.push_back(ref1);
+  refs.push_back(ref2);
+  refs.push_back(ref3);
+  Bleu bleu;
+  bleu.AddReferences(refs,src);
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(0);
+  GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
+
+  float actual = gf->Evaluate(hyp);
+  float sm = BLEU_SMOOTHING;
+  //precision 4/4, 2/3, 1/2, 0/1
+  float log_expected = log(4+sm) - log(4+sm) + log(2+sm) - log(3+sm) + log(1+sm) - log(2+sm) + log(0+sm) - log(1+sm);
+  log_expected  /= BLEU_ORDER;
+  //closest
+  log_expected += (1-5.0/4.0);
+  BOOST_CHECK_EQUAL(actual,100*exp(log_expected));
+}
+
+BOOST_AUTO_TEST_CASE(update_smoothing) {
+  Bleu bleu;
+  float sw = 0.5;
+  bleu.SetSmoothingWeight(sw);
+  BleuStats stats = bleu.GetSmoothingStats();
+  for (size_t order = 1; order < BLEU_ORDER; ++order) {
+    BOOST_CHECK_CLOSE(stats.tp(order), BLEU_SMOOTHING, 0.001);
+    BOOST_CHECK_CLOSE(stats.total(order), BLEU_SMOOTHING, 0.001);
+  }
+  
+  Translation src,ref,hyp1,hyp2;
+  TextToTranslation("whatever it says", src);
+  TextToTranslation("this is the reference sentence ok ?", ref);
+  TextToTranslation("is this  the reference sentence ok !", hyp1);
+  TextToTranslation("is this  the hypothesis sentence ok !", hyp2);
+  vector<Translation> refs;
+  refs.push_back(ref);
+  bleu.AddReferences(refs,src);
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(0);
+  GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
+  gf->AddSmoothingStats(0,hyp1);
+  gf->AddSmoothingStats(0,hyp2);
+  gf->UpdateSmoothingStats();
+  
+  //should be the average of the two hypotheses
+  stats = bleu.GetSmoothingStats();
+  BOOST_CHECK_CLOSE(stats.tp(1), (5.5+BLEU_SMOOTHING)*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.total(1), (7+BLEU_SMOOTHING)*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.tp(2), (2+BLEU_SMOOTHING)*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.total(2), (6+BLEU_SMOOTHING)*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.tp(3), (1+BLEU_SMOOTHING)*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.total(3), (5+BLEU_SMOOTHING)*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.tp(4), (0.5+BLEU_SMOOTHING)*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.total(4), (4+BLEU_SMOOTHING)*sw, 0.001);
+  
+  BOOST_CHECK_CLOSE(stats.src_len(), 3*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.ref_len(), 7*sw, 0.001);
+  BOOST_CHECK_CLOSE(stats.hyp_len(), 7*sw, 0.001);
+  
+  gf->AddSmoothingStats(0,hyp1);
+  gf->UpdateSmoothingStats();
+  
+  //previous stats should get downweighted
+  stats = bleu.GetSmoothingStats();
+  BOOST_CHECK_CLOSE(stats.tp(1), sw*(6+sw*(5.5+BLEU_SMOOTHING)), 0.001);
+  BOOST_CHECK_CLOSE(stats.total(1), sw*(7+sw*(7+BLEU_SMOOTHING)), 0.001);
+  BOOST_CHECK_CLOSE(stats.tp(2), sw*(3+sw*(2+BLEU_SMOOTHING)), 0.001);
+  BOOST_CHECK_CLOSE(stats.total(2), sw*(6+sw*(6+BLEU_SMOOTHING)), 0.001);
+  BOOST_CHECK_CLOSE(stats.tp(3), sw*(2+sw*(1+BLEU_SMOOTHING)), 0.001);
+  BOOST_CHECK_CLOSE(stats.total(3), sw*(5+sw*(5+BLEU_SMOOTHING)), 0.001);
+  BOOST_CHECK_CLOSE(stats.tp(4), sw*(1+sw*(0.5+BLEU_SMOOTHING)), 0.001);
+  BOOST_CHECK_CLOSE(stats.total(4), sw*(4+sw*(4+BLEU_SMOOTHING)), 0.001);
+  
+  BOOST_CHECK_CLOSE(stats.src_len(), sw*(3+sw*(3)), 0.001);
+  BOOST_CHECK_CLOSE(stats.ref_len(), sw*(7+sw*(7)), 0.001);
+  BOOST_CHECK_CLOSE(stats.hyp_len(), sw*(7+sw*(7)), 0.001);
+  
+  
+}
+
+BOOST_AUTO_TEST_CASE(evaluate_smoothing) {
+  Bleu bleu;
+  float sw = 0.5;
+  bleu.SetSmoothingWeight(sw);
+  Translation src,ref,hyp1,hyp2;
+  TextToTranslation("whatever it says", src);
+  TextToTranslation("this is the reference sentence ok ?", ref);
+  TextToTranslation("is this  the reference sentence ok !", hyp1);
+  TextToTranslation("is this  the hypothesis sentence ok !", hyp2);
+  vector<Translation> refs;
+  refs.push_back(ref);
+  bleu.AddReferences(refs,src);
+  vector<size_t> sentenceIds;
+  sentenceIds.push_back(0);
+  GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
+  
+  float actual = gf->Evaluate(hyp1);
+  //should just be standard smoothed bleu
+  float sm = BLEU_SMOOTHING;
+  //precision 6/7, 3/6, 2/5, 1/4
+  float log_expected = log(6+sm) - log(7+sm) + log(3+sm) - log(6+sm) + log(2+sm) - log(5+sm) + log(1+sm) - log(4+sm);
+  log_expected  /= BLEU_ORDER;
+  log_expected += log(3); //multiply by source length
+  BOOST_CHECK_CLOSE(actual,exp(log_expected),0.001);
+  
+  gf->AddSmoothingStats(0,hyp1);
+  gf->UpdateSmoothingStats();
+  
+  //now should get smoothed bleu
+  actual = gf->Evaluate(hyp2);
+  //precision 5/7 1/6 0/5 0/4
+  log_expected = log(5+sw*(6+sm)) - log(7+sw*(7+sm)) + log(1+sw*(3+sm)) - log(6+sw*(6+sm)) + log(0+sw*(2+sm)) 
+      - log(5 + sw*(5+sm)) + log(0 + sw*(1+sm)) - log(4+sw*(4+sm));
+  //cerr << log(5+sw*(6+sm)) << " " << log(7+sw*(7+sm)) << " " <<  log(1+sw*(3+sm))  << " " <<
+  //    log(6+sw*(6+sm)) << " " << log(0+sw*(2+sm)) << " " << log(5 + sw*(5+sm)) << " " << 
+  //    log(0 + sw*(1+sm)) << " " << log(4+sw*(4+sm)) << endl;
+  log_expected /= BLEU_ORDER;
+  log_expected += log(3 + sw*3); // weight bleu by (O_f + |f|)
+  BOOST_CHECK_CLOSE(actual,exp(log_expected),0.001);
+  
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+
--- a/josiah/TestOnlineTrainingCorpus.cpp
+++ b/josiah/TestOnlineTrainingCorpus.cpp
@ -0,0 +1,159 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <stdexcept>
+
+#include <boost/test/unit_test.hpp>
+
+#include "OnlineTrainingCorpus.h"
+
+using namespace Josiah;
+using namespace std;
+
+BOOST_AUTO_TEST_SUITE(online_training_corpus)
+    
+  class SourceFixture {
+    public:
+      SourceFixture() {
+        sourceFile = string(tmpnam(NULL));
+        size_t sourceSize = 20;
+        string line = "one two three four five six seven eight nine ten";
+        ofstream sourceHandle(sourceFile.c_str());
+        for (size_t i = 0; i < sourceSize; ++i) {
+          sourceHandle << line << " " << i << endl;
+        }
+      }
+      
+      ~SourceFixture() {
+        BOOST_CHECK(!remove(sourceFile.c_str()));
+      }
+      
+      string sourceFile;
+  };
+  
+  BOOST_FIXTURE_TEST_CASE(ctor_validate_lines, SourceFixture) {
+    //args are batch, epoch, max, mpi_size, mpi_rank
+    OnlineTrainingCorpus(sourceFile,1,10,50,1,0);
+    //epoch size not divisible by batch size
+    BOOST_CHECK_THROW(OnlineTrainingCorpus(sourceFile,3,10,50,1,0), runtime_error);
+    //max not divisible by shard
+    BOOST_CHECK_THROW(OnlineTrainingCorpus(sourceFile,2,10,52,1,0), runtime_error);
+    //for mpi, shard size should be divisible by batch size
+    // This example should give a shard size of 5, which is not divisible 
+    // by the batch size of 2
+    BOOST_CHECK_THROW(OnlineTrainingCorpus(sourceFile, 2, 10,  50 , 2, 0), runtime_error);
+  }
+  
+  BOOST_FIXTURE_TEST_CASE(batch_single_core, SourceFixture) {
+    //args are batch, epoch,  max, mpi_size, mpi_rank
+    OnlineTrainingCorpus corpus(sourceFile,4,20,120,1,0);
+    size_t batchCount = 0;
+    size_t lineCount = 0;
+    multiset<size_t> linesSeen;
+    while(corpus.HasMore()) {
+      vector<string> lines;
+      vector<size_t> lineNumbers;
+      bool shouldMix;
+      corpus.GetNextBatch(&lines, &lineNumbers, &shouldMix);
+      BOOST_CHECK_EQUAL(lines.size(), (size_t)4);
+      BOOST_CHECK_EQUAL(lineNumbers.size(), (size_t)4);
+      
+      ++batchCount;
+      lineCount += lines.size();
+      linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
+      BOOST_CHECK_EQUAL(shouldMix, lineCount % 20 == 0);
+      //cerr << "lineCount " << lineCount << " " << shouldDump << endl;
+    }
+    
+    BOOST_CHECK_EQUAL(lineCount, (size_t)120);
+    BOOST_CHECK_EQUAL(batchCount, (size_t)30);
+    //Each sentence should appear exactly 6 times
+    for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
+      BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)6);
+    }
+    
+  }
+  
+  BOOST_FIXTURE_TEST_CASE(batch_multi_core, SourceFixture) {
+    //args are batch, epoch,  max, mpi_size, mpi_rank
+    OnlineTrainingCorpus corpus(sourceFile,4,60,120,3,0);
+    multiset<size_t> linesSeen;
+    while(corpus.HasMore()) {
+      vector<string> lines;
+      vector<size_t> lineNumbers;
+      bool shouldMix;
+      corpus.GetNextBatch(&lines, &lineNumbers, &shouldMix);
+      
+      linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
+    }
+    BOOST_CHECK_EQUAL(linesSeen.size(),(size_t)40);
+    //Each sentence should appear exactly twice
+    for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
+      BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)2);
+    }
+  }
+
+  BOOST_FIXTURE_TEST_CASE(batch_zero, SourceFixture) {
+    //set batch size to 0, no mpi
+    //batch should be whole epoch
+    OnlineTrainingCorpus corpus(sourceFile,0,20,60,1,0);
+    //each line should be seen 3 times, and should mix after every batch
+    multiset<size_t> linesSeen;
+    while(corpus.HasMore()) {
+      vector<string> lines;
+      vector<size_t> lineNumbers;
+      bool shouldMix;
+      corpus.GetNextBatch(&lines,&lineNumbers,&shouldMix);
+      BOOST_CHECK(shouldMix);
+      linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
+    }
+    BOOST_CHECK_EQUAL(linesSeen.size(), (size_t)60);
+    for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
+      BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)3);
+    }
+
+    //try again, with mpi
+    OnlineTrainingCorpus corpus2(sourceFile,0,20,80,3,0);
+    //Each line should be seen 4 times, mix after every batch
+    linesSeen.clear();
+    while(corpus2.HasMore()) {
+      vector<string> lines;
+      vector<size_t> lineNumbers;
+      bool shouldMix;
+      corpus2.GetNextBatch(&lines,&lineNumbers,&shouldMix);
+      BOOST_CHECK(shouldMix);
+      linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
+    }
+    BOOST_CHECK_EQUAL(linesSeen.size(), (size_t)80);
+    for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
+      BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)4);
+    }
+
+
+  }
+  
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
--- a/josiah/TrainingSource.cpp
+++ b/josiah/TrainingSource.cpp
@ -0,0 +1,245 @@
+#include "TrainingSource.h"
+
+#ifdef MPI_ENABLED
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/collectives.hpp>
+#include "MpiDebug.h"
+#endif
+
+#include <cassert>
+
+#include "Optimizer.h"
+#include "Decoder.h"
+#include "WeightManager.h"
+
+using namespace std;
+using namespace Moses;
+
+#ifdef MPI_ENABLED
+namespace mpi=boost::mpi;
+#endif
+
+namespace Josiah {
+
+ExpectedBleuTrainer::ExpectedBleuTrainer(
+  int r, 
+  int s, 
+  int bsize,
+  vector<string>* sents,
+  unsigned int rseed,
+  bool randomize,
+  Optimizer* o,
+  int wt_dump_freq,
+  std::string wt_dump_stem)
+    : rank(r),
+      size(s),
+      batch_size(bsize),
+      corpus(),
+      keep_going(true),
+      order(batch_size),
+      rng(rseed),
+      dist(0, sents->size() - 1),
+      draw(rng, dist),
+      randomize_batches(randomize),
+      optimizer(o),
+      total_ref_len(),
+      total_exp_len(),
+      total_exp_gain(),
+      total_unreg_exp_gain(),
+      weight_dump_freq(wt_dump_freq),
+      weight_dump_stem(wt_dump_stem){
+  if (rank >= batch_size) keep_going = false;
+  corpus.swap(*sents);
+  int esize = min(batch_size, size);
+  //cerr << "esize: " << esize << endl;
+  int sents_per_batch = batch_size / esize;
+  cur = cur_start = sents_per_batch * rank;
+  //cerr << "sents_per_batch: " << sents_per_batch << endl;
+  cur_end = min((int)corpus.size(), sents_per_batch * (rank + 1));
+  if (rank == size - 1) cur_end = batch_size;
+  cerr << rank << "/" << size << ": cur_start=" << cur_start << "  cur_end=" << cur_end << endl;
+  assert(cur_end >= cur_start);
+  tlc = 0;
+  ReserveNextBatch();
+}
+
+void ExpectedBleuTrainer::ReserveNextBatch() {
+    if (rank == 0) {
+      if (randomize_batches) {
+        for (unsigned int i = 0; i < order.size(); ++i, ++tlc)
+          order[i] = draw();
+      } else {
+        for (unsigned int i = 0; i < order.size(); ++i, ++tlc)
+          order[i] = tlc % corpus.size();
+      }
+    } 
+#ifdef MPI_ENABLED
+    mpi::broadcast(mpi::communicator(), order, 0);
+  //  if (MPI_SUCCESS != MPI_Bcast(&order[0], order.size(), MPI_INT, 0, MPI_COMM_WORLD))
+   //   MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+  }
+
+bool ExpectedBleuTrainer::HasMore() const {
+  return keep_going && (cur < cur_end);
+}
+
+void ExpectedBleuTrainer::GetSentence(string* sentence, int* lineno) {
+  assert(static_cast<unsigned int>(cur) < order.size());
+  if (lineno) *lineno = order[cur];
+  *sentence = corpus[order[cur++]];
+}
+
+
+                                                
+  
+  
+void ExpectedBleuTrainer::IncorporateGradient(
+       const FValue trans_len,
+       const FValue ref_len,
+       const FValue exp_gain,
+       const FValue unreg_exp_gain,
+       const FVector& grad) {
+
+  gradient += grad;
+  total_exp_gain += exp_gain;
+  total_unreg_exp_gain += unreg_exp_gain;
+  total_ref_len += ref_len;
+  total_exp_len += trans_len;
+  
+  if (cur == cur_end) {
+    FVector& weights = WeightManager::instance().get();
+
+    
+    
+    FValue tg = 0, trl = 0, tel = 0, tgunreg = 0;
+#ifdef MPI_ENABLED
+    FVector sum_gradient;
+    mpi::communicator world;
+    MPI_VERBOSE(1,"Reducing gradient, gradient = " << gradient << " rank = " << rank << endl);
+    mpi::reduce(world, gradient, sum_gradient, FVectorPlus(),0);
+    if (rank == 0) MPI_VERBOSE(1, "Reduced gradient = " << sum_gradient << endl;)
+    mpi::reduce(world, total_exp_gain, tg, std::plus<float>(),0);
+    mpi::reduce(world, total_unreg_exp_gain, tgunreg, std::plus<float>(),0);
+    mpi::reduce(world, total_ref_len, trl, std::plus<float>(),0);
+    mpi::reduce(world, total_exp_len, tel, std::plus<float>(),0);
+ /*   if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&gradient.data()[0]), &rcv_grad[0], w.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    if (MPI_SUCCESS != MPI_Reduce(&total_exp_gain, &tg, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    if (MPI_SUCCESS != MPI_Reduce(&total_unreg_exp_gain, &tgunreg, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    if (MPI_SUCCESS != MPI_Reduce(&total_ref_len, &trl, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    if (MPI_SUCCESS != MPI_Reduce(&total_exp_len, &tel, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+  */  
+    if (rank == 0) {
+        gradient = sum_gradient;
+    }
+#else
+    //rcv_grad = gradient.data();
+    tg = total_exp_gain;
+    tgunreg = total_exp_gain;
+    trl = total_ref_len;
+    tel = total_exp_len;
+#endif
+
+    if (rank == 0) {
+      tg /= batch_size;
+      tgunreg /= batch_size;
+      gradient /=batch_size;
+      cerr << "TOTAL EXPECTED GAIN: " << tg << " (batch size = " << batch_size << ")\n";
+      cerr << "TOTAL UNREGULARIZED EXPECTED GAIN: " << tgunreg << " (batch size = " << batch_size << ")\n";
+      cerr << "EXPECTED LENGTH / REF LENGTH: " << tel << '/' << trl << " (" << (tel / trl) << ")\n";
+      optimizer->Optimize(tg, weights, gradient,  &weights);
+      if (optimizer->HasConverged()) keep_going = false;
+    }
+    int iteration = optimizer->GetIteration();
+#ifdef MPI_ENABLED
+    int kg = keep_going;
+    mpi::broadcast(world,weights,0);
+    mpi::broadcast(world,kg,0);
+    mpi::broadcast(world,iteration,0);
+    ReserveNextBatch();
+    world.barrier();
+   /* if (MPI_SUCCESS != MPI_Bcast(const_cast<float*>(&weights.data()[0]), weights.data().size(), MPI_FLOAT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    if (MPI_SUCCESS != MPI_Bcast(&kg, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    if (MPI_SUCCESS != MPI_Bcast(&iteration, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    ReserveNextBatch();
+    if (MPI_SUCCESS != MPI_Barrier(MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+    */
+    keep_going = kg;
+    optimizer->SetIteration(iteration);
+#endif
+
+    
+    cur = cur_start;
+    gradient.clear();
+    total_exp_gain = 0;
+    total_unreg_exp_gain = 0;
+    total_exp_len = 0;
+    total_ref_len = 0;
+    
+    if (weight_dump_freq > 0 && rank == 0 && iteration > 0 && (iteration % weight_dump_freq) == 0) {
+      stringstream s;
+      s << weight_dump_stem;
+      s << "_";
+      s << iteration;
+      string weight_file = s.str();
+      cerr << "Dumping weights to  " << weight_file << endl;
+      WeightManager::instance().dump(weight_file);
+    }
+    
+  }
+}
+
+void ExpectedBleuTrainer::IncorporateCorpusGradient(
+                                                const FValue trans_len,
+                                                const FValue ref_len,      
+                                                const FValue exp_gain,
+                                                const FValue unreg_exp_gain,
+                                                const FVector& grad) {
+    
+    if (cur == cur_end) {
+      FVector& weights = WeightManager::instance().get();
+      
+      if (rank == 0) {
+        cerr << "TOTAL EXPECTED GAIN: " << exp_gain << " (batch size = " << batch_size << ")\n";
+        cerr << "TOTAL UNREGULARIZED EXPECTED GAIN: " << unreg_exp_gain << " (batch size = " << batch_size << ")\n";
+        cerr << "EXPECTED LENGTH / REF LENGTH: " << trans_len << '/' << ref_len << " (" << (trans_len / ref_len) << ")\n";
+        optimizer->Optimize(exp_gain, weights, grad,  &weights);
+        if (optimizer->HasConverged()) keep_going = false;
+      }
+      int kg = keep_going;
+      int iteration = optimizer->GetIteration();
+#ifdef MPI_ENABLED
+      mpi::communicator world;
+      mpi::broadcast(world, weights,0);
+      mpi::broadcast(world,kg,0);
+      mpi::broadcast(world,iteration,0);
+      ReserveNextBatch();
+      world.barrier();
+      /*
+      if (MPI_SUCCESS != MPI_Bcast(const_cast<float*>(&weights.data()[0]), weights.data().size(), MPI_FLOAT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+      if (MPI_SUCCESS != MPI_Bcast(&kg, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+      if (MPI_SUCCESS != MPI_Bcast(&iteration, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+      ReserveNextBatch();
+      if (MPI_SUCCESS != MPI_Barrier(MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
+      */
+      keep_going = kg;
+      optimizer->SetIteration(iteration);
+#endif
+      
+      
+      cur = cur_start;
+      
+      if (weight_dump_freq > 0 && rank == 0 && iteration > 0 && (iteration % weight_dump_freq) == 0) {
+        stringstream s;
+        s << weight_dump_stem;
+        s << "_";
+        s << iteration;
+        string weight_file = s.str();
+        WeightManager::instance().dump(weight_file);
+      }
+      
+    }
+  }  
+  
+}
+
--- a/josiah/TrainingSource.h
+++ b/josiah/TrainingSource.h
@ -0,0 +1,74 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <boost/random/variate_generator.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_smallint.hpp>
+
+
+#include "FeatureVector.h"
+#include "InputSource.h"
+
+namespace Josiah {
+ 
+class Decoder;
+class Optimizer;
+
+class ExpectedBleuTrainer : public InputSource {
+ public:
+  ExpectedBleuTrainer(
+    int r,      // MPI rank, or 0 if not MPI
+    int s,      // MPI size, or 1 if not MPI
+    int bsize,  // batch size
+    std::vector<std::string>* sents,  // development corpus
+    unsigned int rseed,
+    bool randomize,
+    Optimizer* o,
+    int wt_dump_freq,
+    std::string wt_dump_stem);
+  void ReserveNextBatch();
+  virtual bool HasMore() const;
+  virtual void GetSentence(std::string* sentence, int* lineno);
+  void IncorporateGradient(
+       const Moses::FValue trans_len,
+       const Moses::FValue ref_len,
+       const Moses::FValue exp_gain,
+       const Moses::FValue unreg_exp_gain,
+       const Moses::FVector& grad);
+  void IncorporateCorpusGradient(
+      const Moses::FValue trans_len,
+      const Moses::FValue ref_len,
+      const Moses::FValue exp_gain,
+      const Moses::FValue unreg_exp_gain,
+      const Moses::FVector& grad);
+  int GetCurr() { return cur;}
+  int GetCurrEnd() { return cur_end;}
+
+ private:
+  int rank, size, batch_size;
+  int cur, cur_start;
+  int cur_end;
+  std::vector<std::string> corpus;
+  bool keep_going;
+  Moses::FVector gradient;
+  
+  std::vector<int> order;
+  boost::mt19937 rng;
+  boost::uniform_smallint<int> dist;
+  boost::variate_generator<boost::mt19937, boost::uniform_smallint<int> > draw;
+  bool randomize_batches;
+  Optimizer* optimizer;
+  Moses::FValue total_ref_len;
+  Moses::FValue total_exp_len;
+  Moses::FValue total_exp_gain;
+  Moses::FValue total_unreg_exp_gain;
+  
+  int tlc;
+  int weight_dump_freq;
+  std::string weight_dump_stem;
+};
+
+}
+
--- a/josiah/TranslationDelta.cpp
+++ b/josiah/TranslationDelta.cpp
@ -0,0 +1,303 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <boost/lambda/lambda.hpp>
+#include "TranslationDelta.h"
+#include "Derivation.h"
+#include "FeatureFunction.h"
+#include "Gibbler.h"
+#include "ScoreComponentCollection.h"
+#include "DummyScoreProducers.h"
+#include "GibbsOperator.h"
+
+using namespace std;
+
+namespace Josiah {
+
+
+  
+void  TranslationDelta::getNewSentenceSingle(const TranslationOption* option, const WordsRange& targetSegment, vector<const Factor*>& newSentence) const{
+  const Phrase& targetPhrase = option->GetTargetPhrase();
+  size_t start = targetSegment.GetStartPos();
+  for (size_t i = 0; i < start; ++i) {
+    const Factor* factor =getSample().GetTargetWords()[i][0]; 
+    newSentence.push_back(factor);
+  }
+  //fill in the target phrase
+  for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+    newSentence.push_back(targetPhrase.GetWord(i)[0]);
+  }
+  //fill in the end of the sentence
+  size_t end = targetSegment.GetEndPos() + 1;
+  for (size_t i = end; i < getSample().GetTargetWords().size(); ++i) {
+    newSentence.push_back(getSample().GetTargetWords()[i][0]);
+  }
+  
+}
+  
+void TranslationDelta::initScoresSingleUpdate(const Sample& s, const TranslationOption* option, const TargetGap& gap) {
+  //don't worry about reordering because they don't change
+        
+  
+  
+  // extra features
+  for (FeatureFunctionVector::const_iterator i=s.GetFeatureFunctions().begin(); i<s.GetFeatureFunctions().end(); ++i) {
+    (*i)->doSingleUpdate(option,gap,m_scores);
+  }
+
+  updateWeightedScore();
+  
+  VERBOSE(2, "Single Update: Scores " << m_scores << endl);
+  VERBOSE(2,"Single Update: Total score is  " << m_score << endl);  
+}
+
+  
+//Note that left and right refer to the target order.
+void TranslationDelta::initScoresContiguousPairedUpdate(const Sample& s, const TranslationOption* leftOption,
+                                              const TranslationOption* rightOption, const TargetGap& gap) {
+  
+    
+  //don't worry about reordering because they don't change
+    
+    
+    
+  for (FeatureFunctionVector::const_iterator i=s.GetFeatureFunctions().begin(); i<s.GetFeatureFunctions().end(); ++i) {
+    (*i)->doContiguousPairedUpdate(leftOption,rightOption,gap,m_scores);
+  }
+}
+
+void TranslationDelta::initScoresDiscontiguousPairedUpdate(const Sample& s, const TranslationOption* leftOption,
+                                              const TranslationOption* rightOption, const TargetGap& leftGap,
+                                              const TargetGap& rightGap) 
+{
+  for (FeatureFunctionVector::const_iterator i=s.GetFeatureFunctions().begin(); i<s.GetFeatureFunctions().end(); ++i) {
+    (*i)->doDiscontiguousPairedUpdate(leftOption, rightOption, leftGap, rightGap,m_scores);
+   }
+
+}
+  
+  void  TranslationDelta::getNewSentenceContiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightTargetSegment, vector<const Factor*>& newSentence) const{
+    //Create the segment
+    WordsRange targetSegment = *leftSegment;
+    targetSegment.SetEndPos(rightTargetSegment->GetEndPos());
+    
+    //create the phrase
+    Phrase targetPhrase(leftOption->GetTargetPhrase());
+    targetPhrase.Append(rightOption->GetTargetPhrase());
+    
+    //set the indices for start and end positions
+    size_t leftStartPos(0);
+    size_t leftEndPos(leftOption->GetTargetPhrase().GetSize()); 
+    size_t rightStartPos(leftEndPos);
+    size_t rightEndPos(targetPhrase.GetSize());
+    size_t start = targetSegment.GetStartPos();
+    for (size_t i = 0; i < start; ++i) {
+      newSentence.push_back(getSample().GetTargetWords()[i][0]);
+    }
+    //fill in the target phrase
+    for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
+      newSentence.push_back(targetPhrase.GetWord(i)[0]);
+    }
+    //fill in the end of the sentence
+    size_t end = targetSegment.GetEndPos() + 1;
+    for (size_t i = end; i < getSample().GetTargetWords().size(); ++i) {
+      newSentence.push_back(getSample().GetTargetWords()[i][0]);
+    }
+    
+  }
+  
+  
+  void  TranslationDelta::getNewSentenceDiscontiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightSegment, vector<const Factor*>& newSentence) const{
+    const Phrase& leftTgtPhrase = leftOption->GetTargetPhrase();
+    const Phrase& rightTgtPhrase = rightOption->GetTargetPhrase();
+    
+    VERBOSE(2, "Sample : " << Josiah::Derivation(m_sample) << endl);
+    VERBOSE(2, *leftSegment << " " << *rightSegment << endl); 
+    size_t start = leftSegment->GetStartPos();
+    for (size_t i = 0; i < start; ++i) {
+      newSentence.push_back(getSample().GetTargetWords()[i][0]);
+    }
+    //fill in the left target phrase
+    for (size_t i = 0; i < leftTgtPhrase.GetSize(); ++i) {
+      newSentence.push_back(leftTgtPhrase.GetWord(i)[0]);
+    }
+    for (size_t i = leftSegment->GetEndPos()+ 1 ; i < rightSegment->GetStartPos(); ++i) {
+      newSentence.push_back(getSample().GetTargetWords()[i][0]);
+    }
+    //fill in the right target phrase
+    for (size_t i = 0; i < rightTgtPhrase.GetSize(); ++i) {
+      newSentence.push_back(rightTgtPhrase.GetWord(i)[0]);
+    }
+    //fill in the end of the sentence
+    size_t end = rightSegment->GetEndPos() + 1;
+    for (size_t i = end; i < getSample().GetTargetWords().size(); ++i) {
+      newSentence.push_back(getSample().GetTargetWords()[i][0]);
+    }
+      
+  }
+  
+  void  TranslationDelta::getNewSentencePaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange& leftTargetSegment, const WordsRange& rightTargetSegment, vector<const Factor*>& newSentence) const{
+    
+    WordsRange* leftSegment = const_cast<WordsRange*> (&leftTargetSegment);
+    WordsRange* rightSegment = const_cast<WordsRange*> (&rightTargetSegment);
+    
+    if (rightTargetSegment < leftTargetSegment) {
+      leftSegment = const_cast<WordsRange*> (&rightTargetSegment);
+      rightSegment = const_cast<WordsRange*>(&leftTargetSegment);
+    }
+    
+    bool contiguous =  (leftSegment->GetEndPos() + 1 ==  rightSegment->GetStartPos()) ;
+    
+    if (contiguous)
+      getNewSentenceContiguousPaired(leftOption, rightOption, leftSegment, rightSegment, newSentence);
+    else
+      getNewSentenceDiscontiguousPaired(leftOption, rightOption, leftSegment, rightSegment, newSentence);
+  }
+  
+  
+void TranslationDelta::updateWeightedScore() {
+  //weight the scores
+  m_score = inner_product(m_scores, WeightManager::instance().get());
+    
+  VERBOSE(2, "Scores " << m_scores << endl);
+  VERBOSE(2,"Total score is  " << m_score << endl);      
+}
+  
+  
+TranslationUpdateDelta::TranslationUpdateDelta(Sample& sample, const TranslationOption* option ,const TargetGap& gap) :
+    TranslationDelta(sample),  m_option(option), m_gap(gap) {
+  initScoresSingleUpdate(m_sample, m_option,m_gap);
+}
+
+void TranslationUpdateDelta::apply(const TranslationDelta& noChangeDelta) {
+  VERBOSE(3, "Applying Translation Update Delta" << endl);
+  m_scores -= noChangeDelta.getScores();
+  getSample().ChangeTarget(*m_option,m_scores);
+}
+
+void TranslationUpdateDelta::getNewSentence(vector<const Factor*>& newSentence) const{
+    getNewSentenceSingle(m_option, m_gap.segment, newSentence); 
+}
+
+
+MergeDelta::MergeDelta(Sample& sample, const TranslationOption* option, const TargetGap& gap) :
+    TranslationDelta(sample),  m_option(option), m_gap(gap) {
+  initScoresSingleUpdate(m_sample, m_option,m_gap);
+}
+
+void MergeDelta::apply(const TranslationDelta& noChangeDelta) {
+  VERBOSE(3, "Applying MergeDelta" << endl);
+  m_scores -= noChangeDelta.getScores();
+  getSample().MergeTarget(*m_option,m_scores);
+}
+
+void MergeDelta::getNewSentence(vector<const Factor*>& newSentence) const {
+  getNewSentenceSingle(m_option, m_gap.segment, newSentence); 
+}
+
+  
+PairedTranslationUpdateDelta::PairedTranslationUpdateDelta(Sample& sample,
+   const TranslationOption* leftOption, const TranslationOption* rightOption, 
+   const TargetGap& leftGap, const TargetGap& rightGap) : TranslationDelta(sample), m_leftOption(leftOption),
+    m_rightOption(rightOption), m_leftGap(leftGap), m_rightGap(rightGap) {
+   
+  VERBOSE(2, "Left Target phrase: " << m_leftOption->GetTargetPhrase() << endl);
+  VERBOSE(2, "Right Target phrase: " << m_rightOption->GetTargetPhrase() << endl);
+  VERBOSE(2, "Left Target segment: " << m_leftGap.segment << endl);    
+  VERBOSE(2, "Right Target segment: " << m_rightGap.segment << endl);
+     
+  assert(m_leftGap.segment < m_rightGap.segment);
+  if (m_leftGap.segment.GetEndPos() + 1 == m_rightGap.segment.GetStartPos()) {
+    TargetGap gap(m_leftGap.leftHypo, m_rightGap.rightHypo, 
+      WordsRange(m_leftGap.segment.GetStartPos(), m_rightGap.segment.GetEndPos()));
+    initScoresContiguousPairedUpdate(m_sample, m_leftOption,m_rightOption, gap);
+  } else {
+    initScoresDiscontiguousPairedUpdate(m_sample,m_leftOption,m_rightOption,m_leftGap,m_rightGap);
+  }
+  updateWeightedScore();
+  VERBOSE(2, "Left Target segment: " << m_leftGap.segment << endl);    
+  VERBOSE(2, "Right Target segment: " << m_rightGap.segment << endl);
+}
+
+void PairedTranslationUpdateDelta::apply(const TranslationDelta& noChangeDelta) {
+  VERBOSE(3, "Applying Paired  Translation Update Delta" << endl);
+  m_scores -= noChangeDelta.getScores();
+  getSample().ChangeTarget(*m_leftOption,m_scores);
+  FVector emptyScores;
+  getSample().ChangeTarget(*m_rightOption,emptyScores);
+}
+
+void PairedTranslationUpdateDelta::getNewSentence(vector<const Factor*>& newSentence) const {
+  getNewSentencePaired(m_leftOption, m_rightOption, m_leftGap.segment, m_rightGap.segment, newSentence);
+}
+
+SplitDelta::SplitDelta(Sample& sample, const TranslationOption* leftOption, 
+                       const TranslationOption* rightOption, const TargetGap& gap) : TranslationDelta(sample),
+    m_leftOption(leftOption), m_rightOption(rightOption), m_gap(gap){
+  
+  
+  VERBOSE(2, "Target phrase: " << m_leftOption->GetTargetPhrase() << " " << m_rightOption->GetTargetPhrase() << endl);
+  VERBOSE(2, "Target segment: " << m_gap.segment << endl);    
+  
+  initScoresContiguousPairedUpdate(m_sample, m_leftOption, m_rightOption, m_gap);
+  updateWeightedScore();
+}
+
+  
+void SplitDelta::apply(const TranslationDelta& noChangeDelta) {
+  m_scores -= noChangeDelta.getScores();
+  getSample().SplitTarget(*m_leftOption,*m_rightOption,m_scores);
+}
+
+void SplitDelta::getNewSentence(vector<const Factor*>& newSentence) const {
+  getNewSentenceContiguousPaired(m_leftOption, m_rightOption, &(m_gap.segment), &(m_gap.segment), newSentence);
+}
+  
+void FlipDelta::apply(const TranslationDelta& noChangeDelta) {
+  VERBOSE(3, "Applying Flip Delta" << endl);
+  m_scores  -= noChangeDelta.getScores();
+  //cerr << "m_prevTgtHypo: " << *m_prevTgtHypo << endl;
+  //cerr << "m_nextTgtHypo: " << *m_nextTgtHypo << endl;
+  getSample().FlipNodes(*m_leftTgtOption, *m_rightTgtOption, m_prevTgtHypo, m_nextTgtHypo, m_scores);
+}
+
+FlipDelta::FlipDelta(Sample& sample, 
+      const TranslationOption* leftTgtOption ,const TranslationOption* rightTgtOption,
+      const TargetGap& leftGap, const TargetGap& rightGap) :
+  TranslationDelta(sample),
+  m_leftTgtOption(leftTgtOption), m_rightTgtOption(rightTgtOption), m_leftGap(leftGap), m_rightGap(rightGap), 
+      m_prevTgtHypo(const_cast<Hypothesis*> (leftGap.leftHypo)), m_nextTgtHypo(const_cast<Hypothesis*> (rightGap.rightHypo))
+        {
+  for (FeatureFunctionVector::const_iterator i=sample.GetFeatureFunctions().begin(); i<sample.GetFeatureFunctions().end(); ++i) {
+    (*i)->doFlipUpdate(leftTgtOption, rightTgtOption, leftGap, rightGap,m_scores);
+  }
+    
+  updateWeightedScore();
+    
+  VERBOSE(2, "Flip delta: Scores " << m_scores << endl);
+  VERBOSE(2,"Flip delta: Total score is  " << m_score << endl);  
+  //cerr << "Creating FlipDelta scores = " << m_scores << " total = " << m_score <<  endl;
+}  
+  
+  
+void FlipDelta::getNewSentence(vector<const Factor*>& newSentence)const  {
+  getNewSentencePaired(m_leftTgtOption, m_rightTgtOption, m_leftGap.segment, m_rightGap.segment, newSentence);
+}
+
+}//namespace
--- a/josiah/TranslationDelta.h
+++ b/josiah/TranslationDelta.h
@ -0,0 +1,221 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <utility>
+
+
+#include <boost/shared_ptr.hpp>
+
+#include "DummyScoreProducers.h"
+#include "FeatureFunction.h"
+#include "FeatureVector.h"
+#include "WeightManager.h"
+
+
+
+namespace Moses {
+  class TranslationOption;
+  class TranslationOptionCollection;
+  class Hypothesis;
+  class Factor;
+  class WordsRange;
+  class Word;
+}
+
+using namespace Moses;
+
+namespace Josiah {
+
+class Sample;
+class GibbsOperator;  
+
+/**
+  * This class hierarchy represents the possible changes in the translation effected
+  * by the gibbs operators.
+  **/
+class TranslationDelta {
+  public:
+    TranslationDelta(Sample& sample): m_score(-1e6), m_sample(sample) {}
+    /**
+      Get the absolute score of this delta
+    **/
+    double getScore() const { return m_score;}
+    /** 
+    * Apply to the sample
+    **/
+    virtual void apply(const TranslationDelta& noChangeDelta) = 0;
+    /**
+    For gain calculation
+     **/
+    virtual void getNewSentence(std::vector<const Factor*>& newSentence) const = 0;
+   
+    Sample& getSample() const {return m_sample;}
+    virtual ~TranslationDelta() {}
+    void updateWeightedScore();
+    const FVector& getScores() const { return m_scores;}
+    void setScores(const FVector& scores)  { m_scores = scores;}
+  protected:
+    
+    FVector m_scores;
+    FValue m_score;
+    Sample& m_sample;
+  
+    
+    
+    void  getNewSentenceSingle(const TranslationOption* option, const WordsRange& targetSegment, std::vector<const Factor*>& newSentence)  const;
+  
+    void  getNewSentencePaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange& leftTargetSegment, const WordsRange& rightTargetSegment, std::vector<const Factor*>& newSentence)  const;
+  
+    void  getNewSentenceContiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightTargetSegment, std::vector<const Factor*>& newSentence)  const;
+  
+    void  getNewSentenceDiscontiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightTargetSegment, std::vector<const Factor*>& newSentence)  const;
+    /**
+      * Initialise the scores for the case where only one source-target pair needs to be considered.
+     **/
+    void initScoresSingleUpdate(const Sample&, const TranslationOption* option, const TargetGap& gap);
+    /**
+     * Initialise the scores for the case where two (target contiguous) source-target pairs need to be considered.
+     * Note that left and right refers to the target order.
+     **/
+    void initScoresContiguousPairedUpdate(const Sample&, const TranslationOption* leftOption,
+                              const TranslationOption* rightOption, const TargetGap& gap);
+    
+    /** Discontiguous version of above. */
+    void initScoresDiscontiguousPairedUpdate(const Sample&, const TranslationOption* leftOption,
+                              const TranslationOption* rightOption, const TargetGap& leftGap, 
+                              const TargetGap& rightGap);
+                              
+ 
+};
+ 
+/**
+  * An update that only changes a single source/target phrase pair. May change length of target
+  **/
+class TranslationUpdateDelta : public virtual TranslationDelta {
+  public:
+     TranslationUpdateDelta(Sample& sample, const TranslationOption* option , const TargetGap& gap);
+     virtual void apply(const TranslationDelta& noChangeDelta);
+     const TranslationOption* getOption() const {return m_option;} 
+     const TargetGap& getGap() const { return m_gap;}
+    virtual void getNewSentence(std::vector<const Factor*>& newSentence)  const;
+  private:
+    const TranslationOption* m_option;
+    TargetGap m_gap;
+};
+
+/**
+  * An update that merges two source phrases and their corresponding target phrases.
+ **/
+class MergeDelta : public virtual TranslationDelta {
+  public: 
+    /**
+     * targetWords - the words in the current target sentence
+     * option - the source/target phrase to go into the merged segment
+     * targetSegment - the location of the target segment
+     **/
+    MergeDelta(Sample& sample, const TranslationOption* option, const TargetGap& gap);
+    virtual void apply(const TranslationDelta& noChangeDelta);
+  const TranslationOption* getOption() const  {return m_option;} 
+  const TargetGap& getGap()  const { return m_gap;}
+    virtual void getNewSentence(std::vector<const Factor*>& newSentence)  const;
+  private:
+    const TranslationOption* m_option;
+    TargetGap m_gap;
+  
+};
+
+/**
+ * Like TranslationUpdateDelta, except that it updates a pair of source/target phrase pairs.
+**/
+class PairedTranslationUpdateDelta : public virtual TranslationDelta {
+  public: 
+    /** Options and gaps in target order */
+    PairedTranslationUpdateDelta(Sample& sample,
+        const TranslationOption* leftOption, const TranslationOption* rightOption, 
+        const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    virtual void apply(const TranslationDelta& noChangeDelta);
+    const TranslationOption* getLeftOption()  const {return m_leftOption;} 
+    const TranslationOption* getRightOption()  const {return m_rightOption;}  
+    const TargetGap& getLeftGap()  const { return m_leftGap;} 
+    const TargetGap& getRightGap()  const { return m_rightGap;}
+    virtual void getNewSentence(std::vector<const Factor*>& newSentence)  const;
+  
+  private:
+    const TranslationOption* m_leftOption;
+    const TranslationOption* m_rightOption;
+    TargetGap m_leftGap;
+    TargetGap m_rightGap;
+};
+
+/**
+ * Updates the sample by splitting a source phrase and its corresponding target phrase, choosing new options.
+ **/
+class SplitDelta : public virtual TranslationDelta {
+  public:
+    /** Options and gaps in target order */
+    SplitDelta(Sample& sample, const TranslationOption* leftOption, const TranslationOption* rightOption, 
+    const TargetGap& gap);
+    virtual void apply(const TranslationDelta& noChangeDelta);
+  const TranslationOption* getLeftOption()  const {return m_leftOption;} 
+  const TranslationOption* getRightOption() const  {return m_rightOption;}  
+  const TargetGap& getGap()  const { return m_gap;} 
+    virtual void getNewSentence(std::vector<const Factor*>& newSentence)  const;
+  
+  private:
+    const TranslationOption* m_leftOption;
+    const TranslationOption* m_rightOption;
+    TargetGap m_gap;
+};
+
+/**
+  * Switch the translations on the target side.
+ **/
+class FlipDelta : public virtual TranslationDelta {
+  public: 
+    /**  Options and gaps in target order */
+    FlipDelta(Sample& sample, const TranslationOption* leftTgtOption, const TranslationOption* rightTgtOption, 
+              const TargetGap& leftGap, const TargetGap& rightGap);
+    
+    virtual void apply(const TranslationDelta& noChangeDelta);
+  const TranslationOption* getLeftOption()  const {return m_leftTgtOption;} 
+  const TranslationOption* getRightOption()  const {return m_rightTgtOption;}  
+  const TargetGap& getLeftGap() const  { return m_leftGap;} 
+  const TargetGap& getRightGap()  const { return m_rightGap;}
+    virtual void getNewSentence(std::vector<const Factor*>& newSentence)  const;
+  private:
+    const TranslationOption* m_leftTgtOption;
+    const TranslationOption* m_rightTgtOption;
+    TargetGap m_leftGap;
+    TargetGap m_rightGap;
+    Hypothesis* m_prevTgtHypo;
+    Hypothesis* m_nextTgtHypo;
+};
+
+typedef boost::shared_ptr<TranslationDelta> TDeltaHandle;
+typedef std::vector<TDeltaHandle> TDeltaVector;
+
+
+} //namespace
+
--- a/josiah/Truncate.cpp
+++ b/josiah/Truncate.cpp
@ -0,0 +1,129 @@
+/**
+  * Truncate a phrase table to the top n translation options, given an 
+  * input corpus.
+**/
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <set>
+
+#include <boost/program_options.hpp>
+
+#include "Decoder.h"
+#include "InputSource.h"
+#include "DummyScoreProducers.h"
+#include "ScoreProducer.h"
+#include "StaticData.h"
+
+using namespace std;
+using namespace Josiah;
+using namespace Moses;
+
+namespace po = boost::program_options;
+
+
+int main(int argc, char** argv) {
+  size_t toptionLimit;
+  string inputFile;
+  string configFile;
+  bool help;
+  po::options_description visible("Allowed options");
+  visible.add_options()
+    ("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
+    ("toption-limit,t", po::value<size_t>(&toptionLimit)->default_value(20), "Number of translation options to prune to");
+
+  po::options_description hidden("Hidden options");
+  hidden.add_options()
+    ("config-file", po::value<string>(&configFile), "config file")  
+    ("input-file", po::value<string>(&inputFile), "input file");  
+
+  po::positional_options_description p;
+  p.add("config-file", 1);
+  p.add("input-file", 1);
+
+  po::options_description cmdline_options;
+  cmdline_options.add(visible).add(hidden);
+
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc,argv).
+            options(cmdline_options).positional(p).run(), vm);
+  po::notify(vm);
+  
+  if (inputFile.empty() || configFile.empty()) help = true;
+  
+  
+  if (help) {
+    std::cout << "Usage: " + string(argv[0]) +  " [options] config-file input-file" << std::endl;
+    std::cout << visible << std::endl;
+    return 0;
+  }
+  cerr << "Truncating the model " << configFile << " using the input file " << inputFile << endl;
+
+
+  //set up moses
+  vector<string> extraArgs;
+  extraArgs.push_back("-ttable-limit");
+  ostringstream toptionLimitStr;
+  toptionLimitStr << toptionLimit;
+  extraArgs.push_back(toptionLimitStr.str());
+  extraArgs.push_back("-persistent-cache-size");
+  extraArgs.push_back("0");
+  initMoses(configFile,0,extraArgs);
+
+
+  //store source phrases already output
+  set<Phrase> sourcePhrases;
+
+  ifstream in(inputFile.c_str());
+  if (!in) {
+    cerr << "Unable to open input file " << inputFile << endl;
+    return 1;
+  }
+
+  //only print the 1st factor
+  vector<FactorType> factors;
+  factors.push_back(0);
+
+  //Assume single phrase feature
+  StaticData& staticData =
+    const_cast<StaticData&>(StaticData::Instance());
+  PhraseDictionaryFeature* ptable  = StaticData::Instance().GetTranslationSystem
+    (TranslationSystem::DEFAULT).GetPhraseDictionaries()[0];
+
+  //To detect unknown words
+  const ScoreProducer* uwp  = staticData. GetTranslationSystem
+    (TranslationSystem::DEFAULT).GetUnknownWordPenaltyProducer();
+
+
+  string line;
+  while (getline(in,line)) {
+    //cerr << line << endl;
+    TranslationHypothesis translation(line);
+    size_t length = translation.getWords().size();
+    size_t maxPhraseSize = staticData.GetMaxPhraseLength();
+    for (size_t start = 0; start < length; ++start) {
+      for (size_t end = start; end < start + maxPhraseSize && end < length; ++end) {
+        TranslationOptionList& options = translation.getToc()->GetTranslationOptionList(start,end);
+        if (!options.size()) continue;
+        const Phrase* sourcePhrase = options.Get(0)->GetSourcePhrase();
+        if (sourcePhrases.find(*sourcePhrase) != sourcePhrases.end()) continue;
+        if (options.Get(0)->GetScoreBreakdown().GetScoreForProducer(uwp)) continue;
+        sourcePhrases.insert(*sourcePhrase);
+        for (size_t i = 0; i < options.size(); ++i) {
+          const TranslationOption* option = options.Get(i);
+          cout << sourcePhrase->GetStringRep(factors);
+          cout << " ||| ";
+          cout << option->GetTargetPhrase().GetStringRep(factors);
+          cout << " |||";
+          vector<float>  scores = option->GetScoreBreakdown().GetScoresForProducer(ptable);
+          for (size_t j = 0; j < scores.size(); ++j) {
+            cout << " " << exp(scores[j]);
+          }
+          cout << " ||| |||";
+          cout << endl;
+        }
+      }
+    }
+  }
+
+}
--- a/josiah/Utils.cpp
+++ b/josiah/Utils.cpp
@ -0,0 +1,222 @@
+#include <boost/program_options.hpp>
+
+#include "Utils.h"
+#include "Pos.h"
+#include "Dependency.h"
+#include "DiscriminativeLMFeature.h"
+#include "DistortionPenaltyFeature.h"
+#include "LanguageModelFeature.h"
+#include "LexicalReorderingFeature.h"
+#include "MetaFeature.h"
+#include "ParenthesisFeature.h"
+#include "PhraseFeature.h"
+#include "PhraseBoundaryFeature.h"
+#include "PhrasePairFeature.h"
+#include "PosProjectionFeature.h"
+#include "RandomFeature.h"
+#include "ReorderingFeature.h"
+#include "SourceToTargetRatio.h"
+#include "StatelessFeature.h"
+#include "WordPenaltyFeature.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+namespace Josiah {
+  
+  
+  
+
+  
+  void configure_features_from_file(const std::string& filename, FeatureVector& fv, bool disableUWP, FVector& coreWeights){
+    //Core features
+    fv.push_back(FeatureHandle(new WordPenaltyFeature()));
+    if (!disableUWP) {
+      fv.push_back(FeatureHandle(new UnknownWordPenaltyFeature()));
+    }
+    const TranslationSystem& system = StaticData::Instance().GetTranslationSystem
+      (TranslationSystem::DEFAULT);
+    vector<PhraseDictionaryFeature*> phraseTables = system.GetPhraseDictionaries();
+    for (size_t i = 0; i < phraseTables.size(); ++i) {
+      fv.push_back(FeatureHandle(new PhraseFeature(phraseTables[i],i)));
+    }
+    const LMList& lms = system.GetLanguageModels();
+    for (LMList::const_iterator i = lms.begin(); i != lms.end(); ++i) {
+      fv.push_back(FeatureHandle(new LanguageModelFeature(*i)));
+    }
+    fv.push_back(FeatureHandle(new DistortionPenaltyFeature()));
+    const std::vector<LexicalReordering*>& reorderModels = system.GetReorderModels();
+    for (size_t i = 0; i < reorderModels.size(); ++i) {
+      fv.push_back(FeatureHandle(new LexicalReorderingFeature(reorderModels[i],i)));
+    }
+    
+    if (filename.empty()) return;
+    std::cerr << "Reading extra features from " << filename << std::endl;
+    std::ifstream in(filename.c_str());
+    if (!in) {
+      throw std::runtime_error("Unable to open feature configuration file");
+    }
+    // todo: instead of having this function know about all required options of
+    // each feature, have features populate options / read variable maps /
+    // populate feature_vector using static functions.
+    po::options_description desc;
+    bool useVerbDiff = false;
+    bool useCherry = false;
+    bool useDepDist = false;
+    bool useSrcTgtRatio = false;
+    string posProjectBigramTags;
+    size_t posSourceFactor;
+    size_t posTargetFactor;
+    string parenthesisLefts;
+    string parenthesisRights;
+    size_t dependencyFactor;
+    bool discrimlmBigram;
+    string discrimlmVocab;
+    FactorType discrimlmFactor;
+    string coreWeightFile;
+    vector<string> msdConfig;
+    vector<string> msdVocab;
+    bool phrasePairSourceTarget;
+    size_t phrasePairSourceFactor;
+    size_t phrasePairTargetFactor;
+    size_t randomFeatureCount;
+    size_t randomFeatureScaling;
+    vector<string> phraseBoundarySourceFactors;
+    vector<string> phraseBoundaryTargetFactors;
+    desc.add_options()
+    ("model1.table", "Model 1 table")
+    ("model1.pef_column", "Column containing p(e|f) score")
+    ("model1.pfe_column", "Column containing p(f|e) score")
+    ("dependency.cherry", po::value<bool>(&useCherry)->default_value(false), "Use Colin Cherry's syntactic cohesiveness feature")
+    ("dependency.distortion", po::value<bool>(&useDepDist)->default_value(false), "Use the dependency distortion feature")
+    ("dependency.factor", po::value<size_t>(&dependencyFactor)->default_value(1), "Factor representing the dependency tree")
+    ("pos.sourcefactor", po::value<size_t>(&posSourceFactor)->default_value(1), "Factor representing the source pos tag")
+    ("pos.targetfactor", po::value<size_t>(&posTargetFactor)->default_value(1), "Factor representing the target pos tag")
+    ("pos.verbdiff", po::value<bool>(&useVerbDiff)->default_value(false), "Verb difference feature")
+    ("pos.projectbigram", po::value<string>(&posProjectBigramTags), 
+      "Pos project bigram. Comma separated list of tags, or * for all tags.")
+    ("srctgtratio.useFeat", po::value<bool>(&useSrcTgtRatio)->default_value(false), "Use source length to target length ratio feature")
+    ("parenthesis.lefts", po::value<std::string>(&parenthesisLefts), "Left parentheses")
+    ("parenthesis.rights", po::value<std::string>(&parenthesisRights), "Right parentheses")
+    ("discrimlm.vocab", po::value<string>(&discrimlmVocab), "Vocabulary file for discriminative lms")
+    ("discrimlm.bigram", po::value<bool>(&discrimlmBigram)->default_value(false), "Use the discriminative lm bigram feature")
+    ("discrimlm.factor", po::value<FactorType>(&discrimlmFactor)->default_value(0), "The factor to use for the discuminative lm features")
+    ("core.weightfile", po::value<string>(&coreWeightFile), 
+        "Weights of core features, if they are to be combined into a single feature")
+    ("reordering.msd", po::value<vector<string> >(&msdConfig), 
+        "Reordering msd (monotone/swap/discontinuous) feature configuration")
+    ("reordering.msdvocab", po::value<vector<string> > (&msdVocab),
+        "Vocabularies for msd features. In the form factor_id:source/target:file")
+    ("phrasepair.sourcetarget", po::value<bool>(&phrasePairSourceTarget)->zero_tokens()->default_value(false), "Watanabe style phrase pair feature")
+    ("phrasepair.sourcefactor", po::value<size_t>(&phrasePairSourceFactor)->default_value(0), "The source factor for the phrase pair feature")
+    ("phrasepair.targetfactor", po::value<size_t>(&phrasePairTargetFactor)->default_value(0), "The target factor for the phrase pair feature")
+    ("random.numvalues", po::value<size_t>(&randomFeatureCount)->default_value(0),
+      "The number of values for the random feature")
+    ("random.scaling", po::value<size_t>(&randomFeatureScaling)->default_value(5),
+      "The scaling for the random feature")
+    ("phraseboundary.sourcefactors", po::value<vector<string> >(&phraseBoundarySourceFactors), "Source factors used in the phrase boundary feature, with optional vocab separated by comma")
+    ("phraseboundary.targetfactors", po::value<vector<string> >(&phraseBoundaryTargetFactors), "Target factors used in the phrase boundary feature, with optional vocab separated by comma")
+    ;
+    
+    
+    
+    po::variables_map vm;
+    po::store(po::parse_config_file(in,desc,true), vm);
+    notify(vm);
+    
+    if (!coreWeightFile.empty()) {
+      cerr << "Using single feature for core features, loading weights from " << coreWeightFile << endl;
+      coreWeights.load(coreWeightFile);
+      FeatureHandle metaFeature(new MetaFeature(coreWeights,fv));
+      fv.clear();
+      fv.push_back(metaFeature);
+    }
+    
+    
+ 
+    if (useVerbDiff) {
+      //FIXME: Should be configurable
+      fv.push_back(FeatureHandle(new VerbDifferenceFeature(posSourceFactor,posTargetFactor)));
+    }
+    if (useCherry) {
+      fv.push_back(FeatureHandle(new CherrySyntacticCohesionFeature(dependencyFactor)));
+    }
+    if (useSrcTgtRatio) {
+      fv.push_back(FeatureHandle(new SourceToTargetRatioFeature));
+    }
+    if (useDepDist) {
+      fv.push_back(FeatureHandle(new DependencyDistortionFeature(dependencyFactor)));
+    }
+    if (parenthesisRights.size() > 0 || parenthesisLefts.size() > 0) {
+        assert(parenthesisRights.size() == parenthesisLefts.size());
+        fv.push_back(FeatureHandle(new ParenthesisFeature(parenthesisLefts,parenthesisRights)));
+    }
+    if (posProjectBigramTags.size()) {
+      fv.push_back(FeatureHandle(new PosProjectionBigramFeature(posSourceFactor,posProjectBigramTags)));
+    }
+    if (discrimlmBigram) {
+      fv.push_back(FeatureHandle(new DiscriminativeLMBigramFeature(discrimlmFactor,discrimlmVocab)));
+    }
+    if (msdConfig.size()) {
+      fv.push_back(FeatureHandle(new ReorderingFeature(msdConfig,msdVocab)));
+    }
+    if (phrasePairSourceTarget) {
+      fv.push_back(FeatureHandle(
+        new PhrasePairFeature(phrasePairSourceFactor,phrasePairTargetFactor)));
+    }
+    if (randomFeatureCount) {
+      fv.push_back(FeatureHandle(
+        new StatelessFeatureAdaptor(MosesFeatureHandle(
+          new Moses::RandomFeature(randomFeatureCount, randomFeatureScaling)))));
+    }
+    if (phraseBoundarySourceFactors.size() || phraseBoundaryTargetFactors.size()) 
+    {
+      FactorList sourceFactorIds;
+      vector<string> sourceVocabs;
+      FactorList targetFactorIds;
+      vector<string> targetVocabs;
+      for (size_t i = 0; i < phraseBoundarySourceFactors.size(); ++i) {
+        vector<string> tokens = Tokenize(phraseBoundarySourceFactors[i],",");
+        assert(tokens.size() <= 2);
+        sourceFactorIds.push_back(Scan<FactorType>(tokens[0]));
+        if (tokens.size() > 1) {
+          sourceVocabs.push_back(tokens[1]);
+        } else {
+          sourceVocabs.push_back("");
+        }
+      }
+      for (size_t i = 0; i < phraseBoundaryTargetFactors.size(); ++i) {
+        vector<string> tokens = Tokenize(phraseBoundaryTargetFactors[i],",");
+        assert(tokens.size() <= 2);
+        targetFactorIds.push_back(Scan<FactorType>(tokens[0]));
+        if (tokens.size() > 1) {
+          targetVocabs.push_back(tokens[1]);
+        } else {
+          targetVocabs.push_back("");
+        }
+      }
+fv.push_back(FeatureHandle(
+        new PhraseBoundaryFeature(sourceFactorIds,targetFactorIds,
+          sourceVocabs,targetVocabs)));
+    }
+    in.close();
+  }
+  
+  /*
+  bool ValidateAndGetLMFromName(string featsName, LanguageModel* &lm) {
+    const ScoreIndexManager& scoreIndexManager = StaticData::Instance().GetScoreIndexManager();
+    size_t numScores = scoreIndexManager.GetTotalNumberOfScores();
+    
+    for (size_t i = 0; i < numScores; ++i) {
+      if (scoreIndexManager.GetFeatureName(i) == featsName) {
+        const ScoreProducer* scoreProducer = scoreIndexManager.GetScoreProducer(i);
+        lm = static_cast<LanguageModel*>(const_cast<ScoreProducer*>(scoreProducer));
+        return true;
+      }
+    }
+    return false;  
+  }*/
+  
+}
+
+
--- a/josiah/Utils.h
+++ b/josiah/Utils.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <vector>
+#include "Timer.h"
+#include "FeatureFunction.h"
+
+namespace Moses {
+  class LanguageModel; 
+}
+namespace Josiah {
+  
+  
+  
+  /**
+   * Wrap moses timer to give a way to no-op it.
+   **/
+  class GibbsTimer {
+  public:
+    GibbsTimer() : m_doTiming(false) {}
+    void on() {m_doTiming = true; m_timer.start("TIME: Starting timer");}
+    void check(const std::string& msg) {if (m_doTiming) m_timer.check(std::string("TIME:" + msg).c_str());}
+  private:
+    Timer m_timer;
+    bool m_doTiming;
+  } ;
+  
+  void configure_features_from_file(const std::string& filename, FeatureVector& fv, bool disableUWP, FVector& coreWeights);
+//  bool ValidateAndGetLMFromName(std::string featsName, Moses::LanguageModel* &lm);
+}
+
--- a/josiah/WeightManager.cpp
+++ b/josiah/WeightManager.cpp
@ -0,0 +1,67 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2009 University of Edinburgh
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+ 
+#include <cassert>
+#include <stdexcept>
+
+#include "StaticData.h"
+#include "WeightManager.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Josiah {
+  
+  auto_ptr<WeightManager> WeightManager::s_instance;
+
+  void WeightManager::init(const std::string& weightsFile) {
+    init();
+    s_instance->m_weights.load(weightsFile);
+  }
+  
+  void WeightManager::init() {
+    assert(!s_instance.get());
+    s_instance.reset(new WeightManager());
+  }
+  
+  WeightManager& WeightManager::instance() {
+    assert(s_instance.get());
+    return *s_instance;
+  }
+
+  FVector& WeightManager::get() {
+    return m_weights;
+  }
+  
+  void WeightManager::scale(FValue& scale) {
+    m_weights *= scale;
+  }
+  
+  void WeightManager::dump(const string& filename) {
+    ofstream out(filename.c_str());
+    if (!out) {
+        cerr << "WARN: Failed to open " << filename << " for weight dump" << endl;
+    } else {
+            m_weights.write(out);
+            out.close();
+    }
+  }
+  
+
+}
--- a/Show More
+++ b/Show More