moses/src for DPR_reordering model

Add files: DPR_reordering.h/cpp Modified files: StaticData.h/cpp Parameter.h/cpp git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/ni_DPR_reordering_model@2973 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-11 11:25:40 +03:00 · 2010-03-14 21:55:52 +00:00 · 2010-03-14 21:55:52 +00:00 · 392aa0a89f
commit 392aa0a89f
parent 88ead91273
184 changed files with 26761 additions and 0 deletions
--- a/src/BitmapContainer.cpp
+++ b/src/BitmapContainer.cpp
@ -0,0 +1,499 @@
+// $Id: BitmapContainer.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+#include <limits>
+#include <utility>
+
+#include "BitmapContainer.h"
+#include "HypothesisStackCubePruning.h"
+#include "DummyScoreProducers.h"
+#include "TranslationOptionList.h"
+
+namespace Moses
+{
+
+class HypothesisScoreOrdererNoDistortion
+{
+	public:
+		bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+		{
+			const float scoreA = hypoA->GetScore();
+			const float scoreB = hypoB->GetScore();
+			
+			if (scoreA > scoreB)
+			{
+				return true;
+			}
+			else if (scoreA < scoreB)
+			{
+				return false;
+			}
+			else
+			{
+				return hypoA < hypoB;
+			}
+		}
+};
+
+class HypothesisScoreOrdererWithDistortion
+{
+	public:
+        HypothesisScoreOrdererWithDistortion(const WordsRange* transOptRange) :
+            m_transOptRange(transOptRange) {}
+
+		const WordsRange* m_transOptRange; 
+
+		bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+		{
+			assert (m_transOptRange != NULL);
+
+			const float weightDistortion = StaticData::Instance().GetWeightDistortion();
+			const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
+			const float distortionScoreA = dsp->CalculateDistortionScore(
+                    *hypoA,
+										hypoA->GetCurrSourceWordsRange(),
+										*m_transOptRange,
+										hypoA->GetWordsBitmap().GetFirstGapPos()
+									 );
+			const float distortionScoreB = dsp->CalculateDistortionScore(
+                    *hypoB,
+										hypoB->GetCurrSourceWordsRange(),
+										*m_transOptRange,
+										hypoB->GetWordsBitmap().GetFirstGapPos()
+									 );
+
+			const float scoreA = hypoA->GetScore() + distortionScoreA * weightDistortion;
+			const float scoreB = hypoB->GetScore() + distortionScoreB * weightDistortion;
+			
+			if (scoreA > scoreB)
+			{
+				return true;
+			}
+			else if (scoreA < scoreB)
+			{
+				return false;
+			}
+			else
+			{
+				return hypoA < hypoB;
+			}
+		}
+
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// BackwardsEdge Code
+////////////////////////////////////////////////////////////////////////////////
+
+BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
+							 , BitmapContainer &parent
+							 , const TranslationOptionList &translations
+							 , const SquareMatrix &futureScore,
+                                const InputType& itype)
+  : m_initialized(false)
+  , m_prevBitmapContainer(prevBitmapContainer)
+  , m_parent(parent)
+  , m_translations(translations)
+  , m_futurescore(futureScore)
+  , m_seenPosition()
+{
+
+	// If either dimension is empty, we haven't got anything to do.
+	if(m_prevBitmapContainer.GetHypotheses().size() == 0 || m_translations.size() == 0) {
+		VERBOSE(3, "Empty cube on BackwardsEdge" << std::endl);
+		return;
+	}
+
+	// Fetch the things we need for distortion cost computation.
+	int maxDistortion = StaticData::Instance().GetMaxDistortion();
+
+	if (maxDistortion == -1) {
+		for (HypothesisSet::const_iterator iter = m_prevBitmapContainer.GetHypotheses().begin(); iter != m_prevBitmapContainer.GetHypotheses().end(); ++iter)
+		{
+			m_hypotheses.push_back(*iter);
+		}
+		return;
+	}
+
+	const WordsRange &transOptRange = translations.Get(0)->GetSourceWordsRange();
+
+	HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
+	HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
+
+	while (iterHypo != iterEnd)
+	{
+		const Hypothesis &hypo = **iterHypo;
+		// Special case: If this is the first hypothesis used to seed the search,
+		// it doesn't have a valid range, and we create the hypothesis, if the
+		// initial position is not further into the sentence than the distortion limit.
+		if (hypo.GetWordsBitmap().GetNumWordsCovered() == 0)
+			{
+				if (transOptRange.GetStartPos() <= maxDistortion)
+					m_hypotheses.push_back(&hypo);
+			}
+		else
+			{
+				int distortionDistance = itype.ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
+																		, transOptRange);
+
+				if (distortionDistance <= maxDistortion)
+					m_hypotheses.push_back(&hypo);
+			}
+	
+		++iterHypo;
+	}
+
+	if (m_translations.size() > 1)
+	{
+		assert(m_translations.Get(0)->GetFutureScore() >= m_translations.Get(1)->GetFutureScore());
+	}
+
+	if (m_hypotheses.size() > 1)
+	{
+		assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore());
+	}	
+
+	HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
+	std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
+
+	// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
+}
+
+BackwardsEdge::~BackwardsEdge()
+{
+	m_seenPosition.clear();
+	m_hypotheses.clear();
+}
+
+
+void
+BackwardsEdge::Initialize()
+{
+	if(m_hypotheses.size() == 0 || m_translations.size() == 0)
+	{
+		m_initialized = true;
+		return;
+	}
+
+	Hypothesis *expanded = CreateHypothesis(*m_hypotheses[0], *m_translations.Get(0));
+	m_parent.Enqueue(0, 0, expanded, this);
+	SetSeenPosition(0, 0);
+	m_initialized = true;
+}
+
+Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt) 
+{
+	// create hypothesis and calculate all its scores
+	Hypothesis *newHypo = hypothesis.CreateNext(transOpt, NULL); // TODO FIXME This is absolutely broken - don't pass null here
+
+	// expand hypothesis further if transOpt was linked
+	std::vector<TranslationOption*>::const_iterator iterLinked = transOpt.GetLinkedTransOpts().begin();
+	std::vector<TranslationOption*>::const_iterator iterEnd = transOpt.GetLinkedTransOpts().end();
+
+	while (iterLinked != iterEnd)
+	{
+		const WordsBitmap hypoBitmap = newHypo->GetWordsBitmap();
+		if (hypoBitmap.Overlap((**iterLinked).GetSourceWordsRange())) {
+			// don't want to add a hypothesis that has some but not all of a linked TO set, so return
+			delete newHypo;
+			return NULL;
+		}
+		else
+		{
+			newHypo->CalcScore(m_futurescore);
+			newHypo = newHypo->CreateNext(**iterLinked, NULL); // TODO FIXME This is absolutely broken - don't pass null here
+		}
+
+		++iterLinked;
+	}
+
+	newHypo->CalcScore(m_futurescore);
+	
+	return newHypo;
+}
+
+bool
+BackwardsEdge::SeenPosition(const size_t x, const size_t y)
+{
+  std::set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
+	return (iter != m_seenPosition.end());
+}
+
+void
+BackwardsEdge::SetSeenPosition(const size_t x, const size_t y)
+{
+  assert(x < (1<<17));
+  assert(y < (1<<17));
+
+	m_seenPosition.insert((x<<16) + y);
+}
+
+
+bool
+BackwardsEdge::GetInitialized()
+{
+	return m_initialized;
+}
+
+const BitmapContainer&
+BackwardsEdge::GetBitmapContainer() const
+{
+	return m_prevBitmapContainer;
+}
+
+void
+BackwardsEdge::PushSuccessors(const size_t x, const size_t y)
+{
+	Hypothesis *newHypo;
+	
+	if(y + 1 < m_translations.size() && !SeenPosition(x, y + 1)) {
+		SetSeenPosition(x, y + 1);
+		newHypo = CreateHypothesis(*m_hypotheses[x], *m_translations.Get(y + 1));
+		if(newHypo != NULL)
+		{
+			m_parent.Enqueue(x, y + 1, newHypo, (BackwardsEdge*)this);
+		}
+	}
+
+	if(x + 1 < m_hypotheses.size() && !SeenPosition(x + 1, y)) {
+	  SetSeenPosition(x + 1, y);
+		newHypo = CreateHypothesis(*m_hypotheses[x + 1], *m_translations.Get(y));
+		if(newHypo != NULL)
+		{
+			m_parent.Enqueue(x + 1, y, newHypo, (BackwardsEdge*)this);
+		}
+	}	
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// BitmapContainer Code
+////////////////////////////////////////////////////////////////////////////////
+
+BitmapContainer::BitmapContainer(const WordsBitmap &bitmap
+																 , HypothesisStackCubePruning &stack)
+  : m_bitmap(bitmap)
+  , m_stack(stack)
+	, m_numStackInsertions(0)
+{
+	m_hypotheses = HypothesisSet();
+	m_edges = BackwardsEdgeSet();
+	m_queue = HypothesisQueue();
+}
+
+BitmapContainer::~BitmapContainer()
+{
+	// As we have created the square position objects we clean up now.
+	HypothesisQueueItem *item = NULL;
+
+	while (!m_queue.empty())
+	{
+		item = m_queue.top();
+		FREEHYPO(item->GetHypothesis());
+		delete item;
+		m_queue.pop();
+	}
+
+	// Delete all edges.
+	RemoveAllInColl(m_edges);
+
+	m_hypotheses.clear();
+	m_edges.clear();
+}
+
+
+void
+BitmapContainer::Enqueue(int hypothesis_pos
+												 , int translation_pos
+												 , Hypothesis *hypothesis
+												 , BackwardsEdge *edge)
+{
+	HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos
+																										  , translation_pos
+																										  , hypothesis
+																											, edge);
+	m_queue.push(item);
+}
+
+HypothesisQueueItem*
+BitmapContainer::Dequeue(bool keepValue)
+{
+	if (!m_queue.empty())
+	{
+		HypothesisQueueItem *item = m_queue.top();
+
+		if (!keepValue)
+		{
+			m_queue.pop();
+		}
+
+		return item;
+	}
+
+	return NULL;
+}
+
+HypothesisQueueItem*
+BitmapContainer::Top() const
+{
+	return m_queue.top();
+}
+
+size_t
+BitmapContainer::Size()
+{
+	return m_queue.size();
+}
+
+bool
+BitmapContainer::Empty() const
+{
+	return m_queue.empty();
+}
+
+
+const WordsBitmap&
+BitmapContainer::GetWordsBitmap()
+{
+	return m_bitmap;
+}
+
+const HypothesisSet&
+BitmapContainer::GetHypotheses() const
+{
+	return m_hypotheses;
+}
+
+size_t
+BitmapContainer::GetHypothesesSize() const
+{
+	return m_hypotheses.size();
+}
+
+const BackwardsEdgeSet&
+BitmapContainer::GetBackwardsEdges()
+{
+	return m_edges;
+}
+
+void
+BitmapContainer::AddHypothesis(Hypothesis *hypothesis)
+{
+	bool itemExists = false;
+	HypothesisSet::const_iterator iter = m_hypotheses.begin();
+	HypothesisSet::const_iterator iterEnd = m_hypotheses.end();
+
+	// cfedermann: do we actually need this check?
+	while (iter != iterEnd)
+	{
+		if (*iter == hypothesis) {
+			itemExists = true;
+			break;
+		}
+
+		++iter;
+	}
+	assert(itemExists == false);
+	m_hypotheses.push_back(hypothesis);
+}
+
+void
+BitmapContainer::AddBackwardsEdge(BackwardsEdge *edge)
+{
+	m_edges.insert(edge);
+}
+
+void
+BitmapContainer::InitializeEdges()
+{
+	BackwardsEdgeSet::iterator iter = m_edges.begin();
+	BackwardsEdgeSet::iterator iterEnd = m_edges.end();
+
+	while (iter != iterEnd)
+	{
+		BackwardsEdge *edge = *iter;
+		edge->Initialize();
+
+		++iter;
+	}
+}
+
+void
+BitmapContainer::EnsureMinStackHyps(const size_t minNumHyps)
+{
+	while ((!Empty()) && m_numStackInsertions < minNumHyps)
+	{
+		ProcessBestHypothesis();
+	}
+}
+
+void
+BitmapContainer::ProcessBestHypothesis()
+{
+	if (m_queue.empty())
+		{
+			return;
+		}
+
+	// Get the currently best hypothesis from the queue.
+	HypothesisQueueItem *item = Dequeue();
+		
+	// If the priority queue is exhausted, we are done and should have exited
+	assert(item != NULL);
+		
+	// check we are pulling things off of priority queue in right order
+	if (!Empty())
+		{
+			HypothesisQueueItem *check = Dequeue(true);
+			assert(item->GetHypothesis()->GetTotalScore() >= check->GetHypothesis()->GetTotalScore());
+		}
+
+	// Logging for the criminally insane
+	IFVERBOSE(3) {
+		//		const StaticData &staticData = StaticData::Instance();
+		item->GetHypothesis()->PrintHypothesis();
+	}
+
+	// Add best hypothesis to hypothesis stack.
+	const bool newstackentry = m_stack.AddPrune(item->GetHypothesis());	
+	if (newstackentry)
+		m_numStackInsertions++;
+
+	IFVERBOSE(3) {
+		TRACE_ERR("new stack entry flag is " << newstackentry << std::endl);
+	}
+
+	// Create new hypotheses for the two successors of the hypothesis just added.
+	item->GetBackwardsEdge()->PushSuccessors(item->GetHypothesisPos(), item->GetTranslationPos());
+
+	// We are done with the queue item, we delete it.
+	delete item;
+}
+
+void
+BitmapContainer::SortHypotheses()
+{
+	std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer());
+}
+
+}
+
--- a/src/BitmapContainer.h
+++ b/src/BitmapContainer.h
@ -0,0 +1,249 @@
+// $Id: BitmapContainer.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_BitmapContainer_h
+#define moses_BitmapContainer_h
+
+#include <set>
+#include <vector>
+
+#include "Hypothesis.h"
+#include "HypothesisStackCubePruning.h"
+#include "SquareMatrix.h"
+#include "TranslationOption.h"
+#include "TypeDef.h"
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+
+class BitmapContainer;
+class BackwardsEdge;
+class Hypothesis;
+class HypothesisStackCubePruning;
+class HypothesisQueueItem;
+class QueueItemOrderer;
+
+typedef std::vector< Hypothesis* > HypothesisSet;
+typedef std::set< BackwardsEdge* > BackwardsEdgeSet;
+typedef std::priority_queue< HypothesisQueueItem*, std::vector< HypothesisQueueItem* >, QueueItemOrderer> HypothesisQueue;
+
+////////////////////////////////////////////////////////////////////////////////
+// Hypothesis Priority Queue Code
+////////////////////////////////////////////////////////////////////////////////
+
+class HypothesisQueueItem
+{
+	private:
+		size_t m_hypothesis_pos, m_translation_pos;
+		Hypothesis *m_hypothesis;
+		BackwardsEdge *m_edge;
+
+		HypothesisQueueItem();
+
+	public:
+		HypothesisQueueItem(const size_t hypothesis_pos
+												, const size_t translation_pos
+												, Hypothesis *hypothesis
+												, BackwardsEdge *edge)
+		  : m_hypothesis_pos(hypothesis_pos)
+		  , m_translation_pos(translation_pos)
+		  , m_hypothesis(hypothesis)
+		  , m_edge(edge)
+		{
+		}
+
+		~HypothesisQueueItem()
+		{
+		}
+
+		int GetHypothesisPos()
+		{
+			return m_hypothesis_pos;
+		}
+		
+		int GetTranslationPos()
+		{
+			return m_translation_pos;
+		}
+
+		Hypothesis *GetHypothesis()
+		{
+			return m_hypothesis;
+		}
+
+		BackwardsEdge *GetBackwardsEdge()
+		{
+			return m_edge;
+		}
+};
+
+// Allows to compare two HypothesisQueueItem objects by the corresponding scores.
+class QueueItemOrderer
+{
+	public:
+		bool operator()(HypothesisQueueItem* itemA, HypothesisQueueItem* itemB) const
+		{
+			float scoreA = itemA->GetHypothesis()->GetTotalScore();
+			float scoreB = itemB->GetHypothesis()->GetTotalScore();
+
+			return (scoreA < scoreB);
+
+			/*
+			{
+				return true;
+			}
+			else if (scoreA < scoreB)
+			{
+				return false;
+			}
+			else
+			{
+				return itemA < itemB;
+			}*/
+		}
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Hypothesis Orderer Code
+////////////////////////////////////////////////////////////////////////////////
+// Allows to compare two Hypothesis objects by the corresponding scores.
+////////////////////////////////////////////////////////////////////////////////
+
+class HypothesisScoreOrderer
+{
+  public:
+	  bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+	  {
+			float scoreA = hypoA->GetTotalScore();
+			float scoreB = hypoB->GetTotalScore();
+
+			return (scoreA > scoreB);
+			/*
+			{
+				return true;
+			}
+			else if (scoreA < scoreB)
+				{
+					return false;
+				}
+			else
+				{
+					return hypoA < hypoB;
+				}*/
+			}
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Backwards Edge Code
+////////////////////////////////////////////////////////////////////////////////
+// Encodes an edge pointing to a BitmapContainer.
+////////////////////////////////////////////////////////////////////////////////
+
+class BackwardsEdge
+{
+	private:
+		friend class BitmapContainer;
+		bool m_initialized;
+
+		const BitmapContainer &m_prevBitmapContainer;
+		BitmapContainer &m_parent;
+		const TranslationOptionList &m_translations;
+		const SquareMatrix &m_futurescore;
+		
+		std::vector< const Hypothesis* > m_hypotheses;
+		std::set< int > m_seenPosition;
+
+		// We don't want to instantiate "empty" objects.
+		BackwardsEdge();
+
+		Hypothesis *CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt);
+		bool SeenPosition(const size_t x, const size_t y);
+		void SetSeenPosition(const size_t x, const size_t y);
+
+	protected:
+		void Initialize();
+
+	public:
+		BackwardsEdge(const BitmapContainer &prevBitmapContainer
+									, BitmapContainer &parent
+									, const TranslationOptionList &translations
+									, const SquareMatrix &futureScore,
+                                      const InputType& source);
+		~BackwardsEdge();
+
+		bool GetInitialized();
+		const BitmapContainer &GetBitmapContainer() const;
+		int GetDistortionPenalty();
+		void PushSuccessors(const size_t x, const size_t y);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Bitmap Container Code
+////////////////////////////////////////////////////////////////////////////////
+// A BitmapContainer encodes an ordered set of hypotheses and a set of edges
+// pointing to the "generating" BitmapContainers.  It also stores a priority
+// queue that contains expanded hypotheses from the connected edges.
+////////////////////////////////////////////////////////////////////////////////
+
+class BitmapContainer
+{
+	private:
+		WordsBitmap m_bitmap;
+	  HypothesisStackCubePruning &m_stack;
+		HypothesisSet m_hypotheses;
+		BackwardsEdgeSet m_edges;
+		HypothesisQueue m_queue;
+  	size_t m_numStackInsertions;
+
+		// We always require a corresponding bitmap to be supplied.
+		BitmapContainer();
+		BitmapContainer(const BitmapContainer &);
+	public:
+		BitmapContainer(const WordsBitmap &bitmap
+										, HypothesisStackCubePruning &stack);
+		
+		// The destructor will also delete all the edges that are
+		// connected to this BitmapContainer.
+		~BitmapContainer();
+		
+		void Enqueue(int hypothesis_pos, int translation_pos, Hypothesis *hypothesis, BackwardsEdge *edge);
+		HypothesisQueueItem *Dequeue(bool keepValue=false);
+		HypothesisQueueItem *Top() const;
+		size_t Size();
+		bool Empty() const;
+
+		const WordsBitmap &GetWordsBitmap();
+		const HypothesisSet &GetHypotheses() const;
+		size_t GetHypothesesSize() const;
+		const BackwardsEdgeSet &GetBackwardsEdges();
+		
+  	void InitializeEdges();
+		void ProcessBestHypothesis();
+  	void EnsureMinStackHyps(const size_t minNumHyps);
+		void AddHypothesis(Hypothesis *hypothesis);
+		void AddBackwardsEdge(BackwardsEdge *edge);
+		void SortHypotheses();
+};
+
+}
+
+#endif
--- a/src/ConfusionNet.cpp
+++ b/src/ConfusionNet.cpp
@ -0,0 +1,245 @@
+// $Id: ConfusionNet.cpp 2935 2010-02-24 10:30:24Z jfouet $
+
+#include "ConfusionNet.h"
+#include <sstream>
+
+#include "FactorCollection.h"
+#include "Util.h"
+#include "PhraseDictionaryTreeAdaptor.h"
+#include "TranslationOptionCollectionConfusionNet.h"
+#include "StaticData.h"
+#include "Sentence.h"
+#include "UserMessage.h"
+
+namespace Moses
+{
+struct CNStats {
+	size_t created,destr,read,colls,words;
+
+	CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
+	~CNStats() {print(std::cerr);}
+
+	void createOne() {++created;}
+	void destroyOne() {++destr;}
+
+	void collect(const ConfusionNet& cn)
+	{
+		++read;
+		colls+=cn.GetSize();
+		for(size_t i=0;i<cn.GetSize();++i)
+			words+=cn[i].size();
+	}
+	void print(std::ostream& out) const
+	{
+		if(created>0)
+			{
+				out<<"confusion net statistics:\n"
+					" created:\t"<<created<<"\n"
+					" destroyed:\t"<<destr<<"\n"
+					" succ. read:\t"<<read<<"\n"
+					" columns:\t"<<colls<<"\n"
+					" words:\t"<<words<<"\n"
+					" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
+					" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
+					"\n\n";
+			}
+	}
+
+};
+
+CNStats stats;
+
+size_t ConfusionNet::GetColumnIncrement(size_t i, size_t j) const
+{
+  (void) i;
+  (void) j;
+  return 1;
+}
+
+ConfusionNet::ConfusionNet() 
+	: InputType()
+{
+	stats.createOne();
+}
+ConfusionNet::~ConfusionNet() {stats.destroyOne();}
+
+ConfusionNet::ConfusionNet(Sentence const& s)
+{
+	data.resize(s.GetSize());
+	for(size_t i=0;i<s.GetSize();++i)
+		data[i].push_back(std::make_pair(s.GetWord(i),0.0));
+}
+
+bool ConfusionNet::ReadF(std::istream& in,
+												 const std::vector<FactorType>& factorOrder,
+												 int format) 
+{
+	VERBOSE(1, "read confusion net with format "<<format<<"\n");
+	switch(format) 
+		{
+		case 0: return ReadFormat0(in,factorOrder);
+		case 1: return ReadFormat1(in,factorOrder);
+		default: 
+			stringstream strme;
+			strme << "ERROR: unknown format '"<<format
+							 <<"' in ConfusionNet::Read";
+			UserMessage::Add(strme.str());
+		}
+	return false;
+}
+
+int ConfusionNet::Read(std::istream& in,
+											 const std::vector<FactorType>& factorOrder) 
+{
+	int rv=ReadF(in,factorOrder,0);
+	if(rv) stats.collect(*this);
+	return rv;
+}
+
+
+void ConfusionNet::String2Word(const std::string& s,Word& w,
+															 const std::vector<FactorType>& factorOrder) 
+{
+	std::vector<std::string> factorStrVector = Tokenize(s, "|");
+	for(size_t i=0;i<factorOrder.size();++i) 
+		w.SetFactor(factorOrder[i],
+								FactorCollection::Instance().AddFactor(Input,factorOrder[i],
+																							factorStrVector[i]));
+}
+
+bool ConfusionNet::ReadFormat0(std::istream& in,
+															 const std::vector<FactorType>& factorOrder) 
+{
+	Clear();
+	std::string line;
+	size_t numLinkParams = StaticData::Instance().GetNumLinkParams();
+	size_t numLinkWeights = StaticData::Instance().GetNumInputScores();
+	bool addRealWordCount = ((numLinkParams + 1) == numLinkWeights);
+
+	while(getline(in,line)) {
+		std::istringstream is(line);
+		std::string word;
+		
+		Column col;
+		while(is>>word) {
+			Word w;
+			String2Word(word,w,factorOrder);
+			std::vector<float> probs(numLinkWeights,0.0);
+			for(size_t i=0;i<numLinkParams;i++) {
+				double prob;
+				if (!(is>>prob)) {
+					TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
+					return false;
+				}
+				if(prob<0.0) 
+					{
+						VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
+						prob=0.0;
+					}
+				else if (prob>1.0)
+					{
+						VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
+						prob=1.0;
+					}
+				probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
+			
+			}
+			//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
+			if (addRealWordCount && word!=EPSILON && word!="")
+				probs[numLinkParams] = -1.0;
+			col.push_back(std::make_pair(w,probs));
+		}
+		if(col.size()) {
+			data.push_back(col);
+			ShrinkToFit(data.back());
+		}
+		else break;
+	}
+	return !data.empty();
+}
+bool ConfusionNet::ReadFormat1(std::istream& in,
+															 const std::vector<FactorType>& factorOrder) 
+{
+	Clear();
+	std::string line;
+	if(!getline(in,line)) return 0;
+	size_t s;
+	if(getline(in,line)) s=atoi(line.c_str()); else return 0;
+	data.resize(s);
+	for(size_t i=0;i<data.size();++i) {
+		if(!getline(in,line)) return 0;
+		std::istringstream is(line);
+		if(!(is>>s)) return 0;
+		std::string word;double prob;
+		data[i].resize(s);
+		for(size_t j=0;j<s;++j)
+			if(is>>word>>prob) {
+				//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
+				data[i][j].second = std::vector<float> (1);
+				data[i][j].second.push_back((float) log(prob)); 
+				if(data[i][j].second[0]<0) {
+					VERBOSE(1, "WARN: neg costs: "<<data[i][j].second[0]<<" -> set to 0\n");
+					data[i][j].second[0]=0.0;}
+				String2Word(word,data[i][j].first,factorOrder);
+			} else return 0;
+	}
+	return !data.empty();
+}
+
+void ConfusionNet::Print(std::ostream& out) const {
+	out<<"conf net: "<<data.size()<<"\n";
+	for(size_t i=0;i<data.size();++i) {
+		out<<i<<" -- ";
+		for(size_t j=0;j<data[i].size();++j) {
+			out<<"("<<data[i][j].first.ToString()<<", ";
+			for(std::vector<float>::const_iterator scoreIterator = data[i][j].second.begin();scoreIterator<data[i][j].second.end();scoreIterator++) {
+				out<<", "<<*scoreIterator;
+			}
+			out<<") ";
+		}
+		out<<"\n";
+	}
+	out<<"\n\n";
+}
+
+#ifdef _WIN32
+#pragma warning(disable:4716)
+#endif
+Phrase ConfusionNet::GetSubString(const WordsRange&) const {
+	TRACE_ERR("ERROR: call to ConfusionNet::GetSubString\n");
+	abort();
+	//return Phrase(Input);
+}
+
+std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint) const{ //not well defined yet
+	TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
+	return "";
+}
+#ifdef _WIN32
+#pragma warning(disable:4716)
+#endif
+const Word& ConfusionNet::GetWord(size_t) const {
+	TRACE_ERR("ERROR: call to ConfusionNet::GetFactorArray\n");
+	abort();
+}
+#ifdef _WIN32
+#pragma warning(default:4716)
+#endif
+std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn) 
+{
+	cn.Print(out);return out;
+}
+
+TranslationOptionCollection* 
+ConfusionNet::CreateTranslationOptionCollection() const 
+{
+	size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
+	float translationOptionThreshold = StaticData::Instance().GetTranslationOptionThreshold();
+	TranslationOptionCollection *rv= new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
+	assert(rv);
+	return rv;
+}
+
+}
+
+
--- a/src/ConfusionNet.h
+++ b/src/ConfusionNet.h
@ -0,0 +1,63 @@
+// $Id: ConfusionNet.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_ConfusionNet_h
+#define moses_ConfusionNet_h
+
+#include <vector>
+#include <iostream>
+#include "Word.h"
+#include "InputType.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+class TranslationOptionCollection;
+class Sentence;
+
+class ConfusionNet : public InputType {
+ public: 
+	typedef std::vector<std::pair<Word,std::vector<float> > > Column;
+
+ protected:
+	std::vector<Column> data;
+
+	bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
+	bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder);
+	void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
+
+ public:
+	ConfusionNet();
+	virtual ~ConfusionNet();
+
+	ConfusionNet(Sentence const& s);
+	
+	InputTypeEnum GetType() const
+	{	return ConfusionNetworkInput;}
+
+	const Column& GetColumn(size_t i) const {assert(i<data.size());return data[i];}
+	const Column& operator[](size_t i) const {return GetColumn(i);}
+	virtual size_t GetColumnIncrement(size_t i, size_t j) const; //! returns 1 for CNs
+
+	bool Empty() const {return data.empty();}
+	size_t GetSize() const {return data.size();}
+	void Clear() {data.clear();}
+
+	bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
+	virtual void Print(std::ostream&) const;
+
+	int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+	
+	Phrase GetSubString(const WordsRange&) const; //TODO not defined
+	std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
+	const Word& GetWord(size_t pos) const;
+
+	TranslationOptionCollection* CreateTranslationOptionCollection() const;
+};
+
+std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn);
+
+
+}
+
+#endif
--- a/src/DPR_reordering.cpp
+++ b/src/DPR_reordering.cpp
@ -0,0 +1,366 @@
+/*
+**********************************************************
+Cpp file ---------- DPR_reordering.cpp
+The reordering feature function for MOSES
+based on the DPR model proposed in (Ni et al., 2009)
+
+Components:
+       vector<unsigned long long> m_dprOptionStartPOS --- store the start pos for each sentence option (to read from the .txt file)
+       ifstream sentenceOptionFile --- the stream file storing the sentence options
+       int sentenceID --- the sentence ID (indicating which sentence option block is used)
+       mapPhraseOption sentencePhraseOption --- sentence phrase option <left bound, right bound> -> target (string) -> probs
+             
+Functions:
+0. Constructor: DPR_reordering(ScoreIndexManager &scoreIndexManager, const std::string &filePath, const std::vector<float>& weights)
+       
+1. interface functions:
+       GetNumScoreComponents() --- return the number of scores the component used (usually 1)
+       GetScoreProducerDescription() --- return the name of the reordering model
+       GetScoreProducerWeightShortName() --- return the short name of the weight for the score
+2. Score producers:
+       Evaluate() --- to evaluate the reordering scores and add the score to the score component collection
+       EmptyHypothesisState() --- create an empty hypothesis
+       
+3. Other functions:
+       constructSentencePhraseOption() --- Construct sentencePhraseOption using sentenceID
+       clearSentencePhraseOption() --- clear the sentence phrase options
+**********************************************************
+*/
+
+#include "DPR_reordering.h"
+
+
+namespace Moses
+{
+
+/*
+1. constructor
+*/
+DPR_reordering::DPR_reordering(ScoreIndexManager &scoreIndexManager, const string filePath,  const string classString, const vector<float>& weights)
+{
+    //1. Add the function in the scoreIndexManager
+    scoreIndexManager.AddScoreProducer(this);
+    //2. Set the weight for this score producer
+    const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+    
+    //3. Get the class setup
+    istringstream tempClassSetup(classString);
+    tempClassSetup>>classSetup;
+    if (classSetup==3)
+    {
+        for (int k=0; k<25; k++)
+            WDR_cost.push_back(log10(exp(-(float) k)));
+        unDetectProb = 0.3333;
+        }
+    else if (classSetup==5)
+        unDetectProb = log10(0.2);
+    else
+        cerr<<"Error in DPR_reordering: Currently there is no class setup: "<<classSetup<<" in our model.\n";
+    
+    //4. get the start position of the sentence options
+     string fileStartPos = filePath+".startPosition";                     //path of the sentence start position file
+     ifstream sentencePOS((char*) fileStartPos.c_str(),ios::binary);
+     string eachLine;
+     while (getline(sentencePOS,eachLine,'\n'))
+     {
+            istringstream tempString(eachLine);
+            unsigned long long tempValue;
+            tempString>>tempValue;
+            m_dprOptionStartPOS.push_back(tempValue); //Get the start position of each sentence option DB
+            }
+            
+     //5. Read the first sentence option
+     sentenceID=0;
+     sentenceOptionFile.open((char*) filePath.c_str(),ios::binary);
+     
+     if (!sentenceOptionFile.is_open())
+        cerr<<"Error in DPR_reordering.cpp: can not open the sentence options file!\n";
+     else
+         constructSentencePhraseOption(); //construct the first sentencePhraseOption
+         
+     sentencePOS.close();
+}
+
+/*
+2. interface functions
+*/
+
+//return the number of score components
+size_t DPR_reordering::GetNumScoreComponents() const
+{
+ return 1;
+}
+
+//return the description of this feature function  
+string DPR_reordering::GetScoreProducerDescription() const
+{
+       return "Distance_phrase_reordering_probabilities_produders";
+       }
+
+//return the weight short name      
+string DPR_reordering::GetScoreProducerWeightShortName() const
+{
+       return "weight-DPR";
+       }
+       
+/*
+3. the score producers
+*/
+const FFState* DPR_reordering::EmptyHypothesisState() const
+{
+      //Do nothing
+      return NULL;
+}
+
+
+//given the hypothesis (and previous hypothesis) computed and add the reordering score  
+FFState* DPR_reordering::Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator)
+{	
+	//cerr << cur_hypo.GetInput();
+	//cerr << cur_hypo.GetInput().GetTranslationId();
+	
+	//1. Check the sentence phrase option (check the ID starts from 0 or 1?)
+	long int currentSentenceID  = cur_hypo.GetInput().GetTranslationId();
+	if (sentenceID!=currentSentenceID)
+	{
+         sentenceID=currentSentenceID;
+         constructSentencePhraseOption(); //construct the first sentencePhraseOption
+         }
+         
+    //2. get the information current phrase: left_boundary, right_boundary, target translation
+    //                       prev phrase:    right_boundary
+    size_t prev_right_boundary;
+    size_t curr_left_boundary;
+    size_t curr_right_boundary;
+    const Hypothesis* prevHypothesis = cur_hypo.GetPrevHypo();
+    //check if there is a previous hypo
+    if (prevHypothesis->GetId()==0)
+        prev_right_boundary=-1;
+    else
+        prev_right_boundary=prevHypothesis->GetCurrSourceWordsRange().GetEndPos();
+    
+    const WordsRange currWordsRange = cur_hypo.GetCurrSourceWordsRange();
+    curr_left_boundary = currWordsRange.GetStartPos();
+    curr_right_boundary = currWordsRange.GetEndPos();
+    string targetTranslation = cur_hypo.GetCurrTargetPhrase().ToString();
+    
+    //3. Get the reordering probability
+    float reorderingProb = generateReorderingProb(curr_left_boundary, curr_right_boundary, prev_right_boundary, targetTranslation);
+    
+    //simple, update the score -1.0
+    accumulator->PlusEquals(this,reorderingProb);
+    return NULL;
+}
+
+
+/*
+4. Other functions
+*/
+
+/*
+4.1 Clear the content in sentencePhraseOption
+*/
+void DPR_reordering::clearSentencePhraseOption()
+{
+     for (mapPhraseOption::iterator iterator = sentencePhraseOption.begin(); iterator!= sentencePhraseOption.end(); iterator++)
+     {
+         iterator->second.clear(); //clear each map in mapTargetProbOption
+         }
+     sentencePhraseOption.clear(); //clear the components in sentencePhraseOption
+     }
+     
+/*
+4.2 Construct sentencePhraseOption using sentenceID
+*/
+void DPR_reordering::constructSentencePhraseOption()
+{
+     //1. Get the start position of the sentence options
+     sentenceOptionFile.seekg(m_dprOptionStartPOS[sentenceID],ios::beg); //set the offset
+     string eachSentence;
+     getline(sentenceOptionFile,eachSentence,'\n');
+     
+     //2. Search each separation 
+     size_t boundaryFound = eachSentence.find(" ::: "); //find the separation between the boundary and the values
+     size_t boundaryFound_end;                          //find the end of the boundary 
+     int countBoundaryOption=0;
+     while (boundaryFound!=string::npos)
+     {
+         //2.1 Get the boundary (create a phraseOption map)
+         vector<unsigned short> boundary;        //store the boundary
+         unsigned short boundary_int;
+         string tempString;                      //store the boundary
+         if (countBoundaryOption==0)                       
+             tempString=eachSentence.substr(0,boundaryFound); //get the boundary string
+         else
+             tempString=eachSentence.substr(boundaryFound_end+5,boundaryFound-boundaryFound_end-5);
+                        
+         istringstream boundaryString(tempString);
+         while (boundaryString>>boundary_int)
+             boundary.push_back(boundary_int);
+                          
+         //2.2 Get the target string (all target transaltions)
+         boundaryFound_end=eachSentence.find(" ;;; ",boundaryFound+5);
+         string targetString=eachSentence.substr(boundaryFound+5,boundaryFound_end-boundaryFound-5);
+         size_t targetFound=targetString.find(" ||| ");
+         size_t probFound=targetString.find(" ||| ",targetFound+5);
+         size_t probFound_prev;               //store the previous probs position
+         int countPhraseOption=0;
+         while (targetFound!=string::npos)
+         {
+             if (probFound==string::npos)
+                 probFound=targetString.size();
+             string target;          //store each target phrase
+             string tempProbString;  //store the probability string
+             vector<float> tempProbs; //store the probabilities
+             float probValue;         //store the probability value
+                          
+             //2.3 Get each target string
+             if (countPhraseOption==0)
+                 target = targetString.substr(0,targetFound);
+             else
+                 target= targetString.substr(probFound_prev+5,targetFound-probFound_prev-5);
+                          
+             //2.4 Get the probability vector 
+             tempProbString=targetString.substr(targetFound+5,probFound-targetFound-5);   
+             istringstream probString(tempProbString);
+             while(probString>>probValue)
+             {
+                 if (classSetup==5)
+                    probValue=log10(probValue); //get the log probability
+                 tempProbs.push_back(probValue);
+                 }
+                          
+             //2.5 Update the information  
+             sentencePhraseOption[boundary][target]=tempProbs;  
+             countPhraseOption++;
+             probFound_prev=probFound;
+             targetFound=targetString.find(" ||| ",probFound+5);
+             if (targetFound!=string::npos)
+                 probFound=targetString.find(" ||| ",targetFound+5);
+             }
+         //3. Get the next boundary
+         boundaryFound_end=boundaryFound;
+         countBoundaryOption++;
+         boundaryFound=eachSentence.find(" ::: ",boundaryFound_end+5); //Get next boundary found
+         }
+     }
+
+/*
+4.3 generate the reordering probability
+*/
+float DPR_reordering::generateReorderingProb(size_t boundary_left, size_t boundary_right, size_t prev_boundary_right, string targetPhrase)
+{
+      float reorderingProb;
+      //1. get the distance reordering
+      int reorderDistance = prev_boundary_right+1-boundary_left; //reordering distance
+      int reorderOrientation = createOrientationClass(reorderDistance); //reordering orientation
+      //2. get the boundary vector
+      vector<unsigned short> phrase_boundary;
+      phrase_boundary.push_back(boundary_left);
+      phrase_boundary.push_back(boundary_right);
+      mapPhraseOption::const_iterator boundaryFound = sentencePhraseOption.find(phrase_boundary);
+      
+      //3.1 If no this source phrase (then return equal probability)
+      if (boundaryFound==sentencePhraseOption.end())
+      {
+          if (classSetup==3)
+          {
+              reorderingProb = WDR_cost[abs(reorderDistance)]; //using word-based distance reordering
+              }
+          else if (classSetup==5)
+          {
+               reorderingProb=unDetectProb;
+               }
+          }
+      else
+      {
+          mapTargetProbOption::const_iterator targetFound = boundaryFound->second.find(targetPhrase);
+          //3.2 if no this target phrase
+          if (targetFound == boundaryFound->second.end())
+          {
+              if (classSetup==3)
+              {
+                  reorderingProb = WDR_cost[abs(reorderDistance)]; //using word-based distance reordering
+                  }
+              else if (classSetup==5)
+              {
+                   reorderingProb=unDetectProb;
+                   }
+              }
+          //3.3 else, get normal reordering probability
+          else
+          {
+              if (classSetup ==3)
+              {
+                  if (reorderOrientation==1) //special case: monotone
+                  {
+                      if (targetFound->second[1]>0.5)
+                         reorderingProb=0.0;
+                      else
+                      {
+                          float ratio=min(MAXRATIO, 1.0/(3*targetFound->second[1]));
+                          reorderingProb=ratio*WDR_cost[1];
+                          }
+                      }
+                  else
+                  {
+                      float ratio=min(MAXRATIO, 1.0/(3*targetFound->second[reorderOrientation]));
+                      reorderingProb=ratio*WDR_cost[abs(reorderDistance)];
+                      }
+                  }
+              else if (classSetup==5)
+              {
+                   reorderingProb=targetFound->second[reorderOrientation];
+                   }
+              }
+          }
+    
+      return reorderingProb;   
+      }
+
+
+
+/*
+4.4. int createOrientationClass(int dist,int classSetup) --- the create the orientation class
+*/
+int DPR_reordering::createOrientationClass(int dist)
+{
+    int orientationClass;
+    //If three-class setup
+    if (classSetup==3)
+    {
+        if (dist<0)
+            orientationClass=0;
+        else if (dist==0)
+            orientationClass=1;
+        else
+            orientationClass=2;
+        }
+    else if (classSetup==5)
+    {
+         if (dist<=-5)
+             orientationClass=0;
+         else if (dist>-5 and dist<0)
+             orientationClass=1;
+         else if (dist==0)
+             orientationClass=2;
+         else if (dist>0 and dist<5)
+             orientationClass=3;
+         else
+             orientationClass=4;
+         }
+    else
+    {
+        cerr<<"Error in DPR_reordering: Currently there is no class setup: "<<classSetup<<" in our model.\n";
+        }
+    
+  
+    return orientationClass; //return the orientation class
+    }
+
+DPR_reordering::~DPR_reordering()
+{
+    sentenceOptionFile.close();
+    }
+
+} // namespace
--- a/src/DPR_reordering.h
+++ b/src/DPR_reordering.h
@ -0,0 +1,99 @@
+/*
+**********************************************************
+Head file ---------- DPR_reordering.h
+The reordering feature function for MOSES
+based on the DPR model proposed in (Ni et al., 2009)
+
+Components:
+       vector<unsigned long long> m_dprOptionStartPOS --- store the start pos for each sentence option (to read from the .txt file)
+       ifstream sentenceOptionFile --- the stream file storing the sentence options
+       int sentenceID --- the sentence ID (indicating which sentence option block is used)
+       mapPhraseOption sentencePhraseOption --- sentence phrase option <left bound, right bound> -> target (string) -> probs
+             
+Functions:
+0. Constructor: DPR_reordering(ScoreIndexManager &scoreIndexManager, const std::string &filePath, const std::vector<float>& weights)
+       
+1. interface functions:
+       GetNumScoreComponents() --- return the number of scores the component used (usually 1)
+       GetScoreProducerDescription() --- return the name of the reordering model
+       GetScoreProducerWeightShortName() --- return the short name of the weight for the score
+2. Score producers:
+       Evaluate() --- to evaluate the reordering scores and add the score to the score component collection
+       EmptyHypothesisState() --- create an empty hypothesis
+       
+3. Other functions:
+       constructSentencePhraseOption() --- Construct sentencePhraseOption using sentenceID
+       clearSentencePhraseOption() --- clear the sentence phrase options
+**********************************************************
+*/
+#pragma once
+#ifndef DPR_REORDERING_H
+#define DPR_REORDERING_H
+#include <cstdlib>
+#include <map>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <sstream>        //using istringstream
+#include <fstream>         //using ifstream
+#include <math.h>
+#include "FeatureFunction.h"
+#include "Hypothesis.h"
+#include "WordsRange.h"
+#include "StaticData.h"
+#include "InputType.h"
+#define MAXRATIO  3.0        //the maximum ration for the 3-class setup
+/*
+#ifdef __GNUC__
+#include <ext/hash_map>
+#else
+#include <hash_map>
+#endif
+namespace std{using namespace __gnu_cxx;}*/
+using namespace std;
+using std::ifstream;
+using std::istringstream;
+using std::vector;
+using std::string;
+//for sentencePhraseOption
+typedef std::map<vector<unsigned short>, map<string, vector<float> > > mapPhraseOption;
+typedef std::map<string, vector<float> > mapTargetProbOption;
+
+
+namespace Moses
+{
+    using namespace std;
+    
+    //define the class DPR_reordering
+    class DPR_reordering : public StatefulFeatureFunction 
+    {
+          public:
+                 //constructor
+                 DPR_reordering(ScoreIndexManager &scoreIndexManager, const string filePath, const string classString, const vector<float>& weights);
+                 ~DPR_reordering();
+          public:
+                 //interface: include 3 functions
+                 size_t GetNumScoreComponents() const; //return the number of scores the component used
+                 string GetScoreProducerDescription() const; //return the name of the reordering model
+                 string GetScoreProducerWeightShortName() const; //return the short name of the weight for the score
+          public:
+                 //The evaluation function and score calculation function 
+                 FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator);
+                 const FFState* EmptyHypothesisState() const;   
+                 
+          public:
+                 void clearSentencePhraseOption();                 //clear the sentence phrase options
+                 void constructSentencePhraseOption();             //construct sentence phrase options (for a sentence)
+                 float generateReorderingProb(size_t boundary_left, size_t boundary_right, size_t prev_boundary_right, string targetPhrase); //generate the reordering probability
+                 int createOrientationClass(int dist);             //the create the orientation class
+          private:
+                  vector<unsigned long long> m_dprOptionStartPOS;                 //store the start pos for each sentence option 
+                  ifstream sentenceOptionFile;                                   //the ifstream file of the sentenceOption
+                  long int sentenceID;                                                 //store the ID of current sentence needed translation
+                  mapPhraseOption sentencePhraseOption;                         //store the phrase option for each sentence
+                  int classSetup;                                               //store the number of orientations
+                  float unDetectProb;                                   //the const reodering prob if the phrase pair is not in sentence option
+                  vector<float> WDR_cost;                                   //the word distance reodering cost
+		};
+};
+#endif
--- a/src/DecodeGraph.cpp
+++ b/src/DecodeGraph.cpp
@ -0,0 +1,36 @@
+// $Id: TranslationOptionCollection.cpp 1429 2007-07-20 13:03:12Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "DecodeGraph.h"
+#include "DecodeStep.h"
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+DecodeGraph::~DecodeGraph()
+{
+	RemoveAllInColl(m_steps);
+}
+
+}
+
--- a/src/DecodeGraph.h
+++ b/src/DecodeGraph.h
@ -0,0 +1,68 @@
+// $Id: TranslationOptionCollection.cpp 1429 2007-07-20 13:03:12Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_DecodeGraph_h
+#define moses_DecodeGraph_h
+
+#include <list>
+#include <iterator>
+
+namespace Moses
+{
+
+class DecodeStep;
+
+//! list of DecodeStep s which factorizes the translation
+class DecodeGraph
+{
+protected:
+	std::list<const DecodeStep*> m_steps;
+    size_t m_position;   
+
+public:
+    /**
+      * position: The position of this graph within the decode sequence.
+      **/
+    DecodeGraph(size_t position): m_position(position) {}
+	//! iterators
+	typedef std::list<const DecodeStep*>::iterator iterator;
+	typedef std::list<const DecodeStep*>::const_iterator const_iterator;
+	const_iterator begin() const { return m_steps.begin(); }
+	const_iterator end() const { return m_steps.end(); }
+	
+    size_t GetPosition() const
+    {
+        return m_position;
+    }   
+    
+	~DecodeGraph();
+
+	//! Add another decode step to the graph
+	void Add(const DecodeStep *decodeStep)
+	{
+		m_steps.push_back(decodeStep);
+	}
+};
+
+
+}
+#endif
--- a/src/DecodeStep.cpp
+++ b/src/DecodeStep.cpp
@ -0,0 +1,66 @@
+// $Id: DecodeStep.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "DecodeStep.h"
+#include "PhraseDictionaryMemory.h"
+#include "GenerationDictionary.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+DecodeStep::DecodeStep(Dictionary *ptr, const DecodeStep* prev)
+:m_ptr(ptr)
+{
+	FactorMask prevOutputFactors;
+	if (prev) prevOutputFactors = prev->m_outputFactors;
+	m_outputFactors = prevOutputFactors;
+	FactorMask conflictMask = (m_outputFactors & ptr->GetOutputFactorMask());
+	m_outputFactors |= ptr->GetOutputFactorMask();
+	FactorMask newOutputFactorMask = m_outputFactors ^ prevOutputFactors;  //xor
+  m_newOutputFactors.resize(newOutputFactorMask.count());
+	m_conflictFactors.resize(conflictMask.count());
+	size_t j=0, k=0;
+  for (size_t i = 0; i < MAX_NUM_FACTORS; i++) {
+    if (newOutputFactorMask[i]) m_newOutputFactors[j++] = i;
+		if (conflictMask[i]) m_conflictFactors[k++] = i;
+	}
+  VERBOSE(2,"DecodeStep():\n\toutputFactors=" << m_outputFactors
+	  << "\n\tconflictFactors=" << conflictMask
+	  << "\n\tnewOutputFactors=" << newOutputFactorMask << std::endl);
+}
+
+DecodeStep::~DecodeStep() {}
+
+/** returns phrase table (dictionary) for translation step */
+const PhraseDictionary &DecodeStep::GetPhraseDictionary() const
+{
+  return *static_cast<const PhraseDictionary*>(m_ptr);
+}
+
+/** returns generation table (dictionary) for generation step */
+const GenerationDictionary &DecodeStep::GetGenerationDictionary() const
+{
+  return *static_cast<const GenerationDictionary*>(m_ptr);
+}
+
+}
+
+
--- a/src/DecodeStep.h
+++ b/src/DecodeStep.h
@ -0,0 +1,113 @@
+// $Id: DecodeStep.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_DecodeStep_h
+#define moses_DecodeStep_h
+
+#include <cassert>
+#include "TypeDef.h"
+#include "Dictionary.h"
+
+namespace Moses
+{
+
+class PhraseDictionary;
+class GenerationDictionary;
+class TranslationOption;
+class TranslationOptionCollection;
+class PartialTranslOptColl;
+class FactorCollection;
+class InputType;
+
+/*! Specification for a decoding step.
+ * The factored translation model consists of Translation and Generation
+ * steps, which consult a Dictionary of phrase translations or word
+ * generations. This class implements the specification for one of these
+ * steps, both the DecodeType and a pointer to the Dictionary
+ **/
+class DecodeStep 
+{
+protected:
+	const Dictionary *m_ptr; //! pointer to translation/generation table 
+	FactorMask m_outputFactors; //! mask of what factors exist on the output side after this decode step 
+	std::vector<FactorType> m_conflictFactors; //! list of the factors that may conflict during this step
+	std::vector<FactorType> m_newOutputFactors; //! list of the factors that are new in this step, may be empty
+
+public:
+	DecodeStep(); //! not implemented
+	DecodeStep(Dictionary *ptr, const DecodeStep* prevDecodeStep);
+	virtual ~DecodeStep();
+
+	//! mask of factors that are present after this decode step
+	const FactorMask& GetOutputFactorMask() const
+	{
+		return m_outputFactors;
+	}
+
+	//! returns true if this decode step must match some pre-existing factors
+	bool IsFilteringStep() const
+	{
+		return !m_conflictFactors.empty();
+	}
+
+	//! returns true if this decode step produces one or more new factors
+	bool IsFactorProducingStep() const
+	{
+		return !m_newOutputFactors.empty();
+	}
+
+	/*! returns a list (possibly empty) of the (target side) factors that
+	 * are produced in this decoding step.  For example, if a previous step
+	 * generated factor 1, and this step generates 1,2, then only 2 will be
+	 * in the returned vector. */
+	const std::vector<FactorType>& GetNewOutputFactors() const
+	{
+		return m_newOutputFactors;
+	}
+
+	/*! returns a list (possibly empty) of the (target side) factors that
+	 * are produced BUT ALREADY EXIST and therefore must be checked for
+	 * conflict or compatibility */
+	const std::vector<FactorType>& GetConflictFactors() const
+	{
+		return m_conflictFactors;
+	}
+
+	/*! returns phrase table (dictionary) for translation step */
+	const PhraseDictionary &GetPhraseDictionary() const;
+
+	/*! returns generation table (dictionary) for generation step */
+	const GenerationDictionary &GetGenerationDictionary() const;
+
+	/*! returns dictionary in abstract class */
+	const Dictionary* GetDictionaryPtr() const {return m_ptr;}
+
+	/*! Given an input TranslationOption, extend it in some way (put results in outputPartialTranslOptColl) */
+  virtual void Process(const TranslationOption &inputPartialTranslOpt
+                              , const DecodeStep &decodeStep
+                              , PartialTranslOptColl &outputPartialTranslOptColl
+                      				, TranslationOptionCollection *toc
+															, bool adhereTableLimit) const = 0;
+
+};
+
+}
+#endif
--- a/src/DecodeStepGeneration.cpp
+++ b/src/DecodeStepGeneration.cpp
@ -0,0 +1,176 @@
+// $Id: DecodeStepGeneration.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "DecodeStepGeneration.h"
+#include "GenerationDictionary.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "PartialTranslOptColl.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict, const DecodeStep* prev)
+: DecodeStep(dict, prev)
+{
+}
+
+const GenerationDictionary &DecodeStepGeneration::GetGenerationDictionary() const
+{
+  return *static_cast<const GenerationDictionary*>(m_ptr);
+}
+
+TranslationOption *DecodeStepGeneration::MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase
+                                  , const ScoreComponentCollection& generationScore) const
+{
+	if (IsFilteringStep()) {
+  	if (!oldTO.IsCompatible(mergePhrase, m_conflictFactors)) 
+			return NULL;
+	}
+
+  TranslationOption *newTransOpt = new TranslationOption(oldTO);
+  newTransOpt->MergeNewFeatures(mergePhrase, generationScore, m_newOutputFactors);
+  return newTransOpt;
+}
+
+// helpers
+typedef pair<Word, ScoreComponentCollection> WordPair;
+typedef list< WordPair > WordList;
+// 1st = word
+// 2nd = score
+typedef list< WordPair >::const_iterator WordListIterator;
+
+/** used in generation: increases iterators when looping through the exponential number of generation expansions */
+inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
+                               , const vector< WordList > &wordListVector)
+{
+  for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++)
+    {
+      WordListIterator &iter = wordListIterVector[currPos];
+      iter++;
+      if (iter != wordListVector[currPos].end())
+        { // eg. 4 -> 5
+          return;
+        }
+      else
+        { //  eg 9 -> 10
+          iter = wordListVector[currPos].begin();
+        }
+    }
+}
+
+void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
+                              , const DecodeStep &decodeStep
+                              , PartialTranslOptColl &outputPartialTranslOptColl
+                              , TranslationOptionCollection *toc
+                              , bool adhereTableLimit) const
+{
+  if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
+    { // word deletion
+
+      TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
+      outputPartialTranslOptColl.Add(newTransOpt);
+
+      return;
+    }
+
+  // normal generation step
+  const GenerationDictionary &generationDictionary  = decodeStep.GetGenerationDictionary();
+//  const WordsRange &sourceWordsRange                = inputPartialTranslOpt.GetSourceWordsRange();
+
+  const Phrase &targetPhrase  = inputPartialTranslOpt.GetTargetPhrase();
+  size_t targetLength         = targetPhrase.GetSize();
+
+  // generation list for each word in phrase
+  vector< WordList > wordListVector(targetLength);
+
+  // create generation list
+  int wordListVectorPos = 0;
+  for (size_t currPos = 0 ; currPos < targetLength ; currPos++) // going thorugh all words
+    {
+      // generatable factors for this word to be put in wordList
+      WordList &wordList = wordListVector[wordListVectorPos];
+      const Word &word = targetPhrase.GetWord(currPos);
+
+      // consult dictionary for possible generations for this word
+      const OutputWordCollection *wordColl = generationDictionary.FindWord(word);
+
+      if (wordColl == NULL)
+        { // word not found in generation dictionary
+          //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
+          return; // can't be part of a phrase, special handling
+        }
+      else
+        {
+          // sort(*wordColl, CompareWordCollScore);
+          OutputWordCollection::const_iterator iterWordColl;
+          for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl)
+            {
+              const Word &outputWord = (*iterWordColl).first;
+              const ScoreComponentCollection& score = (*iterWordColl).second;
+              // enter into word list generated factor(s) and its(their) score(s)
+              wordList.push_back(WordPair(outputWord, score));
+            }
+
+          wordListVectorPos++; // done, next word
+        }
+    }
+
+  // use generation list (wordList)
+  // set up iterators (total number of expansions)
+  size_t numIteration = 1;
+  vector< WordListIterator >  wordListIterVector(targetLength);
+  vector< const Word* >       mergeWords(targetLength);
+  for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
+    {
+      wordListIterVector[currPos] = wordListVector[currPos].begin();
+      numIteration *= wordListVector[currPos].size();
+    }
+
+  // go thru each possible factor for each word & create hypothesis
+  for (size_t currIter = 0 ; currIter < numIteration ; currIter++)
+    {
+      ScoreComponentCollection generationScore; // total score for this string of words
+
+      // create vector of words with new factors for last phrase
+      for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
+        {
+          const WordPair &wordPair = *wordListIterVector[currPos];
+          mergeWords[currPos] = &(wordPair.first);
+          generationScore.PlusEquals(wordPair.second);
+        }
+
+      // merge with existing trans opt
+      Phrase genPhrase(Output, mergeWords);
+      TranslationOption *newTransOpt = MergeGeneration(inputPartialTranslOpt, genPhrase, generationScore);
+      if (newTransOpt != NULL)
+        {
+          outputPartialTranslOptColl.Add(newTransOpt);
+        }
+
+      // increment iterators
+      IncrementIterators(wordListIterVector, wordListVector);
+    }
+}
+
+}
+
+
--- a/src/DecodeStepGeneration.h
+++ b/src/DecodeStepGeneration.h
@ -0,0 +1,60 @@
+// $Id: DecodeStepGeneration.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_DecodeStepGeneration_h
+#define moses_DecodeStepGeneration_h
+
+#include "DecodeStep.h"
+
+namespace Moses
+{
+
+class GenerationDictionary;
+class Phrase;
+class ScoreComponentCollection;
+
+//! subclass of DecodeStep for generation step
+class DecodeStepGeneration : public DecodeStep
+{
+public:
+	DecodeStepGeneration(GenerationDictionary* dict, const DecodeStep* prev);
+
+  //! returns phrase table (dictionary) for translation step 
+  const GenerationDictionary &GetGenerationDictionary() const;
+
+  virtual void Process(const TranslationOption &inputPartialTranslOpt
+                              , const DecodeStep &decodeStep
+                              , PartialTranslOptColl &outputPartialTranslOptColl
+                              , TranslationOptionCollection *toc
+                              , bool adhereTableLimit) const;
+
+private:
+	/*! create new TranslationOption from merging oldTO with mergePhrase
+		This function runs IsCompatible() to ensure the two can be merged
+	*/
+  TranslationOption *MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase
+                                  , const ScoreComponentCollection& generationScore) const;
+
+};
+
+
+}
+#endif
--- a/src/DecodeStepTranslation.cpp
+++ b/src/DecodeStepTranslation.cpp
@ -0,0 +1,136 @@
+// $Id: DecodeStepTranslation.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "DecodeStepTranslation.h"
+#include "PhraseDictionaryMemory.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "PartialTranslOptColl.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* dict, const DecodeStep* prev)
+: DecodeStep(dict, prev), m_phraseDictionary(dict)
+{
+}
+
+/*const PhraseDictionary &DecodeStepTranslation::GetPhraseDictionary() const
+{
+  return *m_phraseDictionary;
+}*/
+
+TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const
+{
+  if (IsFilteringStep()) {
+    if (!oldTO.IsCompatible(targetPhrase, m_conflictFactors)) return 0;
+  }
+
+  TranslationOption *newTransOpt = new TranslationOption(oldTO);
+  newTransOpt->MergeNewFeatures(targetPhrase, targetPhrase.GetScoreBreakdown(), m_newOutputFactors);
+  return newTransOpt;
+}
+
+
+void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
+                              , const DecodeStep &decodeStep
+                              , PartialTranslOptColl &outputPartialTranslOptColl
+                              , TranslationOptionCollection *toc
+                              , bool adhereTableLimit) const
+{
+  if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
+    { // word deletion
+
+      outputPartialTranslOptColl.Add(new TranslationOption(inputPartialTranslOpt));
+
+      return;
+    }
+
+  // normal trans step
+  const WordsRange &sourceWordsRange        = inputPartialTranslOpt.GetSourceWordsRange();
+  const PhraseDictionary &phraseDictionary  = decodeStep.GetPhraseDictionary();
+	const size_t currSize = inputPartialTranslOpt.GetTargetPhrase().GetSize();
+	const size_t tableLimit = phraseDictionary.GetTableLimit();
+	
+  const TargetPhraseCollection *phraseColl= phraseDictionary.GetTargetPhraseCollection(toc->GetSource(),sourceWordsRange);
+
+  if (phraseColl != NULL)
+    {
+      TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
+		 	iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
+			
+      for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase)
+        {
+          const TargetPhrase& targetPhrase = **iterTargetPhrase;
+					// skip if the 
+					if (targetPhrase.GetSize() != currSize) continue;
+
+          TranslationOption *newTransOpt = MergeTranslation(inputPartialTranslOpt, targetPhrase);
+          if (newTransOpt != NULL)
+            {
+              outputPartialTranslOptColl.Add( newTransOpt );
+            }
+        }
+    }
+  else if (sourceWordsRange.GetNumWordsCovered() == 1)
+    { // unknown handler
+      //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
+    }
+}
+
+
+void DecodeStepTranslation::ProcessInitialTranslation(
+															const InputType &source
+															,PartialTranslOptColl &outputPartialTranslOptColl
+															, size_t startPos, size_t endPos, bool adhereTableLimit) const
+{
+	const size_t tableLimit = m_phraseDictionary->GetTableLimit();
+
+	const WordsRange wordsRange(startPos, endPos);
+	const TargetPhraseCollection *phraseColl =	m_phraseDictionary->GetTargetPhraseCollection(source,wordsRange); 
+
+	if (phraseColl != NULL)
+	{
+		IFVERBOSE(3) {
+			if(StaticData::Instance().GetInputType() == SentenceInput)
+				TRACE_ERR("[" << source.GetSubString(wordsRange) << "; " << startPos << "-" << endPos << "]\n");
+			else
+				TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
+		}
+			
+		TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
+		iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
+		
+		for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase)
+		{
+			const TargetPhrase	&targetPhrase = **iterTargetPhrase;
+			outputPartialTranslOptColl.Add ( new TranslationOption(wordsRange, targetPhrase, source) );
+			
+			VERBOSE(3,"\t" << targetPhrase << "\n");
+		}
+		VERBOSE(3,endl);
+	}
+}
+
+}
+
+
+
--- a/src/DecodeStepTranslation.h
+++ b/src/DecodeStepTranslation.h
@ -0,0 +1,67 @@
+// $Id: DecodeStepTranslation.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_DecodeStepTranslation_h
+#define moses_DecodeStepTranslation_h
+
+#include "DecodeStep.h"
+#include "PhraseDictionary.h"
+
+namespace Moses
+{
+
+class PhraseDictionary;
+class TargetPhrase;
+
+//! subclass of DecodeStep for translation step
+class DecodeStepTranslation : public DecodeStep
+{
+public:
+	DecodeStepTranslation(); //! not implemented
+	DecodeStepTranslation(PhraseDictionary* dict, const DecodeStep* prev);
+
+  //! returns phrase table (dictionary) for translation step 
+  const PhraseDictionary &GetPhraseDictionary() const;
+
+  virtual void Process(const TranslationOption &inputPartialTranslOpt
+                              , const DecodeStep &decodeStep
+                              , PartialTranslOptColl &outputPartialTranslOptColl
+                              , TranslationOptionCollection *toc
+                              , bool adhereTableLimit) const;
+
+	/*! initialize list of partial translation options by applying the first translation step 
+	* Ideally, this function should be in DecodeStepTranslation class
+	*/
+	void ProcessInitialTranslation(
+															const InputType &source
+															, PartialTranslOptColl &outputPartialTranslOptColl
+															, size_t startPos, size_t endPos, bool adhereTableLimit) const;
+private:
+	/*! create new TranslationOption from merging oldTO with mergePhrase
+		This function runs IsCompatible() to ensure the two can be merged
+	*/
+	TranslationOption *MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const;
+    PhraseDictionary* m_phraseDictionary;   
+};
+
+
+}
+#endif
--- a/src/Dictionary.cpp
+++ b/src/Dictionary.cpp
@ -0,0 +1,38 @@
+// $Id: Dictionary.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Dictionary.h"
+#include "FactorTypeSet.h"
+
+namespace Moses
+{
+Dictionary::Dictionary(size_t numScoreComponent)
+	:m_numScoreComponent(numScoreComponent)
+{
+}
+
+Dictionary::~Dictionary() {}
+
+void Dictionary::CleanUp() {}
+
+}
+
+
--- a/src/Dictionary.h
+++ b/src/Dictionary.h
@ -0,0 +1,68 @@
+// $Id: Dictionary.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_Dictionary_h
+#define moses_Dictionary_h
+
+#include <vector>
+#include "FactorTypeSet.h"
+#include "ScoreProducer.h"
+
+namespace Moses
+{
+
+/** Abstract class from which PhraseDictionary and GenerationDictionary
+	* are inherited.
+*/
+class Dictionary
+{
+protected:
+	
+	const size_t m_numScoreComponent;
+	FactorMask m_inputFactors;
+	FactorMask m_outputFactors;
+
+public:
+	//! Constructor
+	Dictionary(size_t numScoreComponent);
+	//!Destructor
+	virtual ~Dictionary();
+
+	//! returns output factor types as specified by the ini file
+	const FactorMask& GetOutputFactorMask() const
+	{
+		return m_outputFactors;
+	}
+	//! returns input factor types as specified by the ini file
+	const FactorMask& GetInputFactorMask() const
+	{
+		return m_inputFactors;
+	}
+
+	//! returns whether this dictionary is to be used for Translate or Generate
+	virtual DecodeType GetDecodeType() const = 0;
+
+	// clean up temporary memory, called after processing each sentence
+	virtual void CleanUp();
+};
+
+}
+#endif
--- a/src/DummyScoreProducers.cpp
+++ b/src/DummyScoreProducers.cpp
@ -0,0 +1,154 @@
+// $Id: DummyScoreProducers.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+#include <cassert>
+#include "FFState.h"
+#include "StaticData.h"
+#include "DummyScoreProducers.h"
+#include "WordsRange.h"
+#include "TranslationOption.h"
+
+namespace Moses
+{
+
+struct DistortionState_traditional : public FFState {
+ WordsRange range;
+ int first_gap;
+ DistortionState_traditional(const WordsRange& wr, int fg) : range(wr), first_gap(fg) {}
+ int Compare(const FFState& other) const {
+   const DistortionState_traditional& o =
+     static_cast<const DistortionState_traditional&>(other);
+    if (range.GetEndPos() < o.range.GetEndPos()) return -1;
+    if (range.GetEndPos() > o.range.GetEndPos()) return 1;
+    return 0;
+ }
+};
+
+struct DistortionState_MQ2007 : public FFState {
+  //TODO
+};
+
+const FFState* DistortionScoreProducer::EmptyHypothesisState() const {
+   return new DistortionState_traditional(WordsRange(NOT_FOUND,NOT_FOUND), NOT_FOUND);
+}
+
+DistortionScoreProducer::DistortionScoreProducer(ScoreIndexManager &scoreIndexManager)
+{
+	scoreIndexManager.AddScoreProducer(this);
+}
+
+size_t DistortionScoreProducer::GetNumScoreComponents() const
+{
+	return 1;
+}
+
+std::string DistortionScoreProducer::GetScoreProducerDescription() const
+{
+	return "Distortion";
+}
+
+std::string DistortionScoreProducer::GetScoreProducerWeightShortName() const
+{
+	return "d";
+}
+
+float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo, 
+      const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
+{
+  const int USE_OLD = 1;
+  if (USE_OLD) {
+	return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
+  }
+
+  // Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
+
+  int prefixEndPos = FirstGap-1;
+  if ((int) curr.GetStartPos() == prefixEndPos+1) {
+    return 0;
+  }
+
+  if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
+    return (float) -2*curr.GetNumWordsCovered();
+  }
+
+  if ((int) prev.GetEndPos() <= prefixEndPos) {
+    int z = curr.GetStartPos()-prefixEndPos;
+    return (float) -2*(z + curr.GetNumWordsCovered());
+  }
+
+  return (float) -2*(curr.GetNumWordsBetween(prev) + curr.GetNumWordsCovered());
+}
+
+size_t DistortionScoreProducer::GetNumInputScores() const { return 0;}
+
+FFState* DistortionScoreProducer::Evaluate(
+    const Hypothesis& hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* out) const {
+  const DistortionState_traditional* prev = static_cast<const DistortionState_traditional*>(prev_state);
+  const float distortionScore = CalculateDistortionScore(
+        hypo,
+        prev->range,
+	hypo.GetCurrSourceWordsRange(),
+	prev->first_gap);
+  out->PlusEquals(this, distortionScore);
+  DistortionState_traditional* res = new DistortionState_traditional(
+    hypo.GetCurrSourceWordsRange(),
+    hypo.GetPrevHypo()->GetWordsBitmap().GetFirstGapPos());
+  return res;
+}
+
+
+WordPenaltyProducer::WordPenaltyProducer(ScoreIndexManager &scoreIndexManager)
+{
+	scoreIndexManager.AddScoreProducer(this);
+}
+
+size_t WordPenaltyProducer::GetNumScoreComponents() const
+{
+	return 1;
+}
+
+std::string WordPenaltyProducer::GetScoreProducerDescription() const
+{
+	return "WordPenalty";
+}
+
+std::string WordPenaltyProducer::GetScoreProducerWeightShortName() const
+{
+	return "w";
+}
+
+size_t WordPenaltyProducer::GetNumInputScores() const { return 0;}
+
+void WordPenaltyProducer::Evaluate(const TargetPhrase& tp, ScoreComponentCollection* out) const
+{
+  out->PlusEquals(this, -static_cast<float>(tp.GetSize()));
+}
+
+UnknownWordPenaltyProducer::UnknownWordPenaltyProducer(ScoreIndexManager &scoreIndexManager)
+{
+	scoreIndexManager.AddScoreProducer(this);
+}
+
+size_t UnknownWordPenaltyProducer::GetNumScoreComponents() const
+{
+	return 1;
+}
+
+std::string UnknownWordPenaltyProducer::GetScoreProducerDescription() const
+{
+	return "!UnknownWordPenalty";
+}
+
+std::string UnknownWordPenaltyProducer::GetScoreProducerWeightShortName() const
+{
+	return "u";
+}
+
+size_t UnknownWordPenaltyProducer::GetNumInputScores() const { return 0;}
+
+bool UnknownWordPenaltyProducer::ComputeValueInTranslationOption() const {
+  return true;
+}
+
+}
--- a/src/DummyScoreProducers.h
+++ b/src/DummyScoreProducers.h
@ -0,0 +1,70 @@
+// $Id: DummyScoreProducers.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_DummyScoreProducers_h
+#define moses_DummyScoreProducers_h
+
+#include "FeatureFunction.h"
+
+namespace Moses
+{
+
+class WordsRange;
+
+/** Calculates Distortion scores
+ */
+class DistortionScoreProducer : public StatefulFeatureFunction {
+public:
+	DistortionScoreProducer(ScoreIndexManager &scoreIndexManager);
+
+	float CalculateDistortionScore(const Hypothesis& hypo,
+                                 const WordsRange &prev, const WordsRange &curr, const int FirstGapPosition) const;
+
+	size_t GetNumScoreComponents() const;
+	std::string GetScoreProducerDescription() const;
+	std::string GetScoreProducerWeightShortName() const;
+	size_t GetNumInputScores() const;
+
+	virtual const FFState* EmptyHypothesisState() const;
+
+  virtual FFState* Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+
+};
+
+/** Doesn't do anything but provide a key into the global
+ * score array to store the word penalty in.
+ */
+class WordPenaltyProducer : public StatelessFeatureFunction {
+public:
+	WordPenaltyProducer(ScoreIndexManager &scoreIndexManager);
+
+	size_t GetNumScoreComponents() const;
+	std::string GetScoreProducerDescription() const;
+	std::string GetScoreProducerWeightShortName() const;
+	size_t GetNumInputScores() const;
+
+	virtual void Evaluate(
+		const TargetPhrase& phrase,
+		ScoreComponentCollection* out) const;
+
+};
+
+/** unknown word penalty */
+class UnknownWordPenaltyProducer : public StatelessFeatureFunction {
+public:
+	UnknownWordPenaltyProducer(ScoreIndexManager &scoreIndexManager);
+
+	size_t GetNumScoreComponents() const;
+	std::string GetScoreProducerDescription() const;
+	std::string GetScoreProducerWeightShortName() const;
+	size_t GetNumInputScores() const;
+
+	virtual bool ComputeValueInTranslationOption() const;
+
+};
+
+}
+
+#endif
--- a/src/DynSAInclude/fdstream.h
+++ b/src/DynSAInclude/fdstream.h
@ -0,0 +1,147 @@
+/* Class modified by ADL for randlm namespace on Feb 15th, 2008. 
+ *
+ * The following code declares classes to read from and write to
+ * file descriptore or file handles.
+ *
+ * See
+ *      http://www.josuttis.com/cppcode
+ * for details and the latest version.
+ *
+ * - open:
+ *      - integrating BUFSIZ on some systems?
+ *      - optimized reading of multiple characters
+ *      - stream for reading AND writing
+ *      - i18n
+ *
+ * (C) Copyright Nicolai M. Josuttis 2001.
+ * Permission to copy, use, modify, sell and distribute this software
+ * is granted provided this copyright notice appears in all copies.
+ * This software is provided "as is" without express or implied
+ * warranty, and with no claim as to its suitability for any purpose.
+ *
+ * Version: Jul 28, 2002
+ * History:
+ *  Jul 28, 2002: bugfix memcpy() => memmove()
+ *                fdinbuf::underflow(): cast for return statements
+ *  Aug 05, 2001: first public version
+ */
+#ifndef moses_DynSAInclude_fdstream_h
+#define moses_DynSAInclude_fdstream_h
+
+#include <streambuf>
+// for EOF:
+#include <cstdio>
+// for memmove():
+#include <cstring>
+
+
+// low-level read and write functions
+#ifdef _MSC_VER
+# include <io.h>
+#else
+# include <unistd.h>
+//extern "C" {
+//    int write (int fd, const char* buf, int num);
+//    int read (int fd, char* buf, int num);
+//}
+#endif
+
+
+// BEGIN namespace 
+//namespace randlm {
+
+/************************************************************
+ * fdstreambuf
+ * - a stream that reads on a file descriptor
+ ************************************************************/
+
+class fdstreambuf : public std::streambuf {
+  protected:
+    int fd;    // file descriptor
+  protected:
+    /* data buffer:
+     * - at most, pbSize characters in putback area plus
+     * - at most, bufSize characters in ordinary read buffer
+     */
+    static const int pbSize = 4;        // size of putback area
+    static const int bufSize = 1024;    // size of the data buffer
+    char buffer[bufSize+pbSize];        // data buffer
+
+  public:
+    /* constructor
+     * - initialize file descriptor
+     * - initialize empty data buffer
+     * - no putback area
+     * => force underflow()
+     */
+    fdstreambuf (int _fd) : fd(_fd) {
+        setg (buffer+pbSize,     // beginning of putback area
+              buffer+pbSize,     // read position
+              buffer+pbSize);    // end position
+    }
+
+  protected:
+    // insert new characters into the buffer
+    virtual int_type underflow () {
+#ifndef _MSC_VER
+        using std::memmove;
+#endif
+
+        // is read position before end of buffer?
+        if (gptr() < egptr()) {
+            return traits_type::to_int_type(*gptr());
+        }
+
+        /* process size of putback area
+         * - use number of characters read
+         * - but at most size of putback area
+         */
+        int numPutback;
+        numPutback = gptr() - eback();
+        if (numPutback > pbSize) {
+            numPutback = pbSize;
+        }
+
+        /* copy up to pbSize characters previously read into
+         * the putback area
+         */
+        memmove (buffer+(pbSize-numPutback), gptr()-numPutback,
+                numPutback);
+
+        // read at most bufSize new characters
+        int num;
+        num = read (fd, buffer+pbSize, bufSize);
+        if (num <= 0) {
+            // ERROR or EOF
+            return EOF;
+        }
+
+        // reset buffer pointers
+        setg (buffer+(pbSize-numPutback),   // beginning of putback area
+              buffer+pbSize,                // read position
+              buffer+pbSize+num);           // end of buffer
+
+        // return next character
+        return traits_type::to_int_type(*gptr());
+    }
+
+    // write one character
+    virtual int_type overflow (int_type c) {
+        if (c != EOF) {
+            char z = c;
+            if (write (fd, &z, 1) != 1) {
+                return EOF;
+            }
+        }
+        return c;
+    }
+    // write multiple characters
+    virtual
+    std::streamsize xsputn (const char* s,
+                            std::streamsize num) {
+        return write(fd,s,num);
+    }
+};
+//} // END namespace 
+
+#endif
--- a/src/DynSAInclude/file.cpp
+++ b/src/DynSAInclude/file.cpp
@ -0,0 +1,160 @@
+#include "file.h" 
+
+namespace Moses {
+
+  // FileHandler class
+  const std::string FileHandler::kStdInDescriptor = "___stdin___";
+  const std::string FileHandler::kStdOutDescriptor = "___stdout___";
+  // compression commands
+  const FileExtension FileHandler::kGzipped = ".gz";
+  const FileExtension FileHandler::kBzipped2 = ".bz2";
+
+  const std::string FileHandler::kCatCommand = "cat";
+  const std::string FileHandler::kGzipCommand = "gzip -f";
+  const std::string FileHandler::kGunzipCommand = "gunzip -f";
+  const std::string FileHandler::kBzip2Command = "bzip2 -f";
+  const std::string FileHandler::kBunzip2Command = "bunzip2 -f";
+  
+  FileHandler::FileHandler(const std::string & path, std::ios_base::openmode flags, bool checkExists)
+    : std::fstream(NULL), path_(path), flags_(flags), buffer_(NULL), fp_(NULL) {  
+    if( !(flags^(std::ios::in|std::ios::out)) ) {
+      fprintf(stderr, "ERROR: FileHandler does not support bidirectional files (%s).\n", path_.c_str());
+      exit(EXIT_FAILURE);
+    }
+    else
+      assert(setStreamBuffer(flags & std::ios::in));
+    this->precision(32);
+  }
+
+  FileHandler::~FileHandler() {
+    if( fp_ != 0 ) 
+      pclose(fp_);
+    if( path_ != FileHandler::kStdInDescriptor &&
+        path_ != FileHandler::kStdOutDescriptor )
+      delete buffer_;
+    if( this->is_open() ) 
+      this->close();
+  }
+  
+  fdstreambuf * FileHandler::openCompressedFile(const char * cmd) {
+    //bool isInput = (flags_ & std::ios::in);
+    //open pipe to file with compression/decompression command
+    const char * p_type = (flags_ & std::ios::in ? "r" : "w");
+    fp_ = popen(cmd, p_type);
+    if( fp_ == NULL ) {
+      //fprintf(stderr, "ERROR:Failed to open compressed file at %s\n", path_.c_str());
+      perror("openCompressedFile: ");
+      exit(EXIT_FAILURE);
+    }
+    //open streambuf with file descriptor
+    return new fdstreambuf(fileno(fp_));
+  }
+
+  bool FileHandler::setStreamBuffer(bool checkExists) {
+    // redirect stdin or stdout if necesary 
+    if (path_ == FileHandler::kStdInDescriptor) {
+      assert(flags_ & std::ios::in);
+      std::streambuf* sb = std::cin.rdbuf();
+      buffer_ = sb;
+    } else if (path_ == FileHandler::kStdOutDescriptor) {
+      assert(flags_ & std::ios::out);
+      std::streambuf* sb = std::cout.rdbuf();
+      buffer_ = sb;
+    } else {
+      // real file
+      if( checkExists && ! fileExists() ) {
+	fprintf(stderr, "ERROR: Failed to find file at %s\n", path_.c_str());
+	exit(EXIT_FAILURE);
+      }
+      std::string cmd = "";
+      if( isCompressedFile(cmd) && (! cmd.empty()) ) {
+        buffer_ = openCompressedFile(cmd.c_str());
+      } else {
+	// open underlying filebuf 
+	std::filebuf* fb = new std::filebuf(); 
+	fb->open(path_.c_str(), flags_);
+	buffer_ = fb;
+      }
+    }
+    if (!buffer_) {
+      fprintf(stderr, "ERROR:Failed to open file at %s\n", path_.c_str());
+      exit(EXIT_FAILURE);
+    }
+    this->init(buffer_);
+    return true;
+  }
+  
+  /*
+   * Checks for compression via file extension. Currently checks for 
+   * ".gz" and ".bz2". 
+   */ 
+  bool FileHandler::isCompressedFile(std::string & cmd)
+  {
+    bool compressed = false, isInput = (flags_ & std::ios::in);
+    cmd = "";
+    unsigned int len = path_.size();
+    if( len > kGzipped.size()
+	&& path_.find(kGzipped) == len - kGzipped.size()) {
+      //gzip file command to compress or decompress
+      compressed = true;
+      //      cmd = (isInput ? "exec gunzip -cf " : "exec gzip -c > ") + path_;
+      cmd = (isInput ? "exec " + kGunzipCommand + "c "
+             : "exec " + kGzipCommand + "c > ") + path_;
+    } else if( len > kBzipped2.size() && 
+	       path_.find(kBzipped2) == len - kBzipped2.size()) {
+      //do bzipped2 file command
+      compressed = true;
+      cmd = (isInput ? "exec " + kBunzip2Command + "c " 
+             : "exec " + kBzip2Command + "c > ") + path_;
+    } 
+    return compressed;
+  }
+  
+  bool FileHandler::fileExists() {
+    bool exists = false;
+    struct stat f_info;
+    if( stat(path_.c_str(), &f_info) == 0 ) //if stat() returns no errors
+      exists = true;
+    return( exists );
+  }
+ 
+  // static method used during preprocessing compressed files without
+  // opening fstream objects.
+  bool FileHandler::getCompressionCmds(const std::string & filepath, std::string & compressionCmd,
+				      std::string & decompressionCmd,
+				      std::string & compressionSuffix) {
+    // determine what compression and decompression cmds are suitable from filepath
+    compressionCmd = kCatCommand;
+    decompressionCmd = kCatCommand;
+    if (filepath.length() > kGzipped.size() &&
+	filepath.find(kGzipped) == filepath.length() 
+	- kGzipped.length()) {
+      compressionCmd = kGzipCommand;
+      decompressionCmd = kGunzipCommand;
+      compressionSuffix = kGzipped;
+    } else if (filepath.length() > kBzipped2.size() &&
+	       filepath.find(kBzipped2) == filepath.length() 
+	       - kBzipped2.length() ) {
+      compressionCmd = kBzip2Command;
+      decompressionCmd = kBunzip2Command;
+      compressionSuffix = kBzipped2;;
+    }
+    return (compressionCmd != kCatCommand && decompressionCmd != kCatCommand);
+  }
+
+  bool FileHandler::reset() {
+    // move to beginning of file
+    if (fp_ != 0) {
+      //can't seek on a pipe so reopen
+      pclose(fp_);
+      std::string cmd = "";
+      if (isCompressedFile(cmd) && ! cmd.empty())
+        buffer_ = openCompressedFile(cmd.c_str());
+      //reinitialize
+      this->init(buffer_);
+    }
+    else 
+      buffer_->pubseekoff(0, std::ios_base::beg); //sets both get and put pointers to beginning of stream
+    return true;
+  }
+} //end namespace
--- a/src/DynSAInclude/file.h
+++ b/src/DynSAInclude/file.h
@ -0,0 +1,61 @@
+#ifndef moses_File_h
+#define moses_File_h
+
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <sys/stat.h>
+#include <string>
+#include <cassert>
+#include "fdstream.h"
+#include "utils.h"
+
+namespace Moses {
+typedef std::string FileExtension;
+
+class FileHandler: public std::fstream {
+public:
+  // descriptors for stdin and stdout
+  static const std::string kStdInDescriptor;  // file name for std::cin
+  static const std::string kStdOutDescriptor;  // file name for std::cout
+  // compression commands
+  static const std::string kCatCommand;  // i.e. no compression
+  static const std::string kGzipCommand;  // gzip -f
+  static const std::string kGunzipCommand;  // gunzip -f
+  static const std::string kBzip2Command;  // bzip2 -f
+  static const std::string kBunzip2Command;  // bunzip2 -f
+
+  // open file or wrap stdin or stdout
+  FileHandler(const std::string & path,
+             std::ios_base::openmode flags = std::ios::in,
+             bool checkExists = true);
+  ~FileHandler();
+  // file utilities 
+  static bool getCompressionCmds(const std::string & filepath,
+                                 std::string & compressionCmd,
+                                 std::string & decompressionCmd,
+                                 std::string & compressionSuffix);
+  
+  // data accessors
+  std::string getPath() { return path_; }
+  std::ios_base::openmode getFlags() { return flags_; }
+  bool isStdIn() { return path_ == FileHandler::kStdInDescriptor; }
+  bool isStdOut() { return path_ == FileHandler::kStdOutDescriptor; }
+  bool reset();
+protected:
+  static const FileExtension kGzipped;
+  static const FileExtension kBzipped2;
+  bool fileExists();
+  bool setStreamBuffer(bool checkExists);
+  bool isCompressedFile(std::string & cmd);
+  fdstreambuf* openCompressedFile(const char* cmd);
+  std::string path_; // file path 
+  std::ios_base::openmode flags_;  // open flags
+  std::streambuf* buffer_;  // buffer to either gzipped or standard data
+  std::FILE* fp_;  //file pointer to handle pipe data
+};
+
+} // end namespace
+
+#endif
--- a/src/DynSAInclude/types.h
+++ b/src/DynSAInclude/types.h
@ -0,0 +1,32 @@
+#ifndef moses_DynSAInclude_types_h
+#define moses_DynSAInclude_types_h
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <vector>
+#include <typeinfo>
+#include <stdint.h>
+
+#define iterate(c, i) for(typeof(c.begin()) i = c.begin(); i != c.end(); ++i)
+#define piterate(c, i) for(typeof(c->begin()) i = c->begin(); i != c->end(); ++i)
+#define THREADED false
+#define THREAD_MAX 2
+#define MAX_NGRAM_ORDER 8
+#define MAX_STR_LEN 300
+#define PRIME 8589935681ULL
+#define MAX_HASH_FUNCS 1000
+//#define PRIME 409 
+
+using std::string;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+//typedefs for projects
+typedef std::string word_t;     // word as string 
+typedef unsigned int wordID_t;      // word mapped to integer
+typedef std::string date_t;     // a date marker
+typedef unsigned int count_t;   // for 64-bit to 32-bit compatibility 
+
+#endif
--- a/src/DynSAInclude/utils.h
+++ b/src/DynSAInclude/utils.h
@ -0,0 +1,81 @@
+#ifndef moses_DynSAInclude_utils_h
+#define moses_DynSAInclude_utils_h
+
+#include <cstdlib>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <cctype>
+#include <cmath>
+#include <cstring>
+
+class Utils {
+public: 
+  static void trim(std::string& str, const std::string dropChars = " \t\n\r") {
+    str.erase(str.find_last_not_of(dropChars)+1);
+    str.erase(0, str.find_first_not_of(dropChars));
+  }
+  static void rtrim(std::string& str, const std::string dropChars = " \t\n\r") {
+    str.erase(str.find_last_not_of(dropChars)+1);
+  }
+  static void ltrim(std::string& str, const std::string dropChars = " \t\n\r") {
+    str.erase(0, str.find_first_not_of(dropChars));
+  }
+  static std::string IntToStr(int integer) {
+    std::ostringstream stream;
+    stream << integer;
+    return stream.str();
+  }
+  static int splitToStr(const char * str, 
+                           std::vector<std::string> & items, 
+                           const char * delm = "\t") {
+    char * buff = const_cast<char *>(str);
+    items.clear();
+    char * pch = strtok(buff, delm);
+    while( pch != NULL ) {
+      items.push_back(pch);
+      pch = strtok(NULL, delm);
+    }
+    return items.size();
+  }
+  static int splitToStr(std::string buff, 
+                           std::vector<std::string> & items, 
+                           std::string delm = "\t") {
+    std::string cp = buff.substr();
+    return splitToStr(cp.c_str(), items, delm.c_str());
+  }
+  static int splitToInt(std::string buff, std::vector<int>& items, 
+                           std::string delm = ",") {
+    items.clear();
+    std::vector<std::string> tmpVector(0);
+    int i = 0;
+    i = splitToStr(buff.c_str(), tmpVector, delm.c_str());
+    if( i > 0 )
+      for( int j = 0; j < i; j++ )
+        items.push_back(atoi(tmpVector[j].c_str()));
+    return i;
+  }
+  static void strToLowercase(std::string& str) {
+   for(unsigned i=0; i < str.length(); i++) {
+      str[i] = tolower(str[i]);
+   }
+  }
+  // TODO: interface with decent PRG
+  template<typename T>
+  static T rand(T mod_bnd = 0) {
+    T random = 0;
+    if(sizeof(T) <= 4) {
+      random = static_cast<T>(std::rand());
+    }
+    else if(sizeof(T) == 8) {
+      random = static_cast<T>(std::rand());
+      random <<= 31; random <<= 1;
+      random |= static_cast<T>(std::rand());
+    }
+    if(mod_bnd != 0) 
+      return random % mod_bnd;
+    else return random;
+  }
+};
+
+#endif
--- a/src/DynSAInclude/vocab.cpp
+++ b/src/DynSAInclude/vocab.cpp
@ -0,0 +1,93 @@
+#include <sstream>
+#include "vocab.h"
+
+namespace Moses {
+
+  // Vocab class
+  const wordID_t Vocab::kOOVWordID;
+  const wordID_t Vocab::kBOSWordID;  
+  const word_t Vocab::kBOS = "<s>";
+  const word_t Vocab::kEOS = "</s>";
+  const word_t Vocab::kOOVWord = "<unk>";
+    
+  wordID_t Vocab::getWordID(const word_t& word) {
+    // get id and possibly add to vocab 
+    if (words2ids_.find(word) == words2ids_.end())
+      if (!closed_) {
+        wordID_t id = words2ids_.size() + 1;
+        words2ids_[word] = id; // size() returns size AFTER insertion of word
+        ids2words_[id] = word; // so size() is the same here ... 
+      } 
+      else {
+        return Vocab::kOOVWordID;
+      }
+    wordID_t id = words2ids_[word];
+    return id;
+  }
+    
+  word_t Vocab::getWord(wordID_t id) {
+    // get word string given id
+    return (ids2words_.find(id) == ids2words_.end()) ? Vocab::kOOVWord : ids2words_[id];
+  }
+  
+  bool Vocab::inVocab(wordID_t id) {
+    return ids2words_.find(id) != ids2words_.end();
+  }
+
+  bool Vocab::inVocab(const word_t & word) {
+    return words2ids_.find(word) != words2ids_.end();
+  }
+  
+  bool Vocab::save(const std::string & vocab_path) {
+    // save vocab as id -> word 
+    FileHandler vcbout(vocab_path, std::ios::out);
+    return save(&vcbout);
+  }
+  bool Vocab::save(FileHandler* vcbout) {
+    // then each vcb entry
+    *vcbout << ids2words_.size() << "\n";
+    iterate(ids2words_, iter)
+      *vcbout << iter->second << "\t" << iter->first << "\n";
+    return true;
+  }
+  
+  bool Vocab::load(const std::string & vocab_path, bool closed) {
+    FileHandler vcbin(vocab_path, std::ios::in);
+    std::cerr << "Loading vocab from " << vocab_path << std::endl;
+    return load(&vcbin, closed);
+  }
+  bool Vocab::load(FileHandler* vcbin, bool closed) {
+    // load vocab id -> word mapping 
+    words2ids_.clear();  // reset mapping
+    ids2words_.clear();
+    std::string line;
+    word_t word;
+    wordID_t id;
+    assert(getline(*vcbin, line));
+    std::istringstream first(line.c_str());
+    uint32_t vcbsize(0);
+    first >> vcbsize;
+    uint32_t loadedsize = 0;
+    while (loadedsize++ < vcbsize && getline(*vcbin, line)) {
+      std::istringstream entry(line.c_str());
+      entry >> word;
+      entry >> id; 
+      // may be no id (i.e. file may just be a word list)
+      if (id == 0 && word != Vocab::kOOVWord) 
+	id = ids2words_.size() + 1;  // assign ids sequentially starting from 1
+      assert(ids2words_.count(id) == 0 && words2ids_.count(word) == 0);
+      ids2words_[id] = word;
+      words2ids_[word] = id;
+    }
+    closed_ = closed;  // once loaded fix vocab ?
+    std::cerr << "Loaded vocab with " << ids2words_.size() << " words." << std::endl;
+    return true;
+  }
+  void Vocab::printVocab() {
+    iterate(ids2words_, iter)
+      std::cerr << iter->second << "\t" << iter->first << "\n";
+    iterate(words2ids_, iter)
+      std::cerr << iter->second << "\t" << iter->first << "\n";
+  }
+
+} //end namespace
--- a/src/DynSAInclude/vocab.h
+++ b/src/DynSAInclude/vocab.h
@ -0,0 +1,64 @@
+#ifndef moses_DynSAInclude_vocab_h
+#define moses_DynSAInclude_vocab_h
+
+#include <map>
+#include <string>
+#include "types.h"
+#include "file.h"
+#include "utils.h"
+namespace Moses {
+  // Vocab maps between strings and uint32 ids.
+
+  class Vocab {
+  public:
+		typedef std::map<word_t, wordID_t> Word2Id;
+		typedef std::map<wordID_t, word_t> Id2Word;
+		
+    static const wordID_t kOOVWordID = 0;   // out of vocabulary word id
+    static const wordID_t kBOSWordID = 1;    
+    static const word_t kBOS;  // beginning of sentence marker
+    static const word_t kEOS;  // end of sentence marker
+    static const word_t kOOVWord;  // <unk>
+    Vocab(bool sntMarkers = true):closed_(false) {
+      if(sntMarkers) {
+        getWordID(kBOS);  // added in case not observed in corpus
+        getWordID(kEOS);
+      }
+    }  
+    // if no file then must allow new words
+    // specify whether more words can be added via 'closed'
+    // assume that if a vocab is loaded from file then it should be closed.
+    Vocab(const std::string & vocab_path, bool closed = true) {
+      assert(load(vocab_path, closed));  
+    }
+    Vocab(FileHandler* fin, bool closed = true) {
+      assert(load(fin, closed));  
+    }
+    ~Vocab() {}
+    wordID_t getWordID(const word_t & word);
+    word_t getWord(wordID_t id);
+    bool inVocab(wordID_t id);
+    bool inVocab(const word_t & word);
+    uint32_t size() { return words2ids_.size(); }
+    void makeClosed() { closed_ = true; }
+    void makeOpen() { closed_ = false; }
+    bool isClosed() { return closed_; }
+    bool save(const std::string & vocab_path);
+    bool save(FileHandler* fout);
+    bool load(const std::string & vocab_path, bool closed = true);
+    bool load(FileHandler* fin, bool closed = true);
+    void printVocab();
+    Word2Id::const_iterator vocabStart() {
+      return words2ids_.begin();
+    }
+    Word2Id::const_iterator vocabEnd() {
+      return words2ids_.end();
+    }
+  private:
+    Word2Id words2ids_;  // map from strings to word ids
+    Id2Word ids2words_;  // map from ids to strings
+    bool closed_;  // can more words be added
+  };
+}
+
+#endif
--- a/src/DynSuffixArray.cpp
+++ b/src/DynSuffixArray.cpp
@ -0,0 +1,237 @@
+#include "DynSuffixArray.h"
+#include <iostream>
+namespace Moses {
+DynSuffixArray::DynSuffixArray() {
+  SA_ = new vuint_t();
+  ISA_ = new vuint_t();
+  F_ = new vuint_t();
+  L_ = new vuint_t();
+  std::cerr << "DYNAMIC SUFFIX ARRAY CLASS INSTANTIATED" << std::endl;
+}
+DynSuffixArray::~DynSuffixArray() {
+  delete SA_;
+  delete ISA_;
+  delete F_;
+  delete L_;
+}
+DynSuffixArray::DynSuffixArray(vuint_t* crp) {
+  // make native int array and pass to SA builder
+  corpus_ = crp; 
+  int size = corpus_->size();
+  int* tmpArr = new int[size];
+  for(int i=0 ; i < size; ++i) tmpArr[i] = i; 
+  qsort(tmpArr, 0, size-1);
+  SA_ = new vuint_t(tmpArr, tmpArr + size);
+  //std::cerr << "printing SA " << std::endl;
+  //for(int i=0; i < size; ++i) std::cerr << SA_->at(i) << std::endl;
+  delete[] tmpArr;
+  std::cerr << "DYNAMIC SUFFIX ARRAY CLASS INSTANTIATED WITH SIZE " << size << std::endl;
+  buildAuxArrays();
+  //printAuxArrays();
+}
+void DynSuffixArray::buildAuxArrays() {
+  int size = SA_->size();
+  ISA_ = new vuint_t(size);
+  F_ = new vuint_t(size);
+  L_ = new vuint_t(size);
+  for(int i=0; i < size; ++i) {
+    ISA_->at(SA_->at(i)) = i;
+    //(*ISA_)[(*SA_)[i]] = i;
+    (*F_)[i] = (*corpus_)[SA_->at(i)];
+    (*L_)[i] = (*corpus_)[(SA_->at(i) == 0 ? size-1 : SA_->at(i)-1)]; 
+  }
+}
+int DynSuffixArray::rank(unsigned word, unsigned idx) {
+/* use Gerlach's code to make rank faster */
+  // the number of word in L[0..i]
+  int r(0);
+  for(unsigned i=0; i < idx; ++i)
+    if(L_->at(i) == word) ++r;
+  return r;
+}
+/* count function should be implemented
+ * with binary search over suffix array!! */
+int DynSuffixArray::F_firstIdx(unsigned word) {
+  // return index of first row where word is found in F_
+  int low = std::lower_bound(F_->begin(), F_->end(), word) - F_->begin();
+  if(F_->at(low) == word) return low;
+  else return -1;
+}
+/* uses rank() and c() to obtain the LF function */
+int DynSuffixArray::LF(unsigned L_idx) {
+  int fIdx(-1);
+  unsigned word = L_->at(L_idx);
+  if((fIdx = F_firstIdx(word)) != -1) 
+    return fIdx + rank(word, L_idx);
+}
+void DynSuffixArray::insertFactor(vuint_t* newSent, unsigned newIndex) {
+  // for sentences
+  //stages 1, 2, 4 stay same from 1char case
+  //(use last word of new text in step 2 and save Ltmp until last insert?)
+  //stage 3...all words of new sentence are inserted backwards
+  // stage 2: k=ISA[newIndex], tmp= L[k], L[k]  = newChar
+  assert(newIndex <= SA_->size());
+  int k(-1), kprime(-1);
+  k = (newIndex < SA_->size() ? ISA_->at(newIndex) : ISA_->at(0)); // k is now index of the cycle that starts at newindex
+  int true_pos = LF(k); // track cycle shift (newIndex - 1)
+  int Ltmp = L_->at(k);
+  L_->at(k) = (*newSent)[newSent->size()-1];  // cycle k now ends with correct word
+  for(int j = newSent->size()-1; j > -1; --j) {
+    kprime = LF(k);  // find cycle that starts with (newindex - 1)
+    //kprime += ((L_[k] == Ltmp) && (k > isa[k]) ? 1 : 0); // yada yada
+    // only terminal char can be 0 so add new vocab at end
+    kprime = (kprime > 0 ? kprime : SA_->size());  
+    true_pos += (kprime <= true_pos ? 1 : 0); // track changes
+    // insert everything
+    F_->insert(F_->begin() + kprime, (*newSent)[j]);
+    int theLWord = (j == 0 ? Ltmp : (*newSent)[j-1]);
+    L_->insert(L_->begin() + kprime, theLWord);
+    piterate(SA_, itr) 
+      if(*itr >= newIndex) ++(*itr);
+    SA_->insert(SA_->begin() + kprime, newIndex);
+    piterate(ISA_, itr)
+      if(*itr >= kprime) ++(*itr);
+    ISA_->insert(ISA_->begin() + newIndex, kprime);
+    k = kprime;
+  }
+  // Begin stage 4
+  reorder(true_pos, LF(kprime)); // actual position vs computed position of cycle (newIndex-1)
+}
+void DynSuffixArray::reorder(unsigned j, unsigned jprime) {
+  printf("j=%d\tj'=%d\n", j, jprime);
+  while(j != jprime) {
+    printf("j=%d\tj'=%d\n", j, jprime);
+    int tmp, isaIdx(-1);
+    int new_j = LF(j);
+    // for SA, L, and F, the element at pos j is moved to j'
+    tmp = L_->at(j); // L
+    L_->at(j) = L_->at(jprime);
+    L_->at(jprime) = tmp;
+    tmp = SA_->at(j);  // SA
+    SA_->at(j) = SA_->at(jprime);
+    SA_->at(jprime) = tmp;
+    // all ISA values between (j...j'] decremented
+    for(int i = 0; i < ISA_->size(); ++i) {
+      if((ISA_->at(i) == j) && (isaIdx == -1)) 
+        isaIdx = i; // store index of ISA[i] = j
+      if((ISA_->at(i) > j) && (ISA_->at(i) <= jprime)) --(*ISA_)[i];
+    }
+    // replace j with j' in ISA
+    //isa[isaIdx] = jprime;
+    ISA_->at(isaIdx) = jprime;
+    j = new_j;
+    jprime = LF(jprime);
+  }
+}
+void DynSuffixArray::deleteFactor(unsigned index, unsigned num2del) {
+  int ltmp = L_->at(ISA_->at(index));
+  int true_pos = LF(ISA_->at(index)); // track cycle shift (newIndex - 1)
+  for(int q = 0; q < num2del; ++q) {
+    int row = ISA_->at(index); // gives the position of index in SA and F_
+    std::cerr << "row = " << row << std::endl;
+    std::cerr << "SA[r]/index = " << SA_->at(row) << "/" << index << std::endl;
+    true_pos -= (row <= true_pos ? 1 : 0); // track changes
+    L_->erase(L_->begin() + row);     
+    F_->erase(F_->begin() + row);     
+    ISA_->erase(ISA_->begin() + index);  // order is important     
+    piterate(ISA_, itr)
+      if(*itr > row) --(*itr);
+    SA_->erase(SA_->begin() + row);
+    piterate(SA_, itr)
+      if(*itr > index) --(*itr);
+  }
+  L_->at(ISA_->at(index))= ltmp;
+  reorder(LF(ISA_->at(index)), true_pos);
+  printAuxArrays();
+}
+void DynSuffixArray::substituteFactor(vuint_t* newSents, unsigned newIndex) {
+  std::cerr << "NEEDS TO IMPELEMNT SUBSITITUTE FACTOR\n";
+  return;
+}
+bool DynSuffixArray::getCorpusIndex(const vuint_t* phrase, vuint_t* indices) {
+  pair<vuint_t::iterator,vuint_t::iterator> bounds;
+  indices->clear();
+  int phrasesize = phrase->size();
+  // find lower and upper bounds on phrase[0]
+  bounds = std::equal_range(F_->begin(), F_->end(), phrase->at(0));
+  // bounds holds first and (last + 1) index of phrase[0] in SA_
+  int lwrBnd = int(bounds.first - F_->begin());
+  int uprBnd = int(bounds.second - F_->begin());
+  if(uprBnd - lwrBnd == 0) return false;  // not found
+  if(phrasesize == 1) {
+    for(int i=lwrBnd; i < uprBnd; ++i) {
+      indices->push_back(SA_->at(i));
+    }
+    return (indices->size() > 0);
+  }
+  //find longer phrases if they exist
+  for(int i = lwrBnd; i < uprBnd; ++i) {
+    int crpIdx = SA_->at(i);
+    if((crpIdx + phrasesize) >= corpus_->size()) continue; // past end of corpus
+    for(int pos = 1; pos < phrasesize; ++pos) { // for all following words
+      if(corpus_->at(crpIdx + pos) != phrase->at(pos)) {  // if word doesn't match
+        if(indices->size() > 0) i = uprBnd;  // past the phrases since SA is ordered
+        break; 
+      }
+      else if(pos == phrasesize-1) { // found phrase 
+        indices->push_back(crpIdx + pos);  // store rigthmost index of phrase 
+      }
+    }  
+  }
+  //cerr << "Total count of phrase = " << indices->size() << endl;
+  return (indices->size() > 0);
+}
+void DynSuffixArray::save(FILE* fout) {
+  fWriteVector(fout, *SA_);
+}
+void DynSuffixArray::load(FILE* fin) {
+  fReadVector(fin, *SA_);
+}
+int DynSuffixArray::compare(int pos1, int pos2, int max) {
+  for (int i=0; i < max; ++i) {
+    if((pos1 + i < corpus_->size()) && (pos2 + i >= corpus_->size()))
+      return 1;
+    if((pos2 + i < corpus_->size()) && (pos1 + i >= corpus_->size()))
+      return -1;
+  
+    int diff = corpus_->at(pos1+i) - corpus_->at(pos2+i);
+    if(diff != 0) return diff;
+  }
+  return 0;
+}
+void DynSuffixArray::qsort(int* array, int begin, int end) {
+  if(end > begin) 
+  {
+    int index; 
+    {	
+      index = begin + (rand() % (end - begin + 1));
+      int pivot = array[index];
+      {
+        int tmp = array[index];
+        array[index] = array[end];
+        array[end] = tmp;
+      }
+      for(int i=index=begin; i < end; ++i) {
+        if (compare(array[i], pivot, 20) <= 0) {
+          {
+            int tmp = array[index];
+            array[index] = array[i];
+            array[i] = tmp;
+            index++;
+          }
+        }
+      }
+      {
+        int tmp = array[index];
+        array[index] = array[end];
+        array[end] = tmp;
+      }
+    }
+    qsort(array, begin, index - 1);
+    qsort(array, index + 1,  end);
+  }
+}
+
+
+
+} // end namespace
--- a/src/DynSuffixArray.h
+++ b/src/DynSuffixArray.h
@ -0,0 +1,50 @@
+#ifndef moses_DynSuffixArray_h
+#define moses_DynSuffixArray_h
+
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <utility>
+#include "Util.h"
+#include "File.h"
+#include "DynSAInclude/types.h"
+
+namespace Moses {
+using std::vector;
+using std::pair;
+typedef std::vector<unsigned> vuint_t;
+
+class DynSuffixArray {
+public:
+  DynSuffixArray();
+  DynSuffixArray(vuint_t*);
+  ~DynSuffixArray();
+  bool getCorpusIndex(const vuint_t*, vuint_t*);
+  void load(FILE*);
+  void save(FILE*);
+private: 
+  vuint_t* SA_;
+  vuint_t* ISA_;
+  vuint_t* F_;
+  vuint_t* L_;
+  vuint_t* corpus_;
+  void buildAuxArrays();
+  void qsort(int* array, int begin, int end);
+  int compare(int, int, int);
+  void reorder(unsigned, unsigned);
+  void insertFactor(vuint_t*, unsigned);  
+  void deleteFactor(unsigned, unsigned);
+  void substituteFactor(vuint_t*, unsigned);
+  int LF(unsigned);
+  int rank(unsigned, unsigned);
+  int F_firstIdx(unsigned);
+  void printAuxArrays() {
+    std::cerr << "SA\tISA\tF_\tL_\n";
+    for(int i=0; i < SA_->size(); ++i)
+      std::cerr << SA_->at(i) << "\t" << ISA_->at(i) << "\t" << F_->at(i) << "\t" << L_->at(i) << std::endl;
+  }
+};
+
+} //end namespace
+
+#endif
--- a/src/FFState.cpp
+++ b/src/FFState.cpp
@ -0,0 +1,8 @@
+#include "FFState.h"
+
+namespace Moses {
+
+FFState::~FFState() {}
+
+}
+
--- a/src/FFState.h
+++ b/src/FFState.h
@ -0,0 +1,13 @@
+#ifndef moses_FFState_h
+#define moses_FFState_h
+
+namespace Moses {
+
+class FFState {
+ public:
+  virtual ~FFState();
+  virtual int Compare(const FFState& other) const = 0;
+};
+
+}
+#endif
--- a/src/Factor.cpp
+++ b/src/Factor.cpp
@ -0,0 +1,53 @@
+// $Id: Factor.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Factor.h"
+
+using namespace std;
+
+namespace Moses
+{
+Factor::Factor(FactorDirection direction, FactorType factorType, const std::string *factorString, size_t id)
+://m_direction(direction)
+//,m_factorType(factorType)
+m_ptrString(factorString)
+,m_id(id)
+{}
+
+Factor::Factor(FactorDirection direction, FactorType factorType, const std::string *factorString)
+//:m_direction(direction)
+//,m_factorType(factorType)
+:m_ptrString(factorString)
+,m_id(NOT_FOUND)
+{}
+
+TO_STRING_BODY(Factor)
+
+// friend
+ostream& operator<<(ostream& out, const Factor& factor)
+{
+	out << factor.GetString();
+	return out;
+}
+
+}
+
+
--- a/src/Factor.h
+++ b/src/Factor.h
@ -0,0 +1,147 @@
+// $Id: Factor.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_Factor_h
+#define moses_Factor_h
+
+#include <sstream>
+#include <iostream>
+#include <list>
+#include <vector>
+#include <map>
+#include <string>
+#include "TypeDef.h"
+#include "Util.h"
+#include "hash.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+
+/** Represents a factor (word, POS, etc) on the E or F side
+ * 
+ * A Factor object is a tuple of direction (Input or Output,
+ * corresponding to French or English), a type (surface form,
+ * POS, stem, etc), and the value of the factor.
+ *
+ * @TODO I find this design problematic- essentially, a factor should
+ * just be a value type and the factor type and "direction"
+ * should be the keys in a larger identification system that
+ * find instances of specific factors.
+ *
+ */
+class Factor
+{
+	friend std::ostream& operator<<(std::ostream&, const Factor&);
+
+	// only these classes are allowed to instantiate this class
+	friend class FactorCollection;
+
+protected:
+
+	//FactorDirection		m_direction;
+	//FactorType				m_factorType;
+	const std::string	*m_ptrString;
+	const size_t			m_id;
+
+	//! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
+	Factor(FactorDirection direction, FactorType factorType, const std::string *factorString, size_t id);
+	//! no id set. do not used to create new factors, only used for seeing if factor exists
+	Factor(FactorDirection direction, FactorType factorType, const std::string *factorString);
+	
+public:
+	//! returns whether this factor is part of the source ('Input') or target ('Output') language
+	//inline FactorDirection GetFactorDirection() const
+	//{
+	//	return m_direction;
+	//}
+	//! index, FactorType. For example, 0=surface, 1=POS. The actual mapping is user defined
+	//inline FactorType GetFactorType() const
+	//{
+	//	return m_factorType;
+	//}
+	//! original string representation of the factor
+	inline const std::string &GetString() const
+	{
+		return *m_ptrString;
+	}
+	//! contiguous ID
+	inline size_t GetId() const
+	{
+		return m_id;
+	}
+
+	/*
+	//! Alternative comparison between factors. Not yet used
+	inline unsigned int GetHash() const
+	{
+		unsigned int h=quick_hash((const char*)&m_direction, sizeof(FactorDirection), 0xc7e7f2fd);
+		h=quick_hash((const char*)&m_factorType, sizeof(FactorType), h);
+		h=quick_hash((const char*)&m_ptrString, sizeof(const std::string *), h);
+		return h;
+	}
+	*/
+	
+	/** transitive comparison between 2 factors.
+	*	-1 = less than
+	*	+1 = more than
+	*	0	= same
+	*	Used by operator< & operator==, as well as other classes
+	*/
+	inline int Compare(const Factor &compare) const
+	{
+		if (m_ptrString < compare.m_ptrString)
+			return -1;
+		if (m_ptrString > compare.m_ptrString)
+			return 1;
+/*
+		if (m_direction < compare.m_direction)
+			return -1;
+		if (m_direction > compare.m_direction)
+			return 1;
+
+		if (m_factorType < compare.m_factorType)
+			return -1;
+		if (m_factorType > compare.m_factorType)
+			return 1;
+*/
+		return 0;
+	}
+	//! transitive comparison used for adding objects into FactorCollection
+	inline bool operator<(const Factor &compare) const
+	{ 
+		return Compare(compare) < 0;
+	}
+
+	// quick equality comparison. Not used
+	inline bool operator==(const Factor &compare) const
+	{ 
+		return this == &compare;
+	}
+
+	TO_STRING();
+
+};
+
+
+}
+#endif
--- a/src/FactorCollection.cpp
+++ b/src/FactorCollection.cpp
@ -0,0 +1,117 @@
+// $Id: FactorCollection.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+#include "FactorCollection.h"
+#include "LanguageModel.h"
+#include "Util.h"
+
+using namespace std;
+
+namespace Moses
+{
+FactorCollection FactorCollection::s_instance;
+
+void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &filePath)
+{
+	ifstream 	inFile(filePath.c_str());
+
+	string line;
+#ifdef WITH_THREADS   
+	boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
+    boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
+#endif
+	while( !getline(inFile, line, '\n').eof())
+	{
+		vector<string> token = Tokenize( line );
+		if (token.size() < 2) 
+		{
+			continue;
+		}		
+		// looks like good line
+		AddFactor(direction, factorType, token[1]);
+	}
+}
+
+bool FactorCollection::Exists(FactorDirection direction, FactorType factorType, const string &factorString)
+{
+#ifdef WITH_THREADS
+	boost::shared_lock<boost::shared_mutex> lock(m_accessLock);
+#endif   
+	// find string id
+	const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
+
+	FactorSet::const_iterator iterFactor;
+	Factor search(direction, factorType, ptrString); // id not used for searching
+
+	iterFactor = m_collection.find(search);
+	return iterFactor != m_collection.end();
+}
+
+const Factor *FactorCollection::AddFactor(FactorDirection direction
+																				, FactorType 			factorType
+																				, const string 		&factorString)
+{
+#ifdef WITH_THREADS
+	boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
+    boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
+#endif
+	// find string id
+	const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
+	pair<FactorSet::iterator, bool> ret = m_collection.insert( Factor(direction, factorType, ptrString, m_factorId) );
+	if (ret.second)
+		++m_factorId; // new factor, make sure next new factor has diffrernt id
+		
+	const Factor *factor = &(*ret.first);
+	return factor;
+}
+
+FactorCollection::~FactorCollection()
+{
+	//FactorSet::iterator iter;
+	//for (iter = m_collection.begin() ; iter != m_collection.end() ; iter++)
+	//{
+	//	delete (*iter);
+	//}
+}
+
+TO_STRING_BODY(FactorCollection);
+
+// friend
+ostream& operator<<(ostream& out, const FactorCollection& factorCollection)
+{
+	FactorSet::const_iterator iterFactor;
+
+	for (iterFactor = factorCollection.m_collection.begin() ; iterFactor != factorCollection.m_collection.end() ; ++iterFactor)
+	{
+		const Factor &factor 	= *iterFactor;
+		out << factor;
+	}
+
+	return out;
+}
+
+}
+
+
--- a/src/FactorCollection.h
+++ b/src/FactorCollection.h
@ -0,0 +1,91 @@
+// $Id: FactorCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_FactorCollection_h
+#define moses_FactorCollection_h
+
+#include <set>
+#include <string>
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#endif
+
+#include "Factor.h"
+
+namespace Moses
+{
+
+class LanguageModel;
+
+typedef std::set<Factor> FactorSet;
+typedef std::set<std::string> StringSet;
+
+/** collection of factors
+ *
+ * All Factors in moses are accessed and created by a FactorCollection.
+ * By enforcing this strict creation processes (ie, forbidding factors
+ * from being created on the stack, etc), their memory addresses can
+ * be used as keys to uniquely identify them.
+ * Only 1 FactorCollection object should be created.
+ */
+class FactorCollection
+{
+	friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
+
+protected:
+	static FactorCollection s_instance;
+#ifdef WITH_THREADS   
+    //reader-writer lock
+    boost::shared_mutex m_accessLock;
+#endif
+
+	size_t		m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */	
+	FactorSet m_collection; /**< collection of all factors */
+	StringSet m_factorStringCollection; /**< collection of unique string used by factors */
+
+	//! constructor. only the 1 static variable can be created
+	FactorCollection()
+	:m_factorId(0)
+	{}
+
+public:		
+	static FactorCollection& Instance() { return s_instance; }
+
+	//! Destructor
+	~FactorCollection();
+
+	//! Test to see whether a factor exists
+	bool Exists(FactorDirection direction, FactorType factorType, const std::string &factorString);	
+	/** returns a factor with the same direction, factorType and factorString. 
+	*	If a factor already exist in the collection, return the existing factor, if not create a new 1
+	*/
+	const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString);	
+	//! Load list of factors. Deprecated
+	void LoadVocab(FactorDirection direction, FactorType factorType, const std::string &filePath);
+	
+	TO_STRING();
+	
+};
+
+
+}
+#endif
--- a/src/FactorTypeSet.cpp
+++ b/src/FactorTypeSet.cpp
@ -0,0 +1,59 @@
+// $Id: FactorTypeSet.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "FactorTypeSet.h"
+
+using namespace std;
+
+namespace Moses
+{
+FactorMask::FactorMask(const vector<FactorType> &factors)
+{
+	vector<FactorType>::const_iterator iter;
+	for (iter = factors.begin() ; iter != factors.end() ; ++iter)
+	{
+		this->set(*iter);
+	}
+}
+
+TO_STRING_BODY(FactorMask);
+
+// friend
+std::ostream& operator<<(std::ostream& out, const FactorMask& fm)
+{
+  out << "FactorMask<";
+	bool first = true;
+  for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+  {
+    if (fm[currFactor])
+    {
+			if (first) { first = false; } else { out << ","; }
+			out << currFactor;
+    }
+  }
+  out << ">";
+
+  return out;
+}
+
+}
+
+
--- a/src/FactorTypeSet.h
+++ b/src/FactorTypeSet.h
@ -0,0 +1,53 @@
+// $Id: FactorTypeSet.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_FactorTypeSet_h
+#define moses_FactorTypeSet_h
+
+#include <iostream>
+#include <bitset>
+#include <vector>
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+/** set of unique FactorTypes. Used to store what factor types are used in phrase tables etc
+*/
+class FactorMask : public std::bitset<MAX_NUM_FACTORS>
+{
+	friend std::ostream& operator<<(std::ostream&, const FactorMask&);
+
+public:
+	//! construct object from list of FactorType.
+	explicit FactorMask(const std::vector<FactorType> &factors);
+	//! default constructor
+	inline FactorMask() {}
+	//! copy constructor
+	FactorMask(const std::bitset<MAX_NUM_FACTORS>& rhs) : std::bitset<MAX_NUM_FACTORS>(rhs) { }
+
+
+	TO_STRING();
+};
+
+}
+#endif
--- a/src/FeatureFunction.cpp
+++ b/src/FeatureFunction.cpp
@ -0,0 +1,22 @@
+#include "FeatureFunction.h"
+
+#include <cassert>
+
+namespace Moses {
+
+FeatureFunction::~FeatureFunction() {}
+
+bool StatelessFeatureFunction::IsStateless() const { return true; }
+bool StatelessFeatureFunction::ComputeValueInTranslationOption() const {
+  return false;
+}
+void StatelessFeatureFunction::Evaluate(
+    const TargetPhrase& cur_hypo,
+    ScoreComponentCollection* accumulator) const {
+  assert(!"Please implement Evaluate or set ComputeValueInTranslationOption to true");
+}
+
+bool StatefulFeatureFunction::IsStateless() const { return false; }
+
+}
+
--- a/src/FeatureFunction.h
+++ b/src/FeatureFunction.h
@ -0,0 +1,64 @@
+#ifndef moses_FeatureFunction_h
+#define moses_FeatureFunction_h
+
+#include <vector>
+
+#include "ScoreProducer.h"
+
+namespace Moses {
+
+class TargetPhrase;
+class Hypothesis;
+class FFState;
+class ScoreComponentCollection;
+
+class FeatureFunction: public ScoreProducer {
+
+public:
+  virtual bool IsStateless() const = 0;	
+  virtual ~FeatureFunction();
+
+};
+
+class StatelessFeatureFunction: public FeatureFunction {
+
+public:
+  //! Evaluate for stateless feature functions. Implement this.
+  virtual void Evaluate(
+    const TargetPhrase& cur_hypo,
+    ScoreComponentCollection* accumulator) const;
+
+  // If true, this value is expected to be included in the
+  // ScoreBreakdown in the TranslationOption once it has been
+  // constructed.
+  // Default: true
+  virtual bool ComputeValueInTranslationOption() const;
+
+  bool IsStateless() const;
+};
+
+class StatefulFeatureFunction: public FeatureFunction {
+
+public:
+
+  /**
+   * \brief This interface should be implemented.
+   * Notes: When evaluating the value of this feature function, you should avoid
+   * calling hypo.GetPrevHypo().  If you need something from the "previous"
+   * hypothesis, you should store it in an FFState object which will be passed
+   * in as prev_state.  If you don't do this, you will get in trouble.
+   */
+  virtual FFState* Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const = 0;
+  
+  //! return the state associated with the empty hypothesis
+  virtual const FFState* EmptyHypothesisState() const = 0;
+
+  bool IsStateless() const;
+};
+
+}
+
+#endif
--- a/src/File.cpp
+++ b/src/File.cpp
@ -0,0 +1,4 @@
+
+#include "File.h"
+
+
--- a/src/File.h
+++ b/src/File.h
@ -0,0 +1,122 @@
+// $Id: File.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/* ---------------------------------------------------------------- */
+/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens                                                     */
+/* ---------------------------------------------------------------- */
+#ifndef moses_File_h
+#define moses_File_h
+
+#include <cstdio>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include "UserMessage.h"
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+#ifdef WIN32
+#define OFF_T __int64 
+#define FTELLO(file) _ftelli64(file)
+#define FSEEKO(file, offset, origin) _fseeki64(file, offset, origin)
+
+#else
+#define OFF_T off_t 
+#define FTELLO(f) ftello(f)
+#define FSEEKO(file, offset, origin) fseeko(file, offset, origin)
+#endif
+
+static const OFF_T InvalidOffT=-1;
+
+//  WARNING:
+//    these functions work only for bitwise read/write-able types
+
+template<typename T> inline size_t fWrite(FILE* f,const T& t) {
+  if(fwrite(&t,sizeof(t),1,f)!=1) {TRACE_ERR("ERROR:: fwrite!\n");abort();}
+  return sizeof(t);
+}
+
+template<typename T> inline void fRead(FILE* f,T& t)  {
+  if(fread(&t,sizeof(t),1,f)!=1) {TRACE_ERR("ERROR: fread!\n");abort();}
+}
+
+template<typename T> inline size_t fWrite(FILE* f,const T* b,const T* e) {
+  UINT32 s=std::distance(b,e);size_t rv=fWrite(f,s);
+  if(fwrite(b,sizeof(T),s,f)!=s) {TRACE_ERR("ERROR: fwrite!\n");abort();}
+  return rv+sizeof(T)*s;
+}
+
+template<typename T> inline size_t fWrite(FILE* f,const T b,const T e) {
+  UINT32 s=std::distance(b,e);size_t rv=fWrite(f,s);
+  if(fwrite(&(*b),sizeof(T),s,f)!=s) {TRACE_ERR("ERROR: fwrite!\n");abort();}
+  return rv+sizeof(T)*s;
+}
+
+template<typename C> inline size_t fWriteVector(FILE* f,const C& v) {
+  UINT32 s=v.size();
+  size_t rv=fWrite(f,s);
+  if(fwrite(&v[0],sizeof(typename C::value_type),s,f)!=s) {TRACE_ERR("ERROR: fwrite!\n");abort();}
+  return rv+sizeof(typename C::value_type)*s;
+}
+
+template<typename C> inline void fReadVector(FILE* f, C& v) {
+  UINT32 s;fRead(f,s);
+	v.resize(s);
+  size_t r=fread(&(*v.begin()),sizeof(typename C::value_type),s,f);
+  if(r!=s) {TRACE_ERR("ERROR: freadVec! "<<r<<" "<<s<<"\n");abort();}
+}
+
+inline size_t fWriteString(FILE* f,const char* e, UINT32 s) {
+  size_t rv=fWrite(f,s);
+	if(fwrite(e,sizeof(char),s,f)!=s) {TRACE_ERR("ERROR:: fwrite!\n");abort();}
+	return rv+sizeof(char)*s;
+}
+
+inline void fReadString(FILE* f,std::string& e)  {
+	UINT32 s;fRead(f,s);
+	char* a=new char[s+1];
+  if(fread(a,sizeof(char),s,f)!=s) {TRACE_ERR("ERROR: fread!\n");abort();}
+	a[s]='\0';
+	e.assign(a);
+}
+
+inline size_t fWriteStringVector(FILE* f,const std::vector<std::string>& v) {
+  UINT32 s=v.size();
+  size_t totrv=fWrite(f,s);
+	for (size_t i=0;i<s;i++){		totrv+=fWriteString(f,v.at(i).c_str(),v.at(i).size());	}
+  return totrv;
+}
+
+inline void fReadStringVector(FILE* f, std::vector<std::string>& v) {
+  UINT32 s;fRead(f,s);v.resize(s);
+	
+	for (size_t i=0;i<s;i++){		fReadString(f,v.at(i));	}
+}
+
+inline OFF_T fTell(FILE* f) {return FTELLO(f);}
+
+inline void fSeek(FILE* f,OFF_T o) {
+  if(FSEEKO(f,o,SEEK_SET)<0) {
+    TRACE_ERR("ERROR: could not fseeko position "<<o<<"\n");
+    if(o==InvalidOffT) TRACE_ERR("You tried to seek for 'InvalidOffT'!\n");
+    abort();
+  }
+}
+
+inline FILE* fOpen(const char* fn,const char* m) {
+  if(FILE* f=fopen(fn,m)) 
+		return f; 
+	else {
+		UserMessage::Add(std::string("ERROR: could not open file ") + fn + " with mode " + m + "\n");
+		assert(false);
+		return NULL;
+	}
+}
+inline void fClose(FILE* f) {fclose(f);} // for consistent function names only
+
+}
+
+#endif
--- a/src/FilePtr.h
+++ b/src/FilePtr.h
@ -0,0 +1,55 @@
+// $Id: FilePtr.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/* ---------------------------------------------------------------- */
+/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens                                                     */
+/* ---------------------------------------------------------------- */
+
+#ifndef moses_FilePtr_h
+#define moses_FilePtr_h
+
+#include "File.h"
+
+namespace Moses
+{
+
+// smart pointer for on-demand loading from file
+// requirement: T has a constructor T(FILE*)
+
+template<typename T> class FilePtr {
+public:
+  typedef T* Ptr;
+private:
+  FILE* f;
+  OFF_T pos;
+  mutable Ptr t;
+public:
+  FilePtr(FILE* f_=0,OFF_T p=0) : f(f_),pos(p),t(0) {}
+  ~FilePtr() {}
+
+  void set(FILE* f_,OFF_T p) {f=f_;pos=p;}
+  void free() {delete t;  t=0;}
+
+  T& operator* () {load();return *t;}
+  Ptr operator->() {load();return t;}
+  operator Ptr () {load();return t;}
+
+  const T& operator* () const {load();return *t;}
+  Ptr operator->() const {load();return t;}
+  operator Ptr  () const {load();return t;}
+
+  // direct access to pointer, use with care!
+  Ptr getPtr() {return t;}
+  Ptr getPtr() const {return t;}
+
+  operator bool() const {return (f && pos!=InvalidOffT);}
+
+  void load() const {
+    if(t) return;
+    if(f && pos!=InvalidOffT) {fSeek(f,pos); t=new T(f);}
+  }
+};
+
+}
+
+#endif
--- a/src/FloydWarshall.cpp
+++ b/src/FloydWarshall.cpp
@ -0,0 +1,34 @@
+#include <cassert>
+#include <climits>
+#include <vector>
+
+#define MAX_DIST (INT_MAX / 2)
+
+//#include "FloydWarshall.h"
+
+// All-pairs shortest path algorithm
+void floyd_warshall(const std::vector<std::vector<bool> >& edges, std::vector<std::vector<int> >& dist)
+{
+  assert(edges.size() == edges.front().size());
+  dist.clear();
+  dist.resize(edges.size(), std::vector<int>(edges.size(), 0));
+
+  size_t num_edges = edges.size();
+
+  for (size_t i=0; i<num_edges; ++i) {
+    for (size_t j=0; j<num_edges; ++j) {
+      if (edges[i][j])
+        dist[i][j] = 1;
+      else
+        dist[i][j] = MAX_DIST;
+      if (i == j) dist[i][j] = MAX_DIST;
+    }
+  }
+
+  for (size_t k=0; k<num_edges; ++k)
+    for (size_t i=0; i<num_edges; ++i)
+      for (size_t j=0; j<num_edges; ++j)
+        if (dist[i][j] > (dist[i][k] + dist[k][j]))
+          dist[i][j] = dist[i][k] + dist[k][j];
+}
+
--- a/src/FloydWarshall.h
+++ b/src/FloydWarshall.h
@ -0,0 +1,12 @@
+#ifndef moses_FloydWarshall_h
+#define moses_FloydWarshall_h
+
+#include <vector>
+
+/**
+ * Floyd-Warshall all-pairs shortest path algorithm
+ * See CLR (1990). Introduction to Algorithms, p. 558-565
+ */
+void floyd_warshall(const std::vector<std::vector<bool> >& edges, std::vector<std::vector<int> >& distances);
+
+#endif
--- a/src/GenerationDictionary.cpp
+++ b/src/GenerationDictionary.cpp
@ -0,0 +1,164 @@
+// $Id: GenerationDictionary.cpp 2087 2009-02-06 15:43:06Z redpony $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <fstream>
+#include <string>
+#include "GenerationDictionary.h"
+#include "FactorCollection.h"
+#include "Word.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+#include "UserMessage.h"
+
+using namespace std;
+
+namespace Moses
+{
+GenerationDictionary::GenerationDictionary(size_t numFeatures, ScoreIndexManager &scoreIndexManager)
+  : Dictionary(numFeatures)
+{
+	scoreIndexManager.AddScoreProducer(this);
+}
+
+bool GenerationDictionary::Load(const std::vector<FactorType> &input
+																			, const std::vector<FactorType> &output
+																			, const std::string &filePath
+																			, FactorDirection direction)
+{	
+	FactorCollection &factorCollection = FactorCollection::Instance();
+
+	const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
+
+	//factors	
+	m_inputFactors = FactorMask(input);
+	m_outputFactors = FactorMask(output);
+	VERBOSE(2,"GenerationDictionary: input=" << m_inputFactors << "  output=" << m_outputFactors << std::endl);
+	
+	// data from file
+	InputFileStream inFile(filePath);
+	if (!inFile.good()) {
+		UserMessage::Add(string("Couldn't read ") + filePath);
+		return false;
+	}
+
+	m_filePath = filePath;
+	string line;
+	size_t lineNum = 0;
+	while(getline(inFile, line)) 
+	{
+		++lineNum;
+		vector<string> token = Tokenize( line );
+		
+		// add each line in generation file into class
+		Word *inputWord = new Word();  // deleted in destructor
+		Word outputWord;
+
+		// create word with certain factors filled out
+
+		// inputs
+		vector<string> factorString = Tokenize( token[0], "|" );
+		for (size_t i = 0 ; i < input.size() ; i++)
+		{
+			FactorType factorType = input[i];
+			const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
+			inputWord->SetFactor(factorType, factor);
+		}
+
+		factorString = Tokenize( token[1], "|" );
+		for (size_t i = 0 ; i < output.size() ; i++)
+		{
+			FactorType factorType = output[i];
+			
+			const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
+			outputWord.SetFactor(factorType, factor);
+		}
+
+		size_t numFeaturesInFile = token.size() - 2;
+		if (numFeaturesInFile < numFeatureValuesInConfig) {
+			stringstream strme;
+			strme << filePath << ":" << lineNum << ": expected " << numFeatureValuesInConfig
+								<< " feature values, but found " << numFeaturesInFile << std::endl;
+			UserMessage::Add(strme.str());
+			return false;
+		}
+		std::vector<float> scores(numFeatureValuesInConfig, 0.0f);
+		for (size_t i = 0; i < numFeatureValuesInConfig; i++)
+			scores[i] = FloorScore(TransformScore(Scan<float>(token[2+i])));
+		
+		Collection::iterator iterWord = m_collection.find(inputWord);
+		if (iterWord == m_collection.end())
+		{
+			m_collection[inputWord][outputWord].Assign(this, scores);
+		}
+		else
+		{ // source word already in there. delete input word to avoid mem leak
+			(iterWord->second)[outputWord].Assign(this, scores);
+			delete inputWord;
+		}
+	}
+
+	inFile.Close();
+	return true;
+}
+
+GenerationDictionary::~GenerationDictionary()
+{
+	Collection::const_iterator iter;
+	for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
+	{
+		delete iter->first;
+	}
+}
+
+size_t GenerationDictionary::GetNumScoreComponents() const
+{
+  return m_numScoreComponent;
+}
+
+std::string GenerationDictionary::GetScoreProducerDescription() const
+{
+  return "Generation score, file=" + m_filePath;
+}
+
+const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
+{
+	const OutputWordCollection *ret;
+	
+	Collection::const_iterator iter = m_collection.find(&word);
+	if (iter == m_collection.end())
+	{ // can't find source phrase
+		ret = NULL;
+	}
+	else
+	{
+		ret = &iter->second;
+	}
+	return ret;
+}
+
+bool GenerationDictionary::ComputeValueInTranslationOption() const {
+	return true;
+}
+
+
+}
+
--- a/src/GenerationDictionary.h
+++ b/src/GenerationDictionary.h
@ -0,0 +1,96 @@
+// $Id: GenerationDictionary.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_GenerationDictionary_h
+#define moses_GenerationDictionary_h
+
+#include <list>
+#include <map>
+#include <vector>
+#include "ScoreComponentCollection.h"
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "Dictionary.h"
+#include "FeatureFunction.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+
+typedef std::map < Word , ScoreComponentCollection > OutputWordCollection;
+		// 1st = output phrase
+		// 2nd = log probability (score)
+
+/** Implementation of a generation table in a trie.  
+ */
+class GenerationDictionary : public Dictionary, public StatelessFeatureFunction
+{
+	typedef std::map<const Word* , OutputWordCollection, WordComparer> Collection;
+protected:
+	Collection m_collection;
+	// 1st = source
+	// 2nd = target
+	std::string						m_filePath;
+
+public:
+	/** constructor.
+	* \param numFeatures number of score components, as specified in ini file
+	*/
+	GenerationDictionary(size_t numFeatures, ScoreIndexManager &scoreIndexManager);
+	virtual ~GenerationDictionary();
+
+	// returns Generate
+	DecodeType GetDecodeType() const
+	{
+		return Generate;
+	}
+	
+	//! load data file
+	bool Load(const std::vector<FactorType> &input
+									, const std::vector<FactorType> &output
+									, const std::string &filePath
+									, FactorDirection direction);
+
+	size_t GetNumScoreComponents() const;
+	std::string GetScoreProducerDescription() const;
+	std::string GetScoreProducerWeightShortName() const
+	{
+		return "g";
+	}
+
+	/** number of unique input entries in the generation table. 
+	* NOT the number of lines in the generation table
+	*/
+	size_t GetSize() const
+	{
+		return m_collection.size();
+	}
+	/** returns a bag of output words, OutputWordCollection, for a particular input word. 
+	*	Or NULL if the input word isn't found. The search function used is the WordComparer functor
+	*/
+	const OutputWordCollection *FindWord(const Word &word) const;
+	virtual bool ComputeValueInTranslationOption() const;
+};
+
+
+}
+#endif
--- a/src/GlobalLexicalModel.cpp
+++ b/src/GlobalLexicalModel.cpp
@ -0,0 +1,185 @@
+#include <fstream>
+#include "GlobalLexicalModel.h"
+#include "StaticData.h"
+#include "InputFileStream.h"
+
+namespace Moses
+{
+GlobalLexicalModel::GlobalLexicalModel(const string &filePath,
+                                       const float weight,
+                                       const vector< FactorType >& inFactors,
+                                       const vector< FactorType >& outFactors)
+{
+	std::cerr << "Creating global lexical model...\n";
+
+	// register as score producer
+	const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
+	std::vector< float > weights;
+	weights.push_back( weight );
+	const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+
+	// load model
+	LoadData( filePath, inFactors, outFactors );
+	
+	// define bias word
+	FactorCollection &factorCollection = FactorCollection::Instance();
+	m_bias = new Word();
+	const Factor* factor = factorCollection.AddFactor( Input, inFactors[0], "**BIAS**" );
+	m_bias->SetFactor( inFactors[0], factor );
+	
+	m_cache = NULL;
+}
+
+GlobalLexicalModel::~GlobalLexicalModel(){
+	// delete words in the hash data structure
+	DoubleHash::const_iterator iter;
+	for(iter = m_hash.begin(); iter != m_hash.end(); iter++ )
+	{
+		map< const Word*, float, WordComparer >::const_iterator iter2;
+		for(iter2 = iter->second.begin(); iter2 != iter->second.end(); iter2++ )
+		{
+			delete iter2->first; // delete input word
+		}
+		delete iter->first; // delete output word
+	}
+	if (m_cache != NULL) delete m_cache;
+}
+
+void GlobalLexicalModel::LoadData(const string &filePath,
+                                  const vector< FactorType >& inFactors,
+                                  const vector< FactorType >& outFactors)
+{
+	FactorCollection &factorCollection = FactorCollection::Instance();
+	const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+
+	VERBOSE(2, "Loading global lexical model from file " << filePath << endl);
+
+	m_inputFactors = FactorMask(inFactors);
+  m_outputFactors = FactorMask(outFactors);
+  InputFileStream inFile(filePath);
+
+	// reading in data one line at a time
+	size_t lineNum = 0;
+	string line;
+	while(getline(inFile, line))
+	{
+		++lineNum;
+		vector<string> token = Tokenize<string>(line, " ");
+
+		if (token.size() != 3) // format checking
+		{
+			stringstream errorMessage;
+			errorMessage << "Syntax error at " << filePath << ":" << lineNum << endl << line << endl;
+			UserMessage::Add(errorMessage.str());
+			abort();
+		}
+		
+		// create the output word
+		Word *outWord = new Word();
+		vector<string> factorString = Tokenize( token[0], factorDelimiter );
+		for (size_t i=0 ; i < outFactors.size() ; i++)
+		{
+			const FactorDirection& direction = Output;
+			const FactorType& factorType = outFactors[i];
+			const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
+			outWord->SetFactor( factorType, factor );
+		}
+		
+		// create the input word
+		Word *inWord = new Word();
+		factorString = Tokenize( token[1], factorDelimiter );
+		for (size_t i=0 ; i < inFactors.size() ; i++)
+		{
+			const FactorDirection& direction = Input;
+			const FactorType& factorType = inFactors[i];
+			const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
+			inWord->SetFactor( factorType, factor );
+		}
+		
+		// maximum entropy feature score
+		float score = Scan<float>(token[2]);
+		
+		// std::cerr << "storing word " << *outWord << " " << *inWord << " " << score << endl;
+		
+		// store feature in hash
+		DoubleHash::iterator keyOutWord = m_hash.find( outWord );
+		if( keyOutWord == m_hash.end() )
+		{
+			m_hash[outWord][inWord] = score;
+		}
+		else // already have hash for outword, delete the word to avoid leaks
+		{
+			(keyOutWord->second)[inWord] = score;
+			delete outWord;
+		}
+	}
+}
+
+void GlobalLexicalModel::InitializeForInput( Sentence const& in )
+{
+	m_input = &in;
+	if (m_cache != NULL) delete m_cache;
+	m_cache = new map< const TargetPhrase*, float >;
+}
+
+float GlobalLexicalModel::ScorePhrase( const TargetPhrase& targetPhrase ) const
+{
+	float score = 0;
+	for(size_t targetIndex = 0; targetIndex < targetPhrase.GetSize(); targetIndex++ )
+	{
+		float sum = 0;
+		const Word& targetWord = targetPhrase.GetWord( targetIndex );
+		VERBOSE(2,"glm " << targetWord << ": ");
+		const DoubleHash::const_iterator targetWordHash = m_hash.find( &targetWord );
+		if( targetWordHash != m_hash.end() )
+		{
+			SingleHash::const_iterator inputWordHash = targetWordHash->second.find( m_bias );
+			if( inputWordHash != targetWordHash->second.end() )
+			{
+				VERBOSE(2,"*BIAS* " << inputWordHash->second);
+				sum += inputWordHash->second;
+			}
+
+			set< const Word*, WordComparer > alreadyScored; // do not score a word twice 
+			for(size_t inputIndex = 0; inputIndex < m_input->GetSize(); inputIndex++ )
+			{
+				const Word& inputWord = m_input->GetWord( inputIndex );
+				if ( alreadyScored.find( &inputWord ) == alreadyScored.end() )
+				{
+					SingleHash::const_iterator inputWordHash = targetWordHash->second.find( &inputWord );
+					if( inputWordHash != targetWordHash->second.end() )
+					{
+						VERBOSE(2," " << inputWord << " " << inputWordHash->second);
+						sum += inputWordHash->second;
+					}
+					alreadyScored.insert( &inputWord );				
+				}
+			}
+		}
+		// Hal Daume says: 1/( 1 + exp [ - sum_i w_i * f_i ] )
+                VERBOSE(2," p=" << FloorScore( log(1/(1+exp(-sum))) ) << endl);
+	        score += FloorScore( log(1/(1+exp(-sum))) );
+	}
+        return score;
+}
+
+float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const
+{
+	map< const TargetPhrase*, float >::const_iterator query = m_cache->find( &targetPhrase );
+	if ( query != m_cache->end() )
+	{
+		return query->second;
+	}
+	
+	float score = ScorePhrase( targetPhrase );
+	m_cache->insert( pair<const TargetPhrase*, float>(&targetPhrase, score) );
+	std::cerr << "add to cache " << targetPhrase << ": " << score << endl;
+	return score;
+}
+
+void GlobalLexicalModel::Evaluate(const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator) const 
+{
+	accumulator->PlusEquals( this, GetFromCacheOrScorePhrase( targetPhrase ) );
+}
+
+}
--- a/src/GlobalLexicalModel.h
+++ b/src/GlobalLexicalModel.h
@ -0,0 +1,76 @@
+#ifndef moses_GlobalLexicalModel_h
+#define moses_GlobalLexicalModel_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "WordsRange.h"
+#include "ScoreProducer.h"
+#include "FeatureFunction.h"
+#include "FactorTypeSet.h"
+#include "Sentence.h"
+
+namespace Moses
+{
+
+class Factor;
+class Phrase;
+class Hypothesis;
+class InputType;
+
+using namespace std;
+
+/** Discriminatively trained global lexicon model
+ * This is a implementation of Mauser et al., 2009's model that predicts
+ * each output word from _all_ the input words. The intuition behind this
+ * feature is that it uses context words for disambiguation
+ */
+
+class GlobalLexicalModel : public StatelessFeatureFunction {
+	typedef map< const Word*, map< const Word*, float, WordComparer >, WordComparer > DoubleHash;
+	typedef map< const Word*, float, WordComparer > SingleHash;
+private:
+	DoubleHash m_hash;
+	map< const TargetPhrase*, float > *m_cache;
+	const Sentence *m_input;
+	Word *m_bias;
+	
+	FactorMask m_inputFactors;
+	FactorMask m_outputFactors;
+
+	void LoadData(const string &filePath,
+	              const vector< FactorType >& inFactors,
+	              const vector< FactorType >& outFactors);
+	
+	float ScorePhrase( const TargetPhrase& targetPhrase ) const;
+	float GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const;
+
+public:
+	GlobalLexicalModel(const string &filePath,
+	                   const float weight,
+	                   const vector< FactorType >& inFactors,
+	                   const vector< FactorType >& outFactors);
+	virtual ~GlobalLexicalModel();
+
+	virtual size_t GetNumScoreComponents() const {
+		return 1;
+	};
+
+	virtual string GetScoreProducerDescription() const {
+		return "GlobalLexicalModel";
+	};
+
+	virtual string GetScoreProducerWeightShortName() const {
+		return "lex";
+	};
+
+	void InitializeForInput( Sentence const& in );
+	
+	void Evaluate(const TargetPhrase&, ScoreComponentCollection* ) const;
+};
+
+}
+#endif
--- a/src/Hypothesis.cpp
+++ b/src/Hypothesis.cpp
@ -0,0 +1,512 @@
+// $Id: Hypothesis.cpp 2929 2010-02-22 23:42:35Z bhaddow $
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cassert>
+#include <iostream>
+#include <limits>
+#include <vector>
+#include <algorithm>
+
+#include "FFState.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "DummyScoreProducers.h"
+#include "Hypothesis.h"
+#include "Util.h"
+#include "SquareMatrix.h"
+#include "LexicalReordering.h"
+#include "StaticData.h"
+#include "InputType.h"
+#include "LMList.h"
+#include "Manager.h"
+#include "hash.h"
+
+using namespace std;
+
+namespace Moses
+{
+unsigned int Hypothesis::s_HypothesesCreated = 0;
+
+#ifdef USE_HYPO_POOL
+	ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
+#endif
+
+Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget)
+	: m_prevHypo(NULL)
+	, m_targetPhrase(emptyTarget)
+	, m_sourcePhrase(0)
+	, m_sourceCompleted(source.GetSize())
+	, m_sourceInput(source)
+	, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
+	, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
+	, m_wordDeleted(false)
+	, m_ffStates(StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions().size())
+	, m_arcList(NULL)
+  , m_transOpt(NULL)
+  , m_manager(manager)
+
+  , m_id(0)
+{	// used for initial seeding of trans process	
+	// initialize scores
+	//_hash_computed = false;
+	s_HypothesesCreated = 1;
+	ResetScore();
+	const vector<const StatefulFeatureFunction*>& ffs = StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions();
+	for (unsigned i = 0; i < ffs.size(); ++i)
+	  m_ffStates[i] = ffs[i]->EmptyHypothesisState();
+}
+
+/***
+ * continue prevHypo by appending the phrases in transOpt
+ */
+Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
+	: m_prevHypo(&prevHypo)
+	, m_targetPhrase(transOpt.GetTargetPhrase())
+	, m_sourcePhrase(transOpt.GetSourcePhrase())
+	, m_sourceCompleted				(prevHypo.m_sourceCompleted )
+	, m_sourceInput						(prevHypo.m_sourceInput)
+	, m_currSourceWordsRange	(transOpt.GetSourceWordsRange())
+	, m_currTargetWordsRange	( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
+														 ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
+	, m_wordDeleted(false)
+	,	m_totalScore(0.0f)
+	,	m_futureScore(0.0f)
+	, m_scoreBreakdown				(prevHypo.m_scoreBreakdown)
+  , m_ffStates(prevHypo.m_ffStates.size())
+	, m_arcList(NULL)
+  , m_transOpt(&transOpt)
+  , m_manager(prevHypo.GetManager())
+	, m_id(s_HypothesesCreated++)
+{
+	// assert that we are not extending our hypothesis by retranslating something
+	// that this hypothesis has already translated!
+	assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));	
+
+	//_hash_computed = false;
+  m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
+  m_wordDeleted = transOpt.IsDeletionOption();
+}
+
+Hypothesis::~Hypothesis()
+{
+	for (unsigned i = 0; i < m_ffStates.size(); ++i)
+		delete m_ffStates[i];
+
+	if (m_arcList) 
+	{
+		ArcList::iterator iter;
+		for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter)
+		{
+			FREEHYPO(*iter);
+		}
+		m_arcList->clear();
+
+		delete m_arcList;
+		m_arcList = NULL;
+	}
+}
+
+void Hypothesis::AddArc(Hypothesis *loserHypo)
+{
+	if (!m_arcList) {
+		if (loserHypo->m_arcList)  // we don't have an arcList, but loser does
+		{
+			this->m_arcList = loserHypo->m_arcList;  // take ownership, we'll delete
+			loserHypo->m_arcList = 0;                // prevent a double deletion
+		}
+		else
+			{ this->m_arcList = new ArcList(); }
+	} else {
+		if (loserHypo->m_arcList) {  // both have an arc list: merge. delete loser
+			size_t my_size = m_arcList->size();
+			size_t add_size = loserHypo->m_arcList->size();
+			this->m_arcList->resize(my_size + add_size, 0);
+			std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
+			delete loserHypo->m_arcList;
+			loserHypo->m_arcList = 0;
+		} else { // loserHypo doesn't have any arcs
+		  // DO NOTHING
+		}
+	}
+	m_arcList->push_back(loserHypo);
+}
+
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const
+{
+	return Create(*this, transOpt, constraint);
+}
+
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constrainingPhrase)
+{
+
+	// This method includes code for constraint decoding
+	
+	bool createHypothesis = true;
+
+	if (constrainingPhrase != NULL)
+	{
+
+		size_t constraintSize = constrainingPhrase->GetSize();
+			
+		size_t start = 1 + prevHypo.GetCurrTargetWordsRange().GetEndPos();
+			
+		const Phrase &transOptPhrase = transOpt.GetTargetPhrase();
+		size_t transOptSize = transOptPhrase.GetSize();
+		
+		size_t endpoint = start + transOptSize - 1;
+		
+
+		if (endpoint < constraintSize) 
+		{	
+			WordsRange range(start, endpoint);
+			Phrase relevantConstraint = constrainingPhrase->GetSubString(range);
+			
+			if ( ! relevantConstraint.IsCompatible(transOptPhrase) )
+			{
+				createHypothesis = false;
+				
+			}
+		}
+		else 
+		{
+			createHypothesis = false;
+		}
+		
+	}
+
+	
+	if (createHypothesis)
+	{
+
+		#ifdef USE_HYPO_POOL
+			Hypothesis *ptr = s_objectPool.getPtr();
+			return new(ptr) Hypothesis(prevHypo, transOpt);
+		#else
+			return new Hypothesis(prevHypo, transOpt);
+		#endif
+
+	}
+	else
+	{
+		// If the previous hypothesis plus the proposed translation option
+		//    fail to match the provided constraint,
+		//    return a null hypothesis.
+		return NULL;
+	}
+	
+}
+/***
+ * return the subclass of Hypothesis most appropriate to the given target phrase
+ */
+
+Hypothesis* Hypothesis::Create(Manager& manager, InputType const& m_source, const TargetPhrase &emptyTarget)
+{
+#ifdef USE_HYPO_POOL
+	Hypothesis *ptr = s_objectPool.getPtr();
+	return new(ptr) Hypothesis(manager, m_source, emptyTarget);
+#else
+	return new Hypothesis(manager, m_source, emptyTarget);
+#endif
+}
+
+/** check, if two hypothesis can be recombined.
+    this is actually a sorting function that allows us to
+    keep an ordered list of hypotheses. This makes recombination
+    much quicker. 
+*/
+int Hypothesis::RecombineCompare(const Hypothesis &compare) const
+{ // -1 = this < compare
+	// +1 = this > compare
+	// 0	= this ==compare
+	int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
+	if (comp != 0)
+		return comp;
+
+	for (unsigned i = 0; i < m_ffStates.size(); ++i) {
+		if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
+			comp = m_ffStates[i] - compare.m_ffStates[i];
+		} else {
+		  comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
+		}
+		if (comp != 0) return comp;
+	}
+
+	return 0;
+}
+
+void Hypothesis::ResetScore()
+{
+	m_scoreBreakdown.ZeroAll();
+	m_futureScore = m_totalScore = 0.0f;
+}
+
+/***
+ * calculate the logarithm of our total translation score (sum up components)
+ */
+void Hypothesis::CalcScore(const SquareMatrix &futureScore) 
+{
+  // some stateless score producers cache their values in the translation
+	// option: add these here
+  // language model scores for n-grams completely contained within a target
+  // phrase are also included here
+	m_scoreBreakdown.PlusEquals(m_transOpt->GetScoreBreakdown());
+
+	const StaticData &staticData = StaticData::Instance();
+	clock_t t=0; // used to track time
+
+  // compute values of stateless feature functions that were not
+  // cached in the translation option-- there is no principled distinction
+	const vector<const StatelessFeatureFunction*>& sfs =
+	  staticData.GetScoreIndexManager().GetStatelessFeatureFunctions();
+	for (unsigned i = 0; i < sfs.size(); ++i) {
+    sfs[i]->Evaluate(m_targetPhrase, &m_scoreBreakdown);
+	}
+
+	const vector<const StatefulFeatureFunction*>& ffs =
+	  staticData.GetScoreIndexManager().GetStatefulFeatureFunctions();
+	for (unsigned i = 0; i < ffs.size(); ++i) {
+		m_ffStates[i] = ffs[i]->Evaluate(
+			*this,
+			m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
+			&m_scoreBreakdown);
+	}
+
+	IFVERBOSE(2) { t = clock(); } // track time excluding LM
+
+	// FUTURE COST
+	m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
+	
+	// TOTAL
+	m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
+
+	IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
+}
+
+/** Calculates the expected score of extending this hypothesis with the
+ * specified translation option. Includes actual costs for everything 
+ * except for expensive actual language model score.
+ * This function is used by early discarding.
+ * /param transOpt - translation option being considered
+ */
+float Hypothesis::CalcExpectedScore( const SquareMatrix &futureScore ) {
+	const StaticData &staticData = StaticData::Instance();
+	clock_t t=0;
+	IFVERBOSE(2) { t = clock(); } // track time excluding LM
+
+  assert(!"Need to add code to get the distortion scores");
+	//CalcDistortionScore();
+	
+	// LANGUAGE MODEL ESTIMATE (includes word penalty cost)
+  float estimatedLMScore = m_transOpt->GetFutureScore() - m_transOpt->GetScoreBreakdown().InnerProduct(staticData.GetAllWeights());
+
+	// FUTURE COST
+	m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
+
+	//LEXICAL REORDERING COST
+	const std::vector<LexicalReordering*> &reorderModels = staticData.GetReorderModels();
+	for(unsigned int i = 0; i < reorderModels.size(); i++)
+	{
+		m_scoreBreakdown.PlusEquals(reorderModels[i], reorderModels[i]->CalcScore(this));
+	}
+
+	// TOTAL
+	float total = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore + estimatedLMScore;
+
+  IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeEstimateScore( clock()-t ); }
+	return total;
+}
+
+void Hypothesis::CalcRemainingScore() 
+{
+	const StaticData &staticData = StaticData::Instance();
+	clock_t t=0; // used to track time
+
+	// LANGUAGE MODEL COST
+  assert(!"Need to add code to get the LM score(s)");
+	//CalcLMScore(staticData.GetAllLM());
+
+	IFVERBOSE(2) { t = clock(); } // track time excluding LM
+
+	// WORD PENALTY
+	m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetNumWordsCovered()); 
+
+	// TOTAL
+	m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
+
+	IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
+}
+
+const Hypothesis* Hypothesis::GetPrevHypo()const{
+	return m_prevHypo;
+}
+
+/**
+ * print hypothesis information for pharaoh-style logging
+ */
+void Hypothesis::PrintHypothesis() const
+{
+  if (!m_prevHypo) { TRACE_ERR(endl << "NULL hypo" << endl); return; }
+  TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
+  int end = (int)(m_prevHypo->m_targetPhrase.GetSize()-1);
+  int start = end-1;
+  if ( start < 0 ) start = 0;
+  if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
+    TRACE_ERR( "<s> ");
+  }
+  else {
+    TRACE_ERR( "... ");
+  }
+  if (end>=0) {
+    WordsRange range(start, end);
+    TRACE_ERR( m_prevHypo->m_targetPhrase.GetSubString(range) << " ");
+  }
+  TRACE_ERR( ")"<<endl);
+	TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
+	TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "
+	  << *m_sourcePhrase <<endl);
+	TRACE_ERR( "\ttranslated as: "<<(Phrase&) m_targetPhrase<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
+	
+	if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl); 
+  //	TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
+  //	TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
+  //	TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
+	TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
+  TRACE_ERR(  "\tunweighted feature scores: " << m_scoreBreakdown << endl);
+	//PrintLMScores();
+}
+
+void Hypothesis::CleanupArcList()
+{
+	// point this hypo's main hypo to itself
+	SetWinningHypo(this);
+
+	if (!m_arcList) return;
+
+	/* keep only number of arcs we need to create all n-best paths.
+	 * However, may not be enough if only unique candidates are needed,
+	 * so we'll keep all of arc list if nedd distinct n-best list
+	 */
+	const StaticData &staticData = StaticData::Instance();
+	size_t nBestSize = staticData.GetNBestSize();
+	bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.UseLatticeMBR() ;
+
+	if (!distinctNBest && m_arcList->size() > nBestSize * 5)
+	{ // prune arc list only if there too many arcs
+		nth_element(m_arcList->begin()
+							, m_arcList->begin() + nBestSize - 1
+							, m_arcList->end()
+							, CompareHypothesisTotalScore());
+		
+		// delete bad ones
+		ArcList::iterator iter;
+		for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter)
+		{
+			Hypothesis *arc = *iter;
+			FREEHYPO(arc);
+		}
+		m_arcList->erase(m_arcList->begin() + nBestSize
+										, m_arcList->end());
+	}
+
+	// set all arc's main hypo variable to this hypo
+	ArcList::iterator iter = m_arcList->begin();
+	for (; iter != m_arcList->end() ; ++iter)
+	{
+		Hypothesis *arc = *iter;
+		arc->SetWinningHypo(this);
+	}
+}
+
+TO_STRING_BODY(Hypothesis)
+ 
+// friend
+ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
+{	
+	hypothesis.ToStream(out);
+	// words bitmap
+	out << "[" << hypothesis.m_sourceCompleted << "] ";
+	
+	// scores
+	out << " [total=" << hypothesis.GetTotalScore() << "]";
+	out << " " << hypothesis.GetScoreBreakdown();
+	
+	// alignment
+	
+	return out;
+}
+
+
+std::string Hypothesis::GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const 
+{
+	if (!m_prevHypo) { return ""; }
+	return m_sourcePhrase->GetStringRep(factorsToPrint);
+#if 0
+	if(m_sourcePhrase) 
+	{
+		return m_sourcePhrase->GetSubString(m_currSourceWordsRange).GetStringRep(factorsToPrint);
+	}
+	else
+	{ 
+		return m_sourceInput.GetSubString(m_currSourceWordsRange).GetStringRep(factorsToPrint);
+	}	
+#endif
+}
+std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const 
+{
+	if (!m_prevHypo) { return ""; }
+	return m_targetPhrase.GetStringRep(factorsToPrint);
+}
+
+std::string Hypothesis::GetSourcePhraseStringRep() const 
+{
+	vector<FactorType> allFactors;
+	const size_t maxSourceFactors = StaticData::Instance().GetMaxNumFactors(Input);
+	for(size_t i=0; i < maxSourceFactors; i++)
+	{
+		allFactors.push_back(i);
+	}
+	return GetSourcePhraseStringRep(allFactors);		
+}
+std::string Hypothesis::GetTargetPhraseStringRep() const 
+{
+	vector<FactorType> allFactors;
+	const size_t maxTargetFactors = StaticData::Instance().GetMaxNumFactors(Output);
+	for(size_t i=0; i < maxTargetFactors; i++)
+	{
+		allFactors.push_back(i);
+	}
+	return GetTargetPhraseStringRep(allFactors);
+}
+
+
+const ScoreComponentCollection &Hypothesis::GetCachedReorderingScore() const
+{
+	return m_transOpt->GetReorderingScore();
+}
+
+}
+
--- a/src/Hypothesis.h
+++ b/src/Hypothesis.h
@ -0,0 +1,322 @@
+// $Id: Hypothesis.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_Hypothesis_h
+#define moses_Hypothesis_h
+
+#include <iostream>
+#include <vector>
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "WordsBitmap.h"
+#include "Sentence.h"
+#include "Phrase.h"
+#include "PhraseDictionaryMemory.h"
+#include "GenerationDictionary.h"
+#include "LanguageModelSingleFactor.h"
+#include "ScoreComponentCollection.h"
+#include "LexicalReordering.h"
+#include "InputType.h"
+#include "ObjectPool.h"
+
+namespace Moses
+{
+
+class SquareMatrix;
+class StaticData;
+class TranslationOption;
+class WordsRange;
+class Hypothesis;
+class FFState;
+class Manager;
+
+typedef std::vector<Hypothesis*> ArcList;
+
+/** Used to store a state in the beam search
+    for the best translation. With its link back to the previous hypothesis
+    m_prevHypo, we can trace back to the sentence start to read of the
+    (partial) translation to this point.
+    
+		The expansion of hypotheses is handled in the class Manager, which
+    stores active hypothesis in the search in hypothesis stacks.
+***/
+class Hypothesis
+{
+	friend std::ostream& operator<<(std::ostream&, const Hypothesis&);
+
+protected:
+	static ObjectPool<Hypothesis> s_objectPool;
+	
+	const Hypothesis* m_prevHypo; /*! backpointer to previous hypothesis (from which this one was created) */
+//	const Phrase			&m_targetPhrase; /*! target phrase being created at the current decoding step */
+	const TargetPhrase			&m_targetPhrase; /*! target phrase being created at the current decoding step */
+	Phrase const*     m_sourcePhrase; /*! input sentence */
+	WordsBitmap				m_sourceCompleted; /*! keeps track of which words have been translated so far */
+	//TODO: how to integrate this into confusion network framework; what if
+	//it's a confusion network in the end???
+	InputType const&  m_sourceInput;
+	WordsRange				m_currSourceWordsRange; /*! source word positions of the last phrase that was used to create this hypothesis */
+	WordsRange        m_currTargetWordsRange; /*! target word positions of the last phrase that was used to create this hypothesis */
+  bool							m_wordDeleted;
+	float							m_totalScore;  /*! score so far */
+	float							m_futureScore; /*! estimated future cost to translate rest of sentence */
+	ScoreComponentCollection m_scoreBreakdown; /*! detailed score break-down by components (for instance language model, word penalty, etc) */
+	std::vector<const FFState*> m_ffStates;
+	const Hypothesis 	*m_winningHypo;
+	ArcList 					*m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
+	const TranslationOption *m_transOpt;
+  Manager& m_manager;
+
+	int m_id; /*! numeric ID of this hypothesis, used for logging */
+	static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total	
+
+	/*! used by initial seeding of the translation process */
+	Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
+	/*! used when creating a new hypothesis using a translation option (phrase translation) */
+	Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
+
+public:
+	static ObjectPool<Hypothesis> &GetObjectPool()
+	{
+		return s_objectPool;
+	}
+
+	~Hypothesis();
+	
+	/** return the subclass of Hypothesis most appropriate to the given translation option */
+	static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constraint);
+
+	static Hypothesis* Create(Manager& manager, const WordsBitmap &initialCoverage);
+
+	/** return the subclass of Hypothesis most appropriate to the given target phrase */
+	static Hypothesis* Create(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
+	
+	/** return the subclass of Hypothesis most appropriate to the given translation option */
+	Hypothesis* CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const;
+
+	void PrintHypothesis() const;
+  
+  const InputType& GetInput() const {return m_sourceInput;}
+
+	/** return target phrase used to create this hypothesis */
+//	const Phrase &GetCurrTargetPhrase() const
+	const TargetPhrase &GetCurrTargetPhrase() const
+	{
+		return m_targetPhrase;
+	}
+
+ // void PrintLMScores(const LMList &lmListInitial, const LMList	&lmListEnd) const;
+ 
+	/** return input positions covered by the translation option (phrasal translation) used to create this hypothesis */
+	inline const WordsRange &GetCurrSourceWordsRange() const
+	{
+		return m_currSourceWordsRange;
+	}
+	
+	inline const WordsRange &GetCurrTargetWordsRange() const
+	{
+		return m_currTargetWordsRange;
+	}
+  
+  Manager& GetManager() const 
+  {
+    return m_manager;
+  }
+	
+	/** output length of the translation option used to create this hypothesis */
+	inline size_t GetCurrTargetLength() const
+	{
+		return m_currTargetWordsRange.GetNumWordsCovered();
+	}
+
+	void ResetScore();
+
+	void CalcScore(const SquareMatrix &futureScore);
+
+  float CalcExpectedScore( const SquareMatrix &futureScore );
+  void CalcRemainingScore();
+
+	int GetId()const
+	{
+		return m_id;
+	}
+
+	const Hypothesis* GetPrevHypo() const;
+
+	/** length of the partial translation (from the start of the sentence) */
+	inline size_t GetSize() const
+	{
+		return m_currTargetWordsRange.GetEndPos() + 1;
+	}
+
+	inline const Phrase* GetSourcePhrase() const
+	{
+		return m_sourcePhrase;
+	}
+
+	std::string GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const;
+	std::string GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const;
+	inline const TargetPhrase GetTargetPhrase() const { return m_targetPhrase; }
+	std::string GetSourcePhraseStringRep() const;
+	std::string GetTargetPhraseStringRep() const;
+
+	/** curr - pos is relative from CURRENT hypothesis's starting index
+	 * (ie, start of sentence would be some negative number, which is
+	 * not allowed- USE WITH CAUTION) */
+	inline const Word &GetCurrWord(size_t pos) const
+	{
+		return m_targetPhrase.GetWord(pos);
+	}
+	inline const Factor *GetCurrFactor(size_t pos, FactorType factorType) const
+	{
+		return m_targetPhrase.GetFactor(pos, factorType);
+	}
+	/** recursive - pos is relative from start of sentence */
+	inline const Word &GetWord(size_t pos) const
+	{
+		const Hypothesis *hypo = this;
+		while (pos < hypo->GetCurrTargetWordsRange().GetStartPos())
+		{
+			hypo = hypo->GetPrevHypo();
+			assert(hypo != NULL);
+		}
+		return hypo->GetCurrWord(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
+	}
+	inline const Factor* GetFactor(size_t pos, FactorType factorType) const
+	{
+		return GetWord(pos)[factorType];
+	}
+
+	/***
+	 * \return The bitmap of source words we cover
+	 */
+	inline const WordsBitmap &GetWordsBitmap() const
+	{
+		return m_sourceCompleted;
+	}
+
+	inline bool IsSourceCompleted() const {
+		return m_sourceCompleted.IsComplete();
+	}
+
+	int RecombineCompare(const Hypothesis &compare) const;
+	
+	void ToStream(std::ostream& out) const
+	{
+		if (m_prevHypo != NULL)
+		{
+			m_prevHypo->ToStream(out);
+		}
+		out << (Phrase) GetCurrTargetPhrase();
+	}
+	
+	inline bool PrintAlignmentInfo() const{ return GetCurrTargetPhrase().PrintAlignmentInfo(); }
+	
+	
+
+	
+
+	TO_STRING();
+
+	inline void SetWinningHypo(const Hypothesis *hypo)
+	{
+		m_winningHypo = hypo;
+	}
+	inline const Hypothesis *GetWinningHypo() const
+	{
+		return m_winningHypo;
+	}
+	
+	void AddArc(Hypothesis *loserHypo);
+	void CleanupArcList();
+
+	//! returns a list alternative previous hypotheses (or NULL if n-best support is disabled)
+	inline const ArcList* GetArcList() const
+	{
+		return m_arcList;
+	}
+	const ScoreComponentCollection& GetScoreBreakdown() const
+	{
+		return m_scoreBreakdown;
+	}
+	float GetTotalScore() const { return m_totalScore; }
+	float GetScore() const { return m_totalScore-m_futureScore; }
+	
+	
+	
+	
+	//! target span that trans opt would populate if applied to this hypo. Used for alignment check
+	size_t GetNextStartPos(const TranslationOption &transOpt) const;
+	
+	std::vector<std::vector<unsigned int> > *GetLMStats() const { return NULL; }
+
+	static unsigned int GetHypothesesCreated()
+	{
+		return s_HypothesesCreated;
+	}
+
+	const ScoreComponentCollection &GetCachedReorderingScore() const;
+
+	const TranslationOption &GetTranslationOption() const
+	{ return *m_transOpt; }
+};
+
+std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
+
+// sorting helper
+struct CompareHypothesisTotalScore
+{
+	bool operator()(const Hypothesis* hypo1, const Hypothesis* hypo2) const
+	{
+		return hypo1->GetTotalScore() > hypo2->GetTotalScore();
+	}
+};
+
+#ifdef USE_HYPO_POOL
+
+#define FREEHYPO(hypo) \
+{ \
+	ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \
+	pool.freeObject(hypo); \
+} \
+
+#else
+#define FREEHYPO(hypo) delete hypo
+#endif
+
+/** defines less-than relation on hypotheses.
+* The particular order is not important for us, we need just to figure out
+* which hypothesis are equal based on:
+*   the last n-1 target words are the same
+*   and the covers (source words translated) are the same
+*/
+class HypothesisRecombinationOrderer
+{
+public:
+	bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+	{
+		return hypoA->RecombineCompare(*hypoB) < 0;
+	}
+};
+
+}
+#endif
--- a/src/HypothesisStack.cpp
+++ b/src/HypothesisStack.cpp
@ -0,0 +1,31 @@
+
+#include "HypothesisStack.h"
+
+namespace Moses
+{
+HypothesisStack::~HypothesisStack()
+{
+	// delete all hypos
+	while (m_hypos.begin() != m_hypos.end())
+	{
+		Remove(m_hypos.begin());
+	}
+}
+
+/** Remove hypothesis pointed to by iterator but don't delete the object. */
+void HypothesisStack::Detach(const HypothesisStack::iterator &iter)
+{
+	m_hypos.erase(iter);
+}
+
+
+void HypothesisStack::Remove(const HypothesisStack::iterator &iter)
+{
+	Hypothesis *h = *iter;	
+	Detach(iter);
+	FREEHYPO(h);
+}
+
+
+}
+
--- a/src/HypothesisStack.h
+++ b/src/HypothesisStack.h
@ -0,0 +1,48 @@
+#ifndef moses_HypothesisStack_h
+#define moses_HypothesisStack_h
+
+#include <vector>
+#include <set>
+#include "Hypothesis.h"
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+  
+  class Manager;
+
+class HypothesisStack
+{
+  
+protected:
+	typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType;
+	_HCType m_hypos; /**< contains hypotheses */
+  Manager& m_manager;
+
+public:
+  HypothesisStack(Manager& manager): m_manager(manager) {}
+	typedef _HCType::iterator iterator;
+	typedef _HCType::const_iterator const_iterator;
+	//! iterators
+	const_iterator begin() const { return m_hypos.begin(); }
+	const_iterator end() const { return m_hypos.end(); }
+	size_t size() const { return m_hypos.size(); }
+	virtual inline float GetWorstScore() const { return -numeric_limits<float>::infinity(); };
+	virtual float GetWorstScoreForBitmap( WordsBitmapID ) { return -numeric_limits<float>::infinity(); };
+	virtual float GetWorstScoreForBitmap( WordsBitmap ) { return -numeric_limits<float>::infinity(); };
+
+	virtual ~HypothesisStack();
+	virtual bool AddPrune(Hypothesis *hypothesis) = 0;
+	virtual const Hypothesis *GetBestHypothesis() const = 0;
+	virtual std::vector<const Hypothesis*> GetSortedList() const = 0;
+
+	//! remove hypothesis pointed to by iterator but don't delete the object
+	virtual void Detach(const HypothesisStack::iterator &iter);
+	/** destroy Hypothesis pointed to by iterator (object pool version) */
+	virtual void Remove(const HypothesisStack::iterator &iter);
+
+};
+
+}
+
+#endif
--- a/src/HypothesisStackCubePruning.cpp
+++ b/src/HypothesisStackCubePruning.cpp
@ -0,0 +1,315 @@
+// $Id: HypothesisStackCubePruning.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+#include <set>
+#include <queue>
+#include "HypothesisStackCubePruning.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "StaticData.h"
+#include "Manager.h"
+
+using namespace std;
+
+namespace Moses
+{
+HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
+    HypothesisStack(manager)
+{
+	m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
+	m_bestScore = -std::numeric_limits<float>::infinity();
+	m_worstScore = -std::numeric_limits<float>::infinity();
+}
+
+/** remove all hypotheses from the collection */
+void HypothesisStackCubePruning::RemoveAll()
+{
+	// delete all bitmap accessors;
+	_BMType::iterator iter;
+	for (iter = m_bitmapAccessor.begin(); iter != m_bitmapAccessor.end(); ++iter)
+	{
+		delete iter->second;
+	}
+}
+
+pair<HypothesisStackCubePruning::iterator, bool> HypothesisStackCubePruning::Add(Hypothesis *hypo)
+{
+	std::pair<iterator, bool> ret = m_hypos.insert(hypo);
+
+	if (ret.second) 
+	{ // equiv hypo doesn't exists
+		VERBOSE(3,"added hyp to stack");
+
+		// Update best score, if this hypothesis is new best
+		if (hypo->GetTotalScore() > m_bestScore)
+		{
+			VERBOSE(3,", best on stack");
+			m_bestScore = hypo->GetTotalScore();
+			// this may also affect the worst score
+	        if ( m_bestScore + m_beamWidth > m_worstScore )
+	          m_worstScore = m_bestScore + m_beamWidth;
+		}
+	
+    // Prune only if stack is twice as big as needed (lazy pruning)
+		VERBOSE(3,", now size " << m_hypos.size());
+		if (m_hypos.size() > 2*m_maxHypoStackSize-1)
+		{
+			PruneToSize(m_maxHypoStackSize);
+		}
+		else {
+		  VERBOSE(3,std::endl);
+		}
+	}	
+	
+	return ret;
+}
+
+bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo)
+{ 
+	if (hypo->GetTotalScore() < m_worstScore)
+	{ // too bad for stack. don't bother adding hypo into collection
+    m_manager.GetSentenceStats().AddDiscarded();
+	  VERBOSE(3,"discarded, too bad for stack" << std::endl);
+		FREEHYPO(hypo);		
+		return false;
+	}
+
+	// over threshold, try to add to collection
+	std::pair<iterator, bool> addRet = Add(hypo); 
+	if (addRet.second)
+	{ // nothing found. add to collection
+		return true;
+  }
+
+	// equiv hypo exists, recombine with other hypo
+	iterator &iterExisting = addRet.first;
+	Hypothesis *hypoExisting = *iterExisting;
+	assert(iterExisting != m_hypos.end());
+
+	m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
+	
+	// found existing hypo with same target ending.
+	// keep the best 1
+	if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())
+	{ // incoming hypo is better than the one we have
+		VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
+		if (m_nBestIsEnabled) {
+			hypo->AddArc(hypoExisting);
+			Detach(iterExisting);
+		} else {
+			Remove(iterExisting);
+		}
+
+		bool added = Add(hypo).second;		
+		if (!added)
+		{
+			iterExisting = m_hypos.find(hypo);
+			TRACE_ERR("Offending hypo = " << **iterExisting << endl);
+			assert(false);
+		}
+		return false;
+	}
+	else
+	{ // already storing the best hypo. discard current hypo 
+	  VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
+		if (m_nBestIsEnabled) {
+			hypoExisting->AddArc(hypo);
+		} else {
+			FREEHYPO(hypo);				
+		}
+		return false;
+	}
+}
+
+void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo)
+{
+	std::pair<iterator, bool> addRet = Add(hypo); 
+	assert (addRet.second);
+
+	const WordsBitmap &bitmap = hypo->GetWordsBitmap();
+	m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this);
+}
+
+void HypothesisStackCubePruning::PruneToSize(size_t newSize)
+{
+	if (m_hypos.size() > newSize) // ok, if not over the limit
+	{
+		priority_queue<float> bestScores;
+		
+		// push all scores to a heap
+		// (but never push scores below m_bestScore+m_beamWidth)
+		iterator iter = m_hypos.begin();
+		float score = 0;
+		while (iter != m_hypos.end())
+		{
+			Hypothesis *hypo = *iter;
+			score = hypo->GetTotalScore();
+			if (score > m_bestScore+m_beamWidth) 
+			{
+				bestScores.push(score);
+			}
+			++iter;
+    }
+		
+		// pop the top newSize scores (and ignore them, these are the scores of hyps that will remain)
+		//  ensure to never pop beyond heap size
+		size_t minNewSizeHeapSize = newSize > bestScores.size() ? bestScores.size() : newSize;
+		for (size_t i = 1 ; i < minNewSizeHeapSize ; i++)
+			bestScores.pop();
+				
+		// and remember the threshold
+		float scoreThreshold = bestScores.top();
+		
+		// delete all hypos under score threshold
+		iter = m_hypos.begin();
+		while (iter != m_hypos.end())
+		{
+			Hypothesis *hypo = *iter;
+			float score = hypo->GetTotalScore();
+			if (score < scoreThreshold)
+			{
+				iterator iterRemove = iter++;
+				Remove(iterRemove);
+				m_manager.GetSentenceStats().AddPruning();
+			}
+			else
+			{
+				++iter;
+			}
+		}
+		VERBOSE(3,", pruned to size " << size() << endl);
+		
+		IFVERBOSE(3) 
+		{
+			TRACE_ERR("stack now contains: ");
+			for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) 
+			{
+				Hypothesis *hypo = *iter;
+				TRACE_ERR( hypo->GetId() << " (" << hypo->GetTotalScore() << ") ");
+			}
+			TRACE_ERR( endl);
+		}
+
+		// set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack
+		m_worstScore = scoreThreshold;
+	}
+}
+
+const Hypothesis *HypothesisStackCubePruning::GetBestHypothesis() const
+{
+	if (!m_hypos.empty())
+	{
+		const_iterator iter = m_hypos.begin();
+		Hypothesis *bestHypo = *iter;
+		while (++iter != m_hypos.end())
+		{
+			Hypothesis *hypo = *iter;
+			if (hypo->GetTotalScore() > bestHypo->GetTotalScore())
+				bestHypo = hypo;
+		}
+		return bestHypo;
+	}
+	return NULL;
+}
+
+vector<const Hypothesis*> HypothesisStackCubePruning::GetSortedList() const
+{
+	vector<const Hypothesis*> ret; ret.reserve(m_hypos.size());
+	std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
+	sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
+
+	return ret;
+}
+
+
+void HypothesisStackCubePruning::CleanupArcList()
+{
+	// only necessary if n-best calculations are enabled
+	if (!m_nBestIsEnabled) return;
+
+	iterator iter;
+	for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
+	{
+		Hypothesis *mainHypo = *iter;
+		mainHypo->CleanupArcList();
+	}
+}
+
+void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
+												, HypothesisStackCubePruning &stack
+												, const WordsRange &range
+												, BitmapContainer &bitmapContainer
+												, const SquareMatrix &futureScore
+												, const TranslationOptionList &transOptList)
+{
+	_BMType::iterator bcExists = m_bitmapAccessor.find(newBitmap);
+
+	BitmapContainer *bmContainer;
+	if (bcExists == m_bitmapAccessor.end()) {
+		bmContainer = new BitmapContainer(newBitmap, stack);
+		m_bitmapAccessor[newBitmap] = bmContainer;
+	}
+	else {
+		bmContainer = bcExists->second;
+	}
+
+	BackwardsEdge *edge = new BackwardsEdge(bitmapContainer
+																					, *bmContainer
+																					, transOptList
+																					, futureScore,
+                                                                                      m_manager.GetSource());
+	bmContainer->AddBackwardsEdge(edge);
+}
+
+
+TO_STRING_BODY(HypothesisStackCubePruning);
+
+
+// friend
+std::ostream& operator<<(std::ostream& out, const HypothesisStackCubePruning& hypoColl)
+{
+	HypothesisStackCubePruning::const_iterator iter;
+	
+	for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter)
+	{
+		const Hypothesis &hypo = **iter;
+		out << hypo << endl;
+		
+	}
+	return out;
+}
+
+void
+HypothesisStackCubePruning::AddHypothesesToBitmapContainers()
+{
+	HypothesisStackCubePruning::const_iterator iter;
+	for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
+	{
+		Hypothesis *h = *iter;
+		const WordsBitmap &bitmap = h->GetWordsBitmap();
+		BitmapContainer *container = m_bitmapAccessor[bitmap];
+		container->AddHypothesis(h);
+	}
+}
+
+}
+
--- a/src/HypothesisStackCubePruning.h
+++ b/src/HypothesisStackCubePruning.h
@ -0,0 +1,154 @@
+// $Id: HypothesisStackCubePruning.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_HypothesisStackCubePruning_h
+#define moses_HypothesisStackCubePruning_h
+
+#include <limits>
+#include <map>
+#include <set>
+#include "Hypothesis.h"
+#include "BitmapContainer.h"
+#include "HypothesisStack.h"
+
+namespace Moses
+{
+
+class BitmapContainer;
+class TranslationOptionList;
+class Manager;
+
+typedef std::map<WordsBitmap, BitmapContainer*> _BMType;
+
+/** Stack for instances of Hypothesis, includes functions for pruning. */ 
+class HypothesisStackCubePruning : public HypothesisStack
+{
+public:
+	friend std::ostream& operator<<(std::ostream&, const HypothesisStackCubePruning&);
+
+protected:
+	_BMType m_bitmapAccessor;
+
+	float m_bestScore; /**< score of the best hypothesis in collection */
+	float m_worstScore; /**< score of the worse hypthesis in collection */
+	float m_beamWidth; /**< minimum score due to threashold pruning */
+	size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
+	bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
+	
+	/** add hypothesis to stack. Prune if necessary. 
+	 * Returns false if equiv hypo exists in collection, otherwise returns true
+	 */
+	std::pair<HypothesisStackCubePruning::iterator, bool> Add(Hypothesis *hypothesis);
+		
+	/** destroy all instances of Hypothesis in this collection */
+	void RemoveAll();
+
+public:
+	HypothesisStackCubePruning(Manager& manager);
+	~HypothesisStackCubePruning()
+	{
+		RemoveAll();
+		m_bitmapAccessor.clear();
+	}
+
+	/** adds the hypo, but only if within thresholds (beamThr, stackSize).
+	*	This function will recombine hypotheses silently!  There is no record
+	* (could affect n-best list generation...TODO)
+	* Call stack for adding hypothesis is
+			AddPrune()
+				Add()
+					AddNoPrune()
+	*/
+	bool AddPrune(Hypothesis *hypothesis);
+
+	void AddInitial(Hypothesis *hypo);
+
+	/** set maximum number of hypotheses in the collection
+   * \param maxHypoStackSize maximum number (typical number: 100)
+   */
+	inline void SetMaxHypoStackSize(size_t maxHypoStackSize)
+	{
+		m_maxHypoStackSize = maxHypoStackSize;
+	}
+
+	inline size_t GetMaxHypoStackSize() const
+	{
+		return m_maxHypoStackSize;
+	}
+
+	/** set beam threshold, hypotheses in the stack must not be worse than 
+    * this factor times the best score to be allowed in the stack
+	 * \param beamThreshold minimum factor (typical number: 0.03)
+	 */
+	inline void SetBeamWidth(float beamWidth)
+	{
+		m_beamWidth = beamWidth;
+	}
+
+	/** return score of the best hypothesis in the stack */
+	inline float GetBestScore() const
+	{
+		return m_bestScore;
+	}
+	
+	/** return worst score allowed for the stack */
+	inline float GetWorstScore() const
+	{
+		return m_worstScore;
+	}
+	
+	void AddHypothesesToBitmapContainers();
+	
+	const _BMType& GetBitmapAccessor() const
+	{
+		return m_bitmapAccessor;
+	}
+
+	void SetBitmapAccessor(const WordsBitmap &newBitmap
+						   , HypothesisStackCubePruning &stack
+						   , const WordsRange &range
+						   , BitmapContainer &bitmapContainer
+						   , const SquareMatrix &futureScore
+						   , const TranslationOptionList &transOptList);
+
+	/** pruning, if too large.
+	 * Pruning algorithm: find a threshold and delete all hypothesis below it.
+	 * The threshold is chosen so that exactly newSize top items remain on the 
+	 * stack in fact, in situations where some of the hypothesis fell below 
+	 * m_beamWidth, the stack will contain less items.
+	 * \param newSize maximum size */
+	void PruneToSize(size_t newSize);
+
+	//! return the hypothesis with best score. Used to get the translated at end of decoding
+	const Hypothesis *GetBestHypothesis() const;
+	//! return all hypothesis, sorted by descending score. Used in creation of N best list
+	std::vector<const Hypothesis*> GetSortedList() const;
+	
+	/** make all arcs in point to the equiv hypothesis that contains them. 
+	* Ie update doubly linked list be hypo & arcs
+	*/
+	void CleanupArcList();
+	
+	TO_STRING();
+};
+
+}
+#endif
--- a/src/HypothesisStackNormal.cpp
+++ b/src/HypothesisStackNormal.cpp
@ -0,0 +1,303 @@
+// $Id: HypothesisStackNormal.cpp 1511 2007-11-12 20:21:44Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+#include <set>
+#include <queue>
+#include "HypothesisStackNormal.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "StaticData.h"
+#include "Manager.h"
+
+using namespace std;
+
+namespace Moses
+{
+HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
+    HypothesisStack(manager)
+{
+	m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
+	m_bestScore = -std::numeric_limits<float>::infinity();
+	m_worstScore = -std::numeric_limits<float>::infinity();
+}
+
+/** remove all hypotheses from the collection */
+void HypothesisStackNormal::RemoveAll()
+{
+	while (m_hypos.begin() != m_hypos.end())
+	{
+		Remove(m_hypos.begin());
+	}
+}
+
+pair<HypothesisStackNormal::iterator, bool> HypothesisStackNormal::Add(Hypothesis *hypo)
+{
+	std::pair<iterator, bool> ret = m_hypos.insert(hypo);
+	if (ret.second) 
+	{ // equiv hypo doesn't exists
+		VERBOSE(3,"added hyp to stack");
+	
+		// Update best score, if this hypothesis is new best
+		if (hypo->GetTotalScore() > m_bestScore)
+		{
+			VERBOSE(3,", best on stack");
+			m_bestScore = hypo->GetTotalScore();
+			// this may also affect the worst score
+			if ( m_bestScore + m_beamWidth > m_worstScore )
+				m_worstScore = m_bestScore + m_beamWidth;
+		}
+		// update best/worst score for stack diversity 1
+		if ( m_minHypoStackDiversity == 1 && 
+		     hypo->GetTotalScore() > GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) )
+		{
+			SetWorstScoreForBitmap( hypo->GetWordsBitmap().GetID(), hypo->GetTotalScore() );
+		}
+	
+		VERBOSE(3,", now size " << m_hypos.size());
+
+		// prune only if stack is twice as big as needed (lazy pruning)
+		size_t toleratedSize = 2*m_maxHypoStackSize-1;
+		// add in room for stack diversity
+		if (m_minHypoStackDiversity)
+			toleratedSize += m_minHypoStackDiversity << StaticData::Instance().GetMaxDistortion();
+		if (m_hypos.size() > toleratedSize)
+		{
+			PruneToSize(m_maxHypoStackSize);
+		}
+		else {
+		  VERBOSE(3,std::endl);
+		}
+	}	
+	
+	return ret;
+}
+
+bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
+{ 
+	// too bad for stack. don't bother adding hypo into collection
+	if (!StaticData::Instance().GetDisableDiscarding() &&
+      hypo->GetTotalScore() < m_worstScore
+	    && ! ( m_minHypoStackDiversity > 0
+	           && hypo->GetTotalScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) )
+	{
+		m_manager.GetSentenceStats().AddDiscarded();
+		VERBOSE(3,"discarded, too bad for stack" << std::endl);
+		FREEHYPO(hypo);		
+		return false;
+	}
+
+	// over threshold, try to add to collection
+	std::pair<iterator, bool> addRet = Add(hypo); 
+	if (addRet.second)
+	{ // nothing found. add to collection
+		return true;
+	}
+
+	// equiv hypo exists, recombine with other hypo
+	iterator &iterExisting = addRet.first;
+	Hypothesis *hypoExisting = *iterExisting;
+	assert(iterExisting != m_hypos.end());
+
+	m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
+	
+	// found existing hypo with same target ending.
+	// keep the best 1
+	if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())
+	{ // incoming hypo is better than the one we have
+		VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
+		if (m_nBestIsEnabled) {
+			hypo->AddArc(hypoExisting);
+			Detach(iterExisting);
+		} else {
+			Remove(iterExisting);
+		}
+
+		bool added = Add(hypo).second;		
+		if (!added)
+		{
+			iterExisting = m_hypos.find(hypo);
+			TRACE_ERR("Offending hypo = " << **iterExisting << endl);
+			abort();
+		}
+		return false;
+	}
+	else
+	{ // already storing the best hypo. discard current hypo 
+	  VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
+		if (m_nBestIsEnabled) {
+			hypoExisting->AddArc(hypo);
+		} else {
+			FREEHYPO(hypo);				
+		}
+		return false;
+	}
+}
+
+void HypothesisStackNormal::PruneToSize(size_t newSize)
+{
+	if ( size() <= newSize ) return; // ok, if not over the limit
+
+	// we need to store a temporary list of hypotheses
+	vector< Hypothesis* > hypos = GetSortedListNOTCONST();
+	bool* included = (bool*) malloc(sizeof(bool) * hypos.size());
+	for(size_t i=0; i<hypos.size(); i++) included[i] = false;
+
+	// clear out original set
+	for( iterator iter = m_hypos.begin(); iter != m_hypos.end(); ) 
+	{
+		iterator removeHyp = iter++;
+		Detach(removeHyp);
+	}
+
+	// add best hyps for each coverage according to minStackDiversity
+	if ( m_minHypoStackDiversity > 0 ) 
+	{
+		map< WordsBitmapID, size_t > diversityCount;
+		for(size_t i=0; i<hypos.size(); i++) 
+		{
+			Hypothesis *hyp = hypos[i];
+			WordsBitmapID coverage = hyp->GetWordsBitmap().GetID();;
+			if (diversityCount.find( coverage ) == diversityCount.end()) 
+				diversityCount[ coverage ] = 0;
+
+			if (diversityCount[ coverage ] < m_minHypoStackDiversity) 
+			{
+				m_hypos.insert( hyp );
+				included[i] = true;
+				diversityCount[ coverage ]++;
+				if (diversityCount[ coverage ] == m_minHypoStackDiversity)
+					SetWorstScoreForBitmap( coverage, hyp->GetTotalScore());
+			}
+		}
+	}
+
+	// only add more if stack not full after satisfying minStackDiversity
+	if ( size() < newSize ) {
+
+		// add best remaining hypotheses
+		for(size_t i=0; i<hypos.size() 
+		             && size() < newSize 
+		             && hypos[i]->GetTotalScore() > m_bestScore+m_beamWidth; i++)
+		{
+			if (! included[i]) 
+			{
+				m_hypos.insert( hypos[i] );
+				included[i] = true;
+				if (size() == newSize) 
+					m_worstScore = hypos[i]->GetTotalScore();
+			}
+		}
+	}
+
+	// delete hypotheses that have not been included
+	for(size_t i=0; i<hypos.size(); i++) 
+	{
+		if (! included[i])
+		{
+			FREEHYPO( hypos[i] );
+			m_manager.GetSentenceStats().AddPruning();
+		}
+	}
+	free(included);
+
+	// some reporting....
+	VERBOSE(3,", pruned to size " << size() << endl);
+	IFVERBOSE(3) 
+	{
+		TRACE_ERR("stack now contains: ");
+		for(iterator iter = m_hypos.begin(); iter != m_hypos.end(); iter++) 
+		{
+			Hypothesis *hypo = *iter;
+			TRACE_ERR( hypo->GetId() << " (" << hypo->GetTotalScore() << ") ");
+		}
+		TRACE_ERR( endl);
+	}
+}
+
+const Hypothesis *HypothesisStackNormal::GetBestHypothesis() const
+{
+	if (!m_hypos.empty())
+	{
+		const_iterator iter = m_hypos.begin();
+		Hypothesis *bestHypo = *iter;
+		while (++iter != m_hypos.end())
+		{
+			Hypothesis *hypo = *iter;
+			if (hypo->GetTotalScore() > bestHypo->GetTotalScore())
+				bestHypo = hypo;
+		}
+		return bestHypo;
+	}
+	return NULL;
+}
+
+vector<const Hypothesis*> HypothesisStackNormal::GetSortedList() const
+{
+	vector<const Hypothesis*> ret; ret.reserve(m_hypos.size());
+	std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
+	sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
+
+	return ret;
+}
+
+vector<Hypothesis*> HypothesisStackNormal::GetSortedListNOTCONST()
+{
+	vector<Hypothesis*> ret; ret.reserve(m_hypos.size());
+	std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
+	sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
+
+	return ret;
+}
+
+void HypothesisStackNormal::CleanupArcList()
+{
+	// only necessary if n-best calculations are enabled
+	if (!m_nBestIsEnabled) return;
+
+	iterator iter;
+	for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
+	{
+		Hypothesis *mainHypo = *iter;
+		mainHypo->CleanupArcList();
+	}
+}
+
+TO_STRING_BODY(HypothesisStackNormal);
+
+
+// friend
+std::ostream& operator<<(std::ostream& out, const HypothesisStackNormal& hypoColl)
+{
+	HypothesisStackNormal::const_iterator iter;
+	
+	for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter)
+	{
+		const Hypothesis &hypo = **iter;
+		out << hypo << endl;
+		
+	}
+	return out;
+}
+
+
+}
+
--- a/src/HypothesisStackNormal.h
+++ b/src/HypothesisStackNormal.h
@ -0,0 +1,137 @@
+// $Id: HypothesisStackNormal.h 1511 2007-11-12 20:21:44Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_HypothesisStackNormal_h
+#define moses_HypothesisStackNormal_h
+
+#include <limits>
+#include <set>
+#include "Hypothesis.h"
+#include "HypothesisStack.h"
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+	// class WordsBitmap;
+	// typedef size_t WordsBitmapID;
+
+/** Stack for instances of Hypothesis, includes functions for pruning. */ 
+class HypothesisStackNormal: public HypothesisStack
+{
+public:
+	friend std::ostream& operator<<(std::ostream&, const HypothesisStackNormal&);
+
+protected:
+	float m_bestScore; /**< score of the best hypothesis in collection */
+	float m_worstScore; /**< score of the worse hypothesis in collection */
+	map< WordsBitmapID, float > m_diversityWorstScore; /**< score of worst hypothesis for particular source word coverage */
+	float m_beamWidth; /**< minimum score due to threashold pruning */
+	size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
+	size_t m_minHypoStackDiversity; /**< minimum number of hypothesis with different source word coverage */
+	bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
+
+	/** add hypothesis to stack. Prune if necessary. 
+	 * Returns false if equiv hypo exists in collection, otherwise returns true
+	 */
+	std::pair<HypothesisStackNormal::iterator, bool> Add(Hypothesis *hypothesis);
+
+	/** destroy all instances of Hypothesis in this collection */
+	void RemoveAll();
+
+	void SetWorstScoreForBitmap( WordsBitmapID id, float worstScore ) {
+		m_diversityWorstScore[ id ] = worstScore;
+	}
+
+public:
+	float GetWorstScoreForBitmap( WordsBitmapID id ) {
+		if (m_diversityWorstScore.find( id ) == m_diversityWorstScore.end())
+			return -numeric_limits<float>::infinity();
+		return m_diversityWorstScore[ id ];
+	}
+	float GetWorstScoreForBitmap( const WordsBitmap &coverage ) {
+		return GetWorstScoreForBitmap( coverage.GetID() );
+	}
+
+	HypothesisStackNormal(Manager& manager);
+
+	/** adds the hypo, but only if within thresholds (beamThr, stackSize).
+	*	This function will recombine hypotheses silently!  There is no record
+	* (could affect n-best list generation...TODO)
+	* Call stack for adding hypothesis is
+			AddPrune()
+				Add()
+					AddNoPrune()
+	*/
+	bool AddPrune(Hypothesis *hypothesis);
+
+	/** set maximum number of hypotheses in the collection
+	 * \param maxHypoStackSize maximum number (typical number: 100)
+	 * \param maxHypoStackSize maximum number (defauly: 0)
+	 */
+	inline void SetMaxHypoStackSize(size_t maxHypoStackSize, size_t minHypoStackDiversity)
+	{
+		m_maxHypoStackSize = maxHypoStackSize;
+		m_minHypoStackDiversity = minHypoStackDiversity;
+	}
+
+	/** set beam threshold, hypotheses in the stack must not be worse than 
+	 * this factor times the best score to be allowed in the stack
+	 * \param beamThreshold minimum factor (typical number: 0.03)
+	 */
+	inline void SetBeamWidth(float beamWidth)
+	{
+		m_beamWidth = beamWidth;
+	}
+	/** return score of the best hypothesis in the stack */
+	inline float GetBestScore() const
+	{
+		return m_bestScore;
+	}
+	/** return worst allowable score */
+	inline float GetWorstScore() const
+	{
+		return m_worstScore;
+	}
+	
+	/** pruning, if too large.
+	 * Pruning algorithm: find a threshold and delete all hypothesis below it.
+	 * The threshold is chosen so that exactly newSize top items remain on the 
+	 * stack in fact, in situations where some of the hypothesis fell below 
+	 * m_beamWidth, the stack will contain less items.
+	 * \param newSize maximum size */
+	void PruneToSize(size_t newSize);
+
+	//! return the hypothesis with best score. Used to get the translated at end of decoding
+	const Hypothesis *GetBestHypothesis() const;
+	//! return all hypothesis, sorted by descending score. Used in creation of N best list
+	std::vector<const Hypothesis*> GetSortedList() const;
+	std::vector<Hypothesis*> GetSortedListNOTCONST();
+	
+	/** make all arcs in point to the equiv hypothesis that contains them. 
+	* Ie update doubly linked list be hypo & arcs
+	*/
+	void CleanupArcList();
+	
+	TO_STRING();
+};
+
+}
+#endif
--- a/src/InputFileStream.cpp
+++ b/src/InputFileStream.cpp
@ -0,0 +1,62 @@
+// $Id: InputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "InputFileStream.h"
+#include "gzfilebuf.h"
+#include <iostream>
+
+using namespace std;
+
+namespace Moses
+{
+InputFileStream::InputFileStream(const std::string &filePath)
+: std::istream(NULL)
+, m_streambuf(NULL)
+{
+  if (filePath.size() > 3 &&
+      filePath.substr(filePath.size() - 3, 3) == ".gz")
+  {
+    m_streambuf = new gzfilebuf(filePath.c_str());
+  } else {
+    std::filebuf* fb = new std::filebuf();
+    fb = fb->open(filePath.c_str(), std::ios::in);
+    if (! fb) {
+      cerr << "Can't read " << filePath.c_str() << endl;
+      exit(1);
+    }
+    m_streambuf = fb;
+  }
+  this->init(m_streambuf);
+}
+
+InputFileStream::~InputFileStream()
+{
+  delete m_streambuf;
+	m_streambuf = NULL;
+}
+
+void InputFileStream::Close()
+{
+}
+
+
+}
+
--- a/src/InputFileStream.h
+++ b/src/InputFileStream.h
@ -0,0 +1,48 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_InputFileStream_h
+#define moses_InputFileStream_h
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+namespace Moses
+{
+
+/** Used in place of std::istream, can read zipped files if it ends in .gz
+*/
+class InputFileStream : public std::istream
+{
+protected:
+	std::streambuf *m_streambuf;
+public:
+
+	InputFileStream(const std::string &filePath);
+	~InputFileStream();
+
+	void Close();
+};
+
+}
+
+#endif
--- a/src/InputType.cpp
+++ b/src/InputType.cpp
@ -0,0 +1,59 @@
+// $Id: InputType.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cstdlib>
+
+#include "InputType.h"
+
+namespace Moses
+{
+
+InputType::InputType(long translationId) : m_translationId(translationId) {}
+InputType::~InputType() {}
+
+TO_STRING_BODY(InputType);
+
+std::ostream& operator<<(std::ostream& out,InputType const& x) 
+{
+	x.Print(out); return out;
+}
+
+// default implementation is one column equals one word
+int InputType::ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const
+{
+  int dist = 0;
+	if (prev.GetNumWordsCovered() == 0) {
+	  dist = current.GetStartPos();
+	} else {
+	  dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
+	}
+	return abs(dist);
+}
+
+bool InputType::CanIGetFromAToB(size_t start, size_t end) const
+{
+  return true;
+}
+
+}
+
+
--- a/src/InputType.h
+++ b/src/InputType.h
@ -0,0 +1,132 @@
+// $Id: InputType.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_InputType_h
+#define moses_InputType_h
+
+#include <string>
+#include "TypeDef.h"
+#include "Phrase.h"
+#include "TargetPhraseCollection.h"
+#include "ReorderingConstraint.h"
+
+namespace Moses
+{
+
+class WordsRange;
+class Factor;
+class PhraseDictionary;
+class TranslationOptionCollection;
+
+//! base class for sentences and confusion networks
+class InputType 
+{
+protected:
+	long m_translationId; 	//< contiguous Id
+	bool m_hasMetaData;
+	long m_segId;
+	ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */
+ 
+public:
+
+	InputType(long translationId = 0);
+	virtual ~InputType();
+
+	virtual InputTypeEnum GetType() const = 0;
+
+	long GetTranslationId() const
+	{
+		return m_translationId;
+	}
+	void SetTranslationId(long translationId)
+	{
+		m_translationId = translationId;
+	}
+	//! returns the number of words moved
+	virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;
+
+  //! In a word lattice, tells you if there's a path from node start to node end
+	virtual bool CanIGetFromAToB(size_t start, size_t end) const;
+	
+  //! is there a path covering [range] (lattice only, otherwise true)
+	inline bool IsCoveragePossible(const WordsRange& range) const
+	{
+		return CanIGetFromAToB(range.GetStartPos(), range.GetEndPos() + 1);
+	}
+
+  //! In a word lattice, you can't always get from node A to node B
+	inline bool IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const
+	{
+		//  return ComputeDistortionDistance(prev, current) < 100000;
+		size_t t = prev.GetEndPos()+1;  // 2
+		size_t l = current.GetEndPos()+1;   //l=1
+		size_t r = l; 
+		if (l<t) { r = t; } else { l = t; }  //r=2
+		if (!CanIGetFromAToB(l,r)) return false;
+
+		// there's another check here: a current span may end at a place that previous could get to,
+		// but it may not *START* at a place it can get to. We'll also have to check if we're going left or right
+
+		r = current.GetStartPos();
+		l = prev.GetEndPos()+1; 
+		if (l == r) return true;
+		if (prev.GetEndPos() > current.GetStartPos()) {
+						r = prev.GetStartPos(); 
+						l = current.GetEndPos()+1; 
+						if (r == l) return true;
+		}
+		return CanIGetFromAToB(l,r);
+	}
+
+	//! number of words in this sentence/confusion network
+	virtual size_t GetSize() const =0;
+
+	//! populate this InputType with data from in stream
+	virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0;
+	
+	//! Output debugging info to stream out
+	virtual void Print(std::ostream&) const =0;
+
+	//! create trans options specific to this InputType
+	virtual TranslationOptionCollection* CreateTranslationOptionCollection() const=0;
+
+	//! return substring. Only valid for Sentence class. TODO - get rid of this fn
+	virtual Phrase GetSubString(const WordsRange&) const =0;
+
+	//! return substring at a particular position. Only valid for Sentence class. TODO - get rid of this fn
+	virtual const Word& GetWord(size_t pos) const=0;
+
+	//! Returns the reordering constraints
+	const ReorderingConstraint& GetReorderingConstraint() const
+	{
+		return m_reorderingConstraint;
+	};
+
+	TO_STRING();
+	
+};
+
+std::ostream& operator<<(std::ostream&,InputType const&);
+
+}
+
+#endif
--- a/src/LMList.cpp
+++ b/src/LMList.cpp
@ -0,0 +1,54 @@
+// $Id: LMList.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LMList.h"
+#include "Phrase.h"
+#include "LanguageModelSingleFactor.h"
+#include "ScoreComponentCollection.h"
+
+using namespace std;
+
+namespace Moses
+{
+void LMList::CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, ScoreComponentCollection* breakdown) const
+{ 
+	const_iterator lmIter;
+	for (lmIter = begin(); lmIter != end(); ++lmIter)
+	{
+		const LanguageModel &lm = **lmIter;
+		const float weightLM = lm.GetWeight();
+
+		float fullScore, nGramScore;
+
+		// do not process, if factors not defined yet (happens in partial translation options)
+		if (!lm.Useable(phrase))
+			continue;
+
+		lm.CalcScore(phrase, fullScore, nGramScore);
+
+		breakdown->Assign(&lm, nGramScore);  // I'm not sure why += doesn't work here- it should be 0.0 right?
+		retFullScore   += fullScore * weightLM;
+		retNGramScore	+= nGramScore * weightLM;
+	}	
+}
+
+}
+
--- a/src/LMList.h
+++ b/src/LMList.h
@ -0,0 +1,23 @@
+#ifndef moses_LMList_h
+#define moses_LMList_h
+
+#include <list>
+#include "LanguageModel.h"
+
+namespace Moses
+{
+
+class Phrase;
+class ScoreColl;
+class ScoreComponentCollection;
+
+//! List of language models
+class LMList : public std::list < LanguageModel* >	
+{
+public:
+	void CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, ScoreComponentCollection* breakdown) const;
+
+};
+
+}
+#endif
--- a/src/LVoc.cpp
+++ b/src/LVoc.cpp
@ -0,0 +1,7 @@
+#include<limits>
+#include "LVoc.h"
+
+//rather pointless file because LVoc is template all wee need here is the definitions of consts
+
+const LabelId InvalidLabelId = std::numeric_limits<LabelId>::max();
+const LabelId Epsilon        = InvalidLabelId-1;
--- a/src/LVoc.h
+++ b/src/LVoc.h
@ -0,0 +1,68 @@
+#ifndef moses_LVoc_h
+#define moses_LVoc_h
+
+#include<map>
+#include<vector>
+#include<iostream>
+#include<fstream>
+#include <sstream>
+
+typedef unsigned LabelId;
+extern const LabelId InvalidLabelId;
+extern const LabelId Epsilon;
+
+typedef std::vector<LabelId> IPhrase;
+
+// A = type of things to numberize, ie, std::string
+// B = map type to use, might consider using hash_map for better performance
+template<typename A,typename B=std::map<A,LabelId> >
+class LVoc {
+  typedef A Key;
+  typedef B M;
+  typedef std::vector<Key> V;
+  M m;
+  V data;
+public:
+  LVoc() {}
+
+  bool isKnown(const Key& k) const {return m.find(k)!=m.end();}
+  LabelId index(const Key& k) const {
+    typename M::const_iterator i=m.find(k);
+    return i!=m.end()? i->second : InvalidLabelId;}
+  LabelId add(const Key& k) {
+    std::pair<typename M::iterator,bool> p
+			=m.insert(std::make_pair(k,data.size()));
+    if(p.second) data.push_back(k);
+		assert(static_cast<size_t>(p.first->second)<data.size());
+    return p.first->second;
+  }
+  Key const& symbol(LabelId i) const {
+    assert(static_cast<size_t>(i)<data.size());
+    return data[i];}
+
+  typedef typename V::const_iterator const_iterator;
+  const_iterator begin() const {return data.begin();}
+  const_iterator end() const {return data.end();}
+  
+  void Write(const std::string& fname) const {
+  	std::ofstream out(fname.c_str()); Write(out);}
+  void Write(std::ostream& out) const {
+  	for(int i=data.size()-1;i>=0;--i)
+  		out<<i<<' '<<data[i]<<'\n';
+  }
+  void Read(const std::string& fname) {
+  	std::ifstream in(fname.c_str());Read(in);}
+  void Read(std::istream& in) {
+  	Key k;size_t i;std::string line;
+  	while(getline(in,line)) {
+  		std::istringstream is(line);
+  		if(is>>i>>k) {
+				if(i>=data.size()) data.resize(i+1);
+  			data[i]=k;
+  			m[k]=i;
+  		}
+  	}
+  }	
+};
+
+#endif
--- a/src/LanguageModel.cpp
+++ b/src/LanguageModel.cpp
@ -0,0 +1,191 @@
+// $Id: LanguageModel.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <sstream>
+
+#include "FFState.h"
+#include "LanguageModel.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "Manager.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+LanguageModel::LanguageModel(bool registerScore, ScoreIndexManager &scoreIndexManager) 
+{
+	if (registerScore)
+		scoreIndexManager.AddScoreProducer(this);
+}
+LanguageModel::~LanguageModel() {}
+
+// don't inline virtual funcs...
+size_t LanguageModel::GetNumScoreComponents() const
+{
+	return 1;
+}
+
+void LanguageModel::CalcScore(const Phrase &phrase
+														, float &fullScore
+														, float &ngramScore) const
+{
+	fullScore	= 0;
+	ngramScore	= 0;
+
+	size_t phraseSize = phrase.GetSize();
+	vector<const Word*> contextFactor;
+	contextFactor.reserve(m_nGramOrder);
+
+	// start of sentence
+	for (size_t currPos = 0 ; currPos < m_nGramOrder - 1 && currPos < phraseSize ; currPos++)
+	{
+		contextFactor.push_back(&phrase.GetWord(currPos));		
+		fullScore += GetValue(contextFactor);
+	}
+	
+	if (phraseSize >= m_nGramOrder)
+	{
+		contextFactor.push_back(&phrase.GetWord(m_nGramOrder - 1));
+		ngramScore = GetValue(contextFactor);
+	}
+	
+	// main loop
+	for (size_t currPos = m_nGramOrder; currPos < phraseSize ; currPos++)
+	{ // used by hypo to speed up lm score calc
+		for (size_t currNGramOrder = 0 ; currNGramOrder < m_nGramOrder - 1 ; currNGramOrder++)
+		{
+			contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
+		}
+		contextFactor[m_nGramOrder - 1] = &phrase.GetWord(currPos);
+		float partScore = GetValue(contextFactor);		
+		ngramScore += partScore;		
+	}
+	fullScore += ngramScore;	
+}
+
+LanguageModel::State LanguageModel::GetState(const std::vector<const Word*> &contextFactor, unsigned int* len) const
+{
+  State state;
+	unsigned int dummy;
+  if (!len) len = &dummy;
+  GetValue(contextFactor,&state,len);
+  return state;
+}
+
+struct LMState : public FFState {
+	const void* lmstate;
+	LMState(const void* lms) { lmstate = lms; }
+	virtual int Compare(const FFState& o) const {
+		const LMState& other = static_cast<const LMState&>(o);
+		if (other.lmstate > lmstate) return 1;
+		else if (other.lmstate < lmstate) return -1;
+		return 0;
+	}
+};
+
+const FFState* LanguageModel::EmptyHypothesisState() const {
+	return new LMState(NULL);
+}
+
+FFState* LanguageModel::Evaluate(
+    const Hypothesis& hypo,
+    const FFState* ps,
+    ScoreComponentCollection* out) const {
+	// In this function, we only compute the LM scores of n-grams that overlap a
+	// phrase boundary. Phrase-internal scores are taken directly from the
+	// translation option. In the unigram case, there is no overlap, so we don't
+	// need to do anything.
+	if(m_nGramOrder <= 1)
+		return NULL;
+
+	clock_t t=0;
+	IFVERBOSE(2) { t  = clock(); } // track time
+	const void* prevlm = ps ? (static_cast<const LMState *>(ps)->lmstate) : NULL;
+	LMState* res = new LMState(prevlm);
+	if (hypo.GetCurrTargetLength() == 0)
+		return res;
+	const size_t currEndPos = hypo.GetCurrTargetWordsRange().GetEndPos();
+	const size_t startPos = hypo.GetCurrTargetWordsRange().GetStartPos();
+
+	// 1st n-gram
+	vector<const Word*> contextFactor(m_nGramOrder);
+	size_t index = 0;
+	for (int currPos = (int) startPos - (int) m_nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
+	{
+		if (currPos >= 0)
+			contextFactor[index++] = &hypo.GetWord(currPos);
+		else			
+			contextFactor[index++] = &GetSentenceStartArray();
+	}
+	float lmScore	= GetValue(contextFactor);
+	//cout<<"context factor: "<<GetValue(contextFactor)<<endl;
+
+	// main loop
+	size_t endPos = std::min(startPos + m_nGramOrder - 2
+			, currEndPos);
+	for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++)
+	{
+		// shift all args down 1 place
+		for (size_t i = 0 ; i < m_nGramOrder - 1 ; i++)
+			contextFactor[i] = contextFactor[i + 1];
+
+		// add last factor
+		contextFactor.back() = &hypo.GetWord(currPos);
+
+		lmScore	+= GetValue(contextFactor);
+	}
+
+	// end of sentence
+	if (hypo.IsSourceCompleted())
+	{
+		const size_t size = hypo.GetSize();
+		contextFactor.back() = &GetSentenceEndArray();
+
+		for (size_t i = 0 ; i < m_nGramOrder - 1 ; i ++)
+		{
+			int currPos = (int)(size - m_nGramOrder + i + 1);
+			if (currPos < 0)
+				contextFactor[i] = &GetSentenceStartArray();
+			else
+				contextFactor[i] = &hypo.GetWord((size_t)currPos);
+		}
+		lmScore	+= GetValue(contextFactor, &res->lmstate);
+	} else {
+		for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) {
+			for (size_t i = 0 ; i < m_nGramOrder - 1 ; i++)
+				contextFactor[i] = contextFactor[i + 1];
+			contextFactor.back() = &hypo.GetWord(currPos);
+		}
+		res->lmstate = GetState(contextFactor);
+	}
+	out->PlusEquals(this, lmScore);
+  IFVERBOSE(2) { hypo.GetManager().GetSentenceStats().AddTimeCalcLM( clock()-t ); }
+	return res;
+}
+
+}
--- a/src/LanguageModel.h
+++ b/src/LanguageModel.h
@ -0,0 +1,146 @@
+// $Id: LanguageModel.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModel_h
+#define moses_LanguageModel_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FeatureFunction.h"
+#include "Word.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+class Factor;
+class Phrase;
+
+//! Abstract base class which represent a language model on a contiguous phrase
+class LanguageModel : public StatefulFeatureFunction
+{
+protected:	
+	float				m_weight; //! scoring weight. Shouldn't this now be superceded by ScoreProducer???
+	std::string	m_filePath; //! for debugging purposes
+	size_t			m_nGramOrder; //! max n-gram length contained in this LM
+	Word m_sentenceStartArray, m_sentenceEndArray; //! Contains factors which represents the beging and end words for this LM. 
+																								//! Usually <s> and </s>
+
+	/** constructor to be called by inherited class
+	 * \param registerScore whether this LM will be directly used to score sentence. 
+	 * 						Usually true, except where LM is a component in a composite LM, eg. LanguageModelJoint
+	 */
+	LanguageModel(bool registerScore, ScoreIndexManager &scoreIndexManager);
+
+public:
+	/* Returned from LM implementations which points at the state used. For example, if a trigram score was requested
+	 * but the LM backed off to using the trigram, the State pointer will point to the bigram.
+	 * Used for more agressive pruning of hypothesis
+	 */
+  typedef const void* State;
+
+	virtual ~LanguageModel();
+
+	//! see ScoreProducer.h
+	size_t GetNumScoreComponents() const;
+
+	//! Single or multi-factor
+	virtual LMType GetLMType() const = 0;
+
+	/* whether this LM can be used on a particular phrase. 
+	 * Should return false if phrase size = 0 or factor types required don't exists
+	 */
+	virtual bool Useable(const Phrase &phrase) const = 0;
+
+	/* calc total unweighted LM score of this phrase and return score via arguments.
+	 * Return scores should always be in natural log, regardless of representation with LM implementation.
+	 * Uses GetValue() of inherited class.
+	 * Useable() should be called beforehand on the phrase
+	 * \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase
+	 * \param ngramScore score of only n-gram of order m_nGramOrder
+	 */
+	void CalcScore(const Phrase &phrase
+							, float &fullScore
+							, float &ngramScore) const;
+	/* get score of n-gram. n-gram should not be bigger than m_nGramOrder
+	 * Specific implementation can return State and len data to be used in hypothesis pruning
+	 * \param contextFactor n-gram to be scored
+	 * \param finalState state used by LM. Return arg
+	 * \param len ???
+	 */
+	virtual float GetValue(const std::vector<const Word*> &contextFactor
+												, State* finalState = 0
+												, unsigned int* len = 0) const = 0;
+	//! get State for a particular n-gram
+	State GetState(const std::vector<const Word*> &contextFactor, unsigned int* len = 0) const;
+
+	//! max n-gram order of LM
+	size_t GetNGramOrder() const
+	{
+		return m_nGramOrder;
+	}
+	
+	//! Contains factors which represents the beging and end words for this LM. Usually <s> and </s>
+	const Word &GetSentenceStartArray() const
+	{
+		return m_sentenceStartArray;
+	}
+	const Word &GetSentenceEndArray() const
+	{
+		return m_sentenceEndArray;
+	}
+	
+	//! scoring weight. Shouldn't this now be superceded by ScoreProducer???
+	float GetWeight() const
+	{
+		return m_weight;
+	}
+	void SetWeight(float weight)
+	{
+		m_weight = weight;
+	}
+	
+	virtual std::string GetScoreProducerDescription() const = 0;
+
+	std::string GetScoreProducerWeightShortName() const 
+	{ 
+		return "lm";
+	}
+  
+	//! overrideable funtions for IRST LM to cleanup. Maybe something to do with on demand/cache loading/unloading
+	virtual void InitializeBeforeSentenceProcessing(){};
+	virtual void CleanUpAfterSentenceProcessing() {};
+
+	virtual const FFState* EmptyHypothesisState() const;
+
+  virtual FFState* Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+
+};
+
+}
+
+#endif
--- a/src/LanguageModelFactory.cpp
+++ b/src/LanguageModelFactory.cpp
@ -0,0 +1,151 @@
+// $Id: LanguageModelFactory.cpp 2180 2009-02-18 11:35:41Z hieuhoang1972 $
+
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <iostream>
+#include "LanguageModelFactory.h"
+#include "UserMessage.h"
+#include "TypeDef.h"
+#include "FactorCollection.h"
+
+// include appropriate header
+#ifdef LM_SRI
+#  include "LanguageModelSRI.h"
+#endif
+#ifdef LM_IRST
+#  include "LanguageModelIRST.h"
+#endif
+#ifdef LM_RAND
+#  include "LanguageModelRandLM.h"
+#endif
+#ifdef LM_REMOTE
+#	include "LanguageModelRemote.h"
+#endif
+
+#include "LanguageModelInternal.h"
+#include "LanguageModelSkip.h"
+#include "LanguageModelJoint.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+namespace LanguageModelFactory
+{
+
+	LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
+																		, const std::vector<FactorType> &factorTypes
+																		, size_t nGramOrder
+																		, const std::string &languageModelFile
+																		, float weight
+																		, ScoreIndexManager &scoreIndexManager
+																		, int dub)
+	{
+	  LanguageModel *lm = NULL;
+	  switch (lmImplementation)
+	  {
+		  case RandLM:
+			#ifdef LM_RAND
+			lm = new LanguageModelRandLM(true,
+						 scoreIndexManager);
+			#endif
+			break;
+		  case Remote:
+			#ifdef LM_REMOTE
+			lm = new LanguageModelRemote(true,scoreIndexManager);
+			#endif
+			break;
+
+	  	case SRI:
+				#ifdef LM_SRI
+				  lm = new LanguageModelSRI(true, scoreIndexManager);
+				#elif LM_INTERNAL
+					lm = new LanguageModelInternal(true, scoreIndexManager);
+			  #endif
+			  break;
+			case IRST:
+				#ifdef LM_IRST
+	     		lm = new LanguageModelIRST(true, scoreIndexManager, dub);
+			  #endif
+				break;
+			case Skip:
+				#ifdef LM_SRI
+	     		lm = new LanguageModelSkip(new LanguageModelSRI(false, scoreIndexManager)
+																		, true
+																		, scoreIndexManager);
+				#elif LM_INTERNAL
+     			lm = new LanguageModelSkip(new LanguageModelInternal(false, scoreIndexManager)
+																		, true
+																		, scoreIndexManager);
+				#endif
+				break;
+			case Joint:
+				#ifdef LM_SRI
+	     		lm = new LanguageModelJoint(new LanguageModelSRI(false, scoreIndexManager)
+	     															, true
+	     															, scoreIndexManager);
+				#elif LM_INTERNAL
+	     		lm = new LanguageModelJoint(new LanguageModelInternal(false, scoreIndexManager)
+																		, true
+																		, scoreIndexManager);
+				#endif
+				break;
+	  	case Internal:
+				#ifdef LM_INTERNAL
+					lm = new LanguageModelInternal(true, scoreIndexManager);
+			  #endif
+			  break;
+	  }
+
+	  if (lm == NULL)
+	  {
+	  	UserMessage::Add("Language model type unknown. Probably not compiled into library");
+	  }
+	  else
+	  {
+	  	switch (lm->GetLMType())
+	  	{
+	  	case SingleFactor:
+	  		if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], weight, nGramOrder))
+				{
+					cerr << "single factor model failed" << endl;
+					delete lm;
+					lm = NULL;
+				}
+	  		break;
+	  	case MultiFactor:
+  			if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, weight, nGramOrder))
+				{
+					cerr << "multi factor model failed" << endl;
+					delete lm;
+					lm = NULL;
+				}
+  			break;
+	  	}
+	  }
+
+	  return lm;
+	}
+}
+
+}
+
--- a/src/LanguageModelFactory.h
+++ b/src/LanguageModelFactory.h
@ -0,0 +1,34 @@
+// $Id: LanguageModelFactory.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_LanguageModelFactory_h
+#define moses_LanguageModelFactory_h
+
+#include <string>
+#include <vector>
+#include "TypeDef.h"
+
+namespace Moses
+{
+
+class LanguageModel;
+class ScoreIndexManager;
+
+namespace LanguageModelFactory {
+
+	/**
+	 * creates a language model that will use the appropriate
+   * language model toolkit as its underlying implementation
+	 */
+	 LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
+																		, const std::vector<FactorType> &factorTypes     
+																		, size_t nGramOrder
+																		, const std::string &languageModelFile
+																		, float weight
+																		, ScoreIndexManager &scoreIndexManager
+																		, int dub);
+	 
+};
+
+}
+
+#endif
--- a/src/LanguageModelIRST.cpp
+++ b/src/LanguageModelIRST.cpp
@ -0,0 +1,236 @@
+// $Id: LanguageModelIRST.cpp 2650 2010-01-09 19:00:37Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <fstream>
+#include "dictionary.h"
+#include "n_gram.h"
+#include "lmtable.h"
+#include "lmmacro.h"
+
+
+#include "LanguageModelIRST.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+LanguageModelIRST::LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub)
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+,m_lmtb(0),m_lmtb_dub(dub)
+{
+}
+
+LanguageModelIRST::~LanguageModelIRST()
+{
+  delete m_lmtb;
+  delete m_lmtb_ng;
+}
+
+
+bool LanguageModelIRST::Load(const std::string &filePath, 
+			     FactorType factorType, 
+			     float weight,
+			     size_t nGramOrder)
+{
+  char *SepString = " \t\n";
+  cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n";
+
+  FactorCollection &factorCollection = FactorCollection::Instance();
+
+  m_factorType 	 = factorType;
+  m_weight			 = weight;
+  m_nGramOrder	 = nGramOrder;
+
+  // get name of LM file and, if any, of the micro-macro map file
+  char *filenamesOrig = strdup(filePath.c_str());
+  char *filenames = filenamesOrig;
+  m_filePath = strsep(&filenames, SepString);
+
+  // Open the input file (possibly gzipped)
+  InputFileStream inp(m_filePath);
+
+  if (filenames) {
+    // case LMfile + MAPfile: create an object of lmmacro class and load both LM file and map
+    cerr << "Loading LM file + MAP\n";
+    m_mapFilePath = strsep(&filenames, SepString);
+    if (!FileExists(m_mapFilePath)) {
+      cerr << "ERROR: Map file <" << m_mapFilePath << "> does not exist\n";
+			free(filenamesOrig);
+      return false;
+    }
+    InputFileStream inpMap(m_mapFilePath);
+    m_lmtb = new lmmacro(m_filePath, inp, inpMap);
+
+
+  } else {
+    // case (standard) LMfile only: create an object of lmtable
+    cerr << "Loading LM file (no MAP)\n";
+    m_lmtb  = (lmtable *)new lmtable;
+
+  // Load the (possibly binary) model
+#ifdef WIN32
+    m_lmtb->load(inp); //don't use memory map
+#else
+    if (m_filePath.compare(m_filePath.size()-3,3,".mm")==0)
+      m_lmtb->load(inp,m_filePath.c_str(),NULL,1);
+    else 
+      m_lmtb->load(inp,m_filePath.c_str(),NULL,0);
+#endif  
+
+  }
+
+  m_lmtb_ng=new ngram(m_lmtb->getDict()); // ngram of words/micro tags
+  m_lmtb_size=m_lmtb->maxlevel();
+
+  // LM can be ok, just outputs warnings
+
+  // Mauro: in the original, the following two instructions are wrongly switched:
+  m_unknownId = m_lmtb->getDict()->oovcode(); // at the level of micro tags
+  CreateFactors(factorCollection);
+
+  VERBOSE(1, "IRST: m_unknownId=" << m_unknownId << std::endl);
+
+  //install caches
+  m_lmtb->init_probcache();
+  m_lmtb->init_statecache();
+  m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);
+
+  if (m_lmtb_dub >0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
+
+	free(filenamesOrig);
+  return true;
+}
+
+void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
+{ // add factors which have srilm id
+	// code copied & paste from SRI LM class. should do template function
+	std::map<size_t, int> lmIdMap;
+	size_t maxFactorId = 0; // to create lookup vector later on
+	
+	dict_entry *entry;
+	dictionary_iter iter(m_lmtb->getDict()); // at the level of micro tags
+	while ( (entry = iter.next()) != NULL)
+	{
+		size_t factorId = factorCollection.AddFactor(Output, m_factorType, entry->word)->GetId();
+		lmIdMap[factorId] = entry->code;
+		maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+	}
+	
+	size_t factorId;
+	
+	m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+	factorId = m_sentenceStart->GetId();
+	m_lmtb_sentenceStart=lmIdMap[factorId] = GetLmID(BOS_);
+	maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+	m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+	m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
+	factorId = m_sentenceEnd->GetId();
+	m_lmtb_sentenceEnd=lmIdMap[factorId] = GetLmID(EOS_);
+	maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+	m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+	
+	// add to lookup vector in object
+	m_lmIdLookup.resize(maxFactorId+1);
+	
+	fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId);
+
+	map<size_t, int>::iterator iterMap;
+	for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap)
+	{
+		m_lmIdLookup[iterMap->first] = iterMap->second;
+	}
+  
+  
+}
+
+int LanguageModelIRST::GetLmID( const std::string &str ) const
+{
+  return m_lmtb->getDict()->encode( str.c_str() ); // at the level of micro tags
+}
+
+float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const
+{
+	unsigned int dummy;
+	if (!len) { len = &dummy; }
+	FactorType factorType = GetFactorType();
+
+	// set up context
+	size_t count = contextFactor.size();
+    
+	m_lmtb_ng->size=0;
+	if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
+	if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);  
+
+	for (size_t i = 0 ; i < count ; i++)
+	{
+	  //int lmId = GetLmID((*contextFactor[i])[factorType]);
+#ifdef DEBUG
+	  cout << "i=" << i << " -> " << (*contextFactor[i])[factorType]->GetString() << "\n";
+#endif
+	  int lmId = GetLmID((*contextFactor[i])[factorType]->GetString());
+	  //	  cerr << (*contextFactor[i])[factorType]->GetString() << " = " << lmId;
+	  m_lmtb_ng->pushc(lmId);
+	}
+  
+	if (finalState){        
+		*finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);	
+		// back off stats not currently available
+		*len = 0;	
+	}
+
+	float prob = m_lmtb->clprob(*m_lmtb_ng);
+  
+  
+	return TransformIRSTScore(prob);
+}
+
+
+void LanguageModelIRST::CleanUpAfterSentenceProcessing(){
+  TRACE_ERR( "reset caches\n");
+  m_lmtb->reset_caches(); 
+
+#ifndef WIN32
+  TRACE_ERR( "reset mmap\n");
+  m_lmtb->reset_mmap();
+#endif
+  
+}
+
+void LanguageModelIRST::InitializeBeforeSentenceProcessing(){
+  //nothing to do
+#ifdef TRACE_CACHE
+ m_lmtb->sentence_id++;
+#endif
+}
+
+}
+
--- a/src/LanguageModelIRST.h
+++ b/src/LanguageModelIRST.h
@ -0,0 +1,88 @@
+// $Id: LanguageModelIRST.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelIRST_h
+#define moses_LanguageModelIRST_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "LanguageModelSingleFactor.h"
+
+class lmtable;  // irst lm table
+class lmmacro;  // irst lm for macro tags
+class ngram;
+
+namespace Moses
+{
+class Phrase;
+	
+/** Implementation of single factor LM using IRST's code.
+* This is the default LM for Moses and is available from the same sourceforge repository
+*/
+class LanguageModelIRST : public LanguageModelSingleFactor
+{
+protected:
+	std::vector<int> m_lmIdLookup;
+	lmtable* m_lmtb;
+	ngram* m_lmtb_ng;
+	
+	int	m_unknownId;
+	int m_lmtb_sentenceStart; //lmtb symbols to initialize ngram with
+	int m_lmtb_sentenceEnd;   //lmt symbol to initialize ngram with 
+	int m_lmtb_size;          //max ngram stored in the table
+	int m_lmtb_dub;           //dictionary upperboud
+
+	std::string m_mapFilePath;
+  
+//	float GetValue(LmId wordId, ngram *context) const;
+
+	void CreateFactors(FactorCollection &factorCollection);
+	int GetLmID( const std::string &str ) const;
+
+	int GetLmID( const Factor *factor ) const{
+	  size_t factorId = factor->GetId();
+	  return ( factorId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[factorId];        
+	};
+  
+public:
+	LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub);
+	~LanguageModelIRST();
+	bool Load(const std::string &filePath
+					, FactorType factorType
+					, float weight
+					, size_t nGramOrder);
+
+  virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
+
+  void CleanUpAfterSentenceProcessing();
+  void InitializeBeforeSentenceProcessing();
+
+  void set_dictionary_upperbound(int dub){ m_lmtb_size=dub ; 
+//m_lmtb->set_dictionary_upperbound(dub);
+};
+};
+
+}
+
+#endif
--- a/src/LanguageModelInternal.cpp
+++ b/src/LanguageModelInternal.cpp
@ -0,0 +1,272 @@
+
+#include "LanguageModelInternal.h"
+#include "FactorCollection.h"
+#include "NGramNode.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+LanguageModelInternal::LanguageModelInternal(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+{
+}
+
+bool LanguageModelInternal::Load(const std::string &filePath
+																, FactorType factorType
+																, float weight
+																, size_t nGramOrder)
+{
+	assert(nGramOrder <= 3);
+	if (nGramOrder > 3)
+	{
+		UserMessage::Add("Can only do up to trigram. Aborting");
+		abort();
+	}
+
+	VERBOSE(1, "Loading Internal LM: " << filePath << endl);
+	
+	FactorCollection &factorCollection = FactorCollection::Instance();
+
+	m_filePath		= filePath;
+	m_factorType	= factorType;
+	m_weight			= weight;
+	m_nGramOrder	= nGramOrder;
+
+	// make sure start & end tags in factor collection
+	m_sentenceStart	= factorCollection.AddFactor(Output, m_factorType, BOS_);
+	m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+	m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
+	m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+	// read in file
+	VERBOSE(1, filePath << endl);
+
+	InputFileStream 	inFile(filePath);
+
+	// to create lookup vector later on
+	size_t maxFactorId = 0; 
+	map<size_t, const NGramNode*> lmIdMap;
+
+	string line;
+	int lineNo = 0;
+	
+	while( !getline(inFile, line, '\n').eof())
+	{
+		lineNo++;
+
+		if (line.size() != 0 && line.substr(0,1) != "\\")
+		{
+			vector<string> tokens = Tokenize(line, "\t");
+			if (tokens.size() >= 2)
+			{
+				// split unigram/bigram trigrams
+				vector<string> factorStr = Tokenize(tokens[1], " ");
+
+				// create / traverse down tree
+				NGramCollection *ngramColl = &m_map;
+				NGramNode *nGram;
+				const Factor *factor;
+				for (int currFactor = (int) factorStr.size() - 1 ; currFactor >= 0  ; currFactor--)
+				{
+					factor = factorCollection.AddFactor(Output, m_factorType, factorStr[currFactor]);
+					nGram = ngramColl->GetOrCreateNGram(factor);
+	
+					ngramColl = nGram->GetNGramColl();
+
+				}
+
+				NGramNode *rootNGram = m_map.GetNGram(factor);
+				nGram->SetRootNGram(rootNGram);
+
+				// create vector of factors used in this LM
+				size_t factorId = factor->GetId();
+				maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+				lmIdMap[factorId] = rootNGram;
+				//factorCollection.SetFactorLmId(factor, rootNGram);
+
+				float score = TransformSRIScore(Scan<float>(tokens[0]));
+				nGram->SetScore( score );
+				if (tokens.size() == 3)
+				{
+					float logBackOff = TransformSRIScore(Scan<float>(tokens[2]));
+					nGram->SetLogBackOff( logBackOff );
+				}
+				else
+				{
+					nGram->SetLogBackOff( 0 );
+				}
+			}
+		}
+	}
+
+		// add to lookup vector in object
+	m_lmIdLookup.resize(maxFactorId+1);
+	fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), static_cast<const NGramNode*>(NULL));
+
+	map<size_t, const NGramNode*>::iterator iterMap;
+	for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap)
+	{
+		m_lmIdLookup[iterMap->first] = iterMap->second;
+	}
+
+	return true;
+}
+
+float LanguageModelInternal::GetValue(const std::vector<const Word*> &contextFactor
+												, State* finalState
+												, unsigned int* len) const
+{
+	const size_t ngram = contextFactor.size();
+	switch (ngram)
+	{
+	case 1: return GetValue((*contextFactor[0])[m_factorType], finalState); break;
+	case 2: return GetValue((*contextFactor[0])[m_factorType]
+												, (*contextFactor[1])[m_factorType], finalState); break;
+	case 3: return GetValue((*contextFactor[0])[m_factorType]
+												, (*contextFactor[1])[m_factorType]
+												, (*contextFactor[2])[m_factorType], finalState); break;
+	}
+
+	assert (false);
+	return 0;
+}
+
+float LanguageModelInternal::GetValue(const Factor *factor0, State* finalState) const
+{
+	float prob;
+	const NGramNode *nGram		= GetLmID(factor0);
+	if (nGram == NULL)
+	{
+		if (finalState != NULL)
+			*finalState = NULL;
+		prob = -numeric_limits<float>::infinity();
+	}
+	else
+	{
+		if (finalState != NULL)
+			*finalState = static_cast<const void*>(nGram);
+		prob = nGram->GetScore();
+	}
+	return FloorScore(prob);
+}
+float LanguageModelInternal::GetValue(const Factor *factor0, const Factor *factor1, State* finalState) const
+{
+	float score;
+	const NGramNode *nGram[2];
+
+	nGram[1]		= GetLmID(factor1);
+	if (nGram[1] == NULL)
+	{
+		if (finalState != NULL)
+			*finalState = NULL;
+		score = -numeric_limits<float>::infinity();
+	}
+	else
+	{
+		nGram[0] = nGram[1]->GetNGram(factor0);
+		if (nGram[0] == NULL)
+		{ // something unigram
+			if (finalState != NULL)
+				*finalState = static_cast<const void*>(nGram[1]);
+			
+			nGram[0]	= GetLmID(factor0);
+			if (nGram[0] == NULL)
+			{ // stops at unigram
+				score = nGram[1]->GetScore();
+			}
+			else
+			{	// unigram unigram
+				score = nGram[1]->GetScore() + nGram[0]->GetLogBackOff();
+			}
+		}
+		else
+		{ // bigram
+			if (finalState != NULL)
+				*finalState = static_cast<const void*>(nGram[0]);
+			score			= nGram[0]->GetScore();
+		}
+	}
+
+	return FloorScore(score);
+
+}
+
+float LanguageModelInternal::GetValue(const Factor *factor0, const Factor *factor1, const Factor *factor2, State* finalState) const
+{
+	float score;
+	const NGramNode *nGram[3];
+
+	nGram[2]		= GetLmID(factor2);
+	if (nGram[2] == NULL)
+	{
+		if (finalState != NULL)
+			*finalState = NULL;
+		score = -numeric_limits<float>::infinity();
+	}
+	else
+	{
+		nGram[1] = nGram[2]->GetNGram(factor1);
+		if (nGram[1] == NULL)
+		{ // something unigram
+			if (finalState != NULL)
+				*finalState = static_cast<const void*>(nGram[2]);
+			
+			nGram[1]	= GetLmID(factor1);
+			if (nGram[1] == NULL)
+			{ // stops at unigram
+				score = nGram[2]->GetScore();
+			}
+			else
+			{
+				nGram[0] = nGram[1]->GetNGram(factor0);
+				if (nGram[0] == NULL)
+				{ // unigram unigram
+					score = nGram[2]->GetScore() + nGram[1]->GetLogBackOff();
+				}
+				else
+				{ // unigram bigram
+					score = nGram[2]->GetScore() + nGram[1]->GetLogBackOff() + nGram[0]->GetLogBackOff();
+				}	
+			}			
+		}
+		else
+		{ // trigram, or something bigram
+			nGram[0] = nGram[1]->GetNGram(factor0);
+			if (nGram[0] != NULL)
+			{ // trigram
+				if (finalState != NULL)
+					*finalState = static_cast<const void*>(nGram[0]);
+				score = nGram[0]->GetScore();
+			}
+			else
+			{
+				if (finalState != NULL)
+					*finalState = static_cast<const void*>(nGram[1]);
+				
+				score			= nGram[1]->GetScore();
+				nGram[1]	= nGram[1]->GetRootNGram();
+				nGram[0]	= nGram[1]->GetNGram(factor0);
+				if (nGram[0] == NULL)
+				{ // just bigram
+					// do nothing
+				}
+				else
+				{
+					score	+= nGram[0]->GetLogBackOff();
+				}
+
+			}
+			// else do nothing. just use 1st bigram
+		}
+	}
+
+	return FloorScore(score);
+
+}
+
+}
+
--- a/src/LanguageModelInternal.h
+++ b/src/LanguageModelInternal.h
@ -0,0 +1,41 @@
+#ifndef moses_LanguageModelInternal_h
+#define moses_LanguageModelInternal_h
+
+#include "LanguageModelSingleFactor.h"
+#include "NGramCollection.h"
+
+namespace Moses
+{
+
+/** Guaranteed cross-platform LM implementation designed to mimic LM used in regression tests
+*/
+class LanguageModelInternal : public LanguageModelSingleFactor
+{
+protected:
+	std::vector<const NGramNode*> m_lmIdLookup;
+	NGramCollection m_map;
+
+	const NGramNode* GetLmID( const Factor *factor ) const
+	{
+		size_t factorId = factor->GetId();
+		return ( factorId >= m_lmIdLookup.size()) ? NULL : m_lmIdLookup[factorId];        
+  };
+
+	float GetValue(const Factor *factor0, State* finalState) const;
+	float GetValue(const Factor *factor0, const Factor *factor1, State* finalState) const;
+	float GetValue(const Factor *factor0, const Factor *factor1, const Factor *factor2, State* finalState) const;
+
+public:
+	LanguageModelInternal(bool registerScore, ScoreIndexManager &scoreIndexManager);
+	bool Load(const std::string &filePath
+					, FactorType factorType
+					, float weight
+					, size_t nGramOrder);
+	float GetValue(const std::vector<const Word*> &contextFactor
+												, State* finalState = 0
+												, unsigned int* len = 0) const;
+};
+
+}
+
+#endif
--- a/src/LanguageModelJoint.cpp
+++ b/src/LanguageModelJoint.cpp
@ -0,0 +1,22 @@
+// $Id: LanguageModelJoint.cpp 886 2006-10-17 11:07:17Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LanguageModelJoint.h"
--- a/src/LanguageModelJoint.h
+++ b/src/LanguageModelJoint.h
@ -0,0 +1,133 @@
+// $Id: LanguageModelJoint.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelJoint_h
+#define moses_LanguageModelJoint_h
+
+#include <vector>
+#include <string>
+#include <sstream>
+#include "LanguageModelSingleFactor.h"
+#include "LanguageModelMultiFactor.h"
+#include "Word.h"
+#include "FactorTypeSet.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+
+class Phrase;
+class FactorCollection;
+
+/** LM of multiple factors. A simple extension of single factor LM - factors backoff together.
+ *	Rather slow as this uses string concatenation/split
+*/
+class LanguageModelJoint : public LanguageModelMultiFactor
+{
+protected:
+	LanguageModelSingleFactor *m_lmImpl;
+	std::vector<FactorType> m_factorTypesOrdered;
+	
+	size_t m_implFactor;
+public:
+	LanguageModelJoint(LanguageModelSingleFactor *lmImpl, bool registerScore, ScoreIndexManager &scoreIndexManager)
+	:LanguageModelMultiFactor(registerScore, scoreIndexManager)
+	{
+		m_lmImpl = lmImpl;
+	}
+	
+	~LanguageModelJoint()
+	{
+		delete m_lmImpl;
+	}
+	
+	bool Load(const std::string &filePath
+					, const std::vector<FactorType> &factorTypes
+					, float weight
+					, size_t nGramOrder)
+	{
+		m_factorTypes				= FactorMask(factorTypes);
+		m_weight 						= weight;
+		m_filePath 					= filePath;
+		m_nGramOrder 				= nGramOrder;
+	
+		m_factorTypesOrdered= factorTypes;
+		m_implFactor				= 0;
+		
+		FactorCollection &factorCollection = FactorCollection::Instance();
+
+		// sentence markers
+		for (size_t index = 0 ; index < factorTypes.size() ; ++index)
+		{
+			FactorType factorType = factorTypes[index];
+			m_sentenceStartArray[factorType] 	= factorCollection.AddFactor(Output, factorType, BOS_);
+			m_sentenceEndArray[factorType] 		= factorCollection.AddFactor(Output, factorType, EOS_);
+		}
+	
+		return m_lmImpl->Load(filePath, m_implFactor, weight, nGramOrder);
+	}
+	
+	float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+	{
+		if (contextFactor.size() == 0)
+		{
+			return 0;
+		}
+
+		// joint context for internal LM
+		std::vector<const Word*> jointContext;
+		
+		for (size_t currPos = 0 ; currPos < m_nGramOrder ; ++currPos )
+		{
+			const Word &word = *contextFactor[currPos];
+
+			// add word to chunked context
+			std::stringstream stream("");
+
+			const Factor *factor = word[ m_factorTypesOrdered[0] ];
+			stream << factor->GetString();
+
+			for (size_t index = 1 ; index < m_factorTypesOrdered.size() ; ++index)
+			{
+				FactorType factorType = m_factorTypesOrdered[index];
+				const Factor *factor = word[factorType];
+				stream << "|" << factor->GetString();
+			}
+			
+			factor = FactorCollection::Instance().AddFactor(Output, m_implFactor, stream.str());
+
+			Word* jointWord = new Word;
+			jointWord->SetFactor(m_implFactor, factor);
+			jointContext.push_back(jointWord);
+		}
+	
+		// calc score on chunked phrase
+		float ret = m_lmImpl->GetValue(jointContext, finalState, len);
+
+		RemoveAllInColl(jointContext);
+		
+		return ret;
+	}
+	
+};
+
+}
+#endif
--- a/src/LanguageModelMultiFactor.cpp
+++ b/src/LanguageModelMultiFactor.cpp
@ -0,0 +1,56 @@
+// $Id: LanguageModelMultiFactor.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LanguageModelMultiFactor.h"
+#include "Phrase.h"
+
+namespace Moses
+{
+LanguageModelMultiFactor::LanguageModelMultiFactor(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModel(registerScore, scoreIndexManager)
+{}
+
+std::string LanguageModelMultiFactor::GetScoreProducerDescription() const
+{
+	std::ostringstream oss;
+	// what about LMs that are over multiple factors at once, POS + stem, for example?
+	oss << GetNGramOrder() << "-gram LM score, factor-type= ??? " << ", file=" << m_filePath;
+	return oss.str();
+} 
+
+bool LanguageModelMultiFactor::Useable(const Phrase &phrase) const
+{
+	if (phrase.GetSize()==0)
+		return false;
+	
+	// whether phrase contains all factors in this LM
+	const Word &word = phrase.GetWord(0);
+	for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; ++currFactor)
+	{
+		if (m_factorTypes[currFactor] && word[currFactor] == NULL)
+			return false;
+	}
+	return  true;
+
+}
+
+}
+
--- a/src/LanguageModelMultiFactor.h
+++ b/src/LanguageModelMultiFactor.h
@ -0,0 +1,60 @@
+// $Id: LanguageModelMultiFactor.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelMultiFactor_h
+#define moses_LanguageModelMultiFactor_h
+
+#include <vector>
+#include <string>
+#include "LanguageModel.h"
+#include "Word.h"
+#include "FactorTypeSet.h"
+
+namespace Moses
+{
+
+class Phrase;
+
+//! Abstract class for for multi factor LM 
+class LanguageModelMultiFactor : public LanguageModel
+{
+protected:
+	FactorMask m_factorTypes;
+
+	LanguageModelMultiFactor(bool registerScore, ScoreIndexManager &scoreIndexManager);
+	
+public:
+	virtual bool Load(const std::string &filePath
+					, const std::vector<FactorType> &factorTypes
+					, float weight
+					, size_t nGramOrder) = 0;
+
+	LMType GetLMType() const
+	{
+		return MultiFactor;
+	}
+
+	std::string GetScoreProducerDescription() const;	
+	bool Useable(const Phrase &phrase) const;	
+};
+
+}
+#endif
--- a/src/LanguageModelRandLM.cpp
+++ b/src/LanguageModelRandLM.cpp
@ -0,0 +1,114 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <fstream>
+
+#include "LanguageModelRandLM.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+
+bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType, float weight,
+			       size_t nGramOrder) {
+  cerr << "Loading LanguageModelRandLM..." << endl;
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  m_filePath = filePath;
+  m_factorType = factorType;
+  m_weight = weight;
+  m_nGramOrder = nGramOrder;
+  int cache_MB = 50; // increase cache size
+  m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB);
+  assert(m_lm != NULL);
+  // get special word ids
+  m_oov_id = m_lm->getWordID(m_lm->getOOV());
+  CreateFactors(factorCollection);
+  return true;
+}
+
+void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection) { // add factors which have randlm id
+  // code copied & paste from SRI LM class. should do template function
+  // first get all bf vocab in map
+  std::map<size_t, randlm::WordID> randlm_ids_map; // map from factor id -> randlm id
+  size_t maxFactorId = 0; // to create lookup vector later on
+  for(std::map<randlm::Word, randlm::WordID>::const_iterator vIter = m_lm->vocabStart();
+      vIter != m_lm->vocabEnd(); vIter++){
+    // get word from randlm vocab and associate with (new) factor id
+    size_t factorId=factorCollection.AddFactor(Output,m_factorType,vIter->first)->GetId();
+    randlm_ids_map[factorId] = vIter->second;
+    maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+  }
+  // add factors for BOS and EOS and store bf word ids
+  size_t factorId;
+  m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, m_lm->getBOS());
+  factorId = m_sentenceStart->GetId();
+  maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+  m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+  m_sentenceEnd	= factorCollection.AddFactor(Output, m_factorType, m_lm->getEOS());
+  factorId = m_sentenceEnd->GetId();
+  maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+  m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+  // add to lookup vector in object
+  m_randlm_ids_vec.resize(maxFactorId+1);
+  // fill with OOV code
+  fill(m_randlm_ids_vec.begin(), m_randlm_ids_vec.end(), m_oov_id);
+
+  for (map<size_t, randlm::WordID>::const_iterator iter = randlm_ids_map.begin();
+       iter != randlm_ids_map.end() ; ++iter)
+    m_randlm_ids_vec[iter->first] = iter->second;
+
+}
+
+randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const {
+  return m_lm->getWordID(str);
+}
+
+float LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
+				    State* finalState, unsigned int* len) const {
+  unsigned int dummy;   // is this needed ?
+  if (!len) { len = &dummy; }
+  FactorType factorType = GetFactorType();
+  // set up context
+  randlm::WordID ngram[MAX_NGRAM_SIZE];
+  int count = contextFactor.size();
+  for (int i = 0 ; i < count ; i++) {
+    ngram[i] = GetLmID((*contextFactor[i])[factorType]);
+    //std::cerr << m_lm->getWord(ngram[i]) << " ";
+  }
+  int found = 0;
+  float logprob = FloorScore(TransformSRIScore(m_lm->getProb(&ngram[0], count, &found, finalState)));
+  *len = 0; // not available
+  //if (finalState)
+  //  std::cerr << " = " << logprob << "(" << *finalState << ", " << *len <<")"<< std::endl;
+  //else
+  //  std::cerr << " = " << logprob << std::endl;
+  return logprob;
+}
+
+}
+
+
--- a/src/LanguageModelRandLM.h
+++ b/src/LanguageModelRandLM.h
@ -0,0 +1,67 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelRandLM_h
+#define moses_LanguageModelRandLM_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "Util.h"
+#include "LanguageModelSingleFactor.h"
+#include "RandLM.h"
+
+class randlm::RandLM;
+
+namespace Moses
+{
+class Factor;
+class Phrase;
+
+// RandLM wrapper (single factor LM)
+
+class LanguageModelRandLM : public LanguageModelSingleFactor {
+public:
+  LanguageModelRandLM(bool registerScore, ScoreIndexManager &scoreIndexManager)
+    : LanguageModelSingleFactor(registerScore, scoreIndexManager), m_lm(0) {}
+  bool Load(const std::string &filePath, FactorType factorType, float weight, size_t nGramOrder);
+  virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
+  ~LanguageModelRandLM() {
+    delete m_lm;
+  }
+  void CleanUpAfterSentenceProcessing() {
+    m_lm->clearCaches(); // clear caches
+  }
+  void InitializeBeforeSentenceProcessing() {} // nothing to do
+ protected:
+  std::vector<randlm::WordID> m_randlm_ids_vec;
+  randlm::RandLM* m_lm;
+  randlm::WordID m_oov_id;
+  void CreateFactors(FactorCollection &factorCollection);
+  randlm::WordID GetLmID( const std::string &str ) const;
+  randlm::WordID GetLmID( const Factor *factor ) const{
+    size_t factorId = factor->GetId();
+    return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
+  };
+
+};
+
+}
+
+#endif
--- a/src/LanguageModelRemote.cpp
+++ b/src/LanguageModelRemote.cpp
@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include "LanguageModelRemote.h"
+#include "Factor.h"
+
+namespace Moses {
+
+const Factor* LanguageModelRemote::BOS = NULL;
+const Factor* LanguageModelRemote::EOS = (LanguageModelRemote::BOS + 1);
+
+LanguageModelRemote::LanguageModelRemote(bool registerScore, ScoreIndexManager &scoreIndexManager) 
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+{
+}
+
+bool LanguageModelRemote::Load(const std::string &filePath
+                                        , FactorType factorType
+                                        , float weight
+                                        , size_t nGramOrder) 
+{
+        m_factorType    = factorType;
+        m_weight                        = weight;
+        m_nGramOrder    = nGramOrder;
+
+	int cutAt = filePath.find(':',0);
+	std::string host = filePath.substr(0,cutAt);
+        //std::cerr << "port string = '" << filePath.substr(cutAt+1,filePath.size()-cutAt) << "'\n";
+	int port = atoi(filePath.substr(cutAt+1,filePath.size()-cutAt).c_str());
+	bool good = start(host,port);
+	if (!good) {
+		std::cerr << "failed to connect to lm server on " << host << " on port " << port << std::endl;
+	}
+	ClearSentenceCache();
+	return good;
+}
+
+
+bool LanguageModelRemote::start(const std::string& host, int port) {
+  //std::cerr << "host = " << host << ", port = " << port << "\n";
+  sock = socket(AF_INET, SOCK_STREAM, 0);
+  hp = gethostbyname(host.c_str());
+  if (hp==NULL) { herror("gethostbyname failed"); exit(1); }
+
+  bzero((char *)&server, sizeof(server));
+  bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
+  server.sin_family = hp->h_addrtype;
+  server.sin_port = htons(port);
+
+  int errors = 0;
+  while (connect(sock, (struct sockaddr *)&server, sizeof(server)) < 0) {
+      //std::cerr << "Error: connect()\n";
+      sleep(1);
+      errors++;
+      if (errors > 5) return false;
+  }
+  return true;
+}
+
+float LanguageModelRemote::GetValue(const std::vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const {
+  size_t count = contextFactor.size();
+  if (count == 0) {
+    if (finalState) *finalState = NULL;
+    return 0;
+  }
+  //std::cerr << "contextFactor.size() = " << count << "\n";
+  size_t max = m_nGramOrder;
+  const FactorType factor = GetFactorType();
+  if (max > count) max = count;
+ 
+  Cache* cur = &m_cache;
+  int pc = static_cast<int>(count) - 1;
+  for (int i = 0; i < pc; ++i) {
+    const Factor* f = contextFactor[i]->GetFactor(factor);
+    cur = &cur->tree[f ? f : BOS];
+  }
+  const Factor* event_word = contextFactor[pc]->GetFactor(factor);
+  cur = &cur->tree[event_word ? event_word : EOS];
+  if (cur->prob) {
+    if (finalState) *finalState = cur->boState;
+    if (len) *len = m_nGramOrder;
+    return cur->prob;
+  }
+  cur->boState = *reinterpret_cast<const State*>(&m_curId);
+  ++m_curId;
+
+  std::ostringstream os;
+  os << "prob ";
+  if (event_word == NULL) {
+	os << "</s>";
+  } else {
+	os << event_word->GetString();
+  }
+  for (size_t i=1; i<max; i++) {
+        const Factor* f = contextFactor[count-1-i]->GetFactor(factor);
+	if (f == NULL) {
+		os << " <s>";
+	} else {
+		os << ' ' << f->GetString();
+	}
+  }
+  os << std::endl;
+  std::string out = os.str();
+  write(sock, out.c_str(), out.size());
+  char res[6];
+  int r = read(sock, res, 6);
+  int errors = 0;
+  int cnt = 0;
+  while (1) {
+      if (r < 0) {
+        errors++; sleep(1);
+        //std::cerr << "Error: read()\n";
+        if (errors > 5) exit(1);
+        } else if (r==0 || res[cnt] == '\n') { break; }
+      else {
+        cnt += r;
+        if (cnt==6) break;
+        read(sock, &res[cnt], 6-cnt);
+      }
+  }
+  cur->prob = FloorScore(TransformSRIScore(*reinterpret_cast<float*>(res)));
+  if (finalState) {
+    *finalState = cur->boState;
+    if (len) *len = m_nGramOrder;
+  }
+  return cur->prob;
+}
+
+LanguageModelRemote::~LanguageModelRemote() {
+  // Step 8 When finished send all lingering transmissions and close the connection
+  close(sock); 
+}
+
+}
--- a/src/LanguageModelRemote.h
+++ b/src/LanguageModelRemote.h
@ -0,0 +1,43 @@
+#ifndef moses_LanguageModelRemote_h
+#define moses_LanguageModelRemote_h
+
+#include "LanguageModelSingleFactor.h"
+#include "TypeDef.h"
+#include "Factor.h"
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+namespace Moses
+{
+
+class LanguageModelRemote : public LanguageModelSingleFactor {
+	private:
+		struct Cache {
+			std::map<const Factor*, Cache> tree;
+			float prob;
+			State boState;
+			Cache() : prob(0) {}
+		};
+
+		int sock, port;
+		struct hostent *hp;
+		struct sockaddr_in server;
+		mutable size_t m_curId;
+		mutable Cache m_cache;
+                bool start(const std::string& host, int port);
+		static const Factor* BOS;
+		static const Factor* EOS;
+	public:
+		LanguageModelRemote(bool registerScore, ScoreIndexManager &scoreIndexManager);
+		~LanguageModelRemote();
+		void ClearSentenceCache() { m_cache.tree.clear(); m_curId = 1000; }
+		virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
+        	bool Load(const std::string &filePath
+                                        , FactorType factorType
+                                        , float weight
+                                        , size_t nGramOrder);
+};
+
+}
+#endif
--- a/src/LanguageModelSRI.cpp
+++ b/src/LanguageModelSRI.cpp
@ -0,0 +1,174 @@
+// $Id: LanguageModelSRI.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <fstream>
+#include "Ngram.h"
+#include "Vocab.h"
+
+#include "LanguageModelSRI.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+LanguageModelSRI::LanguageModelSRI(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+, m_srilmVocab(0)
+, m_srilmModel(0)
+{
+}
+
+LanguageModelSRI::~LanguageModelSRI()
+{
+  delete m_srilmModel;
+  delete m_srilmVocab;
+}
+
+bool LanguageModelSRI::Load(const std::string &filePath
+												, FactorType factorType
+												, float weight
+												, size_t nGramOrder)
+{
+	m_srilmVocab  = new Vocab();
+  m_srilmModel	= new Ngram(*m_srilmVocab, nGramOrder);
+	m_factorType 	= factorType;
+	m_weight			= weight;
+	m_nGramOrder	= nGramOrder;
+	m_filePath		= filePath;
+
+	m_srilmModel->skipOOVs() = false;
+
+	File file( filePath.c_str(), "r" );
+	m_srilmModel->read(file);
+
+	// LM can be ok, just outputs warnings
+	CreateFactors();		
+  m_unknownId = m_srilmVocab->unkIndex();
+  
+  return true;
+}
+
+void LanguageModelSRI::CreateFactors()
+{ // add factors which have srilm id
+	FactorCollection &factorCollection = FactorCollection::Instance();
+	
+	std::map<size_t, VocabIndex> lmIdMap;
+	size_t maxFactorId = 0; // to create lookup vector later on
+	
+	VocabString str;
+	VocabIter iter(*m_srilmVocab);
+	while ( (str = iter.next()) != NULL)
+	{
+		VocabIndex lmId = GetLmID(str);
+		size_t factorId = factorCollection.AddFactor(Output, m_factorType, str)->GetId();
+		lmIdMap[factorId] = lmId;
+		maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+	}
+	
+	size_t factorId;
+	
+	m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+	factorId = m_sentenceStart->GetId();
+	lmIdMap[factorId] = GetLmID(BOS_);
+	maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+	m_sentenceStartArray[m_factorType] = m_sentenceStart;
+	
+	m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
+	factorId = m_sentenceEnd->GetId();
+	lmIdMap[factorId] = GetLmID(EOS_);
+	maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+	m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+	
+	// add to lookup vector in object
+	m_lmIdLookup.resize(maxFactorId+1);
+	
+	fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId);
+
+	map<size_t, VocabIndex>::iterator iterMap;
+	for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap)
+	{
+		m_lmIdLookup[iterMap->first] = iterMap->second;
+	}
+}
+
+VocabIndex LanguageModelSRI::GetLmID( const std::string &str ) const
+{
+    return m_srilmVocab->getIndex( str.c_str(), m_unknownId );
+}
+VocabIndex LanguageModelSRI::GetLmID( const Factor *factor ) const
+{
+	size_t factorId = factor->GetId();
+	return ( factorId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[factorId];
+}
+
+float LanguageModelSRI::GetValue(VocabIndex wordId, VocabIndex *context) const
+{
+	float p = m_srilmModel->wordProb( wordId, context );
+	return FloorScore(TransformSRIScore(p));  // log10->log
+}
+
+float LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int *len) const
+{
+	FactorType	factorType = GetFactorType();
+	size_t count = contextFactor.size();
+	if (count <= 0)
+	{
+		finalState = NULL;
+		return 0;
+	}
+		
+	// set up context
+	VocabIndex context[MAX_NGRAM_SIZE];
+	for (size_t i = 0 ; i < count - 1 ; i++)
+	{
+		context[i] =  GetLmID((*contextFactor[count-2-i])[factorType]);
+	}
+	context[count-1] = Vocab_None;
+	
+	assert((*contextFactor[count-1])[factorType] != NULL);
+	// call sri lm fn
+	VocabIndex lmId= GetLmID((*contextFactor[count-1])[factorType]);
+	float ret = GetValue(lmId, context);
+
+	if (finalState) {
+		for (int i = count - 2 ; i >= 0 ; i--)
+			context[i+1] = context[i];
+		context[0] = lmId;
+		unsigned int dummy;
+		if (!len) { len = &dummy; }
+		*finalState = m_srilmModel->contextID(context,*len);
+		(*len)++;
+	}
+	return ret;
+}
+
+}
+
+
+
--- a/src/LanguageModelSRI.h
+++ b/src/LanguageModelSRI.h
@ -0,0 +1,65 @@
+// $Id: LanguageModelSRI.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelSRI_h
+#define moses_LanguageModelSRI_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "TypeDef.h"
+#include "Vocab.h"
+#include "LanguageModelSingleFactor.h"
+
+class Factor;
+class Phrase;
+class Ngram; // SRI forward decl
+
+namespace Moses
+{
+
+class LanguageModelSRI : public LanguageModelSingleFactor
+{
+protected:
+	std::vector<VocabIndex> m_lmIdLookup;
+	Vocab 			*m_srilmVocab;
+	Ngram 			*m_srilmModel;
+	VocabIndex	m_unknownId;
+
+	float GetValue(VocabIndex wordId, VocabIndex *context) const;
+	void CreateFactors();
+	VocabIndex GetLmID( const std::string &str ) const;
+	VocabIndex GetLmID( const Factor *factor ) const;
+	
+public:
+	LanguageModelSRI(bool registerScore, ScoreIndexManager &scoreIndexManager);
+	~LanguageModelSRI();
+	bool Load(const std::string &filePath
+					, FactorType factorType
+					, float weight
+					, size_t nGramOrder);
+
+  virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
+};
+
+
+}
+#endif
--- a/src/LanguageModelSingleFactor.cpp
+++ b/src/LanguageModelSingleFactor.cpp
@ -0,0 +1,60 @@
+// $Id: LanguageModelSingleFactor.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <sstream>
+
+#include "LanguageModelSingleFactor.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+// static variable init
+LanguageModelSingleFactor::State LanguageModelSingleFactor::UnknownState=0;
+
+LanguageModelSingleFactor::LanguageModelSingleFactor(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModel(registerScore, scoreIndexManager)
+{
+}
+LanguageModelSingleFactor::~LanguageModelSingleFactor() {}
+
+
+std::string LanguageModelSingleFactor::GetScoreProducerDescription() const
+{
+	std::ostringstream oss;
+	// what about LMs that are over multiple factors at once, POS + stem, for example?
+	oss << "LM_" << GetNGramOrder() << "gram";
+	return oss.str();
+} 
+
+}
+
+
+
+
--- a/src/LanguageModelSingleFactor.h
+++ b/src/LanguageModelSingleFactor.h
@ -0,0 +1,87 @@
+// $Id: LanguageModelSingleFactor.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelSingleFactor_h
+#define moses_LanguageModelSingleFactor_h
+
+#include "LanguageModel.h"
+#include "Phrase.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+class Factor;
+
+//! Abstract class for for single factor LM 
+class LanguageModelSingleFactor : public LanguageModel
+{
+protected:	
+	const Factor *m_sentenceStart, *m_sentenceEnd;
+	FactorType	m_factorType;
+
+	LanguageModelSingleFactor(bool registerScore, ScoreIndexManager &scoreIndexManager);
+
+public:
+  static State UnknownState;
+
+	virtual ~LanguageModelSingleFactor();
+	virtual bool Load(const std::string &filePath
+					, FactorType factorType
+					, float weight
+					, size_t nGramOrder) = 0;
+
+	LMType GetLMType() const
+	{
+		return SingleFactor;
+	}
+
+	bool Useable(const Phrase &phrase) const
+	{
+		return (phrase.GetSize()>0 && phrase.GetFactor(0, m_factorType) != NULL);		
+	}
+	
+	const Factor *GetSentenceStart() const
+	{
+		return m_sentenceStart;
+	}
+	const Factor *GetSentenceEnd() const
+	{
+		return m_sentenceEnd;
+	}
+	FactorType GetFactorType() const
+	{
+		return m_factorType;
+	}
+	float GetWeight() const
+	{
+		return m_weight;
+	}
+	void SetWeight(float weight)
+	{
+		m_weight = weight;
+	}
+	std::string GetScoreProducerDescription() const;
+};
+
+}
+
+#endif
--- a/src/LanguageModelSkip.cpp
+++ b/src/LanguageModelSkip.cpp
@ -0,0 +1,22 @@
+// $Id: LanguageModelSkip.cpp 916 2006-10-24 16:27:13Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LanguageModelSkip.h"
--- a/src/LanguageModelSkip.h
+++ b/src/LanguageModelSkip.h
@ -0,0 +1,129 @@
+// $Id: LanguageModelSkip.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelSkip_h
+#define moses_LanguageModelSkip_h
+
+#include <vector>
+#include <algorithm>
+#include "LanguageModelMultiFactor.h"
+#include "LanguageModelSingleFactor.h"
+#include "Phrase.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+
+/* Hacked up LM which skips any factor with string '---'
+* order of chunk hardcoded to 3 (m_realNGramOrder)
+*/
+class LanguageModelSkip : public LanguageModelSingleFactor
+{	
+protected:
+	size_t m_realNGramOrder;
+	LanguageModelSingleFactor *m_lmImpl;
+	
+public:
+	/** Constructor
+	* \param lmImpl SRI or IRST LM which this LM can use to load data
+	*/
+	LanguageModelSkip(LanguageModelSingleFactor *lmImpl
+										, bool registerScore
+										, ScoreIndexManager &scoreIndexManager)
+	: LanguageModelSingleFactor(registerScore, scoreIndexManager)
+	{
+		m_lmImpl = lmImpl;		
+	}
+	~LanguageModelSkip()
+	{
+		delete m_lmImpl;
+	}
+	bool Load(const std::string &filePath
+					, FactorType factorType
+					, float weight
+					, size_t nGramOrder)
+	{
+		m_factorType 				= factorType;
+		m_weight 						= weight;
+		m_filePath 					= filePath;
+		m_nGramOrder 				= nGramOrder;
+		
+		m_realNGramOrder 		= 3;
+
+		FactorCollection &factorCollection = FactorCollection::Instance();
+
+		m_sentenceStartArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, BOS_);
+		m_sentenceEndArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, EOS_);
+
+		return m_lmImpl->Load(filePath, m_factorType, weight, nGramOrder);
+	}
+			
+	float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+	{
+		if (contextFactor.size() == 0)
+		{
+			return 0;
+		}
+
+		// only process context where last word is a word we want
+		const Factor *factor = (*contextFactor.back())[m_factorType];
+		std::string strWord = factor->GetString();
+		if (strWord.find("---") == 0)
+			return 0;
+		
+		// add last word
+		std::vector<const Word*> chunkContext;
+		Word* chunkWord = new Word;
+		chunkWord->SetFactor(m_factorType, factor);
+		chunkContext.push_back(chunkWord);
+		
+		// create context in reverse 'cos we skip words we don't want
+		for (int currPos = (int)contextFactor.size() - 2 ; currPos >= 0 && chunkContext.size() < m_realNGramOrder ; --currPos )
+		{
+			const Word &word = *contextFactor[currPos];
+			factor = word[m_factorType];
+			std::string strWord = factor->GetString();
+			bool skip = strWord.find("---") == 0;
+			if (skip)
+				continue;
+
+			// add word to chunked context
+			Word* chunkWord = new Word;
+			chunkWord->SetFactor(m_factorType, factor);
+			chunkContext.push_back(chunkWord);
+		}
+	
+		// create context factor the right way round
+		std::reverse(chunkContext.begin(), chunkContext.end());
+
+		// calc score on chunked phrase
+		float ret = m_lmImpl->GetValue(chunkContext, finalState, len);
+
+		RemoveAllInColl(chunkContext);
+
+		return ret;
+	}
+};
+
+}
+
+#endif
+
--- a/src/LexicalReordering.cpp
+++ b/src/LexicalReordering.cpp
@ -0,0 +1,269 @@
+#include "LexicalReordering.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+LexicalReordering::LexicalReordering(const std::string &filePath, 
+									 const std::vector<float>& weights, 
+									 Direction direction, 
+									 Condition condition, 
+									 std::vector< FactorType >& f_factors, 
+									 std::vector< FactorType >& e_factors)
+  : m_NumScoreComponents(weights.size()), m_MaxContextLength(0) 
+{
+  std::cerr << "Creating lexical reordering...\n";
+  //add ScoreProducer
+  const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
+  const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+  std::cerr << "weights: ";
+  for(size_t w = 0; w < weights.size(); ++w){
+	std::cerr << weights[w] << " ";
+  }
+  std::cerr << "\n";
+  m_Direction = DecodeDirection(direction);
+  m_Condition = DecodeCondition(condition);
+    
+  //m_FactorsE = e_factors;
+  //m_FactorsF = f_factors;
+  //Todo:should check that
+  //- if condition contains e or c than e_factors non empty
+  //- if condition contains f f_factors non empty
+  for(size_t i = 0; i < m_Condition.size(); ++i){
+    switch(m_Condition[i]){
+    case E:
+      m_FactorsE = e_factors;
+	  if(m_FactorsE.empty()){
+		//problem
+		std::cerr << "Problem e factor mask is unexpectedly empty\n";
+      }
+      break;
+    case F:
+      m_FactorsF = f_factors;
+	  if(m_FactorsF.empty()){
+		//problem
+		std::cerr << "Problem f factor mask is unexpectedly empty\n";
+      }
+      break;
+    case C:
+      m_FactorsC         = e_factors;
+	  m_MaxContextLength = 1;
+      if(m_FactorsC.empty()){
+		//problem
+		std::cerr << "Problem c factor mask is unexpectedly empty\n";
+      }
+      break;
+    default:
+      //problem
+	  std::cerr << "Unknown conditioning option!\n";
+      break;
+    }
+  }
+  if(weights.size() == m_Direction.size()){
+    m_OneScorePerDirection = true;
+	std::cerr << "Reordering types NOT individualy weighted!\n";
+  } else {
+	m_OneScorePerDirection = false;
+  }
+  m_Table = LexicalReorderingTable::LoadAvailable(filePath, m_FactorsF, m_FactorsE, m_FactorsC);
+}
+
+LexicalReordering::~LexicalReordering(){
+  if(m_Table){
+	delete m_Table;
+  }
+}
+  
+std::vector<float> LexicalReordering::CalcScore(Hypothesis* hypothesis) const {
+  std::vector<float> score(GetNumScoreComponents(), 0);
+  std::vector<float> values;
+
+  //for every direction
+  for(size_t i = 0; i < m_Direction.size(); ++i){
+    //grab data
+    if(Forward == m_Direction[i]){
+      //relates to prev hypothesis as we dont know next phrase for current yet
+      //sanity check: is there a previous hypothesis?
+      if(0 == hypothesis->GetPrevHypo()->GetId()){
+				continue; //no score continue with next direction
+      }
+      //grab probs for prev hypothesis
+			const ScoreComponentCollection &reorderingScoreColl = 
+							hypothesis->GetPrevHypo()->GetCachedReorderingScore();
+			values = reorderingScoreColl.GetScoresForProducer(this);
+			/*
+      values = m_Table->GetScore((hypothesis->GetPrevHypo()->GetSourcePhrase()).GetSubString(hypothesis->GetPrevHypo()->GetCurrSourceWordsRange()),
+								 hypothesis->GetPrevHypo()->GetCurrTargetPhrase(),
+								 auxGetContext(hypothesis->GetPrevHypo()));
+			*/
+    }
+    if(Backward == m_Direction[i])
+		{
+			const ScoreComponentCollection &reorderingScoreColl = 
+				hypothesis->GetCachedReorderingScore();
+			values = reorderingScoreColl.GetScoresForProducer(this);
+			/*
+      values = m_Table->GetScore(hypothesis->GetSourcePhrase().GetSubString(hypothesis->GetCurrSourceWordsRange()),
+								 hypothesis->GetCurrTargetPhrase(),
+								 auxGetContext(hypothesis));
+								 */
+    }
+    
+    //add score
+    //sanity check: do we have any probs?
+	  assert(values.size() == (GetNumOrientationTypes() * m_Direction.size()));
+
+		OrientationType orientation = GetOrientationType(hypothesis); 
+    float value = values[orientation + i * GetNumOrientationTypes()];
+    if(m_OneScorePerDirection){ 
+      //one score per direction
+      score[i] = value;
+    } else {
+      //one score per direction and orientation
+      score[orientation + i * GetNumOrientationTypes()] = value; 
+    }
+  }
+  return score;
+}
+
+Phrase LexicalReordering::auxGetContext(const Hypothesis* hypothesis) const { 
+  const Hypothesis* h = hypothesis;
+  Phrase c(Output);
+  if(0 == hypothesis->GetId()){
+	return c;
+  }
+  while(0 != hypothesis->GetPrevHypo()->GetId() && c.GetSize() < m_MaxContextLength){
+	hypothesis = hypothesis->GetPrevHypo();
+	int needed = m_MaxContextLength - c.GetSize();
+	const Phrase& p = hypothesis->GetCurrTargetPhrase();
+	Phrase tmp(Output);
+	if(needed > p.GetSize()){
+	  //needed -= p.GetSize();
+	  tmp = p;
+	} else {
+	  WordsRange range(p.GetSize() - needed, p.GetSize()-1);
+	  tmp = p.GetSubString(range);
+	}
+	//new code: new append returns void not this...
+	tmp.Append(c); c = tmp;
+  }
+  return c;
+}
+
+std::vector<LexicalReordering::Condition> LexicalReordering::DecodeCondition(LexicalReordering::Condition c){
+  std::vector<LexicalReordering::Condition> result;
+  switch(c){
+  case F:
+  case E:
+  case C:
+    result.push_back(c);
+    break;
+  case FE:
+    result.push_back(F);
+    result.push_back(E);
+    break;
+  case FEC:
+    result.push_back(F);
+    result.push_back(E);
+    result.push_back(C);
+    break;
+  }
+  return result;
+}
+
+std::vector<LexicalReordering::Direction> LexicalReordering::DecodeDirection(LexicalReordering::Direction d){
+  std::vector<Direction> result;
+  if(Bidirectional == d){
+    result.push_back(Backward);
+    result.push_back(Forward);
+  } else {
+    result.push_back(d);
+  }
+  return result;
+}
+
+LexicalReordering::OrientationType LexicalMonotonicReordering::GetOrientationType(Hypothesis* currHypothesis) const
+{
+  const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
+  const WordsRange currWordsRange  = currHypothesis->GetCurrSourceWordsRange();
+  //check if there is a previous hypo 
+  if(0 == prevHypothesis->GetId()){
+    if(0 == currWordsRange.GetStartPos()){
+      return Monotone;
+    } else {
+      return NonMonotone;
+    }
+  } else {
+	const WordsRange  prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
+
+    if(prevWordsRange.GetEndPos() == currWordsRange.GetStartPos()-1){
+      return Monotone;
+    } else {
+      return NonMonotone;
+    }
+  }
+} 
+
+LexicalReordering::OrientationType LexicalOrientationReordering::GetOrientationType(Hypothesis* currHypothesis) const
+{
+  const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
+  const WordsRange currWordsRange  = currHypothesis->GetCurrSourceWordsRange();
+  //check if there is a previous hypo 
+  if(0 == prevHypothesis->GetId()){
+    if(0 == currWordsRange.GetStartPos()){
+      return Monotone;
+    } else {
+      return Discontinuous;
+    }
+  } else {
+	const WordsRange prevWordsRange  = prevHypothesis->GetCurrSourceWordsRange();
+
+    if(prevWordsRange.GetEndPos() == currWordsRange.GetStartPos()-1){
+      return Monotone;
+    } else if(prevWordsRange.GetStartPos() == currWordsRange.GetEndPos()+1) {
+      return Swap;
+    } else {
+      return Discontinuous;
+    }
+  }
+}
+
+
+LexicalReordering::OrientationType LexicalDirectionalReordering::GetOrientationType(Hypothesis* currHypothesis) const{
+  const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
+  const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange();
+  //check if there is a previous hypo 
+  if(0 == prevHypothesis->GetId()){
+	return Right;
+  } else {
+	const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
+
+    if(prevWordsRange.GetEndPos() <= currWordsRange.GetStartPos()){
+	  return Right;
+	} else {
+	  return Left;
+	}
+  }
+} 
+
+Score LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
+{
+	return m_Table->GetScore(f, e, Phrase(Output));
+}
+
+FFState* LexicalReordering::Evaluate(
+		const Hypothesis& hypo,
+		const FFState* prev_state,
+		ScoreComponentCollection* out) const {
+	out->PlusEquals(this, CalcScore(const_cast<Hypothesis*>(&hypo)));
+
+	//TODO need to return proper state, calc score should not use previous
+	//hypothesis, it should use the state.
+	return NULL;
+}
+
+const FFState* LexicalReordering::EmptyHypothesisState() const {
+  return NULL;
+}
+
+}
+
--- a/src/LexicalReordering.h
+++ b/src/LexicalReordering.h
@ -0,0 +1,159 @@
+#ifndef moses_LexicalReordering_h
+#define moses_LexicalReordering_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "WordsRange.h"
+#include "ScoreProducer.h"
+#include "FeatureFunction.h"
+
+#include "LexicalReorderingTable.h"
+
+namespace Moses
+{
+
+class Factor;
+class Phrase;
+class Hypothesis;
+class InputType;
+
+using namespace std;
+
+class LexicalReordering : public StatefulFeatureFunction {
+ public: //types & consts
+  typedef int OrientationType; 
+  enum Direction {Forward, Backward, Bidirectional, Unidirectional = Backward};
+  enum Condition {F,E,C,FE,FEC};
+ public: //con- & destructors 
+  LexicalReordering(const std::string &filePath, 
+		    const std::vector<float>& weights, 
+		    Direction direction, 
+		    Condition condition, 
+		    std::vector< FactorType >& f_factors, 
+		    std::vector< FactorType >& e_factors);
+  virtual ~LexicalReordering();
+ public: //interface
+  //inherited
+  virtual size_t GetNumScoreComponents() const {
+    return m_NumScoreComponents; 
+  };
+
+  virtual FFState* Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+
+  const FFState* EmptyHypothesisState() const;
+
+  virtual std::string GetScoreProducerDescription() const {
+    return "Generic Lexical Reordering Model... overwrite in subclass.";
+  };
+
+  std::string GetScoreProducerWeightShortName() const {
+	return "d";
+  };
+
+  //new 
+  virtual int             GetNumOrientationTypes() const = 0;
+  virtual OrientationType GetOrientationType(Hypothesis*) const = 0;
+  
+  std::vector<float> CalcScore(Hypothesis* hypothesis) const;
+  void InitializeForInput(const InputType& i){
+    m_Table->InitializeForInput(i);
+  }
+
+  Score GetProb(const Phrase& f, const Phrase& e) const;
+  //helpers
+  static std::vector<Condition> DecodeCondition(Condition c);
+  static std::vector<Direction> DecodeDirection(Direction d);
+ private:
+  Phrase auxGetContext(const Hypothesis* hypothesis) const;
+ private:
+  LexicalReorderingTable* m_Table;
+  size_t m_NumScoreComponents;
+  std::vector< Direction > m_Direction;
+  std::vector< Condition > m_Condition;
+  bool m_OneScorePerDirection;
+  std::vector< FactorType > m_FactorsE, m_FactorsF, m_FactorsC;
+  int m_MaxContextLength;
+};
+
+
+class LexicalMonotonicReordering : public LexicalReordering {
+ private:
+  enum {Monotone = 0, NonMonotone = 1};
+ public:
+  LexicalMonotonicReordering(const std::string &filePath, 
+			     const std::vector<float>& w, 
+			     Direction direction, 
+			     Condition condition, 
+			     std::vector< FactorType >& f_factors, 
+			     std::vector< FactorType >& e_factors)
+    : LexicalReordering(filePath, w, direction, condition, f_factors, e_factors){
+	std::cerr << "Created lexical monotonic reordering\n";
+  }
+ public:
+  virtual int GetNumOrientationTypes() const {
+    return 2; 
+  };
+  virtual std::string GetScoreProducerDescription() const {
+    return "MonotonicLexicalReorderingModel";
+  };
+  virtual int GetOrientationType(Hypothesis* currHypothesis) const;
+};
+
+class LexicalOrientationReordering : public LexicalReordering {
+ private:
+  enum {Monotone = 0, Swap = 1, Discontinuous = 2};
+ public:
+    LexicalOrientationReordering(const std::string &filePath, 
+			     const std::vector<float>& w, 
+			     Direction direction, 
+			     Condition condition, 
+			     std::vector< FactorType >& f_factors, 
+			     std::vector< FactorType >& e_factors)
+      : LexicalReordering(filePath, w, direction, condition, f_factors, e_factors){
+	  std::cerr << "Created lexical orientation reordering\n";
+  }
+ public:
+  virtual int GetNumOrientationTypes() const {
+    return 3; 
+  }
+  virtual std::string GetScoreProducerDescription() const {
+    return "OrientationLexicalReorderingModel";
+  };
+  virtual OrientationType GetOrientationType(Hypothesis* currHypothesis) const;
+};
+
+class LexicalDirectionalReordering : public LexicalReordering {
+ private:
+  enum {Left = 0, Right = 1};
+ public:
+ LexicalDirectionalReordering(const std::string &filePath, 
+							  const std::vector<float>& w, 
+							  Direction direction, 
+							  Condition condition, 
+							  std::vector< FactorType >& f_factors, 
+							  std::vector< FactorType >& e_factors)
+   : LexicalReordering(filePath, w, direction, condition, f_factors, e_factors){
+   std::cerr << "Created lexical directional Reordering\n";
+  }
+ public:
+  virtual int GetNumOrientationTypes() const {
+    return 2; 
+  };
+  virtual std::string GetScoreProducerDescription() const {
+    return "DirectionalLexicalReorderingModel";
+  };
+  virtual OrientationType GetOrientationType(Hypothesis* currHypothesis) const;
+};
+
+
+}
+
+#endif
+
--- a/src/LexicalReorderingTable.cpp
+++ b/src/LexicalReorderingTable.cpp
@ -0,0 +1,686 @@
+#include "LexicalReorderingTable.h"
+#include "InputFileStream.h"
+//#include "LVoc.h" //need IPhrase
+
+#include "StaticData.h"
+#include "PhraseDictionary.h"
+#include "GenerationDictionary.h"
+#include "TargetPhrase.h"
+#include "TargetPhraseCollection.h"
+
+namespace Moses
+{
+/* 
+ * local helper functions
+ */
+//cleans str of leading and tailing spaces
+std::string auxClearString(const std::string& str){
+  int i = 0, j = str.size()-1;
+  while(i <= j){
+    if(' ' != str[i]){
+      break;
+    } else {
+      ++i;
+    }
+  }
+  while(j >= i){
+    if(' ' != str[j]){
+      break;
+    } else {
+      --j;
+    }
+  }
+  return str.substr(i,j-i+1);
+}
+
+void auxAppend(IPhrase& head, const IPhrase& tail){
+  head.reserve(head.size()+tail.size());
+  for(size_t i = 0; i < tail.size(); ++i){
+	head.push_back(tail[i]);
+  }
+}
+/* 
+ * functions for LexicalReorderingTable
+ */
+
+LexicalReorderingTable* LexicalReorderingTable::LoadAvailable(const std::string& filePath, const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors){
+	//decide use Tree or Memory table
+	if(FileExists(filePath+".binlexr.idx")){
+	  //there exists a binary version use that
+	  return new LexicalReorderingTableTree(filePath, f_factors, e_factors, c_factors);
+	} else {
+	  //use plain memory
+	  return new LexicalReorderingTableMemory(filePath, f_factors, e_factors, c_factors);
+	}
+  }
+
+/* 
+ * functions for LexicalReorderingTableMemory
+ */
+LexicalReorderingTableMemory::LexicalReorderingTableMemory( 
+				const std::string& filePath,
+				const std::vector<FactorType>& f_factors, 
+				const std::vector<FactorType>& e_factors,
+				const std::vector<FactorType>& c_factors)
+  : LexicalReorderingTable(f_factors, e_factors, c_factors) 
+{
+  LoadFromFile(filePath);
+}
+
+LexicalReorderingTableMemory::~LexicalReorderingTableMemory(){
+}
+
+std::vector<float>  LexicalReorderingTableMemory::GetScore(const Phrase& f,
+														   const Phrase& e,
+														   const Phrase& c) {
+  //rather complicated because of const can't use []... as [] might enter new things into std::map
+  //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large
+  TableType::const_iterator r;
+  std::string key;
+  if(0 == c.GetSize()){
+	key = MakeKey(f,e,c);
+	r = m_Table.find(key);
+	if(m_Table.end() != r){
+	  return r->second;
+	}
+  } else {
+	//right try from large to smaller context
+	for(size_t i = 0; i <= c.GetSize(); ++i){
+	  Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
+	  key = MakeKey(f,e,sub_c);
+	  r = m_Table.find(key);
+	  if(m_Table.end() != r){
+		return r->second;
+	  }
+	}
+  }
+  return Score(); 
+}
+
+void LexicalReorderingTableMemory::DbgDump(std::ostream* out) const{
+  TableType::const_iterator i;
+  for(i = m_Table.begin(); i != m_Table.end(); ++i){
+	*out << " key: '" << i->first << "' score: ";
+	*out << "(num scores: " << (i->second).size() << ")";
+	for(size_t j = 0; j < (i->second).size(); ++j){
+	  *out << (i->second)[j] << " ";
+	}
+	*out << "\n";
+  }
+};
+
+std::string  LexicalReorderingTableMemory::MakeKey(const Phrase& f, 
+												   const Phrase& e,
+												   const Phrase& c) const {
+  /*
+  std::string key;
+  if(!m_FactorsF.empty()){
+    key += f.GetStringRep(m_FactorsF);
+  }
+  if(!m_FactorsE.empty()){
+    if(!key.empty()){
+      key += " ||| ";
+    }
+    key += e.GetStringRep(m_FactorsE);
+  }
+  */
+  return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)),
+				 auxClearString(e.GetStringRep(m_FactorsE)),
+				 auxClearString(c.GetStringRep(m_FactorsC)));
+}
+
+std::string  LexicalReorderingTableMemory::MakeKey(const std::string& f, 
+												   const std::string& e,
+												   const std::string& c) const{
+  std::string key;
+  if(!f.empty()){
+    key += f;
+  }
+  if(!m_FactorsE.empty()){
+    if(!key.empty()){
+      key += "|||";
+    }
+    key += e;
+  }
+  if(!m_FactorsC.empty()){
+    if(!key.empty()){
+      key += "|||";
+    }
+    key += c;
+  }
+  return key;
+}
+
+void  LexicalReorderingTableMemory::LoadFromFile(const std::string& filePath){
+  std::string fileName = filePath;
+  if(!FileExists(fileName) && FileExists(fileName+".gz")){
+	fileName += ".gz";
+  }
+  InputFileStream file(fileName);
+  std::string line(""), key("");
+  int numScores = -1;
+  std::cerr << "Loading table into memory...";
+  while(!getline(file, line).eof()){
+    std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
+    int t = 0 ; 
+    std::string f(""),e(""),c("");
+      
+    if(!m_FactorsF.empty()){
+      //there should be something for f
+      f = auxClearString(tokens.at(t));
+      ++t;
+    }
+    if(!m_FactorsE.empty()){
+      //there should be something for e
+      e = auxClearString(tokens.at(t));
+      ++t;
+    }
+    if(!m_FactorsC.empty()){
+      //there should be something for c
+      c = auxClearString(tokens.at(t));
+      ++t;
+    }
+    //last token are the probs
+    std::vector<float> p = Scan<float>(Tokenize(tokens.at(t)));
+    //sanity check: all lines must have equall number of probs
+    if(-1 == numScores){
+      numScores = (int)p.size(); //set in first line
+    }
+    if((int)p.size() != numScores){
+      TRACE_ERR( "found inconsistent number of probabilities... found " << p.size() << " expected " << numScores << std::endl);
+      exit(0);
+    }
+    std::transform(p.begin(),p.end(),p.begin(),TransformScore);
+    std::transform(p.begin(),p.end(),p.begin(),FloorScore);
+    //save it all into our map
+    m_Table[MakeKey(f,e,c)] = p;
+  }
+  std::cerr << "done.\n";
+}
+
+/* 
+ * functions for LexicalReorderingTableTree
+ */
+LexicalReorderingTableTree::LexicalReorderingTableTree(
+			    const std::string& filePath,
+			    const std::vector<FactorType>& f_factors, 
+				const std::vector<FactorType>& e_factors,
+			    const std::vector<FactorType>& c_factors)
+  : LexicalReorderingTable(f_factors, e_factors, c_factors), m_UseCache(false), m_FilePath(filePath) 
+{
+  m_Table.reset(new PrefixTreeMap());
+  m_Table->Read(m_FilePath+".binlexr");
+}
+
+LexicalReorderingTableTree::~LexicalReorderingTableTree(){
+}
+
+Score LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, const Phrase& c) {
+  if(   (!m_FactorsF.empty() && 0 == f.GetSize())
+     || (!m_FactorsE.empty() && 0 == e.GetSize())){
+    //NOTE: no check for c as c might be empty, e.g. start of sentence
+    //not a proper key
+    // phi: commented out, since e may be empty (drop-unknown)
+    //std::cerr << "Not a proper key!\n";
+    return Score();
+  }
+  CacheType::iterator i;;
+  if(m_UseCache){
+    std::pair<CacheType::iterator, bool> r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates()));
+    if(!r.second){
+      return auxFindScoreForContext((r.first)->second, c);
+    }
+    i = r.first;
+  } else if(!m_Cache.empty()) { 
+    //although we might not be caching now, cache might be none empty!
+	i = m_Cache.find(MakeCacheKey(f,e));
+    if(i != m_Cache.end()){
+      return auxFindScoreForContext(i->second, c);
+	}
+  }
+  //not in cache go to file...
+  Score      score;
+  Candidates cands; 
+  m_Table->GetCandidates(MakeTableKey(f,e), &cands);
+  if(cands.empty()){
+    return Score();
+  } 
+
+  if(m_FactorsC.empty()){
+	assert(1 == cands.size());
+	return cands[0].GetScore(0);
+  } else {
+	score = auxFindScoreForContext(cands, c);
+  }
+  //cache for future use
+  if(m_UseCache){
+    i->second = cands;
+  }
+  return score;
+};
+
+Score LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context){
+  if(m_FactorsC.empty()){
+	assert(cands.size() <= 1);
+	return (1 == cands.size())?(cands[0].GetScore(0)):(Score());
+  } else {
+	std::vector<std::string> cvec;
+	for(size_t i = 0; i < context.GetSize(); ++i){
+	  /* old code
+      std::string s = context.GetWord(i).ToString(m_FactorsC);
+	  cvec.push_back(s.substr(0,s.size()-1));
+      */
+	  cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
+	}
+	IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
+	IPhrase sub_c;
+	IPhrase::iterator start = c.begin();
+	for(size_t j = 0; j <= context.GetSize(); ++j, ++start){
+	  sub_c.assign(start, c.end()); 
+	  for(size_t cand = 0; cand < cands.size(); ++cand){
+		IPhrase p = cands[cand].GetPhrase(0);
+		if(cands[cand].GetPhrase(0) == sub_c){
+		  return cands[cand].GetScore(0);
+		}
+	  }
+	}
+	return Score();
+  }
+}
+
+/*
+void LexicalReorderingTableTree::DbgDump(std::ostream* pout){
+  std::ostream& out = *pout; 
+  //TODO!  
+}
+*/
+
+void LexicalReorderingTableTree::InitializeForInput(const InputType& input){
+  ClearCache();
+  if(ConfusionNet const* cn = dynamic_cast<ConfusionNet const*>(&input)){
+    Cache(*cn);
+  } else if(Sentence const* s = dynamic_cast<Sentence const*>(&input)){
+    // Cache(*s); ... this just takes up too much memory, we cache elsewhere
+    DisableCache();
+  }
+  if (!m_Table.get()) {
+    //load thread specific table. 
+    m_Table.reset(new PrefixTreeMap());
+    m_Table->Read(m_FilePath+".binlexr");
+  }
+};
+ 
+bool LexicalReorderingTableTree::Create(std::istream& inFile, 
+                                        const std::string& outFileName){
+  std::string line;
+  //TRACE_ERR("Entering Create...\n");  
+  std::string 
+    ofn(outFileName+".binlexr.srctree"),
+    oft(outFileName+".binlexr.tgtdata"),
+    ofi(outFileName+".binlexr.idx"),
+    ofsv(outFileName+".binlexr.voc0"),
+	oftv(outFileName+".binlexr.voc1");
+  
+
+  FILE *os = fOpen(ofn.c_str(),"wb");
+  FILE *ot = fOpen(oft.c_str(),"wb");
+
+  //TRACE_ERR("opend files....\n");
+
+  typedef PrefixTreeSA<LabelId,OFF_T> PSA;
+  PSA *psa = new PSA;
+  PSA::setDefault(InvalidOffT);
+  WordVoc* voc[3];
+    
+  LabelId currFirstWord = InvalidLabelId;
+  IPhrase currKey;
+
+  Candidates         cands;
+  std::vector<OFF_T> vo;
+  size_t lnc = 0;
+  size_t numTokens    = 0;
+  size_t numKeyTokens = 0;
+  while(getline(inFile, line)){
+	//TRACE_ERR(lnc<<":"<<line<<"\n");
+    ++lnc;
+	if(0 == lnc % 10000){
+	  TRACE_ERR(".");
+	}
+    IPhrase key;
+    Score   score;
+
+    std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
+    std::string w;
+	if(1 == lnc){
+	  //do some init stuff in the first line
+	  numTokens = tokens.size();
+	  if(tokens.size() == 2){ //f ||| score
+		numKeyTokens = 1;
+		voc[0] = new WordVoc();
+		voc[1] = 0;
+	  } else if(3 == tokens.size() || 4 == tokens.size()){ //either f ||| e ||| score or f ||| e ||| c ||| score
+		numKeyTokens = 2;
+		voc[0] = new WordVoc(); //f voc
+		voc[1] = new WordVoc(); //e voc
+		voc[2] = voc[1];        //c & e share voc
+	  }
+	} else {
+	  //sanity check ALL lines must have same number of tokens
+	  assert(numTokens == tokens.size());
+	}
+	int phrase = 0;
+    for(; phrase < numKeyTokens; ++phrase){
+      //conditioned on more than just f... need |||
+	  if(phrase >=1){
+		key.push_back(PrefixTreeMap::MagicWord);
+	  }
+      std::istringstream is(tokens[phrase]);
+      while(is >> w) {
+		key.push_back(voc[phrase]->add(w));
+      }
+    }
+    //collect all non key phrases, i.e. c
+    std::vector<IPhrase> tgt_phrases;
+    tgt_phrases.resize(numTokens - numKeyTokens - 1);
+    for(int j = 0; j < tgt_phrases.size(); ++j, ++phrase){
+      std::istringstream is(tokens[numKeyTokens + j]);
+      while(is >> w) {
+		tgt_phrases[j].push_back(voc[phrase]->add(w));
+      }
+    }
+    //last token is score
+    std::istringstream is(tokens[numTokens-1]);
+    while(is >> w) {
+      score.push_back(atof(w.c_str()));
+    }
+    //transform score now...
+    std::transform(score.begin(),score.end(),score.begin(),TransformScore);
+    std::transform(score.begin(),score.end(),score.begin(),FloorScore);
+    std::vector<Score> scores;
+    scores.push_back(score);
+    
+    if(key.empty()) {
+      TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");
+      continue;
+    }
+    //first time inits
+    if(currFirstWord == InvalidLabelId){ 
+      currFirstWord = key[0];
+    }
+    if(currKey.empty()){
+      currKey = key;
+      //insert key into tree
+      assert(psa);
+      PSA::Data& d = psa->insert(key);
+      if(d == InvalidOffT) { 
+		d = fTell(ot);
+      } else {
+		TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");
+		return false;
+      }
+    }
+    if(currKey != key){
+      //ok new key
+      currKey = key;
+      //a) write cands for old key
+      cands.writeBin(ot);
+      cands.clear();
+      //b) check if we need to move on to new tree root
+      if(key[0] != currFirstWord){
+		// write key prefix tree to file and clear
+		PTF pf;
+		if(currFirstWord >= vo.size()){ 
+		  vo.resize(currFirstWord+1,InvalidOffT);
+		}
+		vo[currFirstWord] = fTell(os);
+		pf.create(*psa, os);
+		// clear
+		delete psa; psa = new PSA;
+		currFirstWord = key[0];
+      }
+      //c) insert key into tree
+      assert(psa);
+      PSA::Data& d = psa->insert(key);
+      if(d == InvalidOffT) { 
+		d = fTell(ot);
+      } else {
+		TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");
+		return false;
+      }
+    }
+	cands.push_back(GenericCandidate(tgt_phrases, scores));
+  }
+  //flush remainders
+  cands.writeBin(ot);
+  cands.clear();
+  //process last currFirstWord
+  PTF pf;
+  if(currFirstWord >= vo.size()) {
+    vo.resize(currFirstWord+1,InvalidOffT);
+  }
+  vo[currFirstWord] = fTell(os);
+  pf.create(*psa,os);
+  delete psa;
+  psa=0;
+  
+  fClose(os);
+  fClose(ot);
+  /*
+  std::vector<size_t> inv;
+  for(size_t i = 0; i < vo.size(); ++i){
+    if(vo[i] == InvalidOffT){ 
+      inv.push_back(i);
+    }
+  }
+  if(inv.size()) {
+    TRACE_ERR("WARNING: there are src voc entries with no phrase "
+	      "translation: count "<<inv.size()<<"\n"
+	      "There exists phrase translations for "<<vo.size()-inv.size()
+	      <<" entries\n");
+  }
+  */
+  FILE *oi = fOpen(ofi.c_str(),"wb");
+  fWriteVector(oi,vo);
+  fClose(oi);
+  
+  if(voc[0]){
+	voc[0]->Write(ofsv);
+	delete voc[0];
+  }
+  if(voc[1]){
+	voc[1]->Write(oftv);
+	delete voc[1];
+  }
+  return true;
+}
+
+std::string LexicalReorderingTableTree::MakeCacheKey(const Phrase& f, 
+						     const Phrase& e) const {
+  std::string key;
+  if(!m_FactorsF.empty()){
+    key += auxClearString(f.GetStringRep(m_FactorsF));
+  }
+  if(!m_FactorsE.empty()){
+    if(!key.empty()){
+      key += "|||";
+    }
+    key += auxClearString(e.GetStringRep(m_FactorsE));
+  }
+  return key;
+};
+
+IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f, 
+						 const Phrase& e) const {
+  IPhrase key;
+  std::vector<std::string> keyPart;
+  if(!m_FactorsF.empty()){
+    for(int i = 0; i < f.GetSize(); ++i){
+	  /* old code
+      std::string s = f.GetWord(i).ToString(m_FactorsF);
+      keyPart.push_back(s.substr(0,s.size()-1));
+      */
+	  keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
+    }
+    auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId));
+	keyPart.clear();
+  }
+  if(!m_FactorsE.empty()){
+	if(!key.empty()){
+      key.push_back(PrefixTreeMap::MagicWord);
+	}
+    for(int i = 0; i < e.GetSize(); ++i){
+	  /* old code
+      std::string s = e.GetWord(i).ToString(m_FactorsE);
+      keyPart.push_back(s.substr(0,s.size()-1));
+      */
+	  keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
+    }      
+	auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId));
+	//keyPart.clear();
+  }
+  return key;
+};
+
+
+struct State {
+  State(PPimp* t, const std::string& p) : pos(t), path(p){
+  }
+  PPimp*      pos;
+  std::string path;
+};
+
+void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){
+  if(m_FactorsE.empty()){
+	//f is all of key...
+	Candidates cands;
+	m_Table->GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
+	m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
+  } else {
+	ObjectPool<PPimp>     pool;
+	PPimp* pPos  = m_Table->GetRoot();
+	//1) goto subtree for f
+	for(int i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i){
+	  /* old code
+	  pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId);
+	  */
+	  pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
+	}
+	if(0 != pPos && pPos->isValid()){
+	  pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord);
+	}
+	if(0 == pPos || !pPos->isValid()){
+	  return;
+	}
+	//2) explore whole subtree depth first & cache
+	std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||";
+	
+	std::vector<State> stack;
+	stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),""));
+	Candidates cands;
+	while(!stack.empty()){
+	  if(stack.back().pos->isValid()){
+		LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
+		std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId);
+		//cache this 
+		m_Table->GetCandidates(*stack.back().pos,&cands);
+		if(!cands.empty()){ 
+		  m_Cache[cache_key + auxClearString(next_path)] = cands;
+		}
+		cands.clear();
+		PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0));
+		++stack.back().pos->idx;
+		stack.push_back(State(next_pos,next_path));
+	  } else {
+		stack.pop_back();
+	  }
+	}
+  }
+}
+
+void LexicalReorderingTableTree::Cache(const ConfusionNet& input){
+  return;
+}
+
+void LexicalReorderingTableTree::Cache(const Sentence& input){
+  //only works with sentences...
+  int prev_cache_size = m_Cache.size();
+  int max_phrase_length = input.GetSize();
+  for(size_t len = 0; len <= max_phrase_length; ++len){ 
+	for(size_t start = 0; start+len <= input.GetSize(); ++start){
+	  Phrase f    = input.GetSubString(WordsRange(start, start+len));
+	  auxCacheForSrcPhrase(f);
+	}
+  }
+  std::cerr << "Cached " << m_Cache.size() - prev_cache_size << " new primary reordering table keys\n"; 
+}
+/*
+Pre fetching implementation using Phrase and Generation Dictionaries 
+*//*
+void LexicalReorderingTableTree::Cache(const ConfusionNet& input){
+  typedef TargetPhraseCollection::iterator Iter;
+  typedef TargetPhraseCollection::const_iterator ConstIter;
+  //not implemented for confusion networks...
+  Sentence const* s = dynamic_cast<Sentence const*>(&input);
+  if(!s){
+	return;
+  }
+  int max_phrase_length = input.GetSize();
+  
+  std::vector<PhraseDictionaryBase*> PhraseTables = StaticData::Instance()->GetPhraseDictionaries();
+  //new code:
+  //std::vector<PhraseDictionary*> PhraseTables = StaticData::Instance()->GetPhraseDictionaries();
+  std::vector<GenerationDictionary*> GenTables = StaticData::Instance()->GetGenerationDictionaries();
+  for(size_t len = 1; len <= max_phrase_length; ++len){ 
+	for(size_t start = 0; start+len <= input.GetSize(); ++start){
+	  Phrase f = s->GetSubString(WordsRange(start, start+len));
+	  //find all translations of f
+	  TargetPhraseCollection list;
+
+	  for(size_t t = 0; t < PhraseTables.size(); ++t){
+		//if(doIntersect(PhraseTables[t]->GetOutputFactorMask(),FactorMask(m_FactorsE))){
+		  //this table gives us something we need
+		  
+		  const TargetPhraseCollection* new_list = PhraseTables[t]->GetTargetPhraseCollection(f);
+		  TargetPhraseCollection curr_list;
+		  for(ConstIter i = new_list->begin(); i != new_list->end(); ++i){
+			for(Iter j = list.begin(); j != list.end(); ++j){
+			  curr_list.Add((*j)->MergeNext(*(*i)));
+			}
+		  }
+		  if(list.IsEmpty()){
+			list = *new_list;
+		  } else {
+			list = curr_list;
+		  }
+		  //}
+	  }
+	  for(size_t g = 0; g < GenTables.size(); ++g){
+		//if(doIntersect(GenTables[g]->GetOutputFactorMask(),FactorMask(m_FactorsE))){
+		  TargetPhraseCollection curr_list;
+		  for(Iter j = list.begin(); j != list.end(); ++j){
+			for(size_t w = 0; w < (*j)->GetSize(); ++w){
+			  const OutputWordCollection* words = GenTables[g]->FindWord((*j)->GetWord(w));
+			  for(OutputWordCollection::const_iterator i = words->begin(); i != words->end(); ++i){
+				TargetPhrase* p = new TargetPhrase(*(*j));
+				Word& pw = p->GetWord(w);
+				pw.Merge(i->first);
+				curr_list.Add(p);
+			  }
+			}
+		  }
+		  list = curr_list;
+		  //}
+	  }
+	  //cache for each translation
+	  for(Iter e = list.begin(); e < list.end(); ++e){
+		Candidates cands;
+		m_Table.GetCandidates(MakeTableKey(f,*(*e)), &cands);
+		m_Cache.insert(std::make_pair(MakeCacheKey(f,*(*e)),cands));
+	  }
+	}
+  }
+};
+*/
+
+}
+
--- a/src/LexicalReorderingTable.h
+++ b/src/LexicalReorderingTable.h
@ -0,0 +1,158 @@
+#ifndef moses_LexicalReorderingTable_h
+#define moses_LexicalReorderingTable_h
+
+//stdlib dependencies:
+#include <vector>
+#include <map>
+#include <memory>
+#include <string>
+#include <iostream>
+
+#ifdef WITH_THREADS
+#include <boost/thread/tss.hpp>
+#endif
+
+//moses dependencies:
+#include "TypeDef.h"
+#include "Phrase.h"
+#include "InputType.h"
+#include "ConfusionNet.h"
+#include "Sentence.h"
+#include "PrefixTreeMap.h"
+
+namespace Moses
+{
+
+class Phrase;
+class InputType;
+class ConfusionNet;
+
+//additional types
+
+typedef std::vector<float>      Score;
+typedef std::vector<FactorType> FactorList;
+
+class LexicalReorderingTable {
+ public:
+ LexicalReorderingTable(const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors)
+   : m_FactorsF(f_factors), m_FactorsE(e_factors), m_FactorsC(c_factors) {
+  }
+  virtual ~LexicalReorderingTable(){
+  }
+ public:
+  static LexicalReorderingTable* LoadAvailable(const std::string& filePath, const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors);
+ public:
+  virtual Score GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0;
+  virtual void InitializeForInput(const InputType&){
+    /* override for on-demand loading */
+  };
+  virtual void InitializeForInputPhrase(const Phrase&){
+  };
+  /*
+  int GetNumScoreComponents() const {
+    return m_NumScores;
+  }
+  */
+  const FactorList& GetFFactorMask() const {
+    return m_FactorsF;
+  }
+  const FactorList& GetEFactorMask() const {
+    return m_FactorsE;
+  }
+  const FactorList& GetCFactorMask() const {
+	return m_FactorsC;
+  }
+  virtual void DbgDump(std::ostream* out) const{
+	*out << "Overwrite in subclass...\n";
+  };
+ protected:
+  FactorList m_FactorsF;
+  FactorList m_FactorsE;
+  FactorList m_FactorsC;
+};
+
+class LexicalReorderingTableMemory : public LexicalReorderingTable {
+  //implements LexicalReorderingTable saving all scores in one large std::map<> thingy
+  //to be used for non binary tables... uses a LOT of memory
+ public:
+  LexicalReorderingTableMemory( const std::string& filePath,
+				const std::vector<FactorType>& f_factors, 
+                                const std::vector<FactorType>& e_factors,
+				const std::vector<FactorType>& c_factors);
+  virtual ~LexicalReorderingTableMemory();
+ public:
+  virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+  void DbgDump(std::ostream* out) const;
+ private:
+  std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
+  std::string MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
+  
+  void LoadFromFile(const std::string& filePath);
+ private:
+  typedef std::map< std::string, std::vector<float> > TableType;
+  TableType m_Table; 
+};
+
+class LexicalReorderingTableTree : public LexicalReorderingTable {
+  //implements LexicalReorderingTable using the crafty PDT code...
+ public:
+  LexicalReorderingTableTree(const std::string& filePath,
+							 const std::vector<FactorType>& f_factors, 
+							 const std::vector<FactorType>& e_factors,
+							 const std::vector<FactorType>& c_factors);
+  ~LexicalReorderingTableTree();
+ public:
+  bool IsCacheEnabled() const {
+    return m_UseCache;
+  };
+  void EnableCache() {
+    m_UseCache = true;
+  };
+  void DisableCache() {
+    m_UseCache = false;
+  };
+  void ClearCache(){
+    if (m_UseCache) {
+	   m_Cache.clear();
+    }   
+  };
+
+  virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+
+  virtual void InitializeForInput(const InputType& input);
+  virtual void InitializeForInputPhrase(const Phrase& f){
+	ClearCache();
+	auxCacheForSrcPhrase(f);
+  }
+ public:
+  static bool Create(std::istream& inFile, const std::string& outFileName);
+ private:
+  std::string MakeCacheKey(const Phrase& f, const Phrase& e) const;
+  IPhrase     MakeTableKey(const Phrase& f, const Phrase& e) const;
+  
+  void Cache(const ConfusionNet& input);
+  void Cache(const Sentence& input);
+
+  void  auxCacheForSrcPhrase(const Phrase& f);
+  Score auxFindScoreForContext(const Candidates& cands, const Phrase& contex);
+ private:
+  //typedef LexicalReorderingCand          CandType;
+  typedef std::map< std::string, Candidates > CacheType;
+  #ifdef WITH_THREADS
+  typedef boost::thread_specific_ptr<PrefixTreeMap>        TableType;
+  #else
+  typedef std::auto_ptr<PrefixTreeMap> TableType;
+  #endif
+  
+  static const int SourceVocId = 0;
+  static const int TargetVocId = 1;
+
+  bool      m_UseCache;
+  std::string m_FilePath;
+  CacheType m_Cache;
+  TableType m_Table;
+};
+
+}
+
+#endif
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -0,0 +1,230 @@
+lib_LTLIBRARIES = libmoses.la
+AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
+
+libmoses_ladir = ${includedir}
+
+libmoses_la_HEADERS = \
+	TypeDef.h \
+	PrefixTree.h \
+	File.h \
+	FilePtr.h \
+	ObjectPool.h \
+	BitmapContainer.h \
+	ConfusionNet.h \
+	DecodeGraph.h \
+	DecodeStep.h \
+	DecodeStepGeneration.h \
+	DecodeStepTranslation.h \
+	Dictionary.h \
+	DummyScoreProducers.h \
+	Factor.h \
+	FactorCollection.h \
+	FactorTypeSet.h \
+	FeatureFunction.h \
+	FFState.h \
+	FloydWarshall.h \
+	GenerationDictionary.h \
+	GlobalLexicalModel.h \
+	hash.h \
+	Hypothesis.h \
+	HypothesisStack.h \
+	HypothesisStackCubePruning.h \
+	HypothesisStackNormal.h \
+	InputType.h \
+	InputFileStream.h \
+	LMList.h \
+	LVoc.h \
+	LanguageModel.h \
+	LanguageModelFactory.h \
+	LanguageModelInternal.h \
+	LanguageModelMultiFactor.h \
+	LanguageModelRemote.h \
+	LanguageModelSingleFactor.h \
+	LanguageModelSkip.h \
+	TrellisPath.h \
+	TrellisPathList.h \
+	TrellisPathCollection.h \
+	LexicalReordering.h \
+	LexicalReorderingTable.h \
+	Manager.h \
+	NGramCollection.h \
+	NGramNode.h \
+	PCNTools.h \
+	Parameter.h \
+	PartialTranslOptColl.h \
+	Phrase.h \
+	PhraseDictionary.h \
+	PhraseDictionaryMemory.h \
+	PhraseDictionaryNode.h \
+	PhraseDictionaryTree.h \
+	PhraseDictionaryTreeAdaptor.h \
+	PrefixTreeMap.h \
+	ReorderingConstraint.h \
+	ScoreComponentCollection.h \
+	ScoreIndexManager.h \
+	ScoreProducer.h \
+	Search.h \
+	SearchCubePruning.h \
+	SearchNormal.h \
+	Sentence.h \
+	SentenceStats.h \
+	SquareMatrix.h \
+	StaticData.h \
+	TargetPhrase.h \
+	TargetPhraseCollection.h \
+	Timer.h \
+	TranslationOption.h \
+	TranslationOptionCollection.h \
+	TranslationOptionCollectionText.h \
+	TranslationOptionCollectionConfusionNet.h \
+	TranslationOptionList.h \
+	UserMessage.h \
+	Util.h \
+	Word.h \
+	WordsBitmap.h \
+	WordLattice.h \
+	WordsRange.h \
+	XmlOption.h 
+
+if PROTOBUF
+libmoses_la_HEADERS += rule.pb.h hypergraph.pb.h
+endif
+
+if SRI_LM
+libmoses_la_HEADERS += LanguageModelSRI.h
+endif
+
+if IRST_LM
+libmoses_la_HEADERS += LanguageModelIRST.h
+endif
+
+if RAND_LM
+libmoses_la_HEADERS += LanguageModelRandLM.h
+endif
+
+if INTERNAL_LM
+libmoses_la_HEADERS += LanguageModelInternal.h \
+		      NGramCollection.h \
+		      NGramNode.h
+endif
+
+libmoses_la_SOURCES = \
+	BitmapContainer.cpp \
+	ConfusionNet.cpp \
+	DecodeGraph.cpp \
+	DecodeStep.cpp \
+	DecodeStepGeneration.cpp \
+	DecodeStepTranslation.cpp \
+	Dictionary.cpp \
+	DummyScoreProducers.cpp \
+	Factor.cpp \
+	FactorCollection.cpp \
+	FactorTypeSet.cpp \
+	FeatureFunction.cpp \
+	FFState.cpp \
+	FloydWarshall.cpp \
+	GenerationDictionary.cpp \
+	GlobalLexicalModel.cpp \
+	hash.cpp \
+	Hypothesis.cpp \
+	HypothesisStack.cpp \
+	HypothesisStackCubePruning.cpp \
+	HypothesisStackNormal.cpp \
+	InputType.cpp \
+	InputFileStream.cpp \
+	LMList.cpp \
+	LVoc.cpp \
+	LanguageModel.cpp \
+	LanguageModelFactory.cpp \
+	LanguageModelInternal.cpp \
+	LanguageModelMultiFactor.cpp \
+	LanguageModelRemote.cpp \
+	LanguageModelSingleFactor.cpp \
+	LanguageModelSkip.cpp \
+	TrellisPath.cpp \
+	TrellisPathCollection.cpp \
+	LexicalReordering.cpp \
+	LexicalReorderingTable.cpp \
+	Manager.cpp \
+	NGramCollection.cpp \
+	NGramNode.cpp \
+	PCNTools.cpp \
+	Parameter.cpp \
+	PartialTranslOptColl.cpp \
+	Phrase.cpp \
+	PhraseDictionary.cpp \
+	PhraseDictionaryMemory.cpp \
+	PhraseDictionaryNode.cpp \
+	PhraseDictionaryTree.cpp \
+	PhraseDictionaryTreeAdaptor.cpp \
+	PrefixTreeMap.cpp \
+	ReorderingConstraint.cpp \
+	ScoreComponentCollection.cpp \
+	ScoreIndexManager.cpp \
+	ScoreProducer.cpp \
+	Search.cpp \
+	SearchCubePruning.cpp \
+	SearchNormal.cpp \
+	Sentence.cpp \
+	SentenceStats.cpp \
+	SquareMatrix.cpp \
+	StaticData.cpp \
+	TargetPhrase.cpp \
+	TargetPhraseCollection.cpp \
+	Timer.cpp \
+	TranslationOption.cpp \
+	TranslationOptionCollection.cpp \
+	TranslationOptionCollectionText.cpp \
+	TranslationOptionCollectionConfusionNet.cpp \
+	TranslationOptionList.cpp \
+	UserMessage.cpp \
+	Util.cpp \
+	Word.cpp \
+	WordsBitmap.cpp \
+	WordLattice.cpp \
+	WordsRange.cpp \
+	XmlOption.cpp 
+	
+if PROTOBUF
+BUILT_SOURCES = \
+  rule.pb.h \
+  rule.pb.cc \
+  hypergraph.pb.h \
+  hypergraph.pb.cc
+
+CLEANFILES = $(BUILT_SOURCES)
+SUFFIXES = .proto
+
+rule.pb.cc: rule.proto
+	@PROTOC@ --cpp_out=. $<
+rule.pb.h: rule.proto
+	@PROTOC@ --cpp_out=. $<
+
+hypergraph.pb.cc: hypergraph.proto
+	@PROTOC@ --cpp_out=. $<			
+hypergraph.pb.h: hypergraph.proto
+	@PROTOC@ --cpp_out=. $<			
+
+libmoses_la_SOURCES += rule.pb.cc hypergraph.pb.cc
+
+endif
+
+if SRI_LM
+libmoses_la_SOURCES += LanguageModelSRI.cpp
+endif
+
+if IRST_LM
+libmoses_la_SOURCES += LanguageModelIRST.cpp
+endif
+
+if RAND_LM
+libmoses_la_SOURCES += LanguageModelRandLM.cpp
+endif
+
+if INTERNAL_LM
+libmoses_la_SOURCES += LanguageModelInternal.cpp \
+		      NGramCollection.cpp \
+		      NGramNode.cpp
+endif
+
+libmoses_la_LIBADD = $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@ -0,0 +1,859 @@
+// $Id: Manager.cpp 2958 2010-03-08 16:30:31Z abarun $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifdef WIN32
+#include <hash_set>
+#else
+#include <ext/hash_set>
+#endif
+
+#include <limits>
+#include <cmath>
+#include "Manager.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "TargetPhrase.h"
+#include "TrellisPath.h"
+#include "TrellisPathCollection.h"
+#include "TranslationOption.h"
+#include "LMList.h"
+#include "TranslationOptionCollection.h"
+#include "DummyScoreProducers.h"
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#include "rule.pb.h"
+#endif
+
+using namespace std;
+
+namespace Moses
+{
+Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
+:m_source(source)
+,m_transOptColl(source.CreateTranslationOptionCollection())
+,m_search(Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl))
+,m_start(clock())
+,interrupted_flag(0)
+{
+	const StaticData &staticData = StaticData::Instance();
+	staticData.InitializeBeforeSentenceProcessing(source);
+}
+
+Manager::~Manager() 
+{
+	delete m_transOptColl;
+	delete m_search;
+
+	StaticData::Instance().CleanUpAfterSentenceProcessing();      
+
+	clock_t end = clock();
+	float et = (end - m_start);
+	et /= (float)CLOCKS_PER_SEC;
+	VERBOSE(1, "Translation took " << et << " seconds" << endl);
+	VERBOSE(1, "Finished translating" << endl);
+}
+
+/**
+ * Main decoder loop that translates a sentence by expanding
+ * hypotheses stack by stack, until the end of the sentence.
+ */
+void Manager::ProcessSentence()
+{
+	// reset statistics
+	const StaticData &staticData = StaticData::Instance();
+	ResetSentenceStats(m_source);
+
+  // collect translation options for this sentence
+    vector <DecodeGraph*>
+			decodeStepVL = staticData.GetDecodeStepVL(m_source);
+	m_transOptColl->CreateTranslationOptions(decodeStepVL);
+
+  // some reporting on how long this took
+  clock_t gotOptions = clock();
+  float et = (gotOptions - m_start);
+  IFVERBOSE(2) { GetSentenceStats().AddTimeCollectOpts( gotOptions - m_start ); }
+  et /= (float)CLOCKS_PER_SEC;
+  VERBOSE(1, "Collecting options took " << et << " seconds" << endl);
+
+	// search for best translation with the specified algorithm
+	m_search->ProcessSentence();
+	VERBOSE(1, "Search took " << ((clock()-m_start)/(float)CLOCKS_PER_SEC) << " seconds" << endl);
+    RemoveAllInColl(decodeStepVL);  
+}
+	
+/**
+ * Print all derivations in search graph. Note: The number of derivations is exponential in the sentence length
+ *
+ */
+
+void Manager::PrintAllDerivations(long translationId ) const
+{
+	const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+
+	vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
+
+	if (sortedPureHypo.size() == 0)
+		return;
+
+  float remainingScore = 0;
+  vector<const TargetPhrase*> remainingPhrases;
+    
+	// add all pure paths
+	vector<const Hypothesis*>::const_iterator iterBestHypo;
+	for (iterBestHypo = sortedPureHypo.begin() 
+			; iterBestHypo != sortedPureHypo.end()
+			; ++iterBestHypo)
+	{
+		printThisHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore); 
+    printDivergentHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore);
+  }
+}
+
+
+void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore  ) const
+{
+   //Backtrack from the predecessor
+   if (hypo->GetId()  > 0) {
+     vector <const TargetPhrase*> followingPhrases;
+     followingPhrases.push_back(& (hypo->GetCurrTargetPhrase()));
+     ///((Phrase) hypo->GetPrevHypo()->GetTargetPhrase());
+     followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
+     printDivergentHypothesis(translationId, hypo->GetPrevHypo(), followingPhrases , remainingScore + hypo->GetScore() - hypo->GetPrevHypo()->GetScore());
+   }
+  
+   //Process the arcs
+   const ArcList *pAL = hypo->GetArcList();
+   if (pAL) {
+     const ArcList &arcList = *pAL;
+     // every possible Arc to replace this edge
+     ArcList::const_iterator iterArc;
+		 for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc)
+     {
+				const Hypothesis *loserHypo = *iterArc;
+				const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
+        float arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore(); 
+        vector <const TargetPhrase* > followingPhrases;
+        followingPhrases.push_back(&(loserHypo->GetCurrTargetPhrase()));
+        followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
+        printThisHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore);
+        printDivergentHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore);
+     }
+   }
+}
+
+
+void Manager::printThisHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore  ) const
+{
+
+  cerr << translationId << " ||| ";
+  
+  //Yield of this hypothesis
+  hypo->ToStream(cerr);
+  for (size_t p = 0; p < remainingPhrases.size(); ++p) {
+    const TargetPhrase * phrase = remainingPhrases[p];
+    size_t size = phrase->GetSize();
+    for (size_t pos = 0 ; pos < size ; pos++)
+    {
+		  const Factor *factor = phrase->GetFactor(pos, 0);
+			cerr << *factor;
+      cerr << " ";
+    }
+  }
+  
+  cerr << "||| " << hypo->GetScore() + remainingScore;
+  cerr << endl;
+}
+
+  
+
+
+/**
+ * After decoding, the hypotheses in the stacks and additional arcs
+ * form a search graph that can be mined for n-best lists.
+ * The heavy lifting is done in the TrellisPath and TrellisPathCollection
+ * this function controls this for one sentence.
+ *
+ * \param count the number of n-best translations to produce
+ * \param ret holds the n-best list that was calculated
+ */
+void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) const
+{
+	if (count <= 0)
+		return;
+
+	const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+
+	vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
+
+	if (sortedPureHypo.size() == 0)
+		return;
+
+	TrellisPathCollection contenders;
+
+	set<Phrase> distinctHyps;
+
+	// add all pure paths
+	vector<const Hypothesis*>::const_iterator iterBestHypo;
+	for (iterBestHypo = sortedPureHypo.begin() 
+			; iterBestHypo != sortedPureHypo.end()
+			; ++iterBestHypo)
+	{
+		contenders.Add(new TrellisPath(*iterBestHypo));
+	}
+
+  // factor defines stopping point for distinct n-best list if too many candidates identical
+	size_t nBestFactor = StaticData::Instance().GetNBestFactor();
+  if (nBestFactor < 1) nBestFactor = 1000; // 0 = unlimited
+
+	// MAIN loop
+	for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * nBestFactor) ; iteration++)
+	{
+		// get next best from list of contenders
+		TrellisPath *path = contenders.pop();
+		assert(path);
+		if(onlyDistinct)
+		{
+			Phrase tgtPhrase = path->GetSurfacePhrase();
+			if (distinctHyps.insert(tgtPhrase).second) 
+        ret.Add(path);
+		}
+		else 
+    {
+		  ret.Add(path);
+    }
+ 
+		// create deviations from current best
+		path->CreateDeviantPaths(contenders);		
+
+		if(onlyDistinct)
+		{
+			const size_t nBestFactor = StaticData::Instance().GetNBestFactor();
+			if (nBestFactor > 0)
+				contenders.Prune(count * nBestFactor);
+		}
+		else
+		{
+			contenders.Prune(count);
+		}
+	}
+}
+  
+  
+  
+
+void Manager::CalcDecoderStatistics() const 
+{
+  const Hypothesis *hypo = GetBestHypothesis();
+	if (hypo != NULL)
+  {
+		GetSentenceStats().CalcFinalStats(*hypo);
+    IFVERBOSE(2) {
+		 	if (hypo != NULL) {
+		   	string buff;
+		  	string buff2;
+		   	TRACE_ERR( "Source and Target Units:"
+            << hypo->GetInput());
+				buff2.insert(0,"] ");
+				buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
+				buff2.insert(0,":");
+				buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
+				buff2.insert(0,"[");
+				
+				hypo = hypo->GetPrevHypo();
+				while (hypo != NULL) {
+					//dont print out the empty final hypo
+				  buff.insert(0,buff2);
+				  buff2.clear();
+				  buff2.insert(0,"] ");
+				  buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
+				  buff2.insert(0,":");
+				  buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
+				  buff2.insert(0,"[");
+				  hypo = hypo->GetPrevHypo();
+				}
+				TRACE_ERR( buff << endl);
+      }
+    }
+  }
+}
+
+void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId)
+{
+	const StaticData &staticData = StaticData::Instance();
+
+	const Hypothesis *prevHypo = hypo->GetPrevHypo();
+
+			
+	outputWordGraphStream << "J=" << linkId++
+		<< "\tS=" << prevHypo->GetId()
+		<< "\tE=" << hypo->GetId()
+		<< "\ta=";
+
+	// phrase table scores
+	const std::vector<PhraseDictionaryFeature*> &phraseTables = staticData.GetPhraseDictionaries();
+	std::vector<PhraseDictionaryFeature*>::const_iterator iterPhraseTable;
+	for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
+	{
+				const PhraseDictionaryFeature *phraseTable = *iterPhraseTable;
+				vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
+				
+				outputWordGraphStream << scores[0];
+				vector<float>::const_iterator iterScore;
+				for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore)
+				{
+					outputWordGraphStream << ", " << *iterScore;
+				}
+			}
+
+			// language model scores
+			outputWordGraphStream << "\tl=";
+			const LMList &lmList = staticData.GetAllLM();
+			LMList::const_iterator iterLM;
+			for (iterLM = lmList.begin() ; iterLM != lmList.end() ; ++iterLM)
+			{
+				LanguageModel *lm = *iterLM;
+				vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(lm);
+				
+				outputWordGraphStream << scores[0];
+				vector<float>::const_iterator iterScore;
+				for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore)
+				{
+					outputWordGraphStream << ", " << *iterScore;
+				}
+			}
+
+			// re-ordering
+			outputWordGraphStream << "\tr=";
+
+			outputWordGraphStream << hypo->GetScoreBreakdown().GetScoreForProducer(staticData.GetDistortionScoreProducer());
+
+			// lexicalised re-ordering
+			const std::vector<LexicalReordering*> &lexOrderings = staticData.GetReorderModels();
+			std::vector<LexicalReordering*>::const_iterator iterLexOrdering;
+			for (iterLexOrdering = lexOrderings.begin() ; iterLexOrdering != lexOrderings.end() ; ++iterLexOrdering)
+			{
+				LexicalReordering *lexicalReordering = *iterLexOrdering;
+				vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(lexicalReordering);
+				
+				outputWordGraphStream << scores[0];
+				vector<float>::const_iterator iterScore;
+				for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore)
+				{
+					outputWordGraphStream << ", " << *iterScore;
+				}
+			}
+
+			// words !!
+			outputWordGraphStream << "\tw=" << hypo->GetCurrTargetPhrase();
+
+			outputWordGraphStream << endl;
+}
+
+void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const
+{
+	const StaticData &staticData = StaticData::Instance();
+	string fileName = staticData.GetParam("output-word-graph")[0];
+	bool outputNBest = Scan<bool>(staticData.GetParam("output-word-graph")[1]);
+	const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+
+	outputWordGraphStream << "VERSION=1.0" << endl
+								<< "UTTERANCE=" << translationId << endl;
+
+	size_t linkId = 0;
+	size_t stackNo = 1;
+	std::vector < HypothesisStack* >::const_iterator iterStack;
+	for (iterStack = ++hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack)
+	{
+		cerr << endl << stackNo++ << endl;
+		const HypothesisStack &stack = **iterStack;
+		HypothesisStack::const_iterator iterHypo;
+		for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+		{
+			const Hypothesis *hypo = *iterHypo;
+			OutputWordGraph(outputWordGraphStream, hypo, linkId);
+			
+			if (outputNBest)
+			{
+				const ArcList *arcList = hypo->GetArcList();
+				if (arcList != NULL)
+				{
+					ArcList::const_iterator iterArcList;
+					for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+					{
+						const Hypothesis *loserHypo = *iterArcList;
+						OutputWordGraph(outputWordGraphStream, loserHypo, linkId);
+					}
+				}
+			} //if (outputNBest)
+		} //for (iterHypo
+	} // for (iterStack 
+}
+
+void OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const Hypothesis *hypo, const Hypothesis *recombinationHypo, int forward, double fscore)
+{
+	const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+	bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended();
+	outputSearchGraphStream << translationId;
+
+	// special case: initial hypothesis
+	if ( hypo->GetId() == 0 )
+	{
+		outputSearchGraphStream << " hyp=0 stack=0";
+		if (!extendedFormat) 
+		{
+			outputSearchGraphStream << " forward=" << forward	<< " fscore=" << fscore;
+		}
+		outputSearchGraphStream << endl;
+		return;
+	}
+
+  const Hypothesis *prevHypo = hypo->GetPrevHypo();
+
+	// output in traditional format
+	if (!extendedFormat)
+	{
+		outputSearchGraphStream << " hyp=" << hypo->GetId()
+			<< " stack=" << hypo->GetWordsBitmap().GetNumWordsCovered()
+		  << " back=" << prevHypo->GetId()
+		  << " score=" << hypo->GetScore()
+			<< " transition=" << (hypo->GetScore() - prevHypo->GetScore());
+
+		if (recombinationHypo != NULL)
+		  outputSearchGraphStream << " recombined=" << recombinationHypo->GetId();
+		
+		outputSearchGraphStream << " forward=" << forward	<< " fscore=" << fscore
+		  << " covered=" << hypo->GetCurrSourceWordsRange().GetStartPos() 
+			<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos()
+			<< " out=" << hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder)
+			<< endl;
+		return;
+	}
+	
+	// output in extended format
+	if (recombinationHypo != NULL) 
+		outputSearchGraphStream << " hyp=" << recombinationHypo->GetId();
+	else
+		outputSearchGraphStream << " hyp=" << hypo->GetId();
+
+	outputSearchGraphStream << " back=" << prevHypo->GetId();
+
+	ScoreComponentCollection scoreBreakdown = hypo->GetScoreBreakdown();
+	scoreBreakdown.MinusEquals( prevHypo->GetScoreBreakdown() );
+	outputSearchGraphStream << " [ ";
+	StaticData::Instance().GetScoreIndexManager().PrintLabeledScores( outputSearchGraphStream, scoreBreakdown );
+	outputSearchGraphStream << " ]";
+
+	outputSearchGraphStream << " out=" << hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << endl;
+}
+
+void Manager::GetConnectedGraph(
+    std::map< int, bool >* pConnected,
+    std::vector< const Hypothesis* >* pConnectedList) const {
+  std::map < int, bool >& connected = *pConnected;
+  std::vector< const Hypothesis *>& connectedList = *pConnectedList;
+
+  // start with the ones in the final stack
+  const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+  const HypothesisStack &finalStack = *hypoStackColl.back();
+  HypothesisStack::const_iterator iterHypo;
+  for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+  {
+    const Hypothesis *hypo = *iterHypo;
+    connected[ hypo->GetId() ] = true;
+    connectedList.push_back( hypo );
+  }
+
+  // move back from known connected hypotheses
+  for(size_t i=0; i<connectedList.size(); i++) {
+    const Hypothesis *hypo = connectedList[i];
+
+    // add back pointer
+    const Hypothesis *prevHypo = hypo->GetPrevHypo();
+    if (prevHypo->GetId() > 0 // don't add empty hypothesis
+	&& connected.find( prevHypo->GetId() ) == connected.end()) // don't add already added
+    {
+      connected[ prevHypo->GetId() ] = true;
+      connectedList.push_back( prevHypo );
+    }
+
+    // add arcs
+    const ArcList *arcList = hypo->GetArcList();
+    if (arcList != NULL)
+    {
+      ArcList::const_iterator iterArcList;
+      for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+      {
+	const Hypothesis *loserHypo = *iterArcList;
+	if (connected.find( loserHypo->GetId() ) == connected.end()) // don't add already added
+	{
+	  connected[ loserHypo->GetId() ] = true;
+	  connectedList.push_back( loserHypo );
+	}
+      }
+    }
+  }
+}
+
+void Manager::GetWinnerConnectedGraph(
+                                  std::map< int, bool >* pConnected,
+                                  std::vector< const Hypothesis* >* pConnectedList) const {
+  std::map < int, bool >& connected = *pConnected;
+  std::vector< const Hypothesis *>& connectedList = *pConnectedList;
+    
+  // start with the ones in the final stack
+  const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+  const HypothesisStack &finalStack = *hypoStackColl.back();
+  HypothesisStack::const_iterator iterHypo;
+  for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+  {
+    const Hypothesis *hypo = *iterHypo;
+    connected[ hypo->GetId() ] = true;
+    connectedList.push_back( hypo );
+  }
+    
+  // move back from known connected hypotheses
+  for(size_t i=0; i<connectedList.size(); i++) {
+    const Hypothesis *hypo = connectedList[i];
+    
+    // add back pointer
+    const Hypothesis *prevHypo = hypo->GetPrevHypo();
+    if (prevHypo->GetId() > 0 // don't add empty hypothesis
+          && connected.find( prevHypo->GetId() ) == connected.end()) // don't add already added
+    {
+      connected[ prevHypo->GetId() ] = true;
+      connectedList.push_back( prevHypo );
+    }
+      
+    // add arcs
+    const ArcList *arcList = hypo->GetArcList();
+    if (arcList != NULL)
+    {
+      ArcList::const_iterator iterArcList;
+      for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+      {
+        const Hypothesis *loserHypo = *iterArcList;
+        if (connected.find( loserHypo->GetPrevHypo()->GetId() ) == connected.end() && loserHypo->GetPrevHypo()->GetId() > 0) // don't add already added & don't add hyp 0
+        {
+          connected[ loserHypo->GetPrevHypo()->GetId() ] = true;
+          connectedList.push_back( loserHypo->GetPrevHypo() );
+        }
+      }
+    }
+  }
+}
+  
+
+#ifdef HAVE_PROTOBUF
+
+void SerializeEdgeInfo(const Hypothesis* hypo, hgmert::Hypergraph_Edge* edge) {
+	hgmert::Rule* rule = edge->mutable_rule();
+	hypo->GetCurrTargetPhrase().WriteToRulePB(rule);
+	const Hypothesis* prev = hypo->GetPrevHypo();
+	// if the feature values are empty, they default to 0
+	if (!prev) return;
+	// score breakdown is an aggregate (forward) quantity, but the exported
+	// graph object just wants the feature values on the edges
+	const ScoreComponentCollection& scores = hypo->GetScoreBreakdown();
+	const ScoreComponentCollection& pscores = prev->GetScoreBreakdown();
+	for (unsigned int i = 0; i < scores.size(); ++i)
+		edge->add_feature_values((scores[i] - pscores[i]) * -1.0);
+}
+
+hgmert::Hypergraph_Node* GetHGNode(
+		const Hypothesis* hypo,
+	  std::map< int, int>* i2hgnode,
+		hgmert::Hypergraph* hg,
+		int* hgNodeIdx) {
+	hgmert::Hypergraph_Node* hgnode;
+  std::map < int, int >::iterator idxi = i2hgnode->find(hypo->GetId());
+	if (idxi == i2hgnode->end()) {
+		*hgNodeIdx = ((*i2hgnode)[hypo->GetId()] = hg->nodes_size());
+		hgnode = hg->add_nodes();
+	} else {
+	 	*hgNodeIdx = idxi->second;
+		hgnode = hg->mutable_nodes(*hgNodeIdx);
+	}
+	return hgnode;
+}
+
+void Manager::SerializeSearchGraphPB(
+    long translationId,
+    std::ostream& outputStream) const {
+	using namespace hgmert;
+  std::map < int, bool > connected;
+  std::map < int, int > i2hgnode;
+  std::vector< const Hypothesis *> connectedList;
+	GetConnectedGraph(&connected, &connectedList);
+  connected[ 0 ] = true;
+  Hypergraph hg;
+	hg.set_is_sorted(false);
+	int num_feats = (*m_search->GetHypothesisStacks().back()->begin())->GetScoreBreakdown().size();
+	hg.set_num_features(num_feats);
+	StaticData::Instance().GetScoreIndexManager().SerializeFeatureNamesToPB(&hg);
+	Hypergraph_Node* goal = hg.add_nodes();  // idx=0 goal node must have idx 0
+	Hypergraph_Node* source = hg.add_nodes();  // idx=1
+	i2hgnode[-1] = 1; // source node
+  const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+  const HypothesisStack &finalStack = *hypoStackColl.back();
+  for (std::vector < HypothesisStack* >::const_iterator iterStack = hypoStackColl.begin();
+	  iterStack != hypoStackColl.end() ; ++iterStack)
+  {
+    const HypothesisStack &stack = **iterStack;
+    HypothesisStack::const_iterator iterHypo;
+		
+    for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+    {
+      const Hypothesis *hypo = *iterHypo;
+			bool is_goal = hypo->GetWordsBitmap().IsComplete();
+      if (connected.find( hypo->GetId() ) != connected.end())
+      {
+				int headNodeIdx;
+				Hypergraph_Node* headNode = GetHGNode(hypo, &i2hgnode, &hg, &headNodeIdx);
+				if (is_goal) {
+					Hypergraph_Edge* ge = hg.add_edges();
+					ge->set_head_node(0);  // goal
+					ge->add_tail_nodes(headNodeIdx);
+				  ge->mutable_rule()->add_trg_words("[X,1]");
+				}
+				Hypergraph_Edge* edge = hg.add_edges();
+				SerializeEdgeInfo(hypo, edge);
+				edge->set_head_node(headNodeIdx);
+				const Hypothesis* prev = hypo->GetPrevHypo();
+				int tailNodeIdx = 1; // source
+				if (prev)
+				  tailNodeIdx = i2hgnode.find(prev->GetId())->second;
+				edge->add_tail_nodes(tailNodeIdx);
+
+        const ArcList *arcList = hypo->GetArcList();
+        if (arcList != NULL)
+        {
+          ArcList::const_iterator iterArcList;
+          for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+          {
+            const Hypothesis *loserHypo = *iterArcList;
+						assert(connected[loserHypo->GetId()]);
+						Hypergraph_Edge* edge = hg.add_edges();
+						SerializeEdgeInfo(loserHypo, edge);
+						edge->set_head_node(headNodeIdx);
+						tailNodeIdx = i2hgnode.find(loserHypo->GetPrevHypo()->GetId())->second;
+						edge->add_tail_nodes(tailNodeIdx);
+          }
+        } // end if arcList empty
+      } // end if connected
+    } // end for iterHypo
+  } // end for iterStack
+	hg.SerializeToOstream(&outputStream);
+}
+#endif
+
+void Manager::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
+{
+  std::map < int, bool > connected;
+  std::map < int, int > forward;
+  std::map < int, double > forwardScore;
+
+  // *** find connected hypotheses ***
+  std::vector< const Hypothesis *> connectedList;
+  GetConnectedGraph(&connected, &connectedList);
+
+  // ** compute best forward path for each hypothesis *** //
+
+  // forward cost of hypotheses on final stack is 0
+  const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+  const HypothesisStack &finalStack = *hypoStackColl.back();
+  HypothesisStack::const_iterator iterHypo;
+  for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+  {
+    const Hypothesis *hypo = *iterHypo;
+    forwardScore[ hypo->GetId() ] = 0.0f;
+    forward[ hypo->GetId() ] = -1;
+  }
+
+  // compete for best forward score of previous hypothesis
+  std::vector < HypothesisStack* >::const_iterator iterStack;
+  for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack)
+  {
+    const HypothesisStack &stack = **iterStack;
+    HypothesisStack::const_iterator iterHypo;
+    for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+    {
+      const Hypothesis *hypo = *iterHypo;
+      if (connected.find( hypo->GetId() ) != connected.end())
+      {
+	// make a play for previous hypothesis
+	const Hypothesis *prevHypo = hypo->GetPrevHypo();
+	double fscore = forwardScore[ hypo->GetId() ] +
+	  hypo->GetScore() - prevHypo->GetScore();
+	if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
+	    || forwardScore.find( prevHypo->GetId() )->second < fscore)
+	{
+	  forwardScore[ prevHypo->GetId() ] = fscore;
+	  forward[ prevHypo->GetId() ] = hypo->GetId();
+	}
+	// all arcs also make a play
+        const ArcList *arcList = hypo->GetArcList();
+        if (arcList != NULL)
+	{
+	  ArcList::const_iterator iterArcList;
+	  for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+	  {
+	    const Hypothesis *loserHypo = *iterArcList;
+	    // make a play
+	    const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
+	    double fscore = forwardScore[ hypo->GetId() ] +
+	      loserHypo->GetScore() - loserPrevHypo->GetScore();
+	    if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
+		|| forwardScore.find( loserPrevHypo->GetId() )->second < fscore)
+	    {
+	      forwardScore[ loserPrevHypo->GetId() ] = fscore;
+	      forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
+	    }
+	  } // end for arc list  
+	} // end if arc list empty
+      } // end if hypo connected
+    } // end for hypo
+  } // end for stack
+
+  // *** output all connected hypotheses *** //
+  
+  connected[ 0 ] = true;
+  for (iterStack = hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack)
+  {
+    const HypothesisStack &stack = **iterStack;
+    HypothesisStack::const_iterator iterHypo;
+    for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+    {
+      const Hypothesis *hypo = *iterHypo;
+      if (connected.find( hypo->GetId() ) != connected.end())
+      {
+	OutputSearchGraph(translationId, outputSearchGraphStream, hypo, NULL, forward[ hypo->GetId() ], forwardScore[ hypo->GetId() ]);
+	
+	const ArcList *arcList = hypo->GetArcList();
+	if (arcList != NULL)
+	{
+	  ArcList::const_iterator iterArcList;
+	  for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+	  {
+	    const Hypothesis *loserHypo = *iterArcList;
+	    OutputSearchGraph(translationId, outputSearchGraphStream, loserHypo, hypo, forward[ hypo->GetId() ], forwardScore[ hypo->GetId() ]);
+	  }
+	} // end if arcList empty
+      } // end if connected
+    } // end for iterHypo
+  } // end for iterStack 
+}
+
+  void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
+                                       std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set< const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const
+  {
+    std::map < int, bool > &connected = *pConnected;
+    std::vector< const Hypothesis *>& connectedList = *pConnectedList;
+    std::map < int, int > forward;
+    std::map < int, double > forwardScore;
+    
+    std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps = *pOutgoingHyps;
+    vector< float> & estimatedScores = *pFwdBwdScores;
+    
+    // *** find connected hypotheses ***
+    GetWinnerConnectedGraph(&connected, &connectedList);
+    
+    // ** compute best forward path for each hypothesis *** //
+    
+    // forward cost of hypotheses on final stack is 0
+    const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+    const HypothesisStack &finalStack = *hypoStackColl.back();
+    HypothesisStack::const_iterator iterHypo;
+    for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+    {
+      const Hypothesis *hypo = *iterHypo;
+      forwardScore[ hypo->GetId() ] = 0.0f;
+      forward[ hypo->GetId() ] = -1;
+    }
+    
+    // compete for best forward score of previous hypothesis
+    std::vector < HypothesisStack* >::const_iterator iterStack;
+    for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack)
+    {
+      const HypothesisStack &stack = **iterStack;
+      HypothesisStack::const_iterator iterHypo;
+      for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+      {
+        const Hypothesis *hypo = *iterHypo;
+        if (connected.find( hypo->GetId() ) != connected.end())
+        {
+          // make a play for previous hypothesis
+          const Hypothesis *prevHypo = hypo->GetPrevHypo();
+          double fscore = forwardScore[ hypo->GetId() ] +
+          hypo->GetScore() - prevHypo->GetScore();
+          if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
+              || forwardScore.find( prevHypo->GetId() )->second < fscore)
+          {
+            forwardScore[ prevHypo->GetId() ] = fscore;
+            forward[ prevHypo->GetId() ] = hypo->GetId();
+          }
+          //store outgoing info
+          outgoingHyps[prevHypo].insert(hypo);
+
+          // all arcs also make a play
+          const ArcList *arcList = hypo->GetArcList();
+          if (arcList != NULL)
+          {
+            ArcList::const_iterator iterArcList;
+            for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+            {
+              const Hypothesis *loserHypo = *iterArcList;
+              // make a play
+              const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
+              double fscore = forwardScore[ hypo->GetId() ] +
+              loserHypo->GetScore() - loserPrevHypo->GetScore();
+              if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
+                  || forwardScore.find( loserPrevHypo->GetId() )->second < fscore)
+              {
+                forwardScore[ loserPrevHypo->GetId() ] = fscore;
+                forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
+              }
+              //store outgoing info 
+              outgoingHyps[loserPrevHypo].insert(hypo);
+              
+              
+            } // end for arc list  
+          } // end if arc list empty
+        } // end if hypo connected
+      } // end for hypo
+    } // end for stack
+    
+    for (std::vector< const Hypothesis *>::iterator it = connectedList.begin(); it != connectedList.end(); ++it) {
+      float estimatedScore = (*it)->GetScore() + forwardScore[(*it)->GetId()];
+      estimatedScores.push_back(estimatedScore);
+    }
+}  
+  
+  
+const Hypothesis *Manager::GetBestHypothesis() const
+{
+	return m_search->GetBestHypothesis();
+}
+
+}
+
--- a/src/Manager.h
+++ b/src/Manager.h
@ -0,0 +1,141 @@
+// $Id: Manager.h 2957 2010-03-08 15:28:40Z abarun $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_Manager_h
+#define moses_Manager_h
+
+#include <vector>
+#include <list>
+#include <ctime>
+#include "InputType.h"
+#include "Hypothesis.h"
+#include "StaticData.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "TrellisPathList.h"
+#include "SquareMatrix.h"
+#include "WordsBitmap.h"
+#include "Search.h"
+#include "SearchCubePruning.h"
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+namespace Moses
+{
+
+class TrellisPath;
+class TranslationOptionCollection;
+
+/** The Manager class implements a stack decoding algorithm.
+ * Hypotheses are organized in stacks. One stack contains all hypothesis that have 
+ * the same number of foreign words translated.  The data structure for hypothesis 
+ * stacks is the class HypothesisStack. The data structure for a hypothesis 
+ * is the class Hypothesis. 
+ *
+ * The main decoder loop in the function ProcessSentence() consists of the steps: 
+ * - Create the list of possible translation options. In phrase-based decoding 
+ *   (and also the first mapping step in the factored model) is a phrase translation 
+ *   from the source to the target. Given a specific input sentence, only a limited 
+ *   number of phrase translation can be applied. For efficient lookup of the 
+ *   translation options later, these optuions are first collected in the function 
+ *   CreateTranslationOption (for more information check the class 
+ *   TranslationOptionCollection) 
+ * - Create initial hypothesis: Hypothesis stack 0 contains only one empty hypothesis. 
+ * - Going through stacks 0 ... (sentence_length-1): 
+ *   - The stack is pruned to the maximum size 
+ *   - Going through all hypotheses in the stack 
+ *     - Each hypothesis is expanded by ProcessOneHypothesis() 
+ *     - Expansion means applying a translation option to the hypothesis to create 
+ *       new hypotheses 
+ *     - What translation options may be applied depends on reordering limits and 
+ *       overlap with already translated words 
+ *     - With a applicable translation option and a hypothesis at hand, a new 
+ *       hypothesis can be created in ExpandHypothesis() 
+ *     - New hypothesis are either discarded (because they are too bad), added to 
+ *       the appropriate stack, or re-combined with existing hypotheses 
+ **/
+
+class Manager
+{
+  Manager();
+  Manager(Manager const&);
+  void operator=(Manager const&);
+protected:	
+	// data
+	InputType const& m_source; /**< source sentence to be translated */
+	TranslationOptionCollection *m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
+	Search *m_search;
+	
+	HypothesisStack* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/ 
+	clock_t m_start; /**< starting time, used for logging */
+	size_t interrupted_flag;
+  void GetConnectedGraph(
+                         std::map< int, bool >* pConnected,
+                         std::vector< const Hypothesis* >* pConnectedList) const;
+	void GetWinnerConnectedGraph(
+                               std::map< int, bool >* pConnected,
+                               std::vector< const Hypothesis* >* pConnectedList) const;
+  
+		
+public:
+	Manager(InputType const& source, SearchAlgorithm searchAlgorithm);
+	~Manager();
+  
+	void ProcessSentence();
+	const Hypothesis *GetBestHypothesis() const;
+	const Hypothesis *GetActualBestHypothesis() const;
+	void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
+  void PrintAllDerivations(long translationId) const;
+  void printDivergentHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase*> & remainingPhrases, float remainingScore  ) const;
+  void printThisHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase* > & remainingPhrases, float remainingScore  ) const;
+	void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
+#ifdef HAVE_PROTOBUF
+	void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
+#endif
+  
+	void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
+    const InputType& GetSource() const {return m_source;}   
+
+	/***
+	 * to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
+	 */
+	void CalcDecoderStatistics() const;
+  void ResetSentenceStats(const InputType& source)
+  {
+    m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
+  }
+  SentenceStats& GetSentenceStats() const
+  {
+    return *m_sentenceStats;
+  }
+  
+  /***
+   *For Lattice MBR 
+  */
+  void GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
+                                     std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set < const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const;
+  
+  std::auto_ptr<SentenceStats> m_sentenceStats;
+};
+
+}
+#endif
--- a/src/NGramCollection.cpp
+++ b/src/NGramCollection.cpp
@ -0,0 +1,67 @@
+// $Id: NGramCollection.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "NGramCollection.h"
+#include "NGramNode.h"
+
+namespace Moses
+{
+NGramCollection::~NGramCollection()
+{
+	Collection::iterator iter;
+	for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
+	{
+		delete (iter->second);
+	}
+}
+
+void NGramCollection::Add(const Factor *factor, const NGramNode &ngramNode)
+{
+}
+
+NGramNode *NGramCollection::GetOrCreateNGram(const Factor *factor)
+{
+	Collection::iterator iter = m_collection.find(factor);
+	if (iter == m_collection.end())
+	{
+		return (m_collection[factor] = new NGramNode());
+	}
+	else
+	{
+		return (iter->second);
+	}
+}
+
+NGramNode *NGramCollection::GetNGram(const Factor *factor)
+{
+	Collection::iterator iter = m_collection.find(factor);
+	return (iter == m_collection.end()) ? NULL : (iter->second) ;
+}
+
+const NGramNode *NGramCollection::GetNGram(const Factor *factor) const
+{
+	Collection::const_iterator iter = m_collection.find(factor);
+	return (iter == m_collection.end()) ? NULL : (iter->second) ;
+}
+
+}
+
+
--- a/src/NGramCollection.h
+++ b/src/NGramCollection.h
@ -0,0 +1,57 @@
+// $Id: NGramCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_NGramCollection_h
+#define moses_NGramCollection_h
+
+#include <map>
+#include <vector>
+#include "NGramNode.h"
+
+namespace Moses
+{
+
+class Factor;
+
+typedef std::vector<const Factor*> FactorVector;
+
+class NGramCollection
+{
+protected:
+	typedef std::map<const Factor*, NGramNode*> Collection;
+	Collection m_collection;
+
+	void Add(const Factor *factor, const NGramNode &ngramNode);
+public:
+	NGramCollection()
+	{
+	}
+	~NGramCollection();
+
+	NGramNode *GetOrCreateNGram(const Factor *factor);
+	NGramNode *GetNGram(const Factor *factor);
+	const NGramNode *GetNGram(const Factor *factor) const;
+
+};
+
+}
+
+#endif
--- a/src/NGramNode.cpp
+++ b/src/NGramNode.cpp
@ -0,0 +1,26 @@
+
+#include "NGramNode.h"
+#include "NGramCollection.h"
+
+namespace Moses
+{
+
+NGramNode::NGramNode()
+{
+	m_map = new NGramCollection();
+}
+NGramNode::~NGramNode()
+{
+	delete m_map;
+}
+
+const NGramNode *NGramNode::GetNGram(const Factor *factor) const
+{
+	return m_map->GetNGram(factor);
+}
+NGramNode *NGramNode::GetNGram(const Factor *factor)
+{
+	return m_map->GetNGram(factor);
+}
+
+}
--- a/src/NGramNode.h
+++ b/src/NGramNode.h
@ -0,0 +1,79 @@
+// $Id: NGramNode.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_NGramNode_h
+#define moses_NGramNode_h
+
+#include "Factor.h"
+
+namespace Moses
+{
+
+class NGramCollection;
+
+class NGramNode
+{
+protected:
+	float						m_score, m_logBackOff;
+	NGramCollection	*m_map;
+	NGramNode				*m_rootNGram;
+public:
+	NGramNode();
+	~NGramNode();
+	NGramCollection *GetNGramColl()
+	{
+		return m_map;
+	}
+
+	const NGramNode *GetNGram(const Factor *factor) const;
+	NGramNode *GetNGram(const Factor *factor);
+
+	const NGramNode *GetRootNGram() const
+	{
+		return m_rootNGram;
+	}
+	void SetRootNGram(NGramNode *rootNGram)
+	{
+		m_rootNGram = rootNGram;
+	}
+
+	float GetScore() const
+	{
+		return m_score;
+	}
+	float GetLogBackOff() const
+	{
+		return m_logBackOff;
+	}
+	void SetScore(float score)
+	{
+		m_score = score;
+	}
+	void SetLogBackOff(float logBackOff)
+	{
+		m_logBackOff = logBackOff;
+	}
+
+};
+
+}
+
+#endif
--- a/src/ObjectPool.h
+++ b/src/ObjectPool.h
@ -0,0 +1,127 @@
+// $Id: ObjectPool.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/* ---------------------------------------------------------------- */
+/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens                                                     */
+/* ---------------------------------------------------------------- */
+
+#ifndef moses_ObjectPool_h
+#define moses_ObjectPool_h
+
+#include <vector>
+#include <deque>
+#include <string>
+#include <iostream>
+#include <iterator>
+#include "Util.h"
+
+/***
+ * template class for pool of objects
+ * - useful if many small objects are frequently created and destroyed
+ * - allocates memory for N objects at a time
+ * - separates memory allocation from constructor/destructor calls
+ * - prevents memory leaks
+ */
+template<typename T> class ObjectPool {
+ public:
+  typedef T Object;
+ private:
+  std::string name;
+  size_t idx,dIdx,N;
+  std::vector<Object*> data;
+  std::vector<size_t> dataSize;
+  std::deque<Object*> freeObj;
+  int mode;
+ public:
+  static const int cleanUpOnDestruction=1;
+  static const int hasTrivialDestructor=2;
+
+  // constructor arguments:
+  //   N: initial number of objects to allocate memory at a time
+  //   m & cleanUpOnDestruction = clean up objects in destructor
+  //   m & hasTrivialDestructor = the object type has a trivial destructor,
+  //            i.e. no sub-object uses dynamically allocated memory
+  //            note: not equivalent to empty destructor
+  //         -> more efficient (destructor calls can be omitted), 
+  //            note: looks like memory leak, but is not
+  ObjectPool(std::string name_="T",size_t N_=100000,int m=cleanUpOnDestruction)
+    : name(name_),idx(0),dIdx(0),N(N_),mode(m) {allocate();}
+
+		// main accesss functions:
+		// get pointer to object via default or copy constructor
+		Object* get() {return new (getPtr()) Object;}
+		Object* get(const Object& x) {return new (getPtr()) Object(x);}
+  
+		// get pointer to uninitialized memory, 
+		// WARNING: use only if you know what you are doing !
+		// useful for non-default constructors, you have to use placement new
+		Object* getPtr() {
+			if(freeObj.size()) {
+				Object* rv=freeObj.back();freeObj.pop_back();rv->~Object();return rv;}
+			if(idx==dataSize[dIdx]) {idx=0; if(++dIdx==data.size()) allocate();}
+			return data[dIdx]+idx++;
+		}
+
+		// return object(s) to pool for reuse
+		// note: objects are not destroyed here, but in 'getPtr'/'destroyObjects',
+		//       otherwise 'destroyObjects' would have to check the freeObj-stack 
+		//       before each destructor call
+		void freeObject(Object* x) {freeObj.push_back(x);}
+		template<class fwiter> void freeObjects(fwiter b,fwiter e) {
+			for(;b!=e;++b) this->free(*b);}
+
+		// destroy all objects, but do not free memory
+		void reset() {destroyObjects();idx=0;dIdx=0;freeObj.clear();}
+		// destroy all objects and free memory
+		void cleanUp() {
+			reset(); for(size_t i=0;i<data.size();++i) free(data[i]);
+			data.clear();dataSize.clear();
+		}
+
+		~ObjectPool() {if(mode & cleanUpOnDestruction) cleanUp();}
+
+		void printInfo(std::ostream& out) const {
+			out<<"OPOOL ("<<name<<") info: "<<data.size()<<" "<<dataSize.size()<<" "
+				 <<freeObj.size()<<"\n"<<idx<<" "<<dIdx<<" "<<N<<"\n";
+			std::copy(dataSize.begin(),dataSize.end(),
+								std::ostream_iterator<size_t>(out," "));
+			out<<"\n\n";
+		}
+
+
+ private:
+		void destroyObjects() {
+			if(mode & hasTrivialDestructor) return;
+			for(size_t i=0;i<=dIdx;++i) {
+				size_t lastJ= (i<dIdx ? dataSize[i] : idx);
+				for(size_t j=0;j<lastJ;++j) (data[i]+j)->~Object();}
+		}
+		// allocate memory for a N objects, for follow-up allocations,
+		// the block size is doubled every time
+		// if allocation fails, block size is reduced by 1/4
+		void allocate() {
+			try {
+				if(dataSize.empty()) dataSize.push_back(N); 
+				else dataSize.push_back(dataSize.back()*2);
+				void *m=malloc(sizeof(Object)*dataSize.back());
+				while(!m) {
+					dataSize.back()=static_cast<size_t>(dataSize.back()*0.75);
+					m=malloc(sizeof(Object)*dataSize.back());
+				}
+				data.push_back(static_cast<Object*>(m)); 
+			}
+			catch (const std::exception& e) {
+				TRACE_ERR("caught std::exception: "<<e.what()
+								 <<" in ObjectPool::allocate(), name: "<<name<<", last size: "
+								 <<dataSize.back()<<"\n");
+				TRACE_ERR("OPOOL info: "<<data.size()<<" "<<dataSize.size()<<" "
+								 <<freeObj.size()<<"\n"<<idx<<" "<<dIdx<<" "<<N<<"\n");
+				std::copy(dataSize.begin(),dataSize.end(),
+									std::ostream_iterator<size_t>(std::cerr," "));
+				TRACE_ERR("\n");
+				throw;
+			}
+		}
+};
+
+#endif
--- a/src/PCNTools.cpp
+++ b/src/PCNTools.cpp
@ -0,0 +1,138 @@
+#include "PCNTools.h"
+
+#include <iostream>
+#include <cstdlib>
+
+namespace PCN
+{
+
+const std::string chars = "'\\";
+const char& quote = chars[0];
+const char& slash = chars[1];
+
+// safe get
+inline char get(const std::string& in, int c) {
+	if (c < 0 || c >= (int)in.size()) return 0;
+	else return in[(size_t)c];
+}
+
+// consume whitespace
+inline void eatws(const std::string& in, int& c) {
+	while (get(in,c) == ' ') { c++; }
+}
+
+// from 'foo' return foo
+std::string getEscapedString(const std::string& in, int &c)
+{
+	eatws(in,c);
+	if (get(in,c++) != quote) return "ERROR";
+	std::string res;
+	char cur = 0;
+	do {
+		cur = get(in,c++);
+		if (cur == slash) { res += get(in,c++); }
+		else if (cur != quote) { res += cur; }
+	} while (get(in,c) != quote && (c < (int)in.size()));
+	c++;
+	eatws(in,c);
+	return res;
+}
+
+// basically atof
+float getFloat(const std::string& in, int &c)
+{
+	std::string tmp;
+	eatws(in,c);
+	while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+		tmp += get(in,c++);
+	}
+	eatws(in,c);
+	return atof(tmp.c_str());
+}
+
+// basically atof
+int getInt(const std::string& in, int &c)
+{
+	std::string tmp;
+	eatws(in,c);
+	while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+		tmp += get(in,c++);
+	}
+	eatws(in,c);
+	return atoi(tmp.c_str());
+}
+
+// parse ('foo', 0.23)
+CNAlt getCNAlt(const std::string& in, int &c)
+{
+	if (get(in,c++) != '(') { std::cerr << "PCN/PLF parse error: expected ( at start of cn alt block\n"; return CNAlt(); } // throw "expected (";
+	std::string word = getEscapedString(in,c);
+	if (get(in,c++) != ',') { std::cerr << "PCN/PLF parse error: expected , after string\n"; return CNAlt(); } // throw "expected , after string";
+	size_t cnNext = 1;
+	std::vector<float> probs;
+	probs.push_back(getFloat(in,c));
+	while (get(in,c) == ',') {
+		c++;
+		float val = getFloat(in,c);
+		probs.push_back(val);
+	}
+	//if we read more than one prob, this was a lattice, last item was column increment
+	if (probs.size()>1) {
+		cnNext = static_cast<size_t>(probs.back());
+		probs.pop_back();
+		if (cnNext < 1) { ; std::cerr << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; return CNAlt(); } //throw "bad link length"
+	}
+	if (get(in,c++) != ')') { std::cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; return CNAlt(); } // throw "expected )";
+	eatws(in,c);
+	return CNAlt(std::pair<std::string, std::vector<float> >(word,probs), cnNext);
+}
+
+// parse (('foo', 0.23), ('bar', 0.77))
+CNCol getCNCol(const std::string& in, int &c) {
+	CNCol res;
+	if (get(in,c++) != '(') return res;  // error
+	eatws(in,c);
+	while (1) {
+		if (c > (int)in.size()) { break; }
+		if (get(in,c) == ')') {
+			c++;
+			eatws(in,c);
+			break;
+		}
+		if (get(in,c) == ',' && get(in,c+1) == ')') {
+			c+=2;
+			eatws(in,c);
+			break;
+		}
+		if (get(in,c) == ',') { c++; eatws(in,c); }
+		res.push_back(getCNAlt(in, c));
+	}
+	return res;
+}
+
+// parse ((('foo', 0.23), ('bar', 0.77)), (('a', 0.3), ('c', 0.7)))
+CN parsePCN(const std::string& in)
+{
+	CN res;
+	int c = 0;
+	if (in[c++] != '(') return res; // error
+	while (1) {
+		if (c > (int)in.size()) { break; }
+		if (get(in,c) == ')') {
+			c++;
+			eatws(in,c);
+			break;
+		}
+		if (get(in,c) == ',' && get(in,c+1) == ')') {
+			c+=2;
+			eatws(in,c);
+			break;
+		}
+		if (get(in,c) == ',') { c++; eatws(in,c); }
+		res.push_back(getCNCol(in, c));
+	}
+	return res;
+}
+
+}
+
--- a/src/PCNTools.h
+++ b/src/PCNTools.h
@ -0,0 +1,46 @@
+// $Id: StaticData.h 992 2006-11-21 23:06:30Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_PCNTools
+#define moses_PCNTools
+
+#include <vector>
+#include <string>
+#include <utility>
+#include <cstdlib>
+
+/** A couple of utilities to read .pcn files. A python-compatible format
+  * for encoding confusion networks.
+  */
+namespace PCN {
+
+  typedef std::pair<std::pair<std::string, std::vector<float> >, size_t> CNAlt;
+  typedef std::vector<CNAlt> CNCol;
+  typedef std::vector<CNCol> CN;
+
+  /** Given a string ((('foo',0.1),('bar',0.9)),...) representation of a
+    * confusion net in PCN format, return a CN object 
+    */
+  CN parsePCN(const std::string& in);
+  
+};
+
+#endif
--- a/src/PDTAimp.h
+++ b/src/PDTAimp.h
@ -0,0 +1,546 @@
+// $Id: PDTAimp.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+#ifndef moses_PDTAimp_h
+#define moses_PDTAimp_h
+
+#include "StaticData.h"  // needed for factor splitter
+
+namespace Moses
+{
+
+inline bool existsFile(const char* filePath) {
+  struct stat mystat;
+  return  (stat(filePath,&mystat)==0);
+}
+
+double addLogScale(double x,double y) 
+{
+	if(x>y) return addLogScale(y,x); else return x+log(1.0+exp(y-x));
+}
+
+double Exp(double x)
+{
+	return exp(x);
+}
+
+class PDTAimp 
+{
+	// only these classes are allowed to instantiate this class
+	friend class PhraseDictionaryTreeAdaptor;
+	
+protected:
+	PDTAimp(PhraseDictionaryTreeAdaptor *p,unsigned nis) 
+		: m_languageModels(0),m_weightWP(0.0),m_dict(0),
+			m_obj(p),useCache(1),m_numInputScores(nis),totalE(0),distinctE(0) {}
+	
+public:
+	std::vector<float> m_weights;
+	LMList const* m_languageModels;
+	float m_weightWP;
+	std::vector<FactorType> m_input,m_output;
+	PhraseDictionaryTree *m_dict;
+	typedef std::vector<TargetPhraseCollection const*> vTPC;
+	mutable vTPC m_tgtColls;
+
+	typedef std::map<Phrase,TargetPhraseCollection const*> MapSrc2Tgt;
+	mutable MapSrc2Tgt m_cache;
+	PhraseDictionaryTreeAdaptor *m_obj;
+	int useCache;
+
+	std::vector<vTPC> m_rangeCache;
+	unsigned m_numInputScores;
+
+	UniqueObjectManager<Phrase> uniqSrcPhr;
+
+	size_t totalE,distinctE;
+	std::vector<size_t> path1Best,pathExplored;
+	std::vector<double> pathCN;
+
+	~PDTAimp() 
+	{
+		CleanUp();
+		delete m_dict;
+
+		if (StaticData::Instance().GetVerboseLevel() >= 2)
+			{
+
+				TRACE_ERR("tgt candidates stats:  total="<<totalE<<";  distinct="
+								 <<distinctE<<" ("<<distinctE/(0.01*totalE)<<");  duplicates="
+								 <<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE)
+								 <<")\n");
+
+				TRACE_ERR("\npath statistics\n");
+
+				if(path1Best.size()) 
+					{
+						TRACE_ERR("1-best:        ");
+						std::copy(path1Best.begin()+1,path1Best.end(),
+											std::ostream_iterator<size_t>(std::cerr," \t")); 
+						TRACE_ERR("\n");
+					}
+				if(pathCN.size())
+					{
+						TRACE_ERR("CN (full):     ");
+						std::transform(pathCN.begin()+1
+													,pathCN.end()
+													,std::ostream_iterator<double>(std::cerr," \t")
+													,Exp); 
+						TRACE_ERR("\n");
+					}
+				if(pathExplored.size())
+					{
+						TRACE_ERR("CN (explored): ");
+						std::copy(pathExplored.begin()+1,pathExplored.end(),
+											std::ostream_iterator<size_t>(std::cerr," \t")); 
+						TRACE_ERR("\n");
+					}
+			}
+
+	}
+
+	void Factors2String(Word const& w,std::string& s) const 
+	{
+		s=w.GetString(m_input,false);
+	}
+
+	void CleanUp() 
+	{
+		assert(m_dict);
+		m_dict->FreeMemory();
+		for(size_t i=0;i<m_tgtColls.size();++i) delete m_tgtColls[i];
+		m_tgtColls.clear();
+		m_cache.clear();
+		m_rangeCache.clear();
+		uniqSrcPhr.clear();
+	}
+
+	void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase) 
+	{
+		cerr << "AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)" << endl;
+		assert(GetTargetPhraseCollection(source)==0);
+		
+		VERBOSE(2, "adding unk source phrase "<<source<<"\n");
+		std::pair<MapSrc2Tgt::iterator,bool> p
+			=m_cache.insert(std::make_pair(source,static_cast<TargetPhraseCollection const*>(0)));
+		if(p.second || p.first->second==0) 
+			{
+				TargetPhraseCollection *ptr=new TargetPhraseCollection;
+				ptr->Add(new TargetPhrase(targetPhrase));
+				p.first->second=ptr;
+				m_tgtColls.push_back(ptr);
+			}
+		else VERBOSE(2, "WARNING: you added an already existing phrase!\n");
+	}
+
+	TargetPhraseCollection const* 
+	GetTargetPhraseCollection(Phrase const &src) const
+	{
+		
+		assert(m_dict);
+		if(src.GetSize()==0) return 0;
+
+		std::pair<MapSrc2Tgt::iterator,bool> piter;
+		if(useCache) 
+			{
+				piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollection const*>(0)));
+				if(!piter.second) return piter.first->second;
+			}
+		else if (m_cache.size()) 
+			{
+				MapSrc2Tgt::const_iterator i=m_cache.find(src);
+				return (i!=m_cache.end() ? i->second : 0);
+			}
+
+		std::vector<std::string> srcString(src.GetSize());
+		// convert source Phrase into vector of strings
+		for(size_t i=0;i<srcString.size();++i)
+		{
+			Factors2String(src.GetWord(i),srcString[i]);
+		}
+
+		// get target phrases in string representation
+		std::vector<StringTgtCand> cands;
+		std::vector<StringWordAlignmentCand> swacands;
+		std::vector<StringWordAlignmentCand> twacands;
+//		m_dict->GetTargetCandidates(srcString,cands);
+		m_dict->GetTargetCandidates(srcString,cands,swacands,twacands);
+		if(cands.empty()) 
+		{
+			return 0;
+		}
+			
+		std::vector<TargetPhrase> tCands;tCands.reserve(cands.size());
+		std::vector<std::pair<float,size_t> > costs;costs.reserve(cands.size());
+
+		// convert into TargetPhrases
+		for(size_t i=0;i<cands.size();++i) 
+		{
+			TargetPhrase targetPhrase(Output);
+			
+			StringTgtCand::first_type const& factorStrings=cands[i].first;
+			StringTgtCand::second_type const& probVector=cands[i].second;
+			//StringWordAlignmentCand::second_type const& swaVector=swacands[i].second;
+			//StringWordAlignmentCand::second_type const& twaVector=twacands[i].second;
+			
+			std::vector<float> scoreVector(probVector.size());
+			std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
+										 TransformScore);
+			std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
+										 FloorScore);
+			CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,&src);
+			//CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,swaVector,twaVector,&src);
+			costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
+			tCands.push_back(targetPhrase);
+		}
+		
+		TargetPhraseCollection *rv;
+		rv=PruneTargetCandidates(tCands,costs);
+		if(rv->IsEmpty()) 
+		{
+			delete rv;
+			return 0;
+		} 
+		else 
+		{
+			if(useCache) piter.first->second=rv;
+			m_tgtColls.push_back(rv);
+			return rv;
+		}
+		
+	}
+
+
+
+	void Create(const std::vector<FactorType> &input
+							, const std::vector<FactorType> &output
+							, const std::string &filePath
+							, const std::vector<float> &weight
+							, const LMList &languageModels
+							, float weightWP
+							)
+	{
+
+		// set my members	
+		m_dict=new PhraseDictionaryTree(weight.size()-m_numInputScores);
+		m_input=input;
+		m_output=output;
+		m_languageModels=&languageModels;
+		m_weightWP=weightWP;
+		m_weights=weight;
+
+
+
+		std::string binFname=filePath+".binphr.idx";
+		if(!existsFile(binFname.c_str())) {
+			TRACE_ERR( "bin ttable does not exist -> create it\n");
+			InputFileStream in(filePath);
+			m_dict->Create(in,filePath);
+		}
+		TRACE_ERR( "reading bin ttable\n");
+//		m_dict->Read(filePath);
+		bool res=m_dict->Read(filePath);
+		if (!res) {
+			stringstream strme;
+			strme << "bin ttable was read in a wrong way\n";
+			UserMessage::Add(strme.str());
+			exit(1);
+		}
+	}
+
+	typedef PhraseDictionaryTree::PrefixPtr PPtr;
+	typedef unsigned short Position;
+	typedef std::pair<Position,Position> Range;
+	struct State {
+		PPtr ptr;
+		Range range;
+		std::vector<float> scores;
+		Phrase src;
+
+		State() : range(0,0),scores(0),src(Input) {}
+		State(Position b,Position e,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0)) 
+			: ptr(v),range(b,e),scores(sv),src(Input) {}
+		State(Range const& r,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0)) 
+			: ptr(v),range(r),scores(sv),src(Input) {}
+
+		Position begin() const {return range.first;}
+		Position end() const {return range.second;}
+		std::vector<float> GetScores() const {return scores;}
+
+		friend std::ostream& operator<<(std::ostream& out,State const& s) {
+			out<<" R=("<<s.begin()<<","<<s.end()<<"),";
+			for(std::vector<float>::const_iterator scoreIterator = s.GetScores().begin();scoreIterator<s.GetScores().end();scoreIterator++) {
+				out<<", "<<*scoreIterator;
+			}
+			out<<")";
+			return out;
+		}
+
+	};
+
+
+	
+	void CreateTargetPhrase(TargetPhrase& targetPhrase,
+													StringTgtCand::first_type const& factorStrings,
+													StringTgtCand::second_type const& scoreVector,
+													Phrase const* srcPtr=0) const
+	{
+		FactorCollection &factorCollection = FactorCollection::Instance();
+		
+		for(size_t k=0;k<factorStrings.size();++k) 
+		{
+			std::vector<std::string> factors=TokenizeMultiCharSeparator(*factorStrings[k],StaticData::Instance().GetFactorDelimiter());
+			Word& w=targetPhrase.AddWord();
+			for(size_t l=0;l<m_output.size();++l)
+				w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
+		}
+		targetPhrase.SetScore(m_obj->GetFeature(), scoreVector, m_weights, m_weightWP, *m_languageModels);
+		targetPhrase.SetSourcePhrase(srcPtr);
+		
+//		targetPhrase.CreateAlignmentInfo("???", "???", 44);
+	}
+	
+	
+	
+
+	TargetPhraseCollection* PruneTargetCandidates(std::vector<TargetPhrase> const & tCands,
+																								std::vector<std::pair<float,size_t> >& costs) const 
+	{
+		// convert into TargetPhraseCollection
+		TargetPhraseCollection *rv=new TargetPhraseCollection;
+
+		// set limit to tableLimit or actual size, whatever is smaller
+		std::vector<std::pair<float,size_t> >::iterator nth = 
+		  costs.begin() + ((m_obj->m_tableLimit>0 && // 0 indicates no limit
+				    m_obj->m_tableLimit < costs.size()) ? 
+				   m_obj->m_tableLimit : costs.size());
+
+		// find the nth phrase according to future cost
+		std::nth_element(costs.begin(),nth ,costs.end());
+
+		// add n top phrases to the return list
+		for(std::vector<std::pair<float,size_t> >::iterator 
+		      it = costs.begin(); it != nth; ++it)
+		  rv->Add(new TargetPhrase(tCands[it->second]));
+
+		return rv;
+	}
+
+	// POD for target phrase scores
+	struct TScores {
+		float total;
+		StringTgtCand::second_type trans;
+		Phrase const* src;
+
+		TScores() : total(0.0),src(0) {}
+	};
+
+	void CacheSource(ConfusionNet const& src) 
+	{
+		assert(m_dict);
+		const size_t srcSize=src.GetSize();
+
+		std::vector<size_t> exploredPaths(srcSize+1,0);
+		std::vector<double> exPathsD(srcSize+1,-1.0);
+
+		// collect some statistics
+		std::vector<size_t> cnDepths(srcSize,0);
+		for(size_t i=0;i<srcSize;++i) cnDepths[i]=src[i].size();
+
+		for(size_t len=1;len<=srcSize;++len)
+			for(size_t i=0;i<=srcSize-len;++i)
+				{
+					double pd=0.0; for(size_t k=i;k<i+len;++k)	pd+=log(1.0*cnDepths[k]);
+					exPathsD[len]=(exPathsD[len]>=0.0 ? addLogScale(pd,exPathsD[len]) : pd);
+				}
+
+		// update global statistics
+		if(pathCN.size()<=srcSize) pathCN.resize(srcSize+1,-1.0);
+		for(size_t len=1;len<=srcSize;++len) 
+			pathCN[len]=pathCN[len]>=0.0 ? addLogScale(pathCN[len],exPathsD[len]) : exPathsD[len];
+
+		if(path1Best.size()<=srcSize) path1Best.resize(srcSize+1,0);
+		for(size_t len=1;len<=srcSize;++len) path1Best[len]+=srcSize-len+1;
+
+		
+		if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size())
+			{
+				TRACE_ERR("path stats for current CN: \nCN (full):     ");
+				std::transform(exPathsD.begin()+1
+											,exPathsD.end()
+											,std::ostream_iterator<double>(std::cerr," ")
+											,Exp);
+				TRACE_ERR("\n");
+			}
+
+		typedef StringTgtCand::first_type sPhrase;
+		typedef std::map<StringTgtCand::first_type,TScores> E2Costs;
+
+		std::map<Range,E2Costs> cov2cand;
+		std::vector<State> stack;
+		for(Position i=0 ; i < srcSize ; ++i) 
+			stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
+					
+		while(!stack.empty()) 
+			{
+				State curr(stack.back());
+				stack.pop_back();
+		
+				assert(curr.end()<srcSize);
+				const ConfusionNet::Column &currCol=src[curr.end()];
+				// in a given column, loop over all possibilities
+				for(size_t colidx=0;colidx<currCol.size();++colidx)
+					{
+						const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
+						std::string s;
+						Factors2String(w,s);
+						bool isEpsilon=(s=="" || s==EPSILON);
+						
+						//assert that we have the right number of link params in this CN option
+						assert(currCol[colidx].second.size() >= m_numInputScores);
+						
+						// do not start with epsilon (except at first position)
+						if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue; 
+
+						// At a given node in the prefix tree, look to see if w defines an edge to
+						// another node (Extend).  Stay at the same node if w==EPSILON
+						PPtr nextP = (isEpsilon ? curr.ptr : m_dict->Extend(curr.ptr,s));
+						
+						if(nextP) // w is a word that should be considered
+							{
+								Range newRange(curr.begin(),curr.end()+src.GetColumnIncrement(curr.end(),colidx));
+								
+								//add together the link scores from the current state and the new arc
+								float inputScoreSum = 0;
+								std::vector<float> newInputScores(m_numInputScores,0.0);
+								if (m_numInputScores) {
+									std::transform(currCol[colidx].second.begin(), currCol[colidx].second.end(),
+												curr.GetScores().begin(),
+												newInputScores.begin(),
+												std::plus<float>());
+								
+								
+									//we need to sum up link weights (excluding realWordCount, which isn't in numLinkParams)
+									//if the sum is too low, then we won't expand this.
+									//TODO: dodgy! shouldn't we consider weights here? what about zero-weight params?
+									inputScoreSum = std::accumulate(newInputScores.begin(),newInputScores.begin()+m_numInputScores,0.0);
+								}
+							
+								Phrase newSrc(curr.src);
+								if(!isEpsilon) newSrc.AddWord(w);
+								if(newRange.second<srcSize && inputScoreSum>LOWEST_SCORE)
+								{
+									  // if there is more room to grow, add a new state onto the queue
+									// to be explored that represents [begin, curEnd+)
+									stack.push_back(State(newRange,nextP,newInputScores));
+									stack.back().src=newSrc;
+								}
+
+								std::vector<StringTgtCand> tcands;
+								// now, look up the target candidates (aprx. TargetPhraseCollection) for
+								// the current path through the CN
+								m_dict->GetTargetCandidates(nextP,tcands);
+
+								if(newRange.second>=exploredPaths.size()+newRange.first) 
+									exploredPaths.resize(newRange.second-newRange.first+1,0);
+								++exploredPaths[newRange.second-newRange.first];
+	
+								totalE+=tcands.size();
+
+								if(tcands.size()) 
+									{
+										E2Costs& e2costs=cov2cand[newRange];
+										Phrase const* srcPtr=uniqSrcPhr(newSrc);
+										for(size_t i=0;i<tcands.size();++i)
+										{											
+											//put input scores in first - already logged, just drop in directly
+											std::vector<float> nscores(newInputScores);
+											
+											//resize to include phrase table scores
+											nscores.resize(m_numInputScores+tcands[i].second.size(),0.0f);
+											
+											//put in phrase table scores, logging as we insert
+											std::transform(tcands[i].second.begin(),tcands[i].second.end(),nscores.begin() + m_numInputScores,TransformScore);
+											
+											assert(nscores.size()==m_weights.size());
+											
+											//tally up
+											float score=std::inner_product(nscores.begin(), nscores.end(), m_weights.begin(), 0.0f);
+
+											//count word penalty
+											score-=tcands[i].first.size() * m_weightWP;
+											
+											std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].first,TScores()));
+											
+											if(p.second) ++distinctE;
+											
+											TScores & scores=p.first->second;
+											if(p.second || scores.total<score)
+											{
+												scores.total=score;
+												scores.trans=nscores;
+												scores.src=srcPtr;
+											}
+										}
+									}
+							}
+					}
+			} // end while(!stack.empty()) 
+
+
+		if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size())
+			{
+				TRACE_ERR("CN (explored): ");
+				std::copy(exploredPaths.begin()+1,exploredPaths.end(),
+									std::ostream_iterator<size_t>(std::cerr," ")); 
+				TRACE_ERR("\n");
+			}
+
+		if(pathExplored.size()<exploredPaths.size()) 
+			pathExplored.resize(exploredPaths.size(),0);
+		for(size_t len=1;len<=srcSize;++len)
+			pathExplored[len]+=exploredPaths[len];
+
+
+		m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
+
+		for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin();i!=cov2cand.end();++i)
+			{
+				assert(i->first.first<m_rangeCache.size());
+				assert(i->first.second>0);
+				assert(static_cast<size_t>(i->first.second-1)<m_rangeCache[i->first.first].size());
+				assert(m_rangeCache[i->first.first][i->first.second-1]==0);
+
+				std::vector<TargetPhrase> tCands;tCands.reserve(i->second.size());
+				std::vector<std::pair<float,size_t> > costs;costs.reserve(i->second.size());
+
+				for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
+					{
+						TScores const & scores=j->second;
+						TargetPhrase targetPhrase(Output);
+						CreateTargetPhrase(targetPhrase,j->first,scores.trans,scores.src);
+						costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
+						tCands.push_back(targetPhrase);
+						//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
+					}
+
+				TargetPhraseCollection *rv=PruneTargetCandidates(tCands,costs);
+
+				if(rv->IsEmpty()) 
+					delete rv;
+				else
+					{
+						m_rangeCache[i->first.first][i->first.second-1]=rv;
+						m_tgtColls.push_back(rv);
+					}
+			}
+		// free memory
+		m_dict->FreeMemory();
+	}
+	
+	
+	size_t GetNumInputScores() const {return m_numInputScores;}
+};
+
+}
+#endif
--- a/src/Parameter.cpp
+++ b/src/Parameter.cpp
@ -0,0 +1,593 @@
+// $Id: Parameter.cpp 2855 2010-02-03 19:46:35Z abarun $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <iostream>
+#include <iterator>
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+#include "Parameter.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "UserMessage.h"
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+using namespace std;
+
+namespace Moses
+{
+/** define allowed parameters */
+Parameter::Parameter() 
+{
+	AddParam("beam-threshold", "b", "threshold for threshold pruning");
+	AddParam("config", "f", "location of the configuration file");
+	AddParam("drop-unknown", "du", "drop unknown words instead of copying them");
+  AddParam("disable-discarding", "dd", "disable hypothesis discarding");
+	AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
+	AddParam("generation-file", "location and properties of the generation table");
+	AddParam("global-lexical-file", "gl", "discriminatively trained global lexical translation model file");
+	AddParam("input-factors", "list of factors in the input");
+	AddParam("input-file", "i", "location of the input file to be translated");
+	AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
+	AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
+	AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");
+	AddParam("lmodel-file", "location and properties of the language models");
+	AddParam("lmodel-dub", "dictionary upper bounds of language models");
+	AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis");
+	AddParam("mapping", "description of decoding steps");
+	AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
+	AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
+	AddParam("max-phrase-length", "maximum phrase length (default 20)");
+	AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
+	AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
+  AddParam("print-all-derivations", "to print all derivations in search graph");
+	AddParam("output-factors", "list of factors in the output");
+	AddParam("phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
+	AddParam("report-all-factors", "report all factors in output, not just first");
+	AddParam("report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
+	AddParam("report-segmentation", "t", "report phrase segmentation in the output");
+	AddParam("stack", "s", "maximum stack size for histogram pruning");
+	AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
+	AddParam("translation-details", "T", "for each best translation hypothesis, print out details about what sourcce spans were used, dropped");
+	AddParam("ttable-file", "location and properties of the translation tables");
+	AddParam("ttable-limit", "ttl", "maximum number of translation table entries per input phrase");
+	AddParam("translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
+	AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
+	AddParam("verbose", "v", "verbosity level of the logging");
+	AddParam("weight-d", "d", "weight(s) for distortion (reordering components)");
+	AddParam("weight-generation", "g", "weight(s) for generation components");
+	AddParam("weight-i", "I", "weight(s) for word insertion - used for parameters from confusion network and lattice input links");
+	AddParam("weight-l", "lm", "weight(s) for language models");
+	AddParam("weight-lex", "lex", "weight for global lexical model");
+	AddParam("weight-t", "tm", "weights for translation model components");
+	AddParam("weight-w", "w", "weight for word penalty");
+	AddParam("weight-u", "u", "weight for unknown word penalty");
+	AddParam("weight-e", "e", "weight for word deletion"); 
+	AddParam("weight-file", "wf", "file containing labeled weights");
+	AddParam("output-factors", "list if factors in the output");
+	AddParam("cache-path", "?");
+	AddParam("distortion-limit", "dl", "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");	
+	AddParam("monotone-at-punctuation", "mp", "do not reorder over punctuation");
+	AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
+ 	AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
+	AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
+ 	AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
+  AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
+	AddParam("mbr-size", "number of translation candidates considered in MBR decoding (default 200)");
+ 	AddParam("mbr-scale", "scaling factor to convert log linear score probability in MBR decoding (default 1.0)");
+  AddParam("lmbr-thetas", "theta(s) for lattice mbr calculation");
+  AddParam("lmbr-pruning-factor", "average number of nodes/word wanted in pruned lattice");
+  AddParam("lmbr-p", "unigram precision value for lattice mbr");
+  AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
+  AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
+	AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
+	AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
+	AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
+	AddParam("output-word-graph", "owg", "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
+	AddParam("time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
+	AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
+	AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
+#ifdef HAVE_PROTOBUF
+	AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
+#endif
+	AddParam("cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
+	AddParam("cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
+	AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing. (default = 0)");
+	AddParam("constraint", "Location of the file with target sentences to produce constraining the search");
+	AddParam("use-alignment-info", "Use word-to-word alignment: actually it is only used to output the word-to-word alignment. Word-to-word alignments are taken from the phrase table if any. Default is false.");
+	AddParam("print-alignment-info", "Output word-to-word alignment into the log file. Word-to-word alignments are takne from the phrase table if any. Default is false");
+	AddParam("print-alignment-info-in-n-best", "Include word-to-word alignment in the n-best list. Word-to-word alignments are takne from the phrase table if any. Default is false");
+	AddParam("link-param-count", "Number of parameters on word links when using confusion networks or lattices (default = 1)");
+	AddParam("description", "Source language, target language, description");
+    /*******************************Loading DPR model**********************************************/
+	AddParam("DPR-file","DPR-file","Model file for the DPR model");
+	AddParam("weight-DPR","weight-DPR","weight for the DPR model");
+	AddParam("class-DPR","class-DPR","the number of orientations for the DPR model");
+	/*******************************Loading DPR model**********************************************/
+	
+}
+
+Parameter::~Parameter()
+{
+}
+
+/** initialize a parameter, sub of constructor */
+void Parameter::AddParam(const string &paramName, const string &description)
+{
+	m_valid[paramName] = true;
+	m_description[paramName] = description;
+}
+
+/** initialize a parameter (including abbreviation), sub of constructor */
+void Parameter::AddParam(const string &paramName, const string &abbrevName, const string &description)
+{
+	m_valid[paramName] = true;
+	m_valid[abbrevName] = true;
+	m_abbreviation[paramName] = abbrevName;
+	m_description[paramName] = description;
+}
+
+/** print descriptions of all parameters */
+void Parameter::Explain() {
+	cerr << "Usage:" << endl;
+	for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) 
+	{
+		const string paramName = iterParam->first;
+		const string paramDescription = iterParam->second;
+		cerr <<  "\t-" << paramName;
+		PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
+		if ( iterAbbr != m_abbreviation.end() )
+			cerr <<  " (" << iterAbbr->second << ")";
+		cerr <<  ": " << paramDescription << endl;
+	}
+}
+
+/** check whether an item on the command line is a switch or a value 
+ * \param token token on the command line to checked **/
+
+bool Parameter::isOption(const char* token) {
+  if (! token) return false;
+  std::string tokenString(token);
+  size_t length = tokenString.size();
+  if (length > 0 && tokenString.substr(0,1) != "-") return false;
+  if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
+  return false;
+}
+
+/** load all parameters from the configuration file and the command line switches */
+bool Parameter::LoadParam(const string &filePath)
+{
+	const char *argv[] = {"executable", "-f", filePath.c_str() };
+	return LoadParam(3, (char**) argv);
+}
+	
+/** load all parameters from the configuration file and the command line switches */
+bool Parameter::LoadParam(int argc, char* argv[]) 
+{
+	// config file (-f) arg mandatory
+	string configPath;
+	if ( (configPath = FindParam("-f", argc, argv)) == "" 
+		&& (configPath = FindParam("-config", argc, argv)) == "")
+	{
+		PrintCredit();
+
+		UserMessage::Add("No configuration file was specified.  Use -config or -f");
+		return false;
+	}
+	else
+	{
+		if (!ReadConfigFile(configPath))
+		{
+			UserMessage::Add("Could not read "+configPath);
+			return false;
+		}
+	}
+	
+	// overwrite parameters with values from switches
+	for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) 
+	{
+		const string paramName = iterParam->first;
+		OverwriteParam("-" + paramName, paramName, argc, argv);
+	}
+
+	// ... also shortcuts
+	for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++) 
+	{
+		const string paramName = iterParam->first;
+		const string paramShortName = iterParam->second;
+		OverwriteParam("-" + paramShortName, paramName, argc, argv);
+	}
+
+	// logging of parameters that were set in either config or switch
+	int verbose = 1;
+	if (m_setting.find("verbose") != m_setting.end() &&
+	    m_setting["verbose"].size() > 0)
+	  verbose = Scan<int>(m_setting["verbose"][0]);
+	if (verbose >= 1) { // only if verbose
+	  TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
+	  for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {
+	    TRACE_ERR( "\t" << iterParam->first << ": ");
+	    for ( size_t i = 0; i < iterParam->second.size(); i++ )
+	      TRACE_ERR( iterParam->second[i] << " ");
+	    TRACE_ERR( endl);
+	  }
+	}
+
+	// check for illegal parameters
+	bool noErrorFlag = true;
+	for (int i = 0 ; i < argc ; i++)
+	{
+		if (isOption(argv[i]))
+			{
+				string paramSwitch = (string) argv[i];				
+				string paramName = paramSwitch.substr(1);
+				if (m_valid.find(paramName) == m_valid.end()) 
+					{
+						UserMessage::Add("illegal switch: " + paramSwitch);
+						noErrorFlag = false;
+					}
+			}
+	}
+
+  // check if parameters make sense
+	return Validate() && noErrorFlag;
+}
+
+/** check that parameter settings make sense */
+bool Parameter::Validate() 
+{
+	bool noErrorFlag = true;
+
+  // required parameters
+	if (m_setting["ttable-file"].size() == 0)
+	{
+		UserMessage::Add("No phrase translation table (ttable-file)");
+		noErrorFlag = false;
+	}
+
+  if (m_setting["lmodel-dub"].size() > 0)
+	{
+    if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size())
+		{
+			stringstream errorMsg("");
+			errorMsg << "Config and parameters specify "
+							<< static_cast<int>(m_setting["lmodel-file"].size())
+							<< " language model files (lmodel-file), but "
+							<< static_cast<int>(m_setting["lmodel-dub"].size())
+							<< " LM upperbounds (lmodel-dub)"
+							<< endl;
+  		UserMessage::Add(errorMsg.str());
+  		noErrorFlag = false;
+		}
+	}
+
+	if (m_setting["lmodel-file"].size() != m_setting["weight-l"].size()) 
+	{	
+		stringstream errorMsg("");
+		errorMsg << "Config and parameters specify "
+            << static_cast<int>(m_setting["lmodel-file"].size()) 
+						<< " language model files (lmodel-file), but " 
+						<< static_cast<int>(m_setting["weight-l"].size())
+						<< " weights (weight-l)";
+    errorMsg << endl << "You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \"TYPE FACTOR ORDER FILENAME\"'";
+		UserMessage::Add(errorMsg.str());
+		noErrorFlag = false;
+	}
+
+  // do files exist?
+	// phrase tables
+	if (noErrorFlag) 
+	{
+		std::vector<std::string> ext;
+		// standard phrase table extension (i.e. full name has to be specified)
+		// raw tables in either un compressed or compressed form
+		ext.push_back("");
+	  ext.push_back(".gz");
+		// alternative file extension for binary phrase table format:
+		ext.push_back(".binphr.idx");
+		noErrorFlag = FilesExist("ttable-file", 3,ext);
+	}
+	// language model
+//	if (noErrorFlag)
+//		noErrorFlag = FilesExist("lmodel-file", 3);
+	// input file
+	if (noErrorFlag && m_setting["input-file"].size() == 1)
+	{
+		noErrorFlag = FileExists(m_setting["input-file"][0]);
+	}
+	// generation tables
+	if (noErrorFlag)
+	{
+	  std::vector<std::string> ext;
+	  //raw tables in either un compressed or compressed form
+	  ext.push_back("");
+	  ext.push_back(".gz");
+		noErrorFlag = FilesExist("generation-file", 3, ext);
+	}
+	// distortion
+	if (noErrorFlag)
+	{
+	  std::vector<std::string> ext;
+	  //raw tables in either un compressed or compressed form
+	  ext.push_back("");
+	  ext.push_back(".gz");
+	  //prefix tree format
+	  ext.push_back(".binlexr.idx");
+	  noErrorFlag = FilesExist("distortion-file", 3, ext);
+	}
+	return noErrorFlag;
+}
+
+/** check whether a file exists */
+bool Parameter::FilesExist(const string &paramName, size_t tokenizeIndex,std::vector<std::string> const& extensions)
+{
+	typedef std::vector<std::string> StringVec;
+	StringVec::const_iterator iter;
+
+	PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
+	if (iterParam == m_setting.end())
+	{ // no param. therefore nothing to check
+		return true;
+	}
+	const StringVec &pathVec = (*iterParam).second;
+	for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter)
+	{
+		StringVec vec = Tokenize(*iter);
+		if (tokenizeIndex >= vec.size())
+		{
+			stringstream errorMsg("");
+			errorMsg << "Expected at least " << (tokenizeIndex+1) << " tokens per emtry in '"
+							<< paramName << "', but only found "
+							<< vec.size();
+			UserMessage::Add(errorMsg.str());
+			return false;
+		}
+		const string &pathStr = vec[tokenizeIndex];
+
+		bool fileFound=0;
+		for(size_t i=0;i<extensions.size() && !fileFound;++i)
+			{
+				fileFound|=FileExists(pathStr + extensions[i]);
+			}
+		if(!fileFound)
+			{
+				stringstream errorMsg("");
+				errorMsg << "File " << pathStr << " does not exist";
+				UserMessage::Add(errorMsg.str());
+				return false;
+			}		
+	}
+	return true;
+}
+
+/** look for a switch in arg, update parameter */
+// TODO arg parsing like this does not belong in the library, it belongs
+// in moses-cmd
+string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
+{
+	for (int i = 0 ; i < argc ; i++)
+	{
+		if (string(argv[i]) == paramSwitch)
+		{
+			if (i+1 < argc)
+			{
+				return argv[i+1];
+			} else {
+				stringstream errorMsg("");
+				errorMsg << "Option " << paramSwitch << " requires a parameter!";
+				UserMessage::Add(errorMsg.str());
+				// TODO return some sort of error, not the empty string
+			}
+		}
+	}
+	return "";
+}
+
+/** update parameter settings with command line switches
+ * \param paramSwitch (potentially short) name of switch
+ * \param paramName full name of parameter
+ * \param argc number of arguments on command line
+ * \param argv values of paramters on command line */
+void Parameter::OverwriteParam(const string &paramSwitch, const string &paramName, int argc, char* argv[])
+{
+	int startPos = -1;
+	for (int i = 0 ; i < argc ; i++)
+	{
+		if (string(argv[i]) == paramSwitch)
+		{
+			startPos = i+1;
+			break;
+		}
+	}
+	if (startPos < 0)
+		return;
+
+	int index = 0;
+	m_setting[paramName]; // defines the parameter, important for boolean switches
+	while (startPos < argc && (!isOption(argv[startPos])))
+	{
+		if (m_setting[paramName].size() > (size_t)index)
+			m_setting[paramName][index] = argv[startPos];
+		else
+			m_setting[paramName].push_back(argv[startPos]);
+		index++;
+		startPos++;
+	}
+}
+
+
+/** read parameters from a configuration file */
+bool Parameter::ReadConfigFile( string filePath ) 
+{
+	InputFileStream inFile(filePath);
+	string line, paramName;
+	while(getline(inFile, line)) 
+	{
+		// comments
+		size_t comPos = line.find_first_of("#");
+		if (comPos != string::npos)
+			line = line.substr(0, comPos);
+		// trim leading and trailing spaces/tabs
+		line = Trim(line);
+
+		if (line[0]=='[') 
+		{ // new parameter
+			for (size_t currPos = 0 ; currPos < line.size() ; currPos++)
+			{
+				if (line[currPos] == ']')
+				{
+					paramName = line.substr(1, currPos - 1);
+					break;
+				}
+			}
+		}
+    else if (line != "") 
+		{ // add value to parameter
+			m_setting[paramName].push_back(line);
+		}
+	}
+	return true;
+}
+
+struct Credit
+{
+	string name, contact, currentPursuits, areaResponsibility;
+
+	Credit(string name, string contact, string currentPursuits, string areaResponsibility)
+	{
+		this->name								= name							;
+		this->contact							= contact						;
+		this->currentPursuits			= currentPursuits		;
+		this->areaResponsibility	= areaResponsibility;
+	}
+
+	bool operator<(const Credit &other) const
+	{
+		if (areaResponsibility.size() != 0 && other.areaResponsibility.size() ==0)
+			return true;
+		if (areaResponsibility.size() == 0 && other.areaResponsibility.size() !=0)
+			return false;
+
+		return name < other.name;
+	}
+
+};
+
+std::ostream& operator<<(std::ostream &os, const Credit &credit)
+{
+	os << credit.name;
+	if (credit.contact != "")
+		os << "\n   contact: " << credit.contact;
+	if (credit.currentPursuits != "")
+		os << "\n   " << credit.currentPursuits;
+	if (credit.areaResponsibility != "")
+		os << "\n   I'll answer question on: " << credit.areaResponsibility;
+	os << endl;
+	return os;
+}
+
+void Parameter::PrintCredit()
+{
+	vector<Credit> everyone;
+
+	everyone.push_back(Credit("Nicola Bertoldi"
+													, "911"
+													, ""
+													, "scripts & other stuff"));
+	everyone.push_back(Credit("Ondrej Bojar"
+													, ""
+													, "czech this out!"
+													, ""));
+	everyone.push_back(Credit("Chris Callison-Burch"
+													, "anytime, anywhere"
+													, "international playboy"
+													, ""));
+	everyone.push_back(Credit("Alexandra Constantin"
+													, ""
+													, "eu sunt varza"
+													, ""));
+	everyone.push_back(Credit("Brooke Cowan"
+													, "brooke@csail.mit.edu"
+													, "if you're going to san francisco, be sure to wear a flower in your hair"
+													, ""));
+	everyone.push_back(Credit("Chris Dyer"
+													, "can't. i'll be out driving my mustang"
+													, "driving my mustang"
+													, ""));
+	everyone.push_back(Credit("Marcello Federico"
+													, "federico at itc at it"
+													, "Researcher at ITC-irst, Trento, Italy"
+													, "IRST language model"));
+	everyone.push_back(Credit("Evan Herbst"
+													, "Small college in upstate New York"
+													, ""
+													, ""));
+	everyone.push_back(Credit("Philipp Koehn"
+													, "only between 2 and 4am"
+													, ""
+													, "Nothing fazes this dude"));
+	everyone.push_back(Credit("Christine Moran"
+													, "weird building at MIT"
+													, ""
+													, ""));
+	everyone.push_back(Credit("Wade Shen"
+													, "via morse code"
+													, "buying another laptop"
+													, ""));
+	everyone.push_back(Credit("Richard Zens"
+													, "richard at aachen dot de"
+													, ""
+													, "ambiguous source input, confusion networks, confusing source code"));
+	everyone.push_back(Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/"
+													, "phd student at Edinburgh Uni. Original Moses developer"
+													, "general queries/ flames on Moses. Doing stuff on async factored translation, so anything on that as well"));
+	
+	sort(everyone.begin(), everyone.end());
+
+
+	cerr <<  "Moses - A beam search decoder for phrase-based statistical machine translation models" << endl
+			<< "Copyright (C) 2006 University of Edinburgh" << endl << endl
+
+			<< "This library is free software; you can redistribute it and/or" << endl
+			<< "modify it under the terms of the GNU Lesser General Public" << endl
+			<< "License as published by the Free Software Foundation; either" << endl
+			<< "version 2.1 of the License, or (at your option) any later version." << endl << endl
+
+			<< "This library is distributed in the hope that it will be useful," << endl
+			<< "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl
+			<< "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU" << endl
+			<< "Lesser General Public License for more details." << endl << endl
+
+			<< "You should have received a copy of the GNU Lesser General Public" << endl
+			<< "License along with this library; if not, write to the Free Software" << endl
+			<< "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA" << endl << endl
+			<< "***********************************************************************" << endl << endl
+			<< "Built on " << __DATE__ << endl << endl
+			<< "CREDITS" << endl << endl;
+
+	ostream_iterator<Credit> out(cerr, "\n");
+	copy(everyone.begin(), everyone.end(), out);
+	cerr <<  endl << endl;
+}
+
+}
--- a/Show More
+++ b/Show More