- finally updated version for async-factors -- this is not complete yet

- Simple, naive implementation: get around stack pruning issues by decoding factors separately. - TODOs: - future cost estimation - nbest list Generation - bugs git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/async-factors@825 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-11 19:27:11 +03:00 · 2006-09-21 14:45:33 +00:00 · 2006-09-21 14:45:33 +00:00 · 576553993b
commit 576553993b
parent c7c83c50f1
24 changed files with 1136 additions and 607 deletions
--- a/moses-cmd/src/IOCommandLine.cpp
+++ b/moses-cmd/src/IOCommandLine.cpp
@ -100,7 +100,7 @@ void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<
 {
 	if ( hypo != NULL)
 	{
-		OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSourceSpan, reportAllFactors);
+		if (hypo->GetPTID() == -1) OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSourceSpan, reportAllFactors);
 		OutputSurface(out, hypo->GetTargetPhrase(), outputFactorOrder, reportAllFactors);

        if (reportSourceSpan == true
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@ -132,7 +132,7 @@ int main(int argc, char* argv[])
 					LatticePathList nBestList;
 					manager.CalcNBest(nBestSize, nBestList);
 					inputOutput->SetNBest(nBestList, source->GetTranslationId());
-					RemoveAllInColl< LatticePathList::iterator > (nBestList);
+					RemoveAllInColl(nBestList);
 				}

 			if (staticData.IsDetailedTranslationReportingEnabled()) {
--- a/moses/src/DecodeStep.h
+++ b/moses/src/DecodeStep.h
@ -24,6 +24,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <cassert>
 #include "TypeDef.h"
 #include "Dictionary.h"
+#include "Word.h"
+#include "ScoreComponentCollection.h"

 class PhraseDictionaryBase;
 class GenerationDictionary;
@ -32,6 +34,13 @@ class TranslationOptionCollection;
 class PartialTranslOptColl;
 class FactorCollection;
 class InputType;
+class Phrase;
+
+typedef std::pair<Word, ScoreComponentCollection2> WordPair;
+typedef std::list< WordPair > WordList;
+// 1st = word
+// 2nd = score
+typedef std::list< WordPair >::const_iterator WordListIterator;

 /** Specification for a decoding step.
 * The factored translation model consists of Translation and Generation
@ -52,6 +61,10 @@ public:
 	DecodeStep(Dictionary *ptr, const DecodeStep* prevDecodeStep);
 	virtual ~DecodeStep();

+	// This sucks!
+	virtual const int GetType() const =0;
+	virtual int GenerateOptions(std::vector<WordList>& wordListVector, const Phrase& targetPhrase) { return 0; };
+
 	/** mask of factors that are present after this decode step */
 	const FactorMask& GetOutputFactorMask() const
 	{
--- a/moses/src/DecodeStep_Generation.cpp
+++ b/moses/src/DecodeStep_Generation.cpp
@ -74,6 +74,47 @@ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
    }
 }

+int GenerationDecodeStep::GenerateOptions(vector<WordList>& wordListVector, const Phrase& targetPhrase)
+{
+	size_t targetLength = targetPhrase.GetSize();
+	const GenerationDictionary& generationDictionary = GetGenerationDictionary();
+  // create generation list
+  int wordListVectorPos = 0;
+  for (size_t currPos = 0 ; currPos < targetLength ; currPos++) // going thorugh all words
+    {
+      // generatable factors for this word to be put in wordList
+      WordList &wordList = wordListVector[wordListVectorPos];
+      const FactorArray &factorArray = targetPhrase.GetFactorArray(currPos);
+
+      // consult dictionary for possible generations for this word
+      const OutputWordCollection *wordColl = generationDictionary.FindWord(factorArray);
+
+      if (wordColl == NULL)
+        { // word not found in generation dictionary
+          
+					// NOTE: Do nothing right now, fix later
+					//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
+          return 0; // can't be part of a phrase, special handling
+        }
+      else
+        {
+          // sort(*wordColl, CompareWordCollScore);
+          OutputWordCollection::const_iterator iterWordColl;
+          for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl)
+            {
+              const Word &outputWord = (*iterWordColl).first;
+              const ScoreComponentCollection2& score = (*iterWordColl).second;
+              // enter into word list generated factor(s) and its(their) score(s)
+              wordList.push_back(WordPair(outputWord, score));
+            }
+
+          wordListVectorPos++; // done, next word
+        }
+    }
+	return wordListVectorPos;
+}
+
+
 void GenerationDecodeStep::Process(const TranslationOption &inputPartialTranslOpt
                              , const DecodeStep &decodeStep
                              , PartialTranslOptColl &outputPartialTranslOptColl
@ -113,7 +154,7 @@ void GenerationDecodeStep::Process(const TranslationOption &inputPartialTranslOp

      if (wordColl == NULL)
        { // word not found in generation dictionary
-          //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
+          toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
          return; // can't be part of a phrase, special handling
        }
      else
--- a/moses/src/DecodeStep_Generation.h
+++ b/moses/src/DecodeStep_Generation.h
@ -23,9 +23,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #define _Generation_DECODE_STEP_H_

 #include "DecodeStep.h"
+#include "Word.h"
+#include "ScoreComponentCollection.h"

 class GenerationDictionary;
 class Phrase;
+class WordsRange;
 class ScoreComponentCollection2;

 class GenerationDecodeStep : public DecodeStep
@ -33,9 +36,12 @@ class GenerationDecodeStep : public DecodeStep
 public:
 	GenerationDecodeStep(GenerationDictionary* dict, const DecodeStep* prev);

+	const int GetType() const { return 1; };
+
  /** returns phrase table (dictionary) for translation step */
  const GenerationDictionary &GetGenerationDictionary() const;

+	int GenerateOptions(std::vector<WordList>& wordListVector, const Phrase& targetPhrase);
  virtual void Process(const TranslationOption &inputPartialTranslOpt
                              , const DecodeStep &decodeStep
                              , PartialTranslOptColl &outputPartialTranslOptColl
--- a/moses/src/DecodeStep_Translation.h
+++ b/moses/src/DecodeStep_Translation.h
@ -32,6 +32,9 @@ class TranslationDecodeStep : public DecodeStep
 public:
 	TranslationDecodeStep(PhraseDictionaryBase* dict, const DecodeStep* prev);

+	// still sucks
+	const int GetType() const { return 0; };
+
  /** returns phrase table (dictionary) for translation step */
  const PhraseDictionaryBase &GetPhraseDictionary() const;

--- a/moses/src/GenerationDictionary.cpp
+++ b/moses/src/GenerationDictionary.cpp
@ -71,7 +71,8 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
 		vector<string> token = Tokenize( line );
 		
 		// add each line in generation file into class
-		Word inputWord, outputWord;
+		Word *inputWord = new Word();
+		Word outputWord;

 		// create word with certain factors filled out

@ -81,7 +82,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
 		{
 			FactorType factorType = input[i];
 			const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
-			inputWord.SetFactor(factorType, factor);
+			inputWord->SetFactor(factorType, factor);
 		}

 		factorString = Tokenize( token[1], "|" );
@ -112,6 +113,11 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input

 GenerationDictionary::~GenerationDictionary()
 {
+	std::map<const FactorArrayWrapper* , OutputWordCollection>::const_iterator iter;
+	for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
+	{
+		delete iter->first;
+	}
 }

 unsigned int GenerationDictionary::GetNumScoreComponents() const
@ -127,18 +133,15 @@ const std::string GenerationDictionary::GetScoreProducerDescription() const
 const OutputWordCollection *GenerationDictionary::FindWord(const FactorArray &factorArray) const
 {
 	const OutputWordCollection *ret;
-	Word word;
-	Word::Copy(word.GetFactorArray(), factorArray);
 	
-	std::map<Word , OutputWordCollection>::const_iterator iter = m_collection.find(word);
+	FactorArrayWrapper wrapper(factorArray);
+	std::map<const FactorArrayWrapper* , OutputWordCollection>::const_iterator iter = m_collection.find(&wrapper);
 	if (iter == m_collection.end())
 	{ // can't find source phrase
-	  cerr << "Can't find: " << word << "\n";
 		ret = NULL;
 	}
 	else
 	{
-	  cerr << "FOUND: " << word << "\n";
 		ret = &iter->second;
 	}
 	return ret;
--- a/moses/src/GenerationDictionary.h
+++ b/moses/src/GenerationDictionary.h
@ -31,6 +31,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 class FactorCollection;

+struct FactorArrayWrapperComparer
+{
+	//! returns true if hypoA can be recombined with hypoB
+	bool operator()(const FactorArrayWrapper *a, const FactorArrayWrapper *b) const
+	{
+		return *a < *b;
+	}
+};
+
 typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection;
 		// 1st = output phrase
 		// 2nd = log probability (score)
@ -38,7 +47,7 @@ typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection;
 class GenerationDictionary : public Dictionary, public ScoreProducer
 {
 protected:
-	std::map<Word , OutputWordCollection> m_collection;
+	std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer> m_collection;
 	// 1st = source
 	// 2nd = target
 	std::string						m_filename;
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@ -39,8 +39,10 @@ using namespace std;

 unsigned int Hypothesis::s_HypothesesCreated = 0;
 ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
+unsigned long Hypothesis::scoredLMs;
+unsigned long Hypothesis::maskedLMs;

-Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
+Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget, int ptid)
 	: m_prevHypo(NULL)
 	, m_targetPhrase(emptyTarget)
 	, m_sourcePhrase(0)
@ -55,39 +57,129 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
 {	// used for initial seeding of trans process	
 	// initialize scores
 	_hash_computed = false;
+	m_ptid = ptid;
+	m_targetLen = 0;
+	maskedLMs = scoredLMs = 0x0;
 	ResetScore();	
 }

 /***
 * continue prevHypo by appending the phrases in transOpt
 */
-Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
+Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid)
 	: m_prevHypo(&prevHypo)
-	, m_targetPhrase(transOpt.GetTargetPhrase())
-	, m_sourcePhrase(0)
-	, m_sourceCompleted				(prevHypo.m_sourceCompleted )
-	, m_sourceInput						(prevHypo.m_sourceInput)
-	, m_currSourceWordsRange	(transOpt.GetSourceWordsRange())
-	, m_currTargetWordsRange	( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
-														 ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
-	, m_wordDeleted(false)
-	,	m_totalScore(0.0f)
-	,	m_futureScore(0.0f)
-	, m_scoreBreakdown				(prevHypo.m_scoreBreakdown)
-	, m_languageModelStates(prevHypo.m_languageModelStates)
-	, m_arcList(NULL)
-	, m_id(s_HypothesesCreated++)
+		, m_targetPhrase(ptid == -1 ? transOpt.GetTargetPhrase() : (*(new Phrase(prevHypo.m_targetPhrase))))
+		, m_sourcePhrase(0)
+		, m_sourceCompleted				(prevHypo.m_sourceCompleted )
+		, m_sourceInput						(prevHypo.m_sourceInput)
+		, m_currSourceWordsRange	(transOpt.GetSourceWordsRange())
+		, m_currTargetWordsRange	( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
+																,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
+		, m_wordDeleted(false)
+		,	m_totalScore(0.0f)
+		,	m_futureScore(0.0f)
+		, m_scoreBreakdown				(prevHypo.m_scoreBreakdown)
+		, m_languageModelStates(prevHypo.m_languageModelStates)
+		, m_arcList(NULL)
+		, m_id(s_HypothesesCreated++)
 {
 	// assert that we are not extending our hypothesis by retranslating something
 	// that this hypothesis has already translated!
 	assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));	

+	m_ptid = ptid;
+	//m_targetLen = 0;
+	if (m_ptid > -1)
+		{ // merge with existing factors: target phrase must already be full length
+			// assumes that phrases have already been checked for compatibility
+			(const_cast<Phrase &>(m_targetPhrase)).MergeFactorsPartial(transOpt.GetTargetPhrase(), prevHypo.m_targetLen);
+			m_targetLen = prevHypo.GetTargetLen() + transOpt.GetTargetPhrase().GetSize();
+			cerr << "\t* " << "secondary hyp: " << m_targetLen << " " << m_currSourceWordsRange << "\n";
+		}
+
 	_hash_computed = false;
+	//maskedLMs = scoredLMs = 0x0;
  m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
  m_wordDeleted = transOpt.IsDeletionOption();
 	m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
 }

+// Transfer hypothesis
+Hypothesis::Hypothesis(const Hypothesis &orig, int ptid)
+	: m_prevHypo(&orig)
+		, m_targetPhrase(ptid == 1 ? *(new Phrase(Output)) : *(new Phrase(orig.m_targetPhrase)))
+		, m_sourcePhrase(0)
+		, m_sourceCompleted				(orig.m_sourceCompleted.GetSize())
+		, m_sourceInput						(orig.m_sourceInput)
+		, m_currSourceWordsRange	(NOT_FOUND, NOT_FOUND)
+		, m_currTargetWordsRange	(NOT_FOUND, NOT_FOUND)
+		, m_wordDeleted(false)
+		,	m_totalScore(orig.m_totalScore)
+		,	m_futureScore(orig.m_futureScore)
+		, m_scoreBreakdown				(orig.m_scoreBreakdown)
+		, m_languageModelStates(orig.m_languageModelStates)
+		, m_arcList(NULL)
+		, m_id(s_HypothesesCreated++)
+{
+	m_ptid = ptid;
+	m_targetLen = 0;
+	//maskedLMs = scoredLMs = 0x0;
+	// IF ptid == 0 then we need to construct a new targetPhrase with the entire sentence so far
+	if (ptid == 1)
+		{
+			list<const Hypothesis *> tmp;
+			for (const Hypothesis *x = &orig; x != NULL; x = x->m_prevHypo)
+				tmp.push_front(x);
+			list<const Hypothesis *>::const_iterator i;
+			for (i = tmp.begin(); i != tmp.end(); i++)
+				for (unsigned int j = 0; j < (*i)->GetSize(); j++)
+					{
+						(const_cast<Phrase &>(m_targetPhrase)).push_back((*i)->m_targetPhrase.GetFactorArray(j));
+					}
+			cerr << "INFO: Doing Transfer... Current Target String: **[" 
+					 << orig.m_targetPhrase << ", " << m_targetPhrase << "]**" << std::endl;
+		}
+	_hash_computed = false;
+}
+
+Hypothesis::Hypothesis(const Hypothesis &orig, Phrase& genph, ScoreComponentCollection2& generationScore, int ptid)
+	: m_prevHypo(&orig)
+		, m_targetPhrase(*(new Phrase(Output)))
+		, m_sourcePhrase(orig.m_sourcePhrase)
+		, m_sourceCompleted				(orig.m_sourceCompleted.GetSize())
+		, m_sourceInput						(orig.m_sourceInput)
+		, m_currSourceWordsRange	(orig.m_currSourceWordsRange)
+		, m_currTargetWordsRange	(orig.m_currTargetWordsRange)
+		, m_wordDeleted(false)
+		,	m_totalScore(orig.m_totalScore)
+		,	m_futureScore(orig.m_futureScore)
+		, m_scoreBreakdown				(orig.m_scoreBreakdown)
+		, m_languageModelStates(orig.m_languageModelStates)
+		, m_arcList(NULL)
+		, m_id(s_HypothesesCreated++)
+{
+	m_ptid = ptid;
+	m_targetLen = orig.m_targetLen;
+	if (ptid == 1)
+		{
+			list<const Hypothesis *> tmp;
+			for (const Hypothesis *x = &orig; x != NULL; x = x->m_prevHypo)
+				tmp.push_front(x);
+			list<const Hypothesis *>::const_iterator i;
+			for (i = tmp.begin(); i != tmp.end(); i++)
+				for (unsigned int j = 0; j < (*i)->GetSize(); j++)
+					(const_cast<Phrase &>(m_targetPhrase)).push_back((*i)->m_targetPhrase.GetFactorArray(j));
+		}
+	if (m_ptid > -1)
+		{ // merge with existing factors: target phrase must already be full length
+			// assumes that phrases have already been checked for compatibility
+			(const_cast<Phrase &>(m_targetPhrase)).MergeFactorsPartial(genph, 0);
+			cerr << "\t* " << "generated hyp: " << m_targetPhrase << " by adding " << genph << " " << m_currSourceWordsRange << "\n";
+		}
+	m_scoreBreakdown.PlusEquals(generationScore);
+	_hash_computed = false;
+}
+
 Hypothesis::~Hypothesis()
 {
 	if (m_arcList) 
@ -134,25 +226,25 @@ void Hypothesis::AddArc(Hypothesis *loserHypo)
 */
 Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt) const
 {
-	return Create(*this, transOpt);
+	return Create(*this, transOpt, m_ptid);
 }

 /***
 * return the subclass of Hypothesis most appropriate to the given translation option
 */
-Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
+Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid)
 {
 	Hypothesis *ptr = s_objectPool.getPtr();
-	return new(ptr) Hypothesis(prevHypo, transOpt);
+	return new(ptr) Hypothesis(prevHypo, transOpt, ptid);
 }
 /***
 * return the subclass of Hypothesis most appropriate to the given target phrase
 */

-Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget)
+Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget, int ptid)
 {
 	Hypothesis *ptr = s_objectPool.getPtr();
-	return new(ptr) Hypothesis(m_source, emptyTarget);
+	return new(ptr) Hypothesis(m_source, emptyTarget, ptid);
 }

 #if 0
@ -216,13 +308,19 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
 {
 	const size_t startPos	= m_currTargetWordsRange.GetStartPos();
 	LMList::const_iterator iterLM;
+	unsigned long index = 0x1;
 	size_t lmIdx = 0;

 	// already have LM scores from previous and trigram score of poss trans.
 	// just need trigram score of the words of the start of current phrase	
-	for (iterLM = languageModels.begin() ; iterLM != languageModels.end() ; ++iterLM,++lmIdx)
+	for (iterLM = languageModels.begin() ; iterLM != languageModels.end() ; ++iterLM,++lmIdx, index <<= 0x1)
 	{
 		const LanguageModel &languageModel = **iterLM;
+		
+		if (index & maskedLMs || !languageModel.Useable(m_targetPhrase))
+			continue;
+
+		scoredLMs |= index;
 		size_t nGramOrder			= languageModel.GetNGramOrder();
 		size_t currEndPos			= m_currTargetWordsRange.GetEndPos();
 		float lmScore;
@ -317,7 +415,8 @@ void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &fut
 	CalcLMScore(staticData.GetAllLM());

 	// WORD PENALTY
-	m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetWordsCount()); 
+	if (m_ptid != -1)
+		m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetWordsCount()); 

 	// FUTURE COST
 	CalcFutureScore(futureScore);
@ -415,6 +514,7 @@ ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
 	hypothesis.ToStream(out);
 	// words bitmap
 	out << "[" << hypothesis.m_sourceCompleted << "] ";
+	out << "tlen: " << hypothesis.m_targetLen << " ";

 	// scores
 	out << " [total=" << hypothesis.GetTotalScore() << "]";
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@ -66,6 +66,8 @@ protected:
 	const Phrase			&m_targetPhrase; /**< target phrase being created at the current decoding step */
 	Phrase const*     m_sourcePhrase; /**< input sentence */
 	WordsBitmap				m_sourceCompleted; /**< keeps track of which words have been translated so far */
+	int m_ptid;
+	int m_targetLen;
 	//TODO: how to integrate this into confusion network framework; what if
 	//it's a confusion network in the end???
 	InputType const&  m_sourceInput;
@ -78,6 +80,8 @@ protected:
 	std::vector<LanguageModelSingleFactor::State> m_languageModelStates; /**< relevant history for language model scoring -- used for recombination */
 	const Hypothesis 	*m_mainHypo;
 	ArcList 					*m_arcList; /**< all arcs that end at the same lattice point as this hypothesis */
+	static unsigned long maskedLMs;
+	static unsigned long scoredLMs;

 	void CalcFutureScore(const SquareMatrix &futureScore);
 	//void CalcFutureScore(float futureScore[256][256]);
@ -95,23 +99,31 @@ public:
 		return s_objectPool;
 	}

-
 	static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
 	int m_id; /**< numeric ID of this hypothesis, used for logging */

+	// for masking lms
+	inline unsigned long &GetMaskedLMs() { return maskedLMs; }
+	inline unsigned long &GetScoredLMs() { return scoredLMs; }
+	inline int GetTargetLen() const { return m_targetLen; }
+	inline int GetPTID() const { return m_ptid; }
 	/** used by initial seeding of the translation process */
-	Hypothesis(InputType const& source, const TargetPhrase &emptyTarget);
+	Hypothesis(InputType const& source, const TargetPhrase &emptyTarget, int ptid = -1);
 	/** used when creating a new hypothesis using a translation option (phrase translation) */
-	Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
+	Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid = -1);
+	/** copy constructor for new pt restart*/
+	Hypothesis(const Hypothesis &orig, int ptid);
+	/** copy constructor for generation options */
+	Hypothesis(const Hypothesis &orig, Phrase& genph, ScoreComponentCollection2& generationScore, int ptid);
 	~Hypothesis();
 	
 	/** return the subclass of Hypothesis most appropriate to the given translation option */
-	static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt);
+	static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid = -1);

-	static Hypothesis* Create(const WordsBitmap &initialCoverage);
+	static Hypothesis* Create(const WordsBitmap &initialCoverage, int ptid = -1);

 	/** return the subclass of Hypothesis most appropriate to the given target phrase */
-	static Hypothesis* Create(InputType const& source, const TargetPhrase &emptyTarget);
+	static Hypothesis* Create(InputType const& source, const TargetPhrase &emptyTarget, int ptid = -1);
 	
 	/** return the subclass of Hypothesis most appropriate to the given translation option */
 	Hypothesis* CreateNext(const TranslationOption &transOpt) const;
@ -221,7 +233,8 @@ public:

 	void ToStream(std::ostream& out) const
 	{
-		if (m_prevHypo != NULL)
+		if (m_ptid >= 0) { out << " ::: "; }
+		else if (m_prevHypo != NULL)
 		{
 			m_prevHypo->ToStream(out);
 		}
--- a/moses/src/HypothesisCollection.h
+++ b/moses/src/HypothesisCollection.h
@ -101,7 +101,6 @@ protected:
 		// if returns false, hypothesis not used
 		// caller must take care to delete unused hypo to avoid leak
 		// used by Add(Hypothesis *hypothesis, float beamThreshold);
-	void RemoveAll();

 	/** destroy all instances of Hypothesis in this collection */
 	inline void Detach(const HypothesisCollection::iterator &iter)
@ -115,6 +114,8 @@ protected:
 		pool.freeObject(*iter);
 		Detach(iter);
 	}
+	void RemoveAll();
+
 	/** add Hypothesis to the collection, without pruning */
 	inline void AddNoPrune(Hypothesis *hypothesis)
 	{
@ -129,6 +130,39 @@ public:

 	HypothesisCollection();

+	void Reset()
+	{
+		m_hypos.clear();
+		m_bestScore = -std::numeric_limits<float>::infinity();
+		m_worstScore = -std::numeric_limits<float>::infinity();
+	}
+
+	/** destroy all instances of Hypothesis in this collection */
+	inline void erase(const HypothesisCollection::iterator &iter)
+	{
+		m_hypos.erase(iter);
+	}
+
+	inline void insert(const HypothesisCollection::iterator &iter, Hypothesis& h)
+	{
+		if (!m_hypos.insert(&h).second) {
+    }
+	}
+
+	inline void insertset(HypothesisCollection &other)
+	{
+		HypothesisCollection::const_iterator iterHypo;
+
+		for (iterHypo = other.begin(); iterHypo != other.end(); ++iterHypo)
+			Add(*iterHypo);
+	}
+
+	void FreeHypPool() 
+	{ 
+		ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool();
+		pool.reset();
+	}
+
 	// this function will recombine hypotheses silently!  There is no record
 	// (could affect n-best list generation...TODO)
 	void AddPrune(Hypothesis *hypothesis);
@ -136,7 +170,8 @@ public:

 	inline ~HypothesisCollection()
 	{
-		RemoveAll();
+		// Don't do this any more
+		// RemoveAll();
 	}
 	/** set maximum number of hypotheses in the collection
   *  /param maxHypoStackSize maximum number (typical number: 100) */
--- a/moses/src/LatticePathCollection.h
+++ b/moses/src/LatticePathCollection.h
@ -38,7 +38,7 @@ public:
 	~LatticePathCollection()
 	{
 		// clean up
-		RemoveAllInColl<LatticePathCollection::iterator> (*this);
+		RemoveAllInColl(*this);
 	}
 };

--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@ -38,10 +38,12 @@ libmoses_a_SOURCES = \
 	PhraseDictionaryNode.cpp \
 	PhraseDictionaryTree.cpp \
 	PhraseDictionaryTreeAdaptor.cpp \
+	PhraseReference.cpp \
 	ScoreComponentCollection.cpp \
 	ScoreIndexManager.cpp \
 	ScoreProducer.cpp \
 	Sentence.cpp \
+	SentenceStats.cpp \
 	StaticData.cpp \
 	TargetPhrase.cpp \
 	TranslationOption.cpp \
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@ -35,11 +35,23 @@ using namespace std;

 Manager::Manager(InputType const& source, StaticData &staticData)
 :m_source(source)
-,m_hypoStack(source.GetSize() + 1)
+ ,m_hypoStack(source.GetSize() + 1)
 ,m_staticData(staticData)
 ,m_possibleTranslations(*source.CreateTranslationOptionCollection())
 ,m_initialTargetPhrase(Output)
 {
+	int pts = m_staticData.GetPhraseDictionaries().size();
+
+	// Need to allocate space for additional collections
+	if (m_staticData.GetInputType() == 0)
+		{
+			m_secondaryOptions = (TranslationOptionCollection **) malloc(sizeof(TranslationOptionCollection *) * pts);
+			for (int c = 0; c < pts; c++)
+				m_secondaryOptions[c] = source.CreateTranslationOptionCollection();
+		}
+	m_scoredLMs = 0x0;
+	assert(m_staticData.GetAllLM().size() < sizeof(int));
+
 	std::vector < HypothesisCollection >::iterator iterStack;
 	for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
 	{
@ -51,6 +63,15 @@ Manager::Manager(InputType const& source, StaticData &staticData)

 Manager::~Manager() 
 {
+	if (m_staticData.GetInputType() == 0)
+		{
+			for (unsigned int c = 0; c < m_staticData.GetPhraseDictionaries().size(); c++)
+				delete m_secondaryOptions[c];
+			free(m_secondaryOptions);
+		}
+	// Clear Hyps before the collections go away
+	m_hypoStack[0].FreeHypPool();
+
  delete &m_possibleTranslations;
 }

@ -67,40 +88,224 @@ void Manager::ProcessSentence()
 	//		1. generation of source sentence is not done 1st
 	//		2. initial hypothesis factors are given in the sentence
 	//CreateTranslationOptions(m_source, phraseDictionary, lmListInitial);
-	m_possibleTranslations.CreateTranslationOptions(decodeStepList
-  														, m_staticData.GetFactorCollection());
+	//m_possibleTranslations.CreateTranslationOptions(decodeStepList
+	//, m_staticData.GetFactorCollection());
+
+	list < DecodeStep* >::const_iterator iterStep = decodeStepList.begin();
+
+	list<DecodeStep *> b;
+	assert((*iterStep)->GetType() == 0);
+	b.push_back(*iterStep); // better not be a generation step!
+	m_possibleTranslations.CreateTranslationOptions(b, m_staticData.GetFactorCollection());
+	
+	//
+	// Create Secondary options
+	//
+
+	//for (int c = 1; c < m_staticData.GetPhraseDictionaries().size(); c++)
+	unsigned int c;
+	for (c = 0, ++iterStep ; iterStep != decodeStepList.end() ; ++iterStep) 
+		if ((*iterStep)->GetType() == 0)
+		{
+			list<DecodeStep *> b;
+			b.push_back(*iterStep);
+			m_secondaryOptions[c++]->CreateTranslationOptions(b, m_staticData.GetFactorCollection());
+		}

 	// initial seed hypothesis: nothing translated, no words produced
 	{
-		Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);
+		Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase, -1); // initial PTID
 		m_hypoStack[0].AddPrune(hypo);
 	}
 	
 	// go through each stack
 	std::vector < HypothesisCollection >::iterator iterStack;
-	for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
-	{
-		HypothesisCollection &sourceHypoColl = *iterStack;

-		// the stack is pruned before processing (lazy pruning):
-		sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
+	//c = 0;
+	int pt = 1;

-		sourceHypoColl.InitializeArcs();
+	// Process first PT as if it were normal (this is used to compute distortion)
+	// This should be identical
+	for (iterStack = m_hypoStack.begin(); iterStack != m_hypoStack.end(); ++iterStack)
+		{
+			HypothesisCollection &sourceHypoColl = *iterStack;
 			
-		// go through each hypothesis on the stack and try to expand it
-		HypothesisCollection::const_iterator iterHypo;
-		for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
-			{
-				Hypothesis &hypothesis = **iterHypo;
-				ProcessOneHypothesis(hypothesis); // expand the hypothesis
+			// the stack is pruned before processing (lazy pruning):
+			sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
+			
+			sourceHypoColl.InitializeArcs();
+			
+			// go through each hypothesis on the stack and try to expand it
+			HypothesisCollection::const_iterator iterHypo;
+			for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
+				{
+					Hypothesis &hypothesis = **iterHypo;
+					cerr << "Processing Hypo " << hypothesis << endl;
+					ProcessOneHypothesis(hypothesis, &m_possibleTranslations, -1); // expand the hypothesis
+				}
+			// some logging
+			if (m_staticData.GetVerboseLevel() > 0) {
+				//OutputHypoStack();
+				OutputHypoStackSize();
 			}
-		// some logging
-		if (m_staticData.GetVerboseLevel() > 0) {
-			//OutputHypoStack();
-			OutputHypoStackSize();
 		}

-	}
+	if (m_staticData.GetInputType()) return;
+		
+	// Process remaining steps
+	int gt = 0, id = 1;
+	iterStep = decodeStepList.begin();
+	for (++iterStep; iterStep != decodeStepList.end(); ++iterStep)
+		{
+			if ((*iterStep)->GetType() == 0)
+				{
+					int i = 0;
+					int firsthyp = 1;
+					// Create new start(s)
+					HypothesisCollection::const_iterator iterHypo;
+					HypothesisCollection &currHypoColl = m_hypoStack.back();
+					
+					// clear stack 0
+					m_hypoStack[0].Reset();
+
+					for (iterHypo = currHypoColl.begin() ; iterHypo != currHypoColl.end() ; ++iterHypo, i++)
+						{
+							Hypothesis &hypothesis = **iterHypo;
+							if (firsthyp)
+								{
+									m_scoredLMs |= hypothesis.GetScoredLMs();
+									hypothesis.GetScoredLMs() = 0x0;
+									
+									firsthyp = 0;
+								}
+							hypothesis.GetMaskedLMs() = m_scoredLMs;
+							cerr << "\t[Transfer Step] Hypo " << i << ": " << hypothesis << endl;
+							Hypothesis *restartHypo = new Hypothesis(hypothesis, id);
+							cerr << "\t\t* [transfer result] " << *restartHypo << endl;
+							m_hypoStack[0].AddPrune(restartHypo);
+						}
+					// clear remaining stacks
+					for (iterStack = m_hypoStack.begin()+1; iterStack != m_hypoStack.end(); ++iterStack)
+						(*iterStack).Reset();
+
+					// Now decode the current PT
+					fprintf(stderr, "Starting PT processing for %d, %d items in starting stack\n", pt, m_hypoStack[0].size());
+					for (iterStack = m_hypoStack.begin(); iterStack != m_hypoStack.end(); ++iterStack)
+						{
+							HypothesisCollection &sourceHypoColl = *iterStack;
+							
+							// the stack is pruned before processing (lazy pruning):
+							sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
+							
+							sourceHypoColl.InitializeArcs();
+							
+							// go through each hypothesis on the stack and try to expand it
+							HypothesisCollection::const_iterator iterHypo;
+							for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
+								{
+									Hypothesis &hypothesis = **iterHypo;
+									cerr << "\t[PT Processing] Hypo " << i << ": " << hypothesis << endl;
+									ProcessOneHypothesis(hypothesis, m_secondaryOptions[pt-1]); // expand the hypothesis
+								}
+							// some logging
+							if (m_staticData.GetVerboseLevel() > 0) {
+								//OutputHypoStack();
+								OutputHypoStackSize();
+							}
+						}
+					pt++;
+				}
+			else // do a generation step
+				{
+					int firsthyp = 1;
+					// NOTE: This should be done elsewhere, like a GenerationDecodeStep
+					//       but that code has been mostly rewritten to handle TranslationOptions
+					//       so this hack is easier for now...
+
+					// normal generation step
+					HypothesisCollection::const_iterator iterHypo;
+					HypothesisCollection &currHypoColl = m_hypoStack.back();
+					HypothesisCollection tmp;
+					
+					for (iterHypo = currHypoColl.begin() ; iterHypo != currHypoColl.end() ; ++iterHypo)
+						{
+							Hypothesis &hypothesis = **iterHypo;
+							if (firsthyp)
+								{
+									m_scoredLMs |= hypothesis.GetScoredLMs();
+									hypothesis.GetScoredLMs() = 0x0;
+									
+									firsthyp = 0;
+								}
+							hypothesis.GetMaskedLMs() = m_scoredLMs;
+							cerr << "Generating from Hypo: " << hypothesis << endl;
+
+							// Actual Generation
+							const Phrase &targetPhrase  = hypothesis.GetTargetPhrase();
+							size_t targetLength = targetPhrase.GetSize();
+							vector< WordList > wordListVector(targetLength);
+
+							(*iterStep)->GenerateOptions(wordListVector, targetPhrase);
+
+							// use generation list (wordList)
+							// set up iterators (total number of expansions)
+							size_t numIteration = 1;
+							vector< WordListIterator >  wordListIterVector(targetLength);
+							vector< const Word* >       mergeWords(targetLength);
+							for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
+								{
+									wordListIterVector[currPos] = wordListVector[currPos].begin();
+									numIteration *= wordListVector[currPos].size();
+								}
+							//fprintf(stderr, "INFO: numiteration == %d\n", numIteration);
+							
+							// go thru each possible factor for each word & create hypothesis
+							for (size_t currIter = 0 ; currIter < numIteration ; currIter++)
+								{
+									ScoreComponentCollection2 generationScore; // total score for this string of words
+									
+									// create vector of words with new factors for last phrase
+									for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
+										{
+											const WordPair &wordPair = *wordListIterVector[currPos];
+											mergeWords[currPos] = &(wordPair.first);
+											generationScore.PlusEquals(wordPair.second);
+										}
+									
+									// merge with existing trans opt
+									Phrase genPhrase(Output, mergeWords);
+									//TranslationOption *newTransOpt = MergeGeneration(inputPartialTranslOpt, genPhrase, generationScore);
+									//cerr << "INFO: " << genPhrase << std::endl;
+									Hypothesis *newh = new Hypothesis(hypothesis, genPhrase, generationScore, id);
+									if (newh != NULL)
+										{
+											//outputPartialTranslOptColl.Add(newTransOpt);
+											cerr << "\t\t+ Generating: " << *newh << std::endl;
+											tmp.insert(iterHypo, *newh);
+										}
+									
+									// increment iterators
+									for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++)
+										{
+											WordListIterator &iter = wordListIterVector[currPos];
+											iter++;
+											if (iter != wordListVector[currPos].end())
+												{ // eg. 4 -> 5
+												}
+											else
+												{ //  eg 9 -> 10
+													iter = wordListVector[currPos].begin();
+												}
+										}
+								}
+						}
+					currHypoColl.Reset();
+					currHypoColl.insertset(tmp);
+					fprintf(stderr, "After procssing generation step #%d, %d items in hyp collection\n", gt, currHypoColl.size()); 
+					gt++;
+				}
+			id++;
+		}
 	
 	// some more logging
 	if (m_staticData.GetVerboseLevel() > 0) {
@ -116,10 +321,14 @@ void Manager::ProcessSentence()
 * violation of reordering limits. 
 * \param hypothesis hypothesis to be expanded upon
 */
-void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
+void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis,
+																	 TranslationOptionCollection *options,
+																	 int index)
 {
 	// since we check for reordering limits, its good to have that limit handy
-	int maxDistortion = m_staticData.GetMaxDistortion();
+
+	// Changed this, secondary processing wants monotone decoding
+	int maxDistortion =  m_staticData.GetMaxDistortion(); //  : 0;

 	// no limit of reordering: only check for overlap
 	if (maxDistortion < 0)
@ -135,7 +344,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
 				if (!hypoBitmap.Overlap(WordsRange(startPos, endPos)))
 				{
 					ExpandAllHypotheses(hypothesis
-												, m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
+															, options->GetTranslationOptionList(WordsRange(startPos, endPos))
+															, index
+															);
 				}
 			}
 		}
@ -164,7 +375,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
 					)
 				{
 					ExpandAllHypotheses(hypothesis
-												,m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
+															, options->GetTranslationOptionList(WordsRange(startPos, endPos))
+															, index
+															);
 				}
 			}
 			// filling in gap => just check for overlap
@ -174,7 +387,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
 						&& !hypoBitmap.Overlap(WordsRange(startPos, endPos)))
 					{
 						ExpandAllHypotheses(hypothesis
-													,m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
+																, options->GetTranslationOptionList(WordsRange(startPos, endPos))
+																, index
+																);
 					}
 				}
 			// ignoring, continuing forward => be limited by start of gap
@ -184,7 +399,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
 						&& !hypoBitmap.Overlap(WordsRange(startPos, endPos)))
 					{
 						ExpandAllHypotheses(hypothesis
-													,m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
+																, options->GetTranslationOptionList(WordsRange(startPos, endPos))
+																, index
+																);
 					}
 				}
 		}
@ -197,12 +414,12 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
 * \param transOptList list of translation options to be applied
 */

-void Manager::ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList)
+void Manager::ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList, int index)
 {
 	TranslationOptionList::const_iterator iter;
 	for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter)
 	{
-		ExpandHypothesis(hypothesis, **iter);
+		ExpandHypothesis(hypothesis, **iter, index);
 	}
 }

@ -213,11 +430,21 @@ void Manager::ExpandAllHypotheses(const Hypothesis &hypothesis,const Translation
 * \param transOpt translation option (phrase translation) 
 *        that is applied to create the new hypothesis
 */
-void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt) 
+void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, int index) 
 {
+	cerr << "Trying to expand: " << hypothesis << " with " << transOpt << std::endl;
+	if (index > -1 && 
+			(transOpt.GetTargetPhrase().GetSize() + hypothesis.GetTargetLen() > hypothesis.GetTargetPhrase().GetSize()
+			 || !hypothesis.GetTargetPhrase().IsCompatiblePartial(transOpt.GetTargetPhrase(), hypothesis.GetTargetLen())
+			 )
+			)
+			return;
 	// create hypothesis and calculate all its scores
 	Hypothesis *newHypo = hypothesis.CreateNext(transOpt);
-	newHypo->CalcScore(m_staticData, m_possibleTranslations.GetFutureScore());
+	newHypo->CalcScore(m_staticData, 
+										 (index == -1 ? m_possibleTranslations.GetFutureScore()
+											: m_secondaryOptions[index]->GetFutureScore())
+										 );
 	
 	// logging for the curious
 	if(m_staticData.GetVerboseLevel() > 2) 
@ -227,6 +454,7 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
 	
 	// add to hypothesis stack
 	size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();	
+	cerr << "\t+ Adding: " << *newHypo << "\n";
 	m_hypoStack[wordsTranslated].AddPrune(newHypo);
 }

--- a/moses/src/Manager.h
+++ b/moses/src/Manager.h
@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "LatticePathList.h"
 #include "SquareMatrix.h"
 #include "WordsBitmap.h"
+#include "DecodeStep_Generation.h"
 //#include "UnknownWordHandler.h"

 class LatticePath;
@ -79,12 +80,16 @@ protected:
 	// no of elements = no of words in source + 1
 	StaticData &m_staticData; /**< holds various kinds of constants, counters, and global data structures */
 	TranslationOptionCollection &m_possibleTranslations; /**< pre-computed list of translation options for the phrases in this sentence */
+	TranslationOptionCollection **m_secondaryOptions; /**< Lists per factor */
 	TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
 	
+	// vector of scored lms
+	unsigned long m_scoredLMs;
+
 	// functions for creating hypotheses
-	void ProcessOneHypothesis(const Hypothesis &hypothesis);
-	void ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList);
-	void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt);
+	void ProcessOneHypothesis(const Hypothesis &hypothesis, TranslationOptionCollection *options = NULL, int index = 0);
+	void ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList, int index = 0);
+	void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, int index = 0);

 	// logging
 	void OutputHypoStack(int stack = -1);
--- a/moses/src/PartialTranslOptColl.h
+++ b/moses/src/PartialTranslOptColl.h
@ -52,7 +52,7 @@ public:
 	/** destructor, cleans out list */
 	~PartialTranslOptColl()
 	{
-		RemoveAllInColl<std::vector<TranslationOption*>::iterator>( m_list );
+		RemoveAllInColl( m_list );
 	}
 	
 	void AddNoPrune(TranslationOption *partialTranslOpt);
--- a/moses/src/Phrase.cpp
+++ b/moses/src/Phrase.cpp
@ -128,6 +128,21 @@ void Phrase::MergeFactors(const Phrase &copy)
 	}
 }

+void Phrase::MergeFactorsPartial(const Phrase &copy, int start)
+{
+	size_t size = copy.GetSize();
+	for (size_t currPos = start; currPos < start + size; currPos++)
+	{
+		for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
+		{
+			FactorType factorType = static_cast<FactorType>(currFactor);
+			const Factor *factor = copy.GetFactor(currPos - start, factorType);
+			if (factor != NULL)
+				SetFactor(currPos, factorType, factor);
+		}
+	}
+}
+
 void Phrase::MergeFactors(const Phrase &copy, FactorType factorType)
 {
 	assert(GetSize() == copy.GetSize());
@ -357,6 +372,28 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const

 }

+bool Phrase::IsCompatiblePartial(const Phrase &inputPhrase, int start) const
+{
+	const size_t size = inputPhrase.GetSize();
+
+	for (size_t currPos = start; currPos < start + size; currPos++)
+	{
+		for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
+		{
+			FactorType factorType = static_cast<FactorType>(currFactor);
+			const Factor *thisFactor 		= GetFactor(currPos, factorType)
+									,*inputFactor	= inputPhrase.GetFactor(currPos - start, factorType);
+			if (thisFactor != NULL && inputFactor != NULL && thisFactor != inputFactor)
+				return false;
+			//cerr << "\t* " << thisFactor << " is compatible with " << inputFactor << "\n";
+		}
+	}
+
+	//cerr << inputPhrase << " is compatible with " << *this << "\n";
+	return true;
+
+}
+
 bool Phrase::IsCompatible(const Phrase &inputPhrase, FactorType factorType) const
 {
 	if (inputPhrase.GetSize() != GetSize())	{ return false;	}
--- a/moses/src/Phrase.h
+++ b/moses/src/Phrase.h
@ -64,12 +64,14 @@ public:
 											, FactorCollection &factorCollection);

 	void MergeFactors(const Phrase &copy);
+	void MergeFactorsPartial(const Phrase &copy, int start = 0);
 	//! copy a single factor (specified by factorType)
 	void MergeFactors(const Phrase &copy, FactorType factorType);
 	//! copy all factors specified in factorVec and none others
 	void MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec);

 	// must run IsCompatible() to ensure incompatible factors aren't being overwritten
+	bool IsCompatiblePartial(const Phrase &inputPhrase, int start) const;
 	bool IsCompatible(const Phrase &inputPhrase) const;
 	bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
 	bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
--- a/moses/src/PrefixTree.h
+++ b/moses/src/PrefixTree.h
@ -124,8 +124,10 @@ private:
  off_t startPos;
  FILE* f;
 public:
+#if 0
 #ifdef DEBUG
  DECLAREMEMSTAT(Self);
+#endif
 #endif

  PrefixTreeF(FILE* f_=0) : f(f_) {if(f) read();}
@ -271,7 +273,7 @@ public:

 };
 template<typename T,typename D> D PrefixTreeF<T,D>::def;
-#ifdef DEBUG
+#if 0 //def DEBUG
 template<typename T,typename D> MemoryStatsPrinter< PrefixTreeF<T,D> > PrefixTreeF<T,D>::memStat("PrefixTreeF<T,D>",0);
 #endif
 #endif
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@ -19,6 +19,7 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/

+#include <algorithm>
 #include "TranslationOptionCollection.h"
 #include "Sentence.h"
 #include "DecodeStep.h"
@ -59,7 +60,7 @@ TranslationOptionCollection::~TranslationOptionCollection()
 	{
 		for (size_t endPos = startPos ; endPos < size ; ++endPos)
 		{
-			RemoveAllInColl<TranslationOptionList::iterator>(GetTranslationOptionList(startPos, endPos));
+		 RemoveAllInColl(GetTranslationOptionList(startPos, endPos));
 		}
 	}
 }
@ -70,20 +71,39 @@ bool CompareTranslationOption(const TranslationOption *a, const TranslationOptio
 	return a->GetFutureScore() > b->GetFutureScore();
 }

+void TranslationOptionCollection::ProcessUnknownWord()
+{
+	// create unknown words for 1 word coverage where we don't have any trans options
+	size_t size = m_source.GetSize();
+	vector<bool> process(size);
+	fill(process.begin(), process.end(), true);
+	
+	for (size_t startPos = 0 ; startPos < size ; ++startPos)
+	{
+		for (size_t endPos = startPos ; endPos < size ; ++endPos)
+		{
+			TranslationOptionList &fullList = GetTranslationOptionList(startPos, endPos);
+			size_t s = fullList.size();
+			if (s > 0)
+			{
+				fill(process.begin() + startPos, process.begin() + endPos + 1, false);
+			}
+		}	
+	}
+			
+	for (size_t currPos = 0 ; currPos < size ; ++currPos)
+	{
+		if (process[currPos])
+			ProcessUnknownWord(currPos, *m_factorCollection);
+	}
+}
+
 /** pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
 void TranslationOptionCollection::Prune()
 {
-	size_t size = m_source.GetSize();
+	ProcessUnknownWord();
 	
-	// create unknown words for 1 word coverage where we don't have any trans options
-	for (size_t startPos = 0 ; startPos < size ; ++startPos)
-	{
-		TranslationOptionList &fullList = GetTranslationOptionList(startPos, startPos);
-		if (fullList.size() == 0)
-		{
-			ProcessUnknownWord(startPos, *m_factorCollection);
-		}
-	}
+	size_t size = m_source.GetSize();
 	
 	// prune to max no. of trans opt
 	if (m_maxNoTransOptPerCoverage == 0)
@ -210,6 +230,7 @@ void TranslationOptionCollection::CreateTranslationOptions(
 																													 const list < DecodeStep* > &decodeStepList
 																													 , FactorCollection &factorCollection)
 {
+	m_dstep = (DecodeStep *) &decodeStepList.front();
 	m_factorCollection = &factorCollection;
 	
 	for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)
@ -325,13 +346,17 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc
 						
 			for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
 			{
-				FactorType factorType = static_cast<FactorType>(currFactor);
+				if (m_dstep->GetDictionaryPtr()->GetOutputFactorMask().test(currFactor)) // only set bits for this pt
+					{

-				const Factor *sourceFactor = sourceWord[currFactor];
-				if (sourceFactor == NULL)
-					targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
-				else
-					targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
+						FactorType factorType = static_cast<FactorType>(currFactor);
+						
+						const Factor *sourceFactor = sourceWord[currFactor];
+						if (sourceFactor == NULL)
+							targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
+						else
+							targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
+					}
 			}
 	
 			targetPhrase.SetScore();
@ -399,11 +424,13 @@ void TranslationOptionCollection::ProcessInitialTranslation(
 			TRACE_ERR(endl);
 		}
 	}
+#if 0 // do this elsewhere now
 	// handling unknown words
 	else if (wordsRange.GetWordsCount() == 1)
-	{
+
 		ProcessUnknownWord(startPos, factorCollection);
 	}
+#endif 
 }

 /** add translation option to the list
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@ -54,6 +54,7 @@ class TranslationOptionCollection
 	friend std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll);
 	TranslationOptionCollection(const TranslationOptionCollection&); /*< no copy constructor */
 protected:
+	DecodeStep *m_dstep;
 	std::vector< std::vector< TranslationOptionList > >	m_collection; /*< contains translation options */
 	InputType const			&m_source;
 	SquareMatrix				m_futureScore; /*< matrix of future costs for parts of the sentence */
@ -70,6 +71,7 @@ protected:
 															, PartialTranslOptColl &outputPartialTranslOptColl
 															, size_t startPos, size_t endPos );

+	void ProcessUnknownWord();
 	virtual void ProcessOneUnknownWord(const FactorArray &sourceWord
 																		 , size_t sourcePos
 																		 , FactorCollection &factorCollection);
--- a/moses/src/Util.h
+++ b/moses/src/Util.h
@ -144,7 +144,7 @@ std::string Join(const std::string& delimiter, const std::vector<T>& items)
 	std::ostringstream outstr;
 	if(items.size() == 0) return "";
 	outstr << items[0];
-	for(unsigned int i = 1; i < items.size(); i++) outstr << " " << items[i];
+	for(unsigned int i = 1; i < items.size(); i++) outstr << delimiter << items[i];
 	return outstr.str();
 }

@ -215,11 +215,10 @@ inline float CalcTranslationScore(const std::vector<float> &scoreVector,
 		return out.str();						\
 	}															\

-template<class ITER, class COLL>
+template<class COLL>
 void RemoveAllInColl(COLL &coll)
 {
-	ITER iter;
-	for (iter = coll.begin() ; iter != coll.end() ; ++iter)
+	for (typename COLL::iterator iter = coll.begin() ; iter != coll.end() ; ++iter)
 	{
 		delete (*iter);
 	}
@ -237,7 +236,9 @@ template<typename T> inline void ShrinkToFit(T& v) {
 /***
 * include checks for null return value, and helpful print statements
 */
+ /*
 void* xmalloc(unsigned int numBytes);
 void* xrealloc(void* ptr, unsigned int numBytes);
 #define malloc(x) xmalloc(x)
 #define realloc(x, n) xrealloc(x, n)
+*/