Support for printing out word-to-word alignments (besides phrase-to-phrase alignments)

as contained in the phrase table. If PT contains word-to-word alignments between source and target phrases, Moses can optionally output them in the nbest and in the log file (if verbose). W2w alignments from source to target and from target to source can differ, if they differ in the PT. Detailed documentation will be added in the Moses webpages very soon. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1886 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-11 11:25:40 +03:00 · 2008-09-12 18:09:06 +00:00 · 2008-09-12 18:09:06 +00:00 · dd6c36640b
commit dd6c36640b
parent e376f9f994
26 changed files with 1699 additions and 254 deletions
--- a/moses-cmd/src/IOWrapper.cpp
+++ b/moses-cmd/src/IOWrapper.cpp
@ -134,8 +134,6 @@ void IOWrapper::Initialization(const std::vector<FactorType>	&inputFactorOrder
 	if (staticData.GetOutputWordGraph())
 	{
 		string fileName = staticData.GetParam("output-word-graph")[0];
-		bool outputNBest = Scan<bool>(staticData.GetParam("output-word-graph")[1]);
-
 		std::ofstream *file = new std::ofstream;
 		m_outputWordGraphStream  = file;
 		file->open(fileName.c_str());
@ -144,8 +142,8 @@ void IOWrapper::Initialization(const std::vector<FactorType>	&inputFactorOrder
 	// search graph output
 	if (staticData.GetOutputSearchGraph())
 	{
-	  std::ofstream *file = new std::ofstream;
 	  string fileName = staticData.GetParam("output-search-graph")[0];
+	  std::ofstream *file = new std::ofstream;
 	  m_outputSearchGraphStream = file;
 	  file->open(fileName.c_str());
 	}
@ -211,6 +209,48 @@ void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<
 	}
 }

+void OutputWordAlignment(std::ostream &out, const TargetPhrase &phrase, size_t srcoffset, size_t trgoffset, FactorDirection direction)
+{
+	size_t size = phrase.GetSize();
+	if (size){
+		out << " ";
+		/*		out << phrase;
+		out << " ===> offset: (" << srcoffset << "," << trgoffset << ")";
+		out << " ===> size: (" << phrase.GetAlignmentPair().GetAlignmentPhrase(Input).GetSize() << "," 
+			<< phrase.GetAlignmentPair().GetAlignmentPhrase(Output).GetSize() << ") ===> ";
+*/
+		AlignmentPhrase alignphrase=phrase.GetAlignmentPair().GetAlignmentPhrase(direction);
+/*		alignphrase.print(out,0);
+		out << " ===> ";
+		//		out << alignphrase << " ===> ";
+*/
+		if (direction == Input){
+			alignphrase.Shift(trgoffset);
+			alignphrase.print(out,srcoffset);
+		}
+		else{
+			alignphrase.Shift(srcoffset);
+			alignphrase.print(out,trgoffset);
+		}
+/*
+ //		out << alignphrase << " ===> ";
+		out << "\n";
+*/
+	}
+}
+
+void OutputWordAlignment(std::ostream &out, const Hypothesis *hypo, FactorDirection direction)
+{
+	size_t srcoffset, trgoffset;
+	if ( hypo != NULL)
+	{
+		srcoffset=hypo->GetCurrSourceWordsRange().GetStartPos();
+		trgoffset=hypo->GetCurrTargetWordsRange().GetStartPos();
+		OutputWordAlignment(out, hypo->GetPrevHypo(),direction);
+		OutputWordAlignment(out, hypo->GetCurrTargetPhrase(), srcoffset, trgoffset, direction);
+	}
+}
+
 void IOWrapper::Backtrack(const Hypothesis *hypo){

 	if (hypo->GetPrevHypo() != NULL) {
@ -281,6 +321,7 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
 {
 	bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
 	bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
+	bool includeWordAlignment = StaticData::Instance().PrintAlignmentInfoInNbest();
 	
 	TrellisPathList::const_iterator iter;
 	for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
@ -404,28 +445,52 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
 		  }
    }
 		
-
 		// total						
    *m_nBestStream << "||| " << path.GetTotalScore();
+		
+		//phrase-to-phrase alignment
    if (includeAlignment) {
-		*m_nBestStream << " |||";
-		for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
-		{
-			const Hypothesis &edge = *edges[currEdge];
-			const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
-			WordsRange targetRange = path.GetTargetWordsRange(edge);
-			*m_nBestStream << " " << sourceRange.GetStartPos();
-			if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
-			  *m_nBestStream << "-" << sourceRange.GetEndPos();
+			*m_nBestStream << " |||";
+			for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
+			{
+				const Hypothesis &edge = *edges[currEdge];
+				const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+				WordsRange targetRange = path.GetTargetWordsRange(edge);
+				*m_nBestStream << " " << sourceRange.GetStartPos();
+				if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+					*m_nBestStream << "-" << sourceRange.GetEndPos();
+				}
+				*m_nBestStream << "=" << targetRange.GetStartPos();
+				if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+					*m_nBestStream << "-" << targetRange.GetEndPos();
+				}
 			}
-			*m_nBestStream << "=" << targetRange.GetStartPos();
-			if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
-			  *m_nBestStream << "-" << targetRange.GetEndPos();
+    }
+		
+				
+		if (includeWordAlignment){			
+			//word-to-word alignment (source-to-target)
+			*m_nBestStream << " |||";
+			for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
+			{
+				const Hypothesis &edge = *edges[currEdge];
+				WordsRange targetRange = path.GetTargetWordsRange(edge);
+				OutputWordAlignment(*m_nBestStream, edge.GetCurrTargetPhrase(),edge.GetCurrSourceWordsRange().GetStartPos(),targetRange.GetStartPos(), Input);
+			}
+
+			//word-to-word alignment (target-to-source)
+			*m_nBestStream << " |||";		
+			for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
+			{
+				const Hypothesis &edge = *edges[currEdge];
+				WordsRange targetRange = path.GetTargetWordsRange(edge);
+				OutputWordAlignment(*m_nBestStream, edge.GetCurrTargetPhrase(),edge.GetCurrSourceWordsRange().GetStartPos(),targetRange.GetStartPos(), Output);
 			}
 		}
-    }
-    *m_nBestStream << endl;
+				
+		*m_nBestStream << endl;
 	}

+
 	*m_nBestStream<<std::flush;
 }
--- a/moses/src/AlignmentElement.cpp
+++ b/moses/src/AlignmentElement.cpp
@ -0,0 +1,97 @@
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+#include "AlignmentElement.h"
+
+using namespace std;
+
+AlignmentElement::AlignmentElement(const ContainerType &alignInfo)
+{
+	insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
+	copy(alignInfo.begin(), alignInfo.end(), insertIter);
+};
+
+AlignmentElement::AlignmentElement(const vector<AlignmentElementType> &alignInfo)
+{
+	insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
+	copy(alignInfo.begin(), alignInfo.end(), insertIter);
+};
+
+AlignmentElement::AlignmentElement(const AlignmentElement &alignInfo)
+{
+	insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
+	copy(alignInfo.begin(), alignInfo.end(), insertIter);
+};
+
+AlignmentElement& AlignmentElement::operator=(const AlignmentElement& alignInfo)
+{
+	insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
+	copy(alignInfo.begin(), alignInfo.end(), insertIter);
+	
+	return *this;
+}
+
+void AlignmentElement::Shift(int shift)
+{
+	ContainerType  newColl;
+
+	ContainerType::const_iterator iter;
+	for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter){
+		if (*iter!=-1) newColl.insert(*iter + shift);	
+		else newColl.insert(*iter);	
+	}
+	m_collection = newColl;
+}
+
+std::ostream& operator<<(std::ostream& out, const AlignmentElement &alignElement)
+{
+	const AlignmentElement::ContainerType &elemSet = alignElement.GetCollection();
+
+//	out << "(";
+	if (elemSet.size() > 0)
+	{
+		AlignmentElement::ContainerType::const_iterator iter = elemSet.begin();
+		out << *iter;
+		for (++iter ; iter != elemSet.end() ; ++iter)
+			out << "," << *iter;
+	}
+//	out << ")";
+
+	return out;
+}
+
+void AlignmentElement::SetIntersect(const AlignmentElement &otherElement)
+{
+	ContainerType newElement;
+	set_intersection(m_collection.begin() , m_collection.end()
+									,otherElement.begin() , otherElement.end()
+									,inserter(newElement , newElement.begin()) );
+	m_collection = newElement;
+}
+
+void AlignmentElement::SetUniformAlignment(size_t otherPhraseSize)
+{
+	for (size_t pos = 0 ; pos < otherPhraseSize ; ++pos)
+		m_collection.insert(pos);
+}
+
+TO_STRING_BODY(AlignmentElement);
+
--- a/moses/src/AlignmentElement.h
+++ b/moses/src/AlignmentElement.h
@ -0,0 +1,105 @@
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <iostream>
+#include <set>
+#include <vector>
+#include "Util.h"
+
+typedef short int AlignmentElementType;
+
+//! set of alignments of 1 word
+class AlignmentElement
+{
+	friend std::ostream& operator<<(std::ostream& out, const AlignmentElement &alignElement);
+
+protected:
+	typedef std::set<AlignmentElementType> ContainerType;
+	ContainerType m_collection;
+public:
+	typedef ContainerType::iterator iterator;
+	typedef ContainerType::const_iterator const_iterator;
+	const_iterator begin() const { return m_collection.begin(); }
+	const_iterator end() const { return m_collection.end(); }
+
+	AlignmentElement(){};
+	~AlignmentElement(){};
+	
+	//! inital constructor from parsed info from phrase table
+	AlignmentElement(const ContainerType &alignInfo); 
+	AlignmentElement(const std::vector<AlignmentElementType> &alignInfo); 
+	AlignmentElement(const AlignmentElement &alignInfo); 
+	
+	AlignmentElement& operator=(const AlignmentElement &copy);
+		
+	//! number of words this element aligns to
+	size_t GetSize() const
+	{ 
+		return m_collection.size();
+	}
+
+	bool IsEmpty() const
+	{
+		return m_collection.empty();
+	}
+	
+	//! return internal collection of elements
+	const ContainerType &GetCollection() const
+	{
+		return m_collection;
+	}
+
+	/** compare all alignments for this word. 
+		*	Return true iff both words are aligned to the same words
+	*/
+	bool Equals(const AlignmentElement &compare) const
+	{
+		return m_collection == compare.GetCollection();
+	}
+	
+		/** used by the unknown word handler.
+		* Set alignment to 0
+		*/
+	void SetIdentityAlignment()
+	{
+		m_collection.insert(0);
+	}
+
+	/** align to all elements on other side, where the size of the other 
+		*	phrase is otherPhraseSize. Used when element has no alignment info
+	*/
+	void SetUniformAlignment(size_t otherPhraseSize);
+
+	/** set intersect with other element. Used when applying trans opt to a hypo
+	*/
+	void SetIntersect(const AlignmentElement &otherElement);
+
+	void Add(size_t pos)
+	{
+		m_collection.insert(pos);
+	}
+
+	// shift alignment so that it is comparitable to another alignment. 
+	void Shift(int shift);
+	
+	TO_STRING();
+};
--- a/moses/src/AlignmentPair.cpp
+++ b/moses/src/AlignmentPair.cpp
@ -0,0 +1,97 @@
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "AlignmentPair.h"
+#include "AlignmentPhrase.h"
+#include "WordsRange.h"
+
+using namespace std;
+
+AlignmentPhraseInserter AlignmentPair::GetInserter(FactorDirection direction)
+{
+	return (direction == Input) ? back_insert_iterator<AlignmentPhrase::CollectionType>(m_sourceAlign.GetVector())
+															: back_insert_iterator<AlignmentPhrase::CollectionType>(m_targetAlign.GetVector());
+}
+
+void AlignmentPair::SetIdentityAlignment()
+{
+	AlignmentElement alignment;
+	alignment.SetIdentityAlignment();
+	
+	m_sourceAlign.Add(alignment);
+	m_targetAlign.Add(alignment);
+}
+
+bool AlignmentPair::IsCompatible(const AlignmentPair &compare
+																, size_t sourceStart
+																, size_t targetStart) const
+{
+	// source
+	bool ret = GetAlignmentPhrase(Input).IsCompatible(
+							compare.GetAlignmentPhrase(Input)
+							, sourceStart
+							, targetStart);
+
+	if (!ret)
+		return false;
+
+	// target
+	return GetAlignmentPhrase(Output).IsCompatible(
+							compare.GetAlignmentPhrase(Output)
+							, targetStart
+							, sourceStart);
+}
+
+void AlignmentPair::Add(const AlignmentPair &newAlignment
+												, const WordsRange &sourceRange
+												, const WordsRange &targetRange)
+{
+	m_sourceAlign.Add(newAlignment.m_sourceAlign
+										, targetRange.GetStartPos()
+										, sourceRange.GetStartPos());	
+	m_targetAlign.Add(newAlignment.m_targetAlign
+											, sourceRange.GetStartPos()
+											, targetRange.GetStartPos());
+}
+
+void AlignmentPair::Merge(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange)
+{
+	m_sourceAlign.Merge(newAlignment.m_sourceAlign
+										, targetRange.GetStartPos()
+										, sourceRange.GetStartPos());	
+	m_targetAlign.Merge(newAlignment.m_targetAlign
+											, sourceRange.GetStartPos()
+											, targetRange.GetStartPos());
+}
+
+TO_STRING_BODY(AlignmentPair);
+
+std::ostream& operator<<(std::ostream &out, const AlignmentPair &alignmentPair)
+{
+//	out << "f2e: " << alignmentPair.m_sourceAlign << ""
+//	<< " , e2f: " << alignmentPair.m_targetAlign << " ";
+	out << "f2e: ";
+	alignmentPair.m_sourceAlign.print(out);
+	out << " , e2f: ";
+	alignmentPair.m_targetAlign.print(out);
+	out << " ";
+	return out;
+}
+
--- a/moses/src/AlignmentPair.h
+++ b/moses/src/AlignmentPair.h
@ -0,0 +1,107 @@
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include <iterator>
+#include "TypeDef.h"
+#include "Util.h"
+#include "AlignmentPhrase.h"
+
+typedef std::back_insert_iterator<AlignmentPhrase::CollectionType> AlignmentPhraseInserter;
+
+/** represent the alignment info between source and target phrase */
+class AlignmentPair
+{
+	friend std::ostream& operator<<(std::ostream&, const AlignmentPair&);
+
+protected:
+	AlignmentPhrase m_sourceAlign, m_targetAlign;
+
+public:
+	// constructor
+	AlignmentPair()
+	{}
+	// constructor, init source size. used in hypo
+	AlignmentPair(size_t sourceSize)
+		:m_sourceAlign(sourceSize)
+	{}
+	
+	// constructor, by copy
+	AlignmentPair(const AlignmentPair& a){
+		m_sourceAlign=a.GetAlignmentPhrase(Input);
+		m_targetAlign=a.GetAlignmentPhrase(Output);
+	};
+
+	// constructor, by copy
+	AlignmentPair(const AlignmentPhrase& a, const AlignmentPhrase& b){
+		SetAlignmentPhrase(a,b);
+	};
+	
+	~AlignmentPair(){};
+		
+	/** get the back_insert_iterator to the source or target alignment vector so that
+		*	they could be populated
+		*/
+	AlignmentPhraseInserter GetInserter(FactorDirection direction);
+	
+	const AlignmentPhrase &GetAlignmentPhrase(FactorDirection direction) const
+	{
+		return (direction == Input) ? m_sourceAlign : m_targetAlign;
+	}
+	
+	AlignmentPhrase &GetAlignmentPhrase(FactorDirection direction)
+	{
+		return (direction == Input) ? m_sourceAlign : m_targetAlign;
+	}
+	
+	void SetAlignmentPhrase(FactorDirection direction, const AlignmentPhrase& a) 
+	{
+		if (direction == Input) m_sourceAlign=a;
+		else m_targetAlign=a;
+	}
+	
+	void SetAlignmentPhrase(const AlignmentPhrase& a, const AlignmentPhrase& b) 
+	{
+		m_sourceAlign=a;
+		m_targetAlign=b;
+	}
+	
+
+	/** used by the unknown word handler.
+		* Set alignment to 0
+		*/
+	void SetIdentityAlignment();
+
+	//! call Merge() for source and and Add() target alignment phrase
+	void Add(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange);
+
+	//! call Merge for both source and target alignment phrase
+	void Merge(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange);
+
+	bool IsCompatible(const AlignmentPair &compare
+									, size_t sourceStart
+									, size_t targetStart) const;
+
+	TO_STRING();
+};
+
--- a/moses/src/AlignmentPhrase.cpp
+++ b/moses/src/AlignmentPhrase.cpp
@ -0,0 +1,229 @@
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "AlignmentPhrase.h"
+#include "WordsRange.h"
+#include "WordsBitmap.h"
+#include "UserMessage.h"
+
+using namespace std;
+
+
+
+void EmptyAlignment(string &Align, size_t Size)
+{
+	Align = " ";
+	for (size_t pos = 0 ; pos < Size ; ++pos)
+		Align += "() ";
+}
+
+void UniformAlignment(string &Align, size_t fSize, size_t eSize)
+{
+	std::stringstream AlignStream;
+	for (size_t fpos = 0 ; fpos < fSize ; ++fpos){
+		AlignStream << "(";
+		for (size_t epos = 0 ; epos < eSize ; ++epos){
+			if (epos) AlignStream << ",";
+			AlignStream << epos;
+		}
+		AlignStream << ") ";
+	}
+	Align = AlignStream.str();
+}
+
+AlignmentPhrase::AlignmentPhrase(const AlignmentPhrase &copy)
+: m_collection(copy.m_collection.size())
+{
+	for (size_t pos = 0 ; pos < copy.m_collection.size() ; ++pos)
+	{
+		if (copy.Exists(pos))
+			m_collection[pos] = new AlignmentElement(copy.GetElement(pos));
+		else
+			m_collection[pos] = NULL;
+	}
+}
+
+AlignmentPhrase& AlignmentPhrase::operator=(const AlignmentPhrase &copy)
+{
+	m_collection.resize(copy.GetSize());
+	//	m_collection=AlignmentPhrase(copy.GetSize());
+	for (size_t pos = 0 ; pos < copy.GetSize() ; ++pos)
+	{
+		if (copy.Exists(pos))
+			m_collection[pos] = new AlignmentElement(copy.GetElement(pos));
+		else
+			m_collection[pos] = NULL;
+	}
+	return *this;
+}
+
+AlignmentPhrase::AlignmentPhrase(size_t size)
+	:m_collection(size)
+{
+	for (size_t pos = 0 ; pos < size ; ++pos)
+	{
+		m_collection[pos] = NULL;
+	}
+}
+
+AlignmentPhrase::~AlignmentPhrase()
+{
+	RemoveAllInColl(m_collection);
+}
+
+bool AlignmentPhrase::IsCompatible(const AlignmentPhrase &compare, size_t mergePosStart, size_t shiftPos) const 
+{
+	const size_t compareSize = min(GetSize() - mergePosStart	, compare.GetSize());
+
+	size_t posThis = mergePosStart;
+	for (size_t posCompare = 0 ; posCompare < compareSize ; ++posCompare)
+	{
+		if (!Exists(posThis))
+			continue;
+		assert(posThis < GetSize());
+		
+		const AlignmentElement &alignThis = GetElement(posThis);
+		AlignmentElement alignCompare = compare.GetElement(posCompare);
+
+		// shift alignment
+		alignCompare.Shift( (int)shiftPos);
+
+		if (!alignThis.Equals(alignCompare))
+			return false;
+
+		posThis++;
+	}
+
+	return true;
+}
+
+void AlignmentPhrase::Add(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos)
+{
+	size_t insertPos = startPos;
+	for (size_t pos = 0 ; pos < newAlignment.GetSize() ; ++pos)
+	{
+		// shift alignment
+		AlignmentElement alignElement = newAlignment.GetElement(pos);
+		alignElement.Shift( (int)shift );
+		
+		if (insertPos >= GetSize())
+		{ // probably doing target. append alignment to end
+			assert(insertPos == GetSize());
+			Add(alignElement);
+		}
+		else
+		{
+			if (Exists(insertPos))
+			{ // add
+				m_collection[insertPos]->SetIntersect(alignElement);
+			}
+			else
+				m_collection[insertPos] = new AlignmentElement(alignElement);
+		}
+
+		insertPos++;
+	}
+}
+
+void AlignmentPhrase::Shift(size_t shift)
+{
+	for (size_t pos = 0 ; pos < GetSize() ; ++pos)
+	{
+		// shift alignment
+		GetElement(pos).Shift( (int)shift );
+	}
+}
+
+void AlignmentPhrase::Merge(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos)
+{
+	assert(startPos < GetSize());
+	
+	size_t insertPos = startPos;
+	for (size_t pos = 0 ; pos < newAlignment.GetSize() ; ++pos)
+	{
+		// shift alignment
+		AlignmentElement alignElement = newAlignment.GetElement(pos);
+		alignElement.Shift( (int)shift );
+		
+		// merge elements to only contain co-joined elements
+		GetElement(insertPos).SetIntersect(alignElement);
+
+		insertPos++;
+	}
+}
+
+void AlignmentPhrase::AddUniformAlignmentElement(std::list<size_t> &uniformAlignmentTarget)
+{
+	list<size_t>::iterator iter;
+	for (iter = uniformAlignmentTarget.begin() ; iter != uniformAlignmentTarget.end() ; ++iter)
+	{
+		for (size_t pos = 0 ; pos < GetSize() ; ++pos)
+		{
+			AlignmentElement &alignElement = GetElement(pos);
+			alignElement.Add(*iter);
+		}
+	}
+}
+
+std::ostream& operator<<(std::ostream& out, const AlignmentPhrase &alignmentPhrase)
+{
+	
+	for (size_t pos = 0 ; pos < alignmentPhrase.GetSize() ; ++pos)
+	{
+		if (alignmentPhrase.Exists(pos))
+		{
+			if (pos) out << " ";
+			const AlignmentElement &alignElement = alignmentPhrase.GetElement(pos);
+			out << alignElement;
+		}
+		else{
+			stringstream strme;
+			strme << "No alignment at position " << pos;
+			UserMessage::Add(strme.str());
+			abort();
+		}
+	}
+	return out;
+}
+
+void AlignmentPhrase::print(std::ostream& out, size_t offset) const
+{
+	
+	for (size_t pos = 0 ; pos < GetSize() ; ++pos)
+	{
+		if (Exists(pos))
+		{
+			if (pos) out << " ";
+			out << pos+offset << "=";
+			const AlignmentElement &alignElement = GetElement(pos);
+			out << alignElement;
+		}
+		else{
+			stringstream strme;
+			strme << "No alignment at position " << pos;
+			UserMessage::Add(strme.str());
+			abort();
+//			out << pos+offset << "=";
+		}
+	}
+}
+
+TO_STRING_BODY(AlignmentPhrase);
+
--- a/moses/src/AlignmentPhrase.h
+++ b/moses/src/AlignmentPhrase.h
@ -0,0 +1,103 @@
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include "AlignmentElement.h"
+#include "Util.h"
+
+void EmptyAlignment(std::string &Align, size_t Size);
+void UniformAlignment(std::string &Align, size_t fSize, size_t eSize);
+
+class WordsRange;
+class WordsBitmap;
+
+//! alignments of each word in a phrase
+class AlignmentPhrase
+{
+	friend std::ostream& operator<<(std::ostream& out, const AlignmentPhrase &alignmentPhrase);
+public:
+	typedef std::vector<AlignmentElement*> CollectionType;
+protected:
+	CollectionType  m_collection;
+public:
+	AlignmentPhrase(){};
+	AlignmentPhrase(size_t size);
+	
+	/** copy constructor */
+	AlignmentPhrase(const AlignmentPhrase &copy);
+	AlignmentPhrase& operator=(const AlignmentPhrase&);
+	
+	/** destructor */
+	~AlignmentPhrase();
+
+	/** compare with another alignment phrase, return true if the other alignment phrase is a 
+		*	subset of this. Used to see whether a trans opt can be used to expand a hypo
+		*/
+	bool IsCompatible(const AlignmentPhrase &compare, size_t mergePosStart, size_t shiftPos) const;
+
+	//! add newAlignment to end of this alignment phrase, offsetting by newAlignmentRange.GetStartPos()
+	void Add(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos);
+
+	/*< merge newAlignment to this alignment phrase, offsetting by newAlignmentRange.GetStartPos().
+			Use intersection of each alignment element
+	*/
+	void Merge(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos);
+	
+	void Shift(size_t shift);
+		
+	size_t GetSize() const
+	{
+		return m_collection.size();
+	}
+
+	CollectionType &GetVector()
+	{
+		return m_collection;
+	}
+
+	void Add(const AlignmentElement &element)
+	{
+		m_collection.push_back(new AlignmentElement(element));
+	}
+				
+	// add elements which didn't have alignments, so are set to uniform on the other side
+	void AddUniformAlignmentElement(std::list<size_t> &uniformAlignmentTarget);
+
+	AlignmentElement &GetElement(size_t pos)
+	{ return *m_collection[pos];	}
+
+	const AlignmentElement &GetElement(size_t pos) const
+	{ return *m_collection[pos];	}
+
+	bool Exists(size_t pos) const
+	{
+		return m_collection[pos] != NULL;
+	}
+	
+	void print(std::ostream& out, size_t offset=0) const;
+
+	TO_STRING();									
+};
+
+
+
--- a/moses/src/File.h
+++ b/moses/src/File.h
@ -31,8 +31,7 @@ static const OFF_T InvalidOffT=-1;
 //    these functions work only for bitwise read/write-able types

 template<typename T> inline size_t fWrite(FILE* f,const T& t) {
-  if(fwrite(&t,sizeof(t),1,f)!=1) {
-    TRACE_ERR("ERROR:: fwrite!\n");abort();}
+  if(fwrite(&t,sizeof(t),1,f)!=1) {TRACE_ERR("ERROR:: fwrite!\n");abort();}
  return sizeof(t);
 }

@ -60,10 +59,37 @@ template<typename C> inline size_t fWriteVector(FILE* f,const C& v) {
 }

 template<typename C> inline void fReadVector(FILE* f, C& v) {
-  UINT32 s;fRead(f,s);v.resize(s);
+  UINT32 s;fRead(f,s);
+	v.resize(s);
  size_t r=fread(&(*v.begin()),sizeof(typename C::value_type),s,f);
-  if(r!=s) {
-    TRACE_ERR("ERROR: freadVec! "<<r<<" "<<s<<"\n");abort();}
+  if(r!=s) {TRACE_ERR("ERROR: freadVec! "<<r<<" "<<s<<"\n");abort();}
+}
+
+inline size_t fWriteString(FILE* f,const char* e, UINT32 s) {
+  size_t rv=fWrite(f,s);
+	if(fwrite(e,sizeof(char),s,f)!=s) {TRACE_ERR("ERROR:: fwrite!\n");abort();}
+	return rv+sizeof(char)*s;
+}
+
+inline void fReadString(FILE* f,std::string& e)  {
+	UINT32 s;fRead(f,s);
+	char* a=new char[s+1];
+  if(fread(a,sizeof(char),s,f)!=s) {TRACE_ERR("ERROR: fread!\n");abort();}
+	a[s]='\0';
+	e.assign(a);
+}
+
+inline size_t fWriteStringVector(FILE* f,const std::vector<std::string>& v) {
+  UINT32 s=v.size();
+  size_t totrv=fWrite(f,s);
+	for (size_t i=0;i<s;i++){		totrv+=fWriteString(f,v.at(i).c_str(),v.at(i).size());	}
+  return totrv;
+}
+
+inline void fReadStringVector(FILE* f, std::vector<std::string>& v) {
+  UINT32 s;fRead(f,s);v.resize(s);
+	
+	for (size_t i=0;i<s;i++){		fReadString(f,v.at(i));	}
 }

 inline OFF_T fTell(FILE* f) {return FTELLO(f);}
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@ -57,7 +57,8 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
 	, m_languageModelStates(StaticData::Instance().GetLMSize(), LanguageModelSingleFactor::UnknownState)
 	, m_arcList(NULL)
 	, m_id(0)
-	, m_lmstats(NULL)
+  , m_lmstats(NULL)
+  , m_alignPair(source.GetSize())
 {	// used for initial seeding of trans process	
 	// initialize scores
 	//_hash_computed = false;
@ -85,7 +86,8 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
 	, m_languageModelStates(prevHypo.m_languageModelStates)
 	, m_arcList(NULL)
 	, m_id(s_HypothesesCreated++)
-	, m_lmstats(NULL)
+  , m_lmstats(NULL)
+  , m_alignPair(prevHypo.m_alignPair)
 {
 	// assert that we are not extending our hypothesis by retranslating something
 	// that this hypothesis has already translated!
@ -462,7 +464,11 @@ void Hypothesis::PrintHypothesis() const
 	TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
 	TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "
 	  << *m_sourcePhrase <<endl);
-	TRACE_ERR( "\ttranslated as: "<<m_targetPhrase<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
+	TRACE_ERR( "\ttranslated as: "<<(Phrase&) m_targetPhrase<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
+	if (PrintAlignmentInfo()){
+		TRACE_ERR( "\tsource-target word alignment: "<< m_targetPhrase.GetAlignmentPair().GetAlignmentPhrase(Input) << endl); // <<" => source to target word-to-word alignment
+		TRACE_ERR( "\ttarget-source word alignment: "<< m_targetPhrase.GetAlignmentPair().GetAlignmentPhrase(Output) << endl); // <<" => target to source word-to-word alignment
+	}
 	if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl); 
  //	TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
  //	TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
@ -526,6 +532,16 @@ ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
 	// scores
 	out << " [total=" << hypothesis.GetTotalScore() << "]";
 	out << " " << hypothesis.GetScoreBreakdown();
+	
+	// alignment
+	if (hypothesis.PrintAlignmentInfo()){
+		out << " [f2e:";
+		hypothesis.SourceAlignmentToStream(out);
+		out << "]";
+		out << " [e2f:";
+		hypothesis.TargetAlignmentToStream(out);
+		out << "]";
+	}
 	return out;
 }

--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@ -36,6 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "LexicalReordering.h"
 #include "InputType.h"
 #include "ObjectPool.h"
+#include "AlignmentPair.h"

 class SquareMatrix;
 class StaticData;
@ -61,7 +62,8 @@ protected:
 	static ObjectPool<Hypothesis> s_objectPool;
 	
 	const Hypothesis* m_prevHypo; /*! backpointer to previous hypothesis (from which this one was created) */
-	const Phrase			&m_targetPhrase; /*! target phrase being created at the current decoding step */
+//	const Phrase			&m_targetPhrase; /*! target phrase being created at the current decoding step */
+	const TargetPhrase			&m_targetPhrase; /*! target phrase being created at the current decoding step */
 	Phrase const*     m_sourcePhrase; /*! input sentence */
 	WordsBitmap				m_sourceCompleted; /*! keeps track of which words have been translated so far */
 	//TODO: how to integrate this into confusion network framework; what if
@ -76,6 +78,7 @@ protected:
 	std::vector<LanguageModelSingleFactor::State> m_languageModelStates; /*! relevant history for language model scoring -- used for recombination */
 	const Hypothesis 	*m_winningHypo;
 	ArcList 					*m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
+	AlignmentPair     m_alignPair;
 	const TranslationOption *m_transOpt;

 	int m_id; /*! numeric ID of this hypothesis, used for logging */
@ -117,7 +120,8 @@ public:
 	void PrintHypothesis(  const InputType &source, float weightDistortion, float weightWordPenalty) const;

 	/** return target phrase used to create this hypothesis */
-	const Phrase &GetCurrTargetPhrase() const
+//	const Phrase &GetCurrTargetPhrase() const
+	const TargetPhrase &GetCurrTargetPhrase() const
 	{
 		return m_targetPhrase;
 	}
@ -211,14 +215,49 @@ public:
 	//		GenerateNGramCompareHash();
 	//		return _hash;
 	//	}
-
+	
+	
 	void ToStream(std::ostream& out) const
 	{
 		if (m_prevHypo != NULL)
 		{
 			m_prevHypo->ToStream(out);
 		}
-		out << GetCurrTargetPhrase();
+		out << (Phrase) GetCurrTargetPhrase();
+	}
+	
+	inline bool PrintAlignmentInfo() const{ return GetCurrTargetPhrase().PrintAlignmentInfo(); }
+	
+	void SourceAlignmentToStream(std::ostream& out) const
+	{
+		if (m_prevHypo != NULL)
+		{
+			m_prevHypo->SourceAlignmentToStream(out);
+			AlignmentPhrase alignSourcePhrase=GetCurrTargetPhrase().GetAlignmentPair().GetAlignmentPhrase(Input);
+			alignSourcePhrase.Shift(m_currTargetWordsRange.GetStartPos());
+			out << " ";
+ /*
+			out << "\nGetCurrTargetPhrase(): " << GetCurrTargetPhrase();
+			out << "\nm_currTargetWordsRange: " << m_currTargetWordsRange << "->";
+*/
+			alignSourcePhrase.print(out,m_currSourceWordsRange.GetStartPos());
+		}
+	}
+
+	void TargetAlignmentToStream(std::ostream& out) const
+	{
+		if (m_prevHypo != NULL)
+		{
+			m_prevHypo->TargetAlignmentToStream(out);
+			AlignmentPhrase alignTargetPhrase=GetCurrTargetPhrase().GetAlignmentPair().GetAlignmentPhrase(Output);
+			alignTargetPhrase.Shift(m_currSourceWordsRange.GetStartPos());
+			out << " ";
+/*
+			 out << "\nGetCurrTargetPhrase(): " << GetCurrTargetPhrase();
+			out << "\nm_currSourceWordsRange: " << m_currSourceWordsRange << "->";
+*/
+			alignTargetPhrase.print(out,m_currTargetWordsRange.GetStartPos());
+		}
 	}

 	TO_STRING();
@ -247,6 +286,16 @@ public:
 	float GetTotalScore() const { return m_totalScore; }
 	float GetScore() const { return m_totalScore-m_futureScore; }
 	
+	
+	
+	//! vector of what source words were aligned to each target
+	const AlignmentPair &GetAlignmentPair() const
+	{
+		return m_alignPair;
+	}
+	//! target span that trans opt would populate if applied to this hypo. Used for alignment check
+	size_t GetNextStartPos(const TranslationOption &transOpt) const;
+	
 	std::vector<std::vector<unsigned int> > *GetLMStats() const
 	{
 		return m_lmstats;
--- a/moses/src/LanguageModelSRI.cpp
+++ b/moses/src/LanguageModelSRI.cpp
@ -53,8 +53,6 @@ bool LanguageModelSRI::Load(const std::string &filePath
 												, float weight
 												, size_t nGramOrder)
 {
-	FactorCollection &factorCollection = FactorCollection::Instance();
-
 	m_srilmVocab  = new Vocab();
  m_srilmModel	= new Ngram(*m_srilmVocab, nGramOrder);
 	m_factorType 	= factorType;
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@ -47,7 +47,6 @@ Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
 ,m_start(clock())
 ,interrupted_flag(0)
 {
-	VERBOSE(1, "Translating: " << m_source << endl);
 	const StaticData &staticData = StaticData::Instance();
 	staticData.InitializeBeforeSentenceProcessing(source);
 }
@ -72,6 +71,7 @@ Manager::~Manager()
 */
 void Manager::ProcessSentence()
 {
+	//VERBOSE(2,"m_source:" << m_source <<"\n");
 	const StaticData &staticData = StaticData::Instance();
 	staticData.ResetSentenceStats(m_source);
 	const vector <DecodeGraph*>
@ -83,7 +83,6 @@ void Manager::ProcessSentence()
 	//		2. initial hypothesis factors are given in the sentence
 	//CreateTranslationOptions(m_source, phraseDictionary, lmListInitial);
 	m_transOptColl->CreateTranslationOptions(decodeStepVL);
-
 	m_search->ProcessSentence();
 }

@ -199,23 +198,21 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
 	const StaticData &staticData = StaticData::Instance();

 	const Hypothesis *prevHypo = hypo->GetPrevHypo();
-			const Phrase *sourcePhrase = hypo->GetSourcePhrase();
-			const Phrase &targetPhrase = hypo->GetCurrTargetPhrase();

 			
-			outputWordGraphStream << "J=" << linkId++
-						<< "\tS=" << prevHypo->GetId()
-						<< "\tE=" << hypo->GetId()
-						<< "\ta=";
+	outputWordGraphStream << "J=" << linkId++
+		<< "\tS=" << prevHypo->GetId()
+		<< "\tE=" << hypo->GetId()
+		<< "\ta=";

-			// phrase table scores
-			const std::vector<PhraseDictionary*> &phraseTables = staticData.GetPhraseDictionaries();
-			std::vector<PhraseDictionary*>::const_iterator iterPhraseTable;
-			for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
-			{
+	// phrase table scores
+	const std::vector<PhraseDictionary*> &phraseTables = staticData.GetPhraseDictionaries();
+	std::vector<PhraseDictionary*>::const_iterator iterPhraseTable;
+	for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
+	{
 				const PhraseDictionary *phraseTable = *iterPhraseTable;
 				vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
-
+				
 				outputWordGraphStream << scores[0];
 				vector<float>::const_iterator iterScore;
 				for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore)
--- a/moses/src/PDTAimp.h
+++ b/moses/src/PDTAimp.h
@ -113,6 +113,7 @@ public:

 	void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase) 
 	{
+		cerr << "AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)" << endl;
 		assert(GetTargetPhraseCollection(source)==0);
 		
 		VERBOSE(2, "adding unk source phrase "<<source<<"\n");
@ -131,6 +132,7 @@ public:
 	TargetPhraseCollection const* 
 	GetTargetPhraseCollection(Phrase const &src) const
 	{
+		
 		assert(m_dict);
 		if(src.GetSize()==0) return 0;

@ -155,7 +157,10 @@ public:

 		// get target phrases in string representation
 		std::vector<StringTgtCand> cands;
-		m_dict->GetTargetCandidates(srcString,cands);
+		std::vector<StringWordAlignmentCand> swacands;
+		std::vector<StringWordAlignmentCand> twacands;
+//		m_dict->GetTargetCandidates(srcString,cands);
+		m_dict->GetTargetCandidates(srcString,cands,swacands,twacands);
 		if(cands.empty()) 
 		{
 			return 0;
@ -166,36 +171,39 @@ public:

 		// convert into TargetPhrases
 		for(size_t i=0;i<cands.size();++i) 
-			{
-				TargetPhrase targetPhrase(Output);
-
-				StringTgtCand::first_type const& factorStrings=cands[i].first;
-				StringTgtCand::second_type const& probVector=cands[i].second;
-
-				std::vector<float> scoreVector(probVector.size());
-				std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
-											 TransformScore);
-				std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
-											 FloorScore);
-				CreateTargetPhrase(targetPhrase,factorStrings,scoreVector);
-				costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),
-																			 tCands.size()));
-				tCands.push_back(targetPhrase);
-			}
-
-		TargetPhraseCollection *rv=PruneTargetCandidates(tCands,costs);
-
+		{
+			TargetPhrase targetPhrase(Output);
+			
+			StringTgtCand::first_type const& factorStrings=cands[i].first;
+			StringTgtCand::second_type const& probVector=cands[i].second;
+			StringWordAlignmentCand::second_type const& swaVector=swacands[i].second;
+			StringWordAlignmentCand::second_type const& twaVector=twacands[i].second;
+			
+			std::vector<float> scoreVector(probVector.size());
+			std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
+										 TransformScore);
+			std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
+										 FloorScore);
+			//				CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,&src);
+			CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,swaVector,twaVector,&src);			costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),
+																		 tCands.size()));
+			tCands.push_back(targetPhrase);
+		}
+		
+		TargetPhraseCollection *rv;
+		rv=PruneTargetCandidates(tCands,costs);
 		if(rv->IsEmpty()) 
-			{
-				delete rv;
-				return 0;
-			} 
+		{
+			delete rv;
+			return 0;
+		} 
 		else 
-			{
-				if(useCache) piter.first->second=rv;
-				m_tgtColls.push_back(rv);
-				return rv;
-			}
+		{
+			if(useCache) piter.first->second=rv;
+			m_tgtColls.push_back(rv);
+			return rv;
+		}
+		
 	}


@ -226,7 +234,14 @@ public:
 			m_dict->Create(in,filePath);
 		}
 		TRACE_ERR( "reading bin ttable\n");
-		m_dict->Read(filePath);
+//		m_dict->Read(filePath);
+		bool res=m_dict->Read(filePath);
+		if (!res) {
+			stringstream strme;
+			strme << "bin ttable was read in a wrong way\n";
+			UserMessage::Add(strme.str());
+			exit(1);
+		}
 	}

 	typedef PhraseDictionaryTree::PrefixPtr PPtr;
@ -257,13 +272,36 @@ public:
 	};


-
+	
 	void CreateTargetPhrase(TargetPhrase& targetPhrase,
 													StringTgtCand::first_type const& factorStrings,
 													StringTgtCand::second_type const& scoreVector,
 													Phrase const* srcPtr=0) const
 	{
 		FactorCollection &factorCollection = FactorCollection::Instance();
+		
+		for(size_t k=0;k<factorStrings.size();++k) 
+		{
+			std::vector<std::string> factors=TokenizeMultiCharSeparator(*factorStrings[k],StaticData::Instance().GetFactorDelimiter());
+			Word& w=targetPhrase.AddWord();
+			for(size_t l=0;l<m_output.size();++l)
+				w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
+		}
+		targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
+		targetPhrase.SetSourcePhrase(srcPtr);
+		
+//		targetPhrase.CreateAlignmentInfo("???", "???", 44);
+	}
+	
+	
+	void CreateTargetPhrase(TargetPhrase& targetPhrase,
+													StringTgtCand::first_type const& factorStrings,
+													StringTgtCand::second_type const& scoreVector,
+													StringWordAlignmentCand::second_type const& swaVector,
+													StringWordAlignmentCand::second_type const& twaVector,
+													Phrase const* srcPtr=0) const
+	{
+		FactorCollection &factorCollection = FactorCollection::Instance();

 		for(size_t k=0;k<factorStrings.size();++k) 
 			{
@ -274,6 +312,8 @@ public:
 			}
 		targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
 		targetPhrase.SetSourcePhrase(srcPtr);
+		
+		targetPhrase.CreateAlignmentInfo(swaVector, twaVector);
 	}


--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@ -88,6 +88,10 @@ Parameter::Parameter()
 	AddParam("cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
 	AddParam("search-algorithm", "", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing. (default = 0)");
 	AddParam("constraint","","Target sentence to produce");
+	AddParam("use-alignment-info", "Use word-to-word alignment: actually it is only used to output the word-to-word alignment. Word-to-word alignments are taken from the phrase table if any. Default is false.");
+	AddParam("print-alignment-info", "Output word-to-word alignment into the log file. Word-to-word alignments are takne from the phrase table if any. Default is false");
+	AddParam("print-alignment-info-in-n-best", "Include word-to-word alignment in the n-best list. Word-to-word alignments are takne from the phrase table if any. Default is false");
+
 }

 Parameter::~Parameter()
--- a/moses/src/PhraseDictionaryMemory.cpp
+++ b/moses/src/PhraseDictionaryMemory.cpp
@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "StaticData.h"
 #include "WordsRange.h"
 #include "UserMessage.h"
+#include "AlignmentPair.h"

 using namespace std;

@ -45,7 +46,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
 														          , float weightWP)
 {
 	const StaticData &staticData = StaticData::Instance();
-
+	
 	m_tableLimit = tableLimit;
 	m_filePath = filePath;

@ -83,26 +84,41 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
 			stringstream strme;
 			strme << "Syntax error at " << filePath << ":" << line_num;
 			UserMessage::Add(strme.str());
-			return false;
+			abort();
 		}

-		bool isLHSEmpty = (tokens[1].find_first_not_of(" \t", 0) == string::npos);
+		string sourcePhraseString, targetPhraseString;
+		string scoreString;
+		string sourceAlignString, targetAlignString;
+
+		sourcePhraseString=tokens[0];
+		targetPhraseString=tokens[1];
+		if (numElement==3){
+			scoreString=tokens[2];
+		}
+		else{
+			sourceAlignString=tokens[2];
+			targetAlignString=tokens[3];
+			scoreString=tokens[4];
+		}
+		
+		bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
 		if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
 			TRACE_ERR( filePath << ":" << line_num << ": pt entry contains empty target, skipping\n");
 			continue;
 		}

 		const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
-		if (tokens[0] != prevSourcePhrase)
-			phraseVector = Phrase::Parse(tokens[0], input, factorDelimiter);
+		if (sourcePhraseString != prevSourcePhrase)
+			phraseVector = Phrase::Parse(sourcePhraseString, input, factorDelimiter);

-		vector<float> scoreVector = Tokenize<float>(tokens[(numElement==3) ? 2 : 4]);
+		vector<float> scoreVector = Tokenize<float>(scoreString);
 		if (scoreVector.size() != m_numScoreComponent) 
 		{
 			stringstream strme;
 			strme << "Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_numScoreComponent<<") of score components on line " << line_num;
 			UserMessage::Add(strme.str());
-			return false;
+			abort();
 		}
 //		assert(scoreVector.size() == m_numScoreComponent);
 			
@ -111,8 +127,27 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
 		sourcePhrase.CreateFromString( input, phraseVector);
 		//target
 		TargetPhrase targetPhrase(Output);
-		targetPhrase.CreateFromString( output, tokens[1], factorDelimiter);
-
+		targetPhrase.SetSourcePhrase(&sourcePhrase);
+		targetPhrase.CreateFromString( output, targetPhraseString, factorDelimiter);
+		
+		if (!staticData.UseAlignmentInfo()){
+      UniformAlignment(sourceAlignString, sourcePhrase.GetSize(), targetPhrase.GetSize());
+			UniformAlignment(targetAlignString, targetPhrase.GetSize(), sourcePhrase.GetSize());
+			/*
+			 EmptyAlignment(sourceAlignString, sourcePhrase.GetSize());
+			 EmptyAlignment(targetAlignString, targetPhrase.GetSize());
+			 */
+		}
+		else if (numElement==3){
+			stringstream strme;
+			strme << "You are using AlignmentInfo, but this info not available in the Phrase Table. Only " <<numElement<<" fields on line " << line_num;
+			UserMessage::Add(strme.str());
+			return false;
+		}
+		
+		// alignment info
+		targetPhrase.CreateAlignmentInfo(sourceAlignString, targetAlignString);
+		
 		// component score, for n-best output
 		std::vector<float> scv(scoreVector.size());
 		std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore);
--- a/moses/src/PhraseDictionaryTree.cpp
+++ b/moses/src/PhraseDictionaryTree.cpp
@ -8,12 +8,6 @@
 #include <fstream>
 #include <string>

-#include "PrefixTree.h"
-#include "File.h"
-#include "ObjectPool.h"
-#include "LVoc.h"
-#include "TypeDef.h"
-#include "Util.h"

 template<typename T>
 std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
@ -25,52 +19,95 @@ std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
 	return out;
 }

-typedef std::vector<float> Scores;
-typedef PrefixTreeF<LabelId,OFF_T> PTF;

 class TgtCand {
 	IPhrase e;
 	Scores sc;
+	WordAlignments m_sourceAlignment, m_targetAlignment;
 public:
 	TgtCand() {}
+	
+	TgtCand(const IPhrase& a, const Scores& b
+					, const WordAlignments &sourceAlignment, const WordAlignments &targetAlignment) 
+		: e(a)
+		, sc(b)
+		, m_sourceAlignment(sourceAlignment)
+		, m_targetAlignment(targetAlignment)
+	{}
+	
 	TgtCand(const IPhrase& a,const Scores& b) : e(a),sc(b) {}
+	
 	TgtCand(FILE* f) {readBin(f);}
 	
+		
+	void writeBin(FILE* f) const 
+	{
+		fWriteVector(f,e);
+		fWriteVector(f,sc);
+	}
+	
+	void readBin(FILE* f) 
+	{
+		fReadVector(f,e);
+		fReadVector(f,sc);
+	} 
+	
+	void writeBinWithAlignment(FILE* f) const 
+	{
+		fWriteVector(f,e);
+		fWriteVector(f,sc);
+		fWriteStringVector(f, m_sourceAlignment);
+		fWriteStringVector(f, m_targetAlignment);
+	}
+	
+	void readBinWithAlignment(FILE* f) 
+	{
+		fReadVector(f,e);
+		fReadVector(f,sc);
+		fReadStringVector(f, m_sourceAlignment);
+		fReadStringVector(f, m_targetAlignment);
+	} 
+	
 	const IPhrase& GetPhrase() const {return e;}
 	const Scores& GetScores() const {return sc;}
-	
-	void writeBin(FILE* f) const {fWriteVector(f,e);fWriteVector(f,sc);}
-	void readBin(FILE* f) {fReadVector(f,e);fReadVector(f,sc);}	
+	const WordAlignments& GetSourceAlignment() const {return m_sourceAlignment;}
+	const WordAlignments& GetTargetAlignment() const {return m_targetAlignment;}
 };
+  

 class TgtCands : public std::vector<TgtCand> {
 	typedef std::vector<TgtCand> MyBase;
 public:
 	TgtCands() : MyBase() {}
-
+	
 	void writeBin(FILE* f) const 
 	{
-		unsigned s=size();fWrite(f,s);
+		unsigned s=size();
+		fWrite(f,s);
 		for(size_t i=0;i<s;++i) MyBase::operator[](i).writeBin(f);
 	}
+
+	void writeBinWithAlignment(FILE* f) const 
+	{
+		unsigned s=size();
+		fWrite(f,s);
+		for(size_t i=0;i<s;++i) MyBase::operator[](i).writeBinWithAlignment(f);
+	}
+	
 	void readBin(FILE* f) 
 	{
 		unsigned s;fRead(f,s);resize(s);
 		for(size_t i=0;i<s;++i) MyBase::operator[](i).readBin(f);
 	}
-};
-
-
-struct PPimp {
-	PTF const*p;unsigned idx;bool root;
 	
-	PPimp(PTF const* x,unsigned i,bool b) : p(x),idx(i),root(b) {}
-	bool isValid() const {return root || (p && idx<p->size());}
-
-	bool isRoot() const {return root;}
-	PTF const* ptr() const {return p;}
+	void readBinWithAlignment(FILE* f) 
+	{
+		unsigned s;fRead(f,s);resize(s);
+		for(size_t i=0;i<s;++i) MyBase::operator[](i).readBinWithAlignment(f);
+	}
 };

+
 PhraseDictionaryTree::PrefixPtr::operator bool() const 
 {
 	return imp && imp->isValid();
@ -91,11 +128,19 @@ struct PDTimp {

  ObjectPool<PPimp> pPool; 
 	// a comparison with the Boost MemPools might be useful
+	
+	bool usewordalign;
+	bool printwordalign;

-
-	PDTimp() : os(0),ot(0) {PTF::setDefault(InvalidOffT);}
+	PDTimp() : os(0),ot(0), usewordalign(false), printwordalign(false) {PTF::setDefault(InvalidOffT);}
 	~PDTimp() {if(os) fClose(os);if(ot) fClose(ot);FreeMemory();}
-
+	
+	inline void UseWordAlignment(bool a){ usewordalign=a; }
+	inline bool UseWordAlignment(){ return usewordalign;	};
+	
+	inline void PrintWordAlignment(bool a){ printwordalign=a; };
+	inline bool PrintWordAlignment(){ return printwordalign; };
+	
 	void FreeMemory() 
 	{
 		for(Data::iterator i=data.begin();i!=data.end();++i) (*i).free();
@ -113,20 +158,24 @@ struct PDTimp {
 		OFF_T tCandOffset=data[f[0]]->find(f);
 		if(tCandOffset==InvalidOffT) return;
  	fSeek(ot,tCandOffset);
-   	tgtCands.readBin(ot);
+		
+   	if (UseWordAlignment())    	tgtCands.readBinWithAlignment(ot);
+		else tgtCands.readBin(ot);
 	}

 	typedef PhraseDictionaryTree::PrefixPtr PPtr;

-	void GetTargetCandidates(PPtr p,TgtCands& tgtCands) 
+	void GetTargetCandidates(PPtr p,TgtCands& tgtCands)
 	{
 		assert(p);
 		if(p.imp->isRoot()) return;
 		OFF_T tCandOffset=p.imp->ptr()->getData(p.imp->idx);
 		if(tCandOffset==InvalidOffT) return;
  	fSeek(ot,tCandOffset);
-   	tgtCands.readBin(ot);
+   	if (UseWordAlignment())    	tgtCands.readBinWithAlignment(ot);
+		else tgtCands.readBin(ot);
 	}
+
 	void PrintTgtCand(const TgtCands& tcands,std::ostream& out) const;

 	// convert target candidates from internal data structure to the external one
@ -142,6 +191,25 @@ struct PDTimp {
 				rv.push_back(StringTgtCand(vs,i->GetScores()));
 			}
 	}
+	
+		// convert target candidates from internal data structure to the external one
+	void ConvertTgtCand(const TgtCands& tcands,std::vector<StringTgtCand>& rv,
+											std::vector<StringWordAlignmentCand>& swa,
+											std::vector<StringWordAlignmentCand>& twa) const
+	{
+		for(TgtCands::const_iterator i=tcands.begin();i!=tcands.end();++i)
+		{
+			const IPhrase& iphrase=i->GetPhrase();
+			
+			std::vector<std::string const*> vs;
+			vs.reserve(iphrase.size());
+			for(size_t j=0;j<iphrase.size();++j)
+				vs.push_back(&tv.symbol(iphrase[j]));
+			rv.push_back(StringTgtCand(vs,i->GetScores()));
+			swa.push_back(StringWordAlignmentCand(vs,(i->GetSourceAlignment())));
+			twa.push_back(StringWordAlignmentCand(vs,(i->GetTargetAlignment())));
+		}
+	}

 	PPtr GetRoot() 
 	{
@ -182,11 +250,42 @@ struct PDTimp {

 int PDTimp::Read(const std::string& fn) 
 {
-	std::string ifs(fn+".binphr.srctree"),
-		ift(fn+".binphr.tgtdata"),
-		ifi(fn+".binphr.idx"),
-		ifsv(fn+".binphr.srcvoc"),
-		iftv(fn+".binphr.tgtvoc");
+	const StaticData &staticData = StaticData::Instance();
+	
+	std::string ifs, ift, ifi, ifsv, iftv;
+
+	if (staticData.UseAlignmentInfo()){//asking for word-to-word alignment
+		if (!FileExists(fn+".binphr.srctree.wa") || !FileExists(fn+".binphr.tgtdata.wa")){
+			//		ERROR
+			std::stringstream strme;
+			strme << "You are asking for word alignment but the binary phrase table does not contain any alignment info. Please check if you had generated the correct phrase table with word alignment (.wa)\n";
+			UserMessage::Add(strme.str());
+			return false;
+		}
+		ifs=fn+".binphr.srctree.wa";
+		ift=fn+".binphr.tgtdata.wa";
+		ifi=fn+".binphr.idx";
+		ifsv=fn+".binphr.srcvoc";
+		iftv=fn+".binphr.tgtvoc";
+		UseWordAlignment(true);
+	}
+	else{
+		if (!FileExists(fn+".binphr.srctree") || !FileExists(fn+".binphr.tgtdata")){
+			//		ERROR
+			std::stringstream strme;
+			strme << "You are asking binary phrase table without word alignments but the file do not exist. Please check if you had generated the correct phrase table without word alignment (" << (fn+".binphr.srctree") << "," << (fn+".binphr.tgtdata")<< ")\n";
+			UserMessage::Add(strme.str());
+			return false;
+		}
+	
+		ifs=fn+".binphr.srctree";
+		ift=fn+".binphr.tgtdata";
+		ifi=fn+".binphr.idx";
+		ifsv=fn+".binphr.srcvoc";
+		iftv=fn+".binphr.tgtvoc";
+		
+		UseWordAlignment(false);
+	}

 	FILE *ii=fOpen(ifi.c_str(),"rb");
 	fReadVector(ii,srcOffsets);
@ -210,13 +309,22 @@ int PDTimp::Read(const std::string& fn)
 void PDTimp::PrintTgtCand(const TgtCands& tcand,std::ostream& out) const
 {
 	for(size_t i=0;i<tcand.size();++i) 
-		{
-		  out<<i<<" -- "<<tcand[i].GetScores()<<" -- ";
-		  const IPhrase& iphr=tcand[i].GetPhrase();
-		  for(size_t j=0;j<iphr.size();++j)
-				out<<tv.symbol(iphr[j])<<" ";
-		  out<<'\n';		
-		}
+	{
+		
+		Scores sc=tcand[i].GetScores();
+		WordAlignments			srcAlign=tcand[i].GetSourceAlignment();
+		WordAlignments			trgAlign=tcand[i].GetTargetAlignment();
+			
+		const IPhrase& iphr=tcand[i].GetPhrase();
+
+		out << i << " -- " << sc << " -- ";
+		for(size_t j=0;j<iphr.size();++j)			out << tv.symbol(iphr[j])<<" ";
+		out<< " -- ";		
+		for (size_t j=0;j<srcAlign.size();j++)			out << " " << srcAlign[j];
+		out << " -- ";
+		for (size_t j=0;j<trgAlign.size();j++)			out << " " << trgAlign[j];
+		out << std::endl;
+	}
 }

 ////////////////////////////////////////////////////////////
@ -241,6 +349,13 @@ PhraseDictionaryTree::~PhraseDictionaryTree()
 {
 	delete imp;
 }
+
+void PhraseDictionaryTree::UseWordAlignment(bool a){ imp->UseWordAlignment(a); };
+bool PhraseDictionaryTree::UseWordAlignment(){ return imp->UseWordAlignment(); };
+
+void PhraseDictionaryTree::PrintWordAlignment(bool a){ imp->PrintWordAlignment(a); };
+bool PhraseDictionaryTree::PrintWordAlignment(){ return imp->PrintWordAlignment(); };
+
 void PhraseDictionaryTree::FreeMemory() const
 {
 	imp->FreeMemory();
@ -262,6 +377,25 @@ GetTargetCandidates(const std::vector<std::string>& src,
 	imp->ConvertTgtCand(tgtCands,rv);
 }

+void PhraseDictionaryTree::
+GetTargetCandidates(const std::vector<std::string>& src,
+										std::vector<StringTgtCand>& rv,
+										std::vector<StringWordAlignmentCand>& swa,
+										std::vector<StringWordAlignmentCand>& twa) const 
+{
+	IPhrase f(src.size());
+	for(size_t i=0;i<src.size();++i) 
+		{
+		f[i]=imp->sv.index(src[i]);
+		if(f[i]==InvalidLabelId) return;
+		}
+	
+	TgtCands tgtCands;
+	imp->GetTargetCandidates(f,tgtCands);
+	imp->ConvertTgtCand(tgtCands,rv,swa,twa);
+}
+
+
 void PhraseDictionaryTree::
 PrintTargetCandidates(const std::vector<std::string>& src,
 											std::ostream& out) const 
@ -280,7 +414,6 @@ PrintTargetCandidates(const std::vector<std::string>& src,

 	TgtCands tcand;
 	imp->GetTargetCandidates(f,tcand);
-	out<<"there are "<<tcand.size()<<" target candidates\n";
 	imp->PrintTgtCand(tcand,out);
 }

@ -294,7 +427,12 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
 		ofi(out+".binphr.idx"),
 		ofsv(out+".binphr.srcvoc"),
 		oftv(out+".binphr.tgtvoc");
-
+	
+	if (PrintWordAlignment()){
+		ofn+=".wa";
+		oft+=".wa";
+	}
+	
  FILE *os=fOpen(ofn.c_str(),"wb"),
    *ot=fOpen(oft.c_str(),"wb");

@ -309,108 +447,175 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
 	size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info
 	
 	while(getline(inFile, line)) 	
+	{
+		++lnc;
+		
+		std::vector<std::string> tokens = TokenizeMultiCharSeparator( line , "|||" );
+		
+		if (numElement == NOT_FOUND) 
+		{ // init numElement
+			numElement = tokens.size();
+			assert(numElement == 3 || numElement == 5);
+		}
+			 
+		if (tokens.size() != numElement)
 		{
-			++lnc;
+			std::stringstream strme;
+			strme << "Syntax error at line " << lnc  << " : " << line;
+			UserMessage::Add(strme.str());
+			abort();
+		}
+		
+		std::string sourcePhraseString, targetPhraseString;
+		std::string scoreString;
+		std::string sourceAlignString, targetAlignString;
+		
+		sourcePhraseString=tokens[0];
+		targetPhraseString=tokens[1];
+		if (numElement==3){
+			scoreString=tokens[2];
+		}
+		else{
+			sourceAlignString=tokens[2];
+			targetAlignString=tokens[3];
+			scoreString=tokens[4];
+		}
+		
+				
+		IPhrase f,e;
+		Scores sc;
+		WordAlignments sourceAlignment, targetAlignment;
 			
-			std::vector<std::string> tokens = TokenizeMultiCharSeparator( line , "|||" );
-			if (numElement == NOT_FOUND) 
-			{ // init numElement
-				numElement = tokens.size();
-				assert(numElement == 3 || numElement == 5);
-			}
-			else if (tokens.size() != numElement)
+		std::vector<std::string> wordVec = Tokenize(sourcePhraseString);
+		for (size_t i = 0 ; i < wordVec.size() ; ++i)
+			f.push_back(imp->sv.add(wordVec[i]));
+		
+		wordVec = Tokenize(targetPhraseString);
+		for (size_t i = 0 ; i < wordVec.size() ; ++i)
+			e.push_back(imp->tv.add(wordVec[i]));
+		
+		if (!PrintWordAlignment()){// word-to-word alignment are not used, create empty word-to-word alignment 
+			EmptyAlignment(sourceAlignString, f.size());
+			EmptyAlignment(targetAlignString, e.size());
+		}
+		else if (numElement==3){
+			stringstream strme;
+			strme << "You are asking for AlignmentInfo, but this info not available in the Phrase Table. Only " <<numElement<<" fields on line " << lnc  << " : " << line;
+
+			strme << endl << "Deleting files " << ofn << " and " << oft << "..." << endl;
+			if( remove( ofn.c_str() ) != 0 )				strme << "Error deleting file " << ofn;
+			else				strme << "File " << ofn << " successfully deleted";
+			strme << endl;
+			if( remove( oft.c_str() ) != 0 )				strme << "Error deleting file " << oft;
+			else				strme << "File " << oft << " successfully deleted";
+			strme << endl;
+			UserMessage::Add(strme.str());
+			exit(1);
+		}
+		
+		//change "()" into "(-1)" for both source and target word-to-word alignments
+		std::string emtpyAlignStr="()";
+		std::string replaceAlignStr="(-1)";
+		sourceAlignString=Replace(sourceAlignString,emtpyAlignStr,replaceAlignStr);
+		targetAlignString=Replace(targetAlignString,emtpyAlignStr,replaceAlignStr);
+
+		//remove all "(" from both source and target word-to-word alignments
+		emtpyAlignStr="(";
+		replaceAlignStr="";
+		sourceAlignString=Replace(sourceAlignString,emtpyAlignStr,replaceAlignStr);
+		targetAlignString=Replace(targetAlignString,emtpyAlignStr,replaceAlignStr);
+		
+		//remove all ")" from both source and target word-to-word alignments
+		emtpyAlignStr=")";
+		replaceAlignStr="";
+		sourceAlignString=Replace(sourceAlignString,emtpyAlignStr,replaceAlignStr);
+		targetAlignString=Replace(targetAlignString,emtpyAlignStr,replaceAlignStr);
+		
+		sourceAlignment = Tokenize(sourceAlignString);
+		targetAlignment = Tokenize(targetAlignString);
+			
+		//			while(is>>w && w!="|||") sc.push_back(atof(w.c_str()));
+		// Mauro: to handle 0 probs in phrase tables
+		std::vector<float> scoreVector = Tokenize<float>(scoreString);
+		for (size_t i = 0 ; i < scoreVector.size() ; ++i)
+		{
+			float tmp = scoreVector[i];
+			sc.push_back(((tmp>0.0)?tmp:(float)1.0e-38));
+		}
+		
+			
+		if(f.empty())
+		{
+			TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");
+			continue;
+		}
+			
+		if(currFirstWord==InvalidLabelId) currFirstWord=f[0];
+		if(currF.empty()) 
+		{
+			++count;
+			currF=f;
+			// insert src phrase in prefix tree
+			assert(psa);
+			PSA::Data& d=psa->insert(f);
+			if(d==InvalidOffT) d=fTell(ot);
+			else 
 			{
-				std::stringstream strme;
-				strme << "Syntax error at line " << lnc  << " : " << line;
-				UserMessage::Add(strme.str());
+				TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
+									<<line<<"'\nf: "<<f<<"\n");
 				abort();
 			}
-			
-				
-			IPhrase f,e;Scores sc;
-			
-			std::vector<std::string> wordVec = Tokenize(tokens[0]);
-			for (size_t i = 0 ; i < wordVec.size() ; ++i)
-				f.push_back(imp->sv.add(wordVec[i]));
+		}

-			wordVec = Tokenize(tokens[1]);
-			for (size_t i = 0 ; i < wordVec.size() ; ++i)
-				e.push_back(imp->tv.add(wordVec[i]));
-			
-			//			while(is>>w && w!="|||") sc.push_back(atof(w.c_str()));
-			// Mauro: to handle 0 probs in phrase tables
-			std::vector<float> scoreVector = Tokenize<float>(tokens[(numElement==3) ? 2 : 4]);
-			for (size_t i = 0 ; i < scoreVector.size() ; ++i)
+		if(currF!=f) 
+		{
+			// new src phrase
+			currF=f;
+			if (PrintWordAlignment())
+				tgtCands.writeBinWithAlignment(ot);
+			else
+				tgtCands.writeBin(ot);
+			tgtCands.clear();
+				
+			if(++count%10000==0) 
 			{
-			  float tmp = scoreVector[i];
-			  sc.push_back(((tmp>0.0)?tmp:(float)1.0e-38));
+				TRACE_ERR(".");
+				if(count%500000==0) TRACE_ERR("[phrase:"<<count<<"]\n");
+			}
+
+			if(f[0]!=currFirstWord) 
+			{
+				// write src prefix tree to file and clear
+				PTF pf;
+				if(currFirstWord>=vo.size()) 
+					vo.resize(currFirstWord+1,InvalidOffT);
+				vo[currFirstWord]=fTell(os);
+				pf.create(*psa,os);
+				// clear
+				delete psa;psa=new PSA;
+				currFirstWord=f[0];
 			}
 			
-			if(f.empty()) 
-				{
-					TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");
-					continue;
-				}
-			
-			if(currFirstWord==InvalidLabelId) currFirstWord=f[0];
-			if(currF.empty()) 
-				{
-					++count;
-					currF=f;
-					// insert src phrase in prefix tree
-					assert(psa);
-					PSA::Data& d=psa->insert(f);
-					if(d==InvalidOffT) d=fTell(ot);
-					else 
-						{
-							TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
-											 <<line<<"'\nf: "<<f<<"\n");
-							abort();
-						}
-				}
-
-			if(currF!=f) 
-				{
-					// new src phrase
-					currF=f;
-					tgtCands.writeBin(ot);
-					tgtCands.clear();
-				
-					if(++count%10000==0) 
-						{
-							TRACE_ERR(".");
-							if(count%500000==0) TRACE_ERR("[phrase:"<<count<<"]\n");
-						}
-
-					if(f[0]!=currFirstWord) 
-						{
-							// write src prefix tree to file and clear
-							PTF pf;
-							if(currFirstWord>=vo.size()) 
-								vo.resize(currFirstWord+1,InvalidOffT);
-							vo[currFirstWord]=fTell(os);
-							pf.create(*psa,os);
-							// clear
-							delete psa;psa=new PSA;
-							currFirstWord=f[0];
-						}
-
-					// insert src phrase in prefix tree
-					assert(psa);
-					PSA::Data& d=psa->insert(f);
-					if(d==InvalidOffT) d=fTell(ot);
-					else 
-						{
-							TRACE_ERR("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
-											 <<line<<"'\nf: "<<f<<"\n");
-							abort();
-						}
-				}
-			tgtCands.push_back(TgtCand(e,sc));
-			assert(currFirstWord!=InvalidLabelId);
+			// insert src phrase in prefix tree
+			assert(psa);
+			PSA::Data& d=psa->insert(f);
+			if(d==InvalidOffT) d=fTell(ot);
+			else 
+			{
+				TRACE_ERR("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
+									<<line<<"'\nf: "<<f<<"\n");
+				abort();
+			}
 		}
-  tgtCands.writeBin(ot);tgtCands.clear();
-
+		tgtCands.push_back(TgtCand(e,sc, sourceAlignment, targetAlignment));
+		assert(currFirstWord!=InvalidLabelId);
+	}
+  if (PrintWordAlignment())
+		tgtCands.writeBinWithAlignment(ot);
+  else
+		tgtCands.writeBin(ot);
+	tgtCands.clear();
+	
  PTF pf;
  if(currFirstWord>=vo.size()) vo.resize(currFirstWord+1,InvalidOffT);
  vo[currFirstWord]=fTell(os);
@ -484,8 +689,17 @@ GetTargetCandidates(PrefixPtr p,
 	imp->ConvertTgtCand(tcands,rv);
 }

-std::string PhraseDictionaryTree::GetScoreProducerDescription() const
+void PhraseDictionaryTree::
+GetTargetCandidates(PrefixPtr p,
+										std::vector<StringTgtCand>& rv,
+										std::vector<StringWordAlignmentCand>& swa,
+										std::vector<StringWordAlignmentCand>& twa) const 
 {
-  return "Phrase dictionary tree";
+	TgtCands tcands;
+	imp->GetTargetCandidates(p,tcands);
+	imp->ConvertTgtCand(tcands,rv,swa,twa);
 }

+std::string PhraseDictionaryTree::GetScoreProducerDescription() const{
+	return "Phrase dictionary tree";
+}
--- a/moses/src/PhraseDictionaryTree.h
+++ b/moses/src/PhraseDictionaryTree.h
@ -8,11 +8,21 @@
 #include "TypeDef.h"
 #include "Dictionary.h"

+
+#include "PrefixTree.h"
+#include "File.h"
+#include "ObjectPool.h"
+#include "LVoc.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "StaticData.h"
+
 class Phrase;
 class Word;
 class ConfusionNet;

-typedef std::pair<std::vector<std::string const*>,std::vector<float> > StringTgtCand;
+
+typedef PrefixTreeF<LabelId,OFF_T> PTF;

 class PDTimp;
 class PPimp;
@ -24,7 +34,14 @@ class PhraseDictionaryTree : public Dictionary {
 	PhraseDictionaryTree(const PhraseDictionaryTree&); //not implemented
 	void operator=(const PhraseDictionaryTree&); //not implemented
 public:
-	PhraseDictionaryTree(size_t numScoreComponent);
+		PhraseDictionaryTree(size_t numScoreComponent);
+	
+	void UseWordAlignment(bool a);
+	bool UseWordAlignment();
+	
+	void PrintWordAlignment(bool a);
+	bool PrintWordAlignment();
+	

 	virtual ~PhraseDictionaryTree();

@ -48,10 +65,16 @@ public:
 	// print target candidates for a given phrase, mainly for debugging
 	void PrintTargetCandidates(const std::vector<std::string>& src,
 														 std::ostream& out) const;
-
+	
 	// get the target candidates for a given phrase
 	void GetTargetCandidates(const std::vector<std::string>& src,
 													 std::vector<StringTgtCand>& rv) const;
+	
+	// get the target candidates for a given phrase
+	void GetTargetCandidates(const std::vector<std::string>& src,
+													 std::vector<StringTgtCand>& rv,
+													 std::vector<StringWordAlignmentCand>& swa,
+													 std::vector<StringWordAlignmentCand>& twa) const;

 	/*****************************
 	 *   access to prefix tree   *
@ -81,6 +104,10 @@ public:
 	// requirement: the pointer has to evaluate to true
 	void GetTargetCandidates(PrefixPtr p,
 													 std::vector<StringTgtCand>& rv) const;
+	void GetTargetCandidates(PrefixPtr p,
+													 std::vector<StringTgtCand>& rv,
+													 std::vector<StringWordAlignmentCand>& swa,
+													 std::vector<StringWordAlignmentCand>& twa) const;

 	// print target candidates for a given prefix pointer to a stream, mainly 
 	// for debugging
--- a/moses/src/PhraseDictionaryTreeAdaptor.cpp
+++ b/moses/src/PhraseDictionaryTreeAdaptor.cpp
@ -55,8 +55,6 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
 																				 , float weightWP
 																				 )
 {
-	FactorCollection &factorCollection = FactorCollection::Instance();
-
 	if(m_numScoreComponent!=weight.size()) {
 		stringstream strme;
 		strme << "ERROR: mismatch of number of scaling factors: "<<weight.size()
@ -84,6 +82,7 @@ PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(Phrase const &src) const
 {
 	return imp->GetTargetPhraseCollection(src);
 }
+
 TargetPhraseCollection const* 
 PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(InputType const& src,WordsRange const &range) const
 {
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -115,7 +115,21 @@ bool StaticData::LoadData(Parameter *parameter)
 	if (m_parameter->GetParam("factor-delimiter").size() > 0) {
 		m_factorDelimiter = m_parameter->GetParam("factor-delimiter")[0];
 	}
+	
+	//word-to-word alignment
+	SetBooleanParameter( &m_UseAlignmentInfo, "use-alignment-info", false );
+	SetBooleanParameter( &m_PrintAlignmentInfo, "print-alignment-info", false );
+	SetBooleanParameter( &m_PrintAlignmentInfoNbest, "print-alignment-info-in-n-best", false );

+	if (!m_UseAlignmentInfo && m_PrintAlignmentInfo){
+		  TRACE_ERR("--print-alignment-info should only be used together with \"--use-alignment-info true\". Continue forcing to false.\n");
+		m_PrintAlignmentInfo=false;
+	}
+	if (!m_UseAlignmentInfo && m_PrintAlignmentInfoNbest){
+		  TRACE_ERR("--print-alignment-info-in-n-best should only be used together with \"--use-alignment-info true\". Continue forcing to false.\n");
+		m_PrintAlignmentInfoNbest=false;
+	}
+	
 	// n-best
 	if (m_parameter->GetParam("n-best-list").size() >= 2)
 	{
@ -731,6 +745,7 @@ bool StaticData::LoadPhraseTables()
 			
 			IFVERBOSE(1)
 				PrintUserTime(string("Start loading PhraseTable ") + filePath);
+			std::cerr << "filePath: " << filePath << std::endl;
 			if (!FileExists(filePath+".binphr.idx"))
 			{	// memory phrase table
 				VERBOSE(2,"using standard phrase tables");
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@ -111,7 +111,10 @@ protected:
 	bool m_isDetailedTranslationReportingEnabled;
 	bool m_onlyDistinctNBest;
 	bool m_computeLMBackoffStats;
-
+	bool m_UseAlignmentInfo;
+	bool m_PrintAlignmentInfo;
+	bool m_PrintAlignmentInfoNbest;
+		
 	mutable std::auto_ptr<SentenceStats> m_sentenceStats;
 	std::string m_factorDelimiter; //! by default, |, but it can be changed
 	size_t m_maxFactorIdx[2];  //! number of factors on source and target side
@ -392,6 +395,10 @@ public:
 	const UnknownWordPenaltyProducer *GetUnknownWordPenaltyProducer() const { return m_unknownWordPenaltyProducer; }

 	bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;}
+	bool UseAlignmentInfo() const {	return m_UseAlignmentInfo;}
+	void UseAlignmentInfo(bool a){ m_UseAlignmentInfo=a; };
+	bool PrintAlignmentInfo() const { return m_PrintAlignmentInfo; }
+	bool PrintAlignmentInfoInNbest() const {return m_PrintAlignmentInfoNbest;}
 	bool GetDistinctNBest() const {return m_onlyDistinctNBest;}
 	const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
 	size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@ -33,19 +33,30 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 using namespace std;

+bool TargetPhrase::wordalignflag=StaticData::Instance().UseAlignmentInfo();
+bool TargetPhrase::printalign=StaticData::Instance().PrintAlignmentInfo();
+
+//bool TargetPhrase::wordalignflag;
+//bool TargetPhrase::printalign;
+
 TargetPhrase::TargetPhrase(FactorDirection direction)
-	//:Phrase(direction), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
 	:Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
 {
+		wordalignflag=StaticData::Instance().UseAlignmentInfo();
+		printalign=StaticData::Instance().PrintAlignmentInfo();
 }

 void TargetPhrase::SetScore()
 { // used when creating translations of unknown words:
 	m_transScore = m_ngramScore = 0;	
-	//m_ngramScore = 0;	
 	m_fullScore = - StaticData::Instance().GetWeightWordPenalty();	
 }

+void TargetPhrase::SetAlignment()
+{
+	m_alignmentPair.SetIdentityAlignment();
+}
+
 void TargetPhrase::SetScore(float score) 
 {
 	//we use an existing score producer to figure out information for score setting (number of scores and weights)
@ -72,24 +83,24 @@ void TargetPhrase::SetScore(float score)
 	
 	//Now we have what we need to call the full SetScore method
 	SetScore(prod,scoreVector,weights,StaticData::Instance().GetWeightWordPenalty(),StaticData::Instance().GetAllLM());
-
 }

 void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
-														const vector<float> &scoreVector, const vector<float> &weightT,
+														const Scores &scoreVector,
+														const vector<float> &weightT,
 														float weightWP, const LMList &languageModels)
 {
 	assert(weightT.size() == scoreVector.size());
 	// calc average score if non-best
-
+	
 	m_transScore = std::inner_product(scoreVector.begin(), scoreVector.end(), weightT.begin(), 0.0f);
 	m_scoreBreakdown.PlusEquals(translationScoreProducer, scoreVector);
-
+	
  // Replicated from TranslationOptions.cpp
 	float totalFutureScore = 0;
 	float totalNgramScore  = 0;
 	float totalFullScore   = 0;
-
+	
 	LMList::const_iterator lmIter;
 	for (lmIter = languageModels.begin(); lmIter != languageModels.end(); ++lmIter)
 	{
@ -99,10 +110,10 @@ void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
 		{ // contains factors used by this LM
 			const float weightLM = lm.GetWeight();
 			float fullScore, nGramScore;
-
+			
 			lm.CalcScore(*this, fullScore, nGramScore);
 			m_scoreBreakdown.Assign(&lm, nGramScore);
-
+			
 			// total LM score so far
 			totalNgramScore  += nGramScore * weightLM;
 			totalFullScore   += fullScore * weightLM;
@ -110,9 +121,9 @@ void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
 		}
 	}
  m_ngramScore = totalNgramScore;
-
+	
 	m_fullScore = m_transScore + totalFutureScore + totalFullScore
-							- (this->GetSize() * weightWP);	 // word penalty
+		- (this->GetSize() * weightWP);	 // word penalty
 }

 void TargetPhrase::SetWeights(const ScoreProducer* translationScoreProducer, const vector<float> &weightT)
@ -158,11 +169,151 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
 	return clone;
 }

+// helper functions
+void AddAlignmentElement(AlignmentPhraseInserter &inserter
+												 , const string &str
+												 , size_t phraseSize
+												 , size_t otherPhraseSize
+												 , list<size_t> &uniformAlignment)
+{
+	// input
+	vector<string> alignPhraseVector = Tokenize(str);
+	// from
+	// "(0) (3) (1,2)"
+	//              to
+	// "(0)" "(3)" "(1,2)"
+	assert (alignPhraseVector.size() == phraseSize) ;
+	
+	const size_t inputSize = alignPhraseVector.size();
+	for (size_t pos = 0 ; pos < inputSize ; ++pos)
+	{
+		string alignElementStr = alignPhraseVector[pos];
+		
+		//change "()" into "(-1)" for both source and target word-to-word alignments
+		size_t pos=0;
+		std::string emtpyAlignStr="()";
+		std::string replaceAlignStr="(-1)";
+		alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
+		
+		//remove all "(" from both source and target word-to-word alignments
+		emtpyAlignStr="(";
+		replaceAlignStr="";
+		alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
+		
+		//remove all ")" from both source and target word-to-word alignments
+		emtpyAlignStr=")";
+		replaceAlignStr="";
+		alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
+		
+		AlignmentElement *alignElement = new AlignmentElement(Tokenize<AlignmentElementType>(alignElementStr, ","));
+		// "(1,2)"
+		//  to
+		// [1] [2]
+		if (alignElement->GetSize() == 0)
+		{ // no alignment info. add uniform alignment, ie. can be aligned to any word
+			alignElement->SetUniformAlignment(otherPhraseSize);
+			uniformAlignment.push_back(pos);
+		}
+		
+		**inserter = alignElement;
+		(*inserter)++;          
+	}
+}
+
+
+// helper functions
+void AddAlignmentElement(AlignmentPhraseInserter &inserter
+												 , const WordAlignments &wa
+												 , size_t phraseSize
+												 , size_t otherPhraseSize
+												 , list<size_t> &uniformAlignment)
+{
+	// from
+	// "(0) (3) (1,2)"
+	//              to
+	// "(0)" "(3)" "(1,2)"
+	assert (wa.size() == phraseSize) ;
+	
+	const size_t inputSize = wa.size();
+	for (size_t pos = 0 ; pos < inputSize ; ++pos)
+	{
+		string alignElementStr = wa[pos];
+		AlignmentElement *alignElement = new AlignmentElement(Tokenize<AlignmentElementType>(alignElementStr, ","));
+		// "(1,2)"
+		//  to
+		// [1] [2]
+		if (alignElement->GetSize() == 0)
+		{ // no alignment info. add uniform alignment, ie. can be aligned to any word
+			alignElement->SetUniformAlignment(otherPhraseSize);
+			uniformAlignment.push_back(pos);
+		}
+		
+		**inserter = alignElement;
+		(*inserter)++;          
+	}
+}
+
+void TargetPhrase::CreateAlignmentInfo(const WordAlignments &swa
+																			 , const WordAlignments &twa)
+{
+	AlignmentPhraseInserter sourceInserter = m_alignmentPair.GetInserter(Input);
+	AlignmentPhraseInserter targetInserter = m_alignmentPair.GetInserter(Output);
+	list<size_t> uniformAlignmentSource, uniformAlignmentTarget;
+	
+	if (!UseWordAlignment()){ //build uniform word-to-word alignment to fit the internal structure which requires their presence
+				std::string srcAlignStr,trgAlignStr;
+				UniformAlignment(srcAlignStr, m_sourcePhrase->GetSize(), GetSize());
+				UniformAlignment(trgAlignStr, GetSize(), m_sourcePhrase->GetSize());
+				CreateAlignmentInfo(srcAlignStr,trgAlignStr);
+	}				
+	else{
+		AddAlignmentElement(sourceInserter
+											, swa
+											, m_sourcePhrase->GetSize()
+											, GetSize()
+											, uniformAlignmentSource);
+		AddAlignmentElement(targetInserter
+											, twa
+											, GetSize()
+											, m_sourcePhrase->GetSize()
+											, uniformAlignmentTarget);
+	}
+	// propergate uniform alignments to other side
+//	m_alignmentPair.GetAlignmentPhrase(Output).AddUniformAlignmentElement(uniformAlignmentSource);
+//	m_alignmentPair.GetAlignmentPhrase(Input).AddUniformAlignmentElement(uniformAlignmentTarget);
+}
+
+
+
+void TargetPhrase::CreateAlignmentInfo(const string &sourceStr
+																			 , const string &targetStr)
+{
+	AlignmentPhraseInserter sourceInserter = m_alignmentPair.GetInserter(Input);
+	AlignmentPhraseInserter targetInserter = m_alignmentPair.GetInserter(Output);
+	list<size_t> uniformAlignmentSource, uniformAlignmentTarget;
+	
+	AddAlignmentElement(sourceInserter
+											, sourceStr
+											, m_sourcePhrase->GetSize()
+											, GetSize()
+											, uniformAlignmentSource);
+	AddAlignmentElement(targetInserter
+											, targetStr
+											, GetSize()
+											, m_sourcePhrase->GetSize()
+											, uniformAlignmentTarget);
+	// propergate uniform alignments to other side
+//	m_alignmentPair.GetAlignmentPhrase(Output).AddUniformAlignmentElement(uniformAlignmentSource);
+//	m_alignmentPair.GetAlignmentPhrase(Input).AddUniformAlignmentElement(uniformAlignmentTarget);
+}
+
 TO_STRING_BODY(TargetPhrase);

 std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
 {
-  os << static_cast<const Phrase&>(tp) << ", pC=" << tp.m_transScore << ", c=" << tp.m_fullScore;
-  //os << static_cast<const Phrase&>(tp) << ", c=" << tp.m_fullScore;
+  os << static_cast<const Phrase&>(tp);
+	os << ", pC=" << tp.m_transScore << ", c=" << tp.m_fullScore;
+	if (tp.PrintAlignmentInfo())
+		os << ", " << tp.GetAlignmentPair();
  return os;
 }
--- a/moses/src/TargetPhrase.h
+++ b/moses/src/TargetPhrase.h
@ -22,15 +22,17 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #pragma once

 #include <vector>
+#include "TypeDef.h"
 #include "Phrase.h"
 #include "ScoreComponentCollection.h"
+#include "AlignmentPair.h"

 class LMList;
 class PhraseDictionary;
 class GenerationDictionary;
 class ScoreProducer;

-/** represents an entry on the target side of a phrase table (scores, translation)
+/** represents an entry on the target side of a phrase table (scores, translation, alignment)
 */
 class TargetPhrase: public Phrase
 {
@ -39,11 +41,22 @@ protected:
 	float m_transScore, m_ngramScore, m_fullScore;
 	//float m_ngramScore, m_fullScore;
 	ScoreComponentCollection m_scoreBreakdown;
+	AlignmentPair m_alignmentPair;

 	// in case of confusion net, ptr to source phrase
 	Phrase const* m_sourcePhrase; 
+
+	static bool wordalignflag;
+	static bool printalign;
+	
 public:
-	TargetPhrase(FactorDirection direction=Output);
+		TargetPhrase(FactorDirection direction=Output);
+		~TargetPhrase(){};
+		
+	/** used by the unknown word handler.
+		* Set alignment to 0
+		*/
+	void SetAlignment();

 	//! used by the unknown word handler- these targets
 	//! don't have a translation score, so wp is the only thing used
@ -62,13 +75,14 @@ public:
   * @param weightWP the weight of the word penalty
   *
   * @TODO should this be part of the constructor?  If not, add explanation why not.
-   */
+		*/
 	void SetScore(const ScoreProducer* translationScoreProducer,
-								const std::vector<float> &scoreVector,
+								const Scores &scoreVector,
 								const std::vector<float> &weightT,
 								float weightWP,
 								const LMList &languageModels);

+	
 	// used when creating translations of unknown words:
 	void ResetScore();
 	void SetWeights(const ScoreProducer*, const std::vector<float> &weightT);
@ -104,6 +118,33 @@ public:
 	{
 		return m_sourcePhrase;
 	}
+	AlignmentPair &GetAlignmentPair()
+	{
+		return m_alignmentPair;
+	}
+	const AlignmentPair &GetAlignmentPair() const
+	{
+		return m_alignmentPair;
+	}
+	
+	/** Parse the alignment info portion of phrase table string to create alignment info */
+	void CreateAlignmentInfo(const std::string &sourceStr
+													 , const std::string &targetStr);
+	void CreateAlignmentInfo(const WordAlignments &swa
+													 , const WordAlignments &twa);
+	
+	void UseWordAlignment(bool a){
+		wordalignflag=a;
+	};
+	bool UseWordAlignment() const {
+		return wordalignflag;
+	};
+	void PrintAlignmentInfo(bool a) {
+		printalign=a; 
+	}
+	bool PrintAlignmentInfo() const {
+		return printalign;
+	}

 	TO_STRING();
 };
--- a/moses/src/TranslationOption.h
+++ b/moses/src/TranslationOption.h
@ -30,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "Util.h"
 #include "TypeDef.h"
 #include "ScoreComponentCollection.h"
+#include "AlignmentPair.h"
 #include "StaticData.h"

 class PhraseDictionary;
@ -56,7 +57,7 @@ class TranslationOption

 protected:

-	Phrase 							m_targetPhrase; /*< output phrase when using this translation option */
+	TargetPhrase 							m_targetPhrase; /*< output phrase when using this translation option */
 	Phrase				      *m_sourcePhrase; /*< input phrase translated by this */
 	const WordsRange		m_sourceWordsRange; /*< word position in the input that are covered by this translation option */
 	float               m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */
@ -97,7 +98,7 @@ public:
 	void MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection& score, const std::vector<FactorType>& featuresToMerge);

 	/** returns target phrase */
-	inline const Phrase &GetTargetPhrase() const
+	inline const TargetPhrase &GetTargetPhrase() const
 	{
 		return m_targetPhrase;
 	}
@ -172,7 +173,7 @@ public:

 	/** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
 	void CalcScore();
-
+	
 	void CacheReorderingProb(const LexicalReordering &lexreordering
 													, const Score &score);

--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@ -222,13 +222,16 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
 		}

 		targetPhrase.SetScore();
-		targetPhrase.SetSourcePhrase(m_unksrc);
+		targetPhrase.SetSourcePhrase(m_unksrc);	
+		//create a one-to-one aignment between UNKNOWN_FACTOR and its verbatim translation		
+		targetPhrase.CreateAlignmentInfo("(0)","(0)");
 		transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source, 0);	
 	}
 	else 
 	{ // drop source word. create blank trans opt
 		TargetPhrase targetPhrase(Output);
 		targetPhrase.SetSourcePhrase(m_unksrc);
+		targetPhrase.SetAlignment();
 		transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source, 0);
 	}

@ -337,6 +340,7 @@ void TranslationOptionCollection::CreateTranslationOptions(const vector <DecodeG
 	// in the phraseDictionary (which is the- possibly filtered-- phrase
 	// table loaded on initialization), generate TranslationOption objects
 	// for all phrases
+
 	size_t size = m_source.GetSize();
 	for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++) 
 	{
@ -440,7 +444,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
 			static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
 																(m_source, *oldPtoc
 																, startPos, endPos, adhereTableLimit );
-
+			
 			// do rest of decode steps
 			int indexStep = 0;
 			for (++iterStep ; iterStep != decodeStepList.end() ; ++iterStep) 
@ -503,12 +507,11 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
 		if (useCache) 
 			delete sourcePhrase;
 	} // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
-
+	
 	if ((StaticData::Instance().GetXmlInputType() != XmlPassThrough) && HasXmlOptionsOverlappingRange(startPos,endPos)) 
 	{
 		CreateXmlOptionsForRange(startPos, endPos);
 	} 
-
 }

 	/** Check if this range overlaps with any XML options. This doesn't need to be an exact match, only an overlap.
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@ -169,3 +169,11 @@ typedef uint32_t UINT32;
 class TranslationOption;
 typedef std::vector<TranslationOption*> TranslationOptionList;

+
+
+
+typedef std::vector<float> Scores;
+typedef std::vector<std::string> WordAlignments;
+
+typedef std::pair<std::vector<std::string const*>,Scores > StringTgtCand;
+typedef std::pair<std::vector<std::string const*>,WordAlignments > StringWordAlignmentCand;
--- a/moses/src/Util.h
+++ b/moses/src/Util.h
@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 #pragma once

+#include <iostream>
 #include <cassert>
 #include <fstream>
 #include <sstream>
@ -106,6 +107,17 @@ inline std::vector<T> Scan(const std::vector< std::string > &input)
 	return output;
 }

+/** replace all occurrences of todelStr in str with the string toaddStr */
+inline std::string Replace(const std::string& str,
+													 const std::string& todelStr,
+													 const std::string& toaddStr)
+{
+	size_t pos=0;
+	std::string newStr=str;	
+	while ((pos=newStr.find(todelStr,pos))!=std::string::npos){		newStr.replace(pos++,todelStr.size(),toaddStr);	}
+	return newStr;
+}
+
 /** tokenise input string to vector of string. each element has been separated by a character in the delimiters argument. 
 		The separator can only be 1 character long. The default delimiters are space or tab
 */
@ -259,7 +271,6 @@ void RemoveAllInColl(COLL &coll)
 		delete (*iter);
 	}
 	coll.clear();
-
 }

 //! x-platform reference to temp folder