Changes on main branch files while I was working on dynamic phrase tables.

2024-12-25 12:52:29 +03:00 · 2014-03-10 14:08:00 +00:00 · 2014-03-10 14:08:00 +00:00 · fdc504d47a
commit fdc504d47a
parent 6e4035fb12
15 changed files with 163 additions and 991 deletions
--- a/contrib/other-builds/extract-rules/.project
+++ b/contrib/other-builds/extract-rules/.project
@ -65,6 +65,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
 		</link>
+		<link>
+			<name>RuleExtractionOptions.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h</locationURI>
+		</link>
 		<link>
 			<name>SentenceAlignment.cpp</name>
 			<type>1</type>
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@ -5,13 +5,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -70,7 +70,6 @@
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="dalm"/>
-									<listOptionValue builtIn="false" value="MurmurHash3"/>
 									<listOptionValue builtIn="false" value="flm"/>
 									<listOptionValue builtIn="false" value="oolm"/>
 									<listOptionValue builtIn="false" value="lattice"/>
@ -108,13 +107,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.516628324" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@ -5,13 +5,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -71,7 +71,6 @@
 									<listOptionValue builtIn="false" value="lattice"/>
 									<listOptionValue builtIn="false" value="misc"/>
 									<listOptionValue builtIn="false" value="dalm"/>
-									<listOptionValue builtIn="false" value="MurmurHash3"/>
 									<listOptionValue builtIn="false" value="search"/>
 									<listOptionValue builtIn="false" value="RandLM"/>
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
@ -109,13 +108,13 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
 				<externalSettings/>
 				<extensions>
-					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@ -181,10 +181,8 @@ FeatureStats::FeatureStats(const size_t size)

 FeatureStats::~FeatureStats()
 {
-  if (m_array) {
-    delete [] m_array;
-    m_array = NULL;
-  }
+  delete [] m_array;
+  m_array = NULL;
 }

 void FeatureStats::Copy(const FeatureStats &stats)
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@ -35,10 +35,8 @@ ScoreStats::ScoreStats(const size_t size)

 ScoreStats::~ScoreStats()
 {
-  if (m_array) {
-    delete [] m_array;
-    m_array = NULL;
-  }
+  delete [] m_array;
+  m_array = NULL;
 }

 void ScoreStats::Copy(const ScoreStats &stats)
@ -157,4 +155,4 @@ bool operator==(const ScoreStats& s1, const ScoreStats& s2)
  return true;
 }

-}
+}
--- a/mert/Singleton.h
+++ b/mert/Singleton.h
@ -21,10 +21,8 @@ public:
  }

  static void Delete() {
-    if (m_instance) {
-      delete m_instance;
-      m_instance = NULL;
-    }
+    delete m_instance;
+    m_instance = NULL;
  }

 private:
--- a/moses-chart-cmd/IOWrapper.cpp
+++ b/moses-chart-cmd/IOWrapper.cpp
@ -50,7 +50,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "moses/FeatureVector.h"
 #include "moses/FF/StatefulFeatureFunction.h"
 #include "moses/FF/StatelessFeatureFunction.h"
-#include "moses/FF/SyntaxConstraintFeature.h"
+#include "moses/FF/TreeStructureFeature.h"
 #include "util/exception.hh"

 using namespace std;
@ -395,14 +395,16 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
  UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
 		  "No output file for tree fragments specified");

-  //Tree of full sentence (to stderr)
-  const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
-  for( size_t i=0; i<sff.size(); i++ ) {
-    const StatefulFeatureFunction *ff = sff[i];
-    if (ff->GetScoreProducerDescription() == "SyntaxConstraintFeature0") {
-      const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
-      out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
-      break;
+  //Tree of full sentence
+  const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
+  if (treeStructure != NULL) {
+    const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+    for( size_t i=0; i<sff.size(); i++ ) {
+        if (sff[i] == treeStructure) {
+        const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
+        out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
+        break;
+        }
    }
  }

--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@ -97,7 +97,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range

      targetPhrase->SetTargetLHS(targetLHS);
      targetPhrase->SetAlignmentInfo("0-0");
-      if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
+      if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.GetTreeStructure() != NULL) {
        targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
      }

--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@ -52,8 +52,7 @@ LexicalReordering::LexicalReordering(const std::string &line)

 LexicalReordering::~LexicalReordering()
 {
-  if(m_table)
-    delete m_table;
+  delete m_table;
  delete m_configuration;
 }

--- a/moses/LM/DALMWrapper.cpp
+++ b/moses/LM/DALMWrapper.cpp
@ -63,6 +63,11 @@ public:
 		state = new DALM::State(*from.state);
 	}

+	void reset(DALM::State *s){
+		delete state;
+		state = s;
+	}
+
  virtual int Compare(const FFState& other) const{
 		const DALMState &o = static_cast<const DALMState &>(other);
 		if(state->get_count() < o.state->get_count()) return -1;
@ -82,11 +87,9 @@ public:
 class DALMChartState : public FFState
 {
 private:
-	size_t sourceStartPos;
-	size_t sourceEndPos;
-	size_t inputSize;
-	DALM::VocabId *prefixIDs;
-	size_t prefixLength;
+	const ChartHypothesis &hypo;
+	DALM::Fragment *prefixFragments;
+	unsigned short prefixLength;
 	float prefixScore;
 	DALMState *rightContext;
 	bool isLarge;
@ -94,15 +97,13 @@ private:
 public:
 	DALMChartState(
 			const ChartHypothesis &hypo, 
-			DALM::VocabId *prefixIDs, 
-			size_t prefixLength, 
+			DALM::Fragment *prefixFragments,
+			unsigned short prefixLength, 
 			float prefixScore, 
 			DALMState *rightContext, 
 			bool isLarge)
-		: sourceStartPos(hypo.GetCurrSourceRange().GetStartPos()), 
-			sourceEndPos(hypo.GetCurrSourceRange().GetEndPos()), 
-			inputSize(hypo.GetManager().GetSource().GetSize()),
-			prefixIDs(prefixIDs), 
+		: hypo(hypo),
+			prefixFragments(prefixFragments), 
 			prefixLength(prefixLength), 
 			prefixScore(prefixScore), 
 			rightContext(rightContext), 
@ -110,16 +111,16 @@ public:
 	{}

 	virtual ~DALMChartState(){
-		if(prefixIDs != NULL) delete [] prefixIDs;
-		if(rightContext != NULL) delete rightContext;
+		delete [] prefixFragments;
+		delete rightContext;
 	}

-	size_t GetPrefixLength() const{
+	unsigned short GetPrefixLength() const{
 		return prefixLength;
 	}

-	const DALM::VocabId *GetPrefixIDs() const{
-		return prefixIDs;
+	const DALM::Fragment *GetPrefixFragments() const{
+		return prefixFragments;
 	}

 	float GetPrefixScore() const{
@ -137,17 +138,22 @@ public:
  virtual int Compare(const FFState& other) const{
 		const DALMChartState &o = static_cast<const DALMChartState &>(other);
 		// prefix
-    if (sourceStartPos > 0) { // not for "<s> ..."
+    if (hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for "<s> ..."
 			if (prefixLength != o.prefixLength){
 				return (prefixLength < o.prefixLength)?-1:1;
 			} else {
-				int ret = memcmp(prefixIDs, o.prefixIDs, prefixLength);
- 	     	if (ret != 0) return ret;
+				if(prefixLength > 0){
+					DALM::Fragment &f = prefixFragments[prefixLength-1];
+					DALM::Fragment &of = o.prefixFragments[prefixLength-1];
+					int ret = DALM::compare_fragments(f, of);
+					if(ret != 0) return ret;
+				}
 			}
    }

    // suffix
-    if (sourceEndPos < inputSize - 1) { // not for "... </s>"
+		size_t inputSize = hypo.GetManager().GetSource().GetSize();
+    if (hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... </s>"
 			int ret = o.rightContext->Compare(*rightContext);
      if (ret != 0) return ret;
    }
@ -323,8 +329,8 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
 	DALM::State *state = dalm_state->get_state();

 	size_t contextSize = m_nGramOrder-1;
-	DALM::VocabId *prefixIDs = new DALM::VocabId[contextSize];
-	size_t prefixLength = 0;
+	DALM::Fragment *prefixFragments = new DALM::Fragment[contextSize];
+	unsigned short prefixLength = 0;
 	bool isLarge = false;

  // initial language model scores
@ -350,11 +356,14 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
 				// state is finalized.
 				isLarge = true;
 			}else{
-				float score = m_lm->query(wid, *state);
-				hypoScore += score;
-  			if (!isLarge){
+				if(isLarge){
+					float score = m_lm->query(wid, *state);
+					hypoScore += score;
+				}else{
+					float score = m_lm->query(wid, *state, prefixFragments[prefixLength]);
+
 				 	prefixScore += score;
-					prefixIDs[prefixLength] = wid;
+					hypoScore += score;
 					prefixLength++;
 					if(prefixLength >= contextSize) isLarge = true;
 				}
@ -374,8 +383,10 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
      // get language model state
 			dalm_state->reset(*prevState->GetRightContext());
 			state = dalm_state->get_state();
+
 			prefixLength = prevState->GetPrefixLength();
-			std::memcpy(prefixIDs, prevState->GetPrefixIDs(), sizeof(DALM::VocabId)*prefixLength);
+			const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
+			std::memcpy(prefixFragments, prevPrefixFragments, sizeof(DALM::Fragment)*prefixLength);
 			isLarge = prevState->LargeEnough();
 		}
 		phrasePos++;
@ -389,11 +400,12 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
    // regular word
    if (!word.IsNonTerminal()) {
 			DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
-			float score = m_lm->query(wid, *state);
-			hypoScore += score; 
-  		if (!isLarge){
+			if (isLarge) {
+				hypoScore += m_lm->query(wid, *state);
+			}else{
+				float score = m_lm->query(wid, *state, prefixFragments[prefixLength]);
 				prefixScore += score;
-				prefixIDs[prefixLength] = wid;
+				hypoScore += score; 
 				prefixLength++;
 				if(prefixLength >= contextSize) isLarge = true;
 			}
@ -410,19 +422,22 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
        static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
      
      size_t prevPrefixLength = prevState->GetPrefixLength();
-			const DALM::VocabId *prevPrefixIDs = prevState->GetPrefixIDs();
-
+			const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
+			DALM::Gap gap(*state);
      // score its prefix
      for(size_t prefixPos = 0; prefixPos < prevPrefixLength; prefixPos++) {
-				DALM::VocabId wid = prevPrefixIDs[prefixPos];
-				float score = m_lm->query(wid, *state);
-				hypoScore += score; 
-  			if (!isLarge){
+				const DALM::Fragment &f = prevPrefixFragments[prefixPos];
+
+				if (isLarge) {
+					hypoScore += m_lm->query(f, *state, gap);
+				} else {
+					float score = m_lm->query(f, *state, gap, prefixFragments[prefixLength]);
 					prefixScore += score;
-					prefixIDs[prefixLength] = wid;
+					hypoScore += score;
 					prefixLength++;
 					if(prefixLength >= contextSize) isLarge = true;
 				}
+				gap.succ();
      }

      // check if we are dealing with a large sub-phrase
@ -430,18 +445,22 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
        // add its language model score
 				hypoScore += UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
        hypoScore -= prevState->GetPrefixScore(); // remove overwrapped score.
-
-        // copy language model state
+      	// copy language model state
 				dalm_state->reset(*prevState->GetRightContext());
 				state = dalm_state->get_state();
-      }
+      } else {
+				DALM::State *state_new = new DALM::State(*prevState->GetRightContext()->get_state());
+				m_lm->set_state(*state_new, *state, gap);
+				dalm_state->reset(state_new);
+				state = dalm_state->get_state();
+			}
    }
  }

  // assign combined score to score breakdown
  out->Assign(this, TransformLMScore(hypoScore));

-  return new DALMChartState(hypo, prefixIDs, prefixLength, prefixScore, dalm_state, isLarge);
+  return new DALMChartState(hypo, prefixFragments, prefixLength, prefixScore, dalm_state, isLarge);
 }

 bool LanguageModelDALM::IsUseable(const FactorMask &mask) const
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@ -66,6 +66,7 @@ StaticData::StaticData()
  ,m_lmEnableOOVFeature(false)
  ,m_isAlwaysCreateDirectTranslationOption(false)
  ,m_currentWeightSetting("default")
+  ,m_treeStructure(NULL)
 {
  m_xmlBrackets.first="<";
  m_xmlBrackets.second=">";
@ -1184,5 +1185,52 @@ void StaticData::CheckLEGACYPT()
 }


+void StaticData::ResetWeights(const std::string &denseWeights, const std::string &sparseFile)
+{
+  m_allWeights = ScoreComponentCollection();
+
+  // dense weights
+  string name("");
+  vector<float> weights;
+  vector<string> toks = Tokenize(denseWeights);
+  for (size_t i = 0; i < toks.size(); ++i) {
+	const string &tok = toks[i];
+
+	if (tok.substr(tok.size() - 1, 1) == "=") {
+	  // start of new feature
+
+	  if (name != "") {
+		// save previous ff
+		const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
+		m_allWeights.Assign(&ff, weights);
+		weights.clear();
+	  }
+
+	  name = tok.substr(0, tok.size() - 1);
+	} else {
+	  // a weight for curr ff
+	  float weight = Scan<float>(toks[i]);
+	  weights.push_back(weight);
+	}
+  }
+
+  const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
+  m_allWeights.Assign(&ff, weights);
+
+  // sparse weights
+  InputFileStream sparseStrme(sparseFile);
+  string line;
+  while (getline(sparseStrme, line)) {
+	  vector<string> toks = Tokenize(line);
+	  UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight");
+
+	  vector<string> names = Tokenize(toks[0], "_");
+	  UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName");
+
+      const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]);
+	  m_allWeights.Assign(&ff, names[1], Scan<float>(toks[1]));
+  }
+}
+
 } // namespace

--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@ -221,6 +221,8 @@ protected:
  std::map<Word, std::set<Word> > m_soft_matches_map;
  std::map<Word, std::set<Word> > m_soft_matches_map_reverse;

+  const StatefulFeatureFunction* m_treeStructure;
+
 public:

  bool IsAlwaysCreateDirectTranslationOption() const {
@ -756,6 +758,20 @@ public:

  bool AdjacentOnly() const
  { return m_adjacentOnly; }
+
+
+  void ResetWeights(const std::string &denseWeights, const std::string &sparseFile);
+
+
+  // need global access for output of tree structure
+  const StatefulFeatureFunction* GetTreeStructure() const {
+      return m_treeStructure;
+  }
+
+  void SetTreeStructure(const StatefulFeatureFunction* treeStructure) {
+      m_treeStructure = treeStructure;
+  }
+
 };

 }
--- a/moses/TranslationModel/DynSAInclude/onlineRLM.h
+++ b/moses/TranslationModel/DynSAInclude/onlineRLM.h
@ -43,10 +43,10 @@ public:
      alpha_[i] = i * log10(0.4);
  }
  ~OnlineRLM() {
-    if(alpha_) delete[] alpha_;
+    delete[] alpha_;
    if(bAdapting_) delete vocab_;
    else vocab_ = NULL;
-    if(cache_) delete cache_;
+    delete cache_;
    delete bPrefix_;
    delete bHit_;
  }
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@ -235,8 +235,8 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC

    // SCORES ...
    string directScores, directSparseScores, indirectScores, indirectSparseScores;
-    breakdownCoreAndSparse( itemDirect[2], directScores, directSparseScores );
-    breakdownCoreAndSparse( itemIndirect[2], indirectScores, indirectSparseScores );
+    breakdownCoreAndSparse( itemDirect[3], directScores, directSparseScores );
+    breakdownCoreAndSparse( itemIndirect[3], indirectScores, indirectSparseScores );

    vector<string> directCounts = tokenize(itemDirect[4].c_str());
    vector<string> indirectCounts = tokenize(itemIndirect[4].c_str());
@ -307,7 +307,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
    }

    // alignment
-    fileConsolidated << " ||| " << itemDirect[3];
+    fileConsolidated << " ||| " << itemDirect[2];

    // counts, for debugging
    fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@ -1,909 +0,0 @@
-/***********************************************************************
-  Moses - factored phrase-based language decoder
-  Copyright (C) 2009 University of Edinburgh
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Lesser General Public
-  License as published by the Free Software Foundation; either
-  version 2.1 of the License, or (at your option) any later version.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public
-  License along with this library; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- ***********************************************************************/
-
-#include <sstream>
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <stdlib.h>
-#include <assert.h>
-#include <cstring>
-#include <map>
-#include <set>
-#include <vector>
-#include <algorithm>
-
-#include "SafeGetline.h"
-#include "ScoreFeature.h"
-#include "tables-core.h"
-#include "ExtractionPhrasePair.h"
-#include "score.h"
-#include "InputFileStream.h"
-#include "OutputFileStream.h"
-
-using namespace std;
-using namespace MosesTraining;
-
-#define LINE_MAX_LENGTH 100000
-
-namespace MosesTraining
-{
-LexicalTable lexTable;
-bool inverseFlag = false;
-bool hierarchicalFlag = false;
-bool pcfgFlag = false;
-bool treeFragmentsFlag = false;
-bool unpairedExtractFormatFlag = false;
-bool conditionOnTargetLhsFlag = false;
-bool wordAlignmentFlag = true;
-bool goodTuringFlag = false;
-bool kneserNeyFlag = false;
-bool logProbFlag = false;
-int negLogProb = 1;
-#define COC_MAX 10
-bool lexFlag = true;
-bool unalignedFlag = false;
-bool unalignedFWFlag = false;
-bool crossedNonTerm = false;
-int countOfCounts[COC_MAX+1];
-int totalDistinct = 0;
-float minCountHierarchical = 0;
-std::map<std::string,float> sourceLHSCounts;
-std::map<std::string, std::map<std::string,float>* > targetLHSAndSourceLHSJointCounts;
-
-std::set<std::string> sourceLabelSet;
-std::map<std::string,size_t> sourceLabels; 
-std::vector<std::string> sourceLabelsByIndex;
-
-Vocabulary vcbT;
-Vocabulary vcbS;
-
-} // namespace
-
-std::vector<std::string> tokenize( const char [] );
-
-void processLine( std::string line,
-                  int lineID, bool includeSentenceIdFlag, int &sentenceId,  
-                  PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
-                  std::string &additionalPropertiesString,
-                  float &count, float &pcfgSum );
-void writeCountOfCounts( const std::string &fileNameCountOfCounts );
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, 
-                         const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb );
-void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog );
-double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
-double computeUnalignedPenalty( const ALIGNMENT *alignmentTargetToSource );
-set<std::string> functionWordList;
-void loadFunctionWords( const string &fileNameFunctionWords );
-double computeUnalignedFWPenalty( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
-int calcCrossedNonTerm( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
-void printSourcePhrase( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, ostream &out );
-void printTargetPhrase( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, ostream &out );
-void invertAlignment( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment );
-
-
-int main(int argc, char* argv[])
-{
-  std::cerr << "Score v2.1 -- " 
-            << "scoring methods for extracted rules" << std::endl;
-
-  ScoreFeatureManager featureManager;
-  if (argc < 4) {
-    std::cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--PCFG] [--TreeFragments] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--CrossedNonTerm]" << std::endl;
-    std::cerr << featureManager.usage() << std::endl;
-    exit(1);
-  }
-  std::string fileNameExtract = argv[1];
-  std::string fileNameLex = argv[2];
-  std::string fileNamePhraseTable = argv[3];
-  std::string fileNameCountOfCounts;
-  std::string fileNameFunctionWords;
-  std::vector<std::string> featureArgs; // all unknown args passed to feature manager
-
-  for(int i=4; i<argc; i++) {
-    if (strcmp(argv[i],"inverse") == 0 || strcmp(argv[i],"--Inverse") == 0) {
-      inverseFlag = true;
-      std::cerr << "using inverse mode" << std::endl;
-    } else if (strcmp(argv[i],"--Hierarchical") == 0) {
-      hierarchicalFlag = true;
-      std::cerr << "processing hierarchical rules" << std::endl;
-    } else if (strcmp(argv[i],"--PCFG") == 0) {
-      pcfgFlag = true;
-      std::cerr << "including PCFG scores" << std::endl;
-    } else if (strcmp(argv[i],"--TreeFragments") == 0) {
-      treeFragmentsFlag = true;
-      std::cerr << "including tree fragment information from syntactic parse\n";
-    } else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
-      unpairedExtractFormatFlag = true;
-      std::cerr << "processing unpaired extract format" << std::endl;
-    } else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
-      conditionOnTargetLhsFlag = true;
-      std::cerr << "processing unpaired extract format" << std::endl;
-    } else if (strcmp(argv[i],"--NoWordAlignment") == 0) {
-      wordAlignmentFlag = false;
-      std::cerr << "omitting word alignment" << std::endl;
-    } else if (strcmp(argv[i],"--NoLex") == 0) {
-      lexFlag = false;
-      std::cerr << "not computing lexical translation score" << std::endl;
-    } else if (strcmp(argv[i],"--GoodTuring") == 0) {
-      goodTuringFlag = true;
-      fileNameCountOfCounts = std::string(fileNamePhraseTable) + ".coc";
-      std::cerr << "adjusting phrase translation probabilities with Good Turing discounting" << std::endl;
-    } else if (strcmp(argv[i],"--KneserNey") == 0) {
-      kneserNeyFlag = true;
-      fileNameCountOfCounts = std::string(fileNamePhraseTable) + ".coc";
-      std::cerr << "adjusting phrase translation probabilities with Kneser Ney discounting" << std::endl;
-    } else if (strcmp(argv[i],"--UnalignedPenalty") == 0) {
-      unalignedFlag = true;
-      std::cerr << "using unaligned word penalty" << std::endl;
-    } else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) {
-      unalignedFWFlag = true;
-      if (i+1==argc) {
-          std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
-        exit(1);
-      }
-      fileNameFunctionWords = argv[++i];
-      std::cerr << "using unaligned function word penalty with function words from " << fileNameFunctionWords << std::endl;
-    }  else if (strcmp(argv[i],"--LogProb") == 0) {
-      logProbFlag = true;
-      std::cerr << "using log-probabilities" << std::endl;
-    } else if (strcmp(argv[i],"--NegLogProb") == 0) {
-      logProbFlag = true;
-      negLogProb = -1;
-      std::cerr << "using negative log-probabilities" << std::endl;
-    } else if (strcmp(argv[i],"--MinCountHierarchical") == 0) {
-      minCountHierarchical = atof(argv[++i]);
-      std::cerr << "dropping all phrase pairs occurring less than " << minCountHierarchical << " times" << std::endl;
-      minCountHierarchical -= 0.00001; // account for rounding
-    } else if (strcmp(argv[i],"--CrossedNonTerm") == 0) {
-      crossedNonTerm = true;
-      std::cerr << "crossed non-term reordering feature" << std::endl;
-    } else {
-      featureArgs.push_back(argv[i]);
-      ++i;
-      for (; i < argc &&  strncmp(argv[i], "--", 2); ++i) {
-        featureArgs.push_back(argv[i]);
-      }
-      if (i != argc) --i; //roll back, since we found another -- argument
-    }
-  }
-
-  MaybeLog maybeLogProb(logProbFlag, negLogProb);
-
-  // configure extra features
-  if (!inverseFlag) {
-    featureManager.configure(featureArgs);
-  }
-
-  // lexical translation table
-  if (lexFlag) {
-    lexTable.load( fileNameLex );
-  }
-
-  // function word list
-  if (unalignedFWFlag) {
-    loadFunctionWords( fileNameFunctionWords );
-  }
-
-  // compute count of counts for Good Turing discounting
-  if (goodTuringFlag || kneserNeyFlag) {
-    for(int i=1; i<=COC_MAX; i++) countOfCounts[i] = 0;
-  }
-
-  // sorted phrase extraction file
-  Moses::InputFileStream extractFile(fileNameExtract);
-
-  if (extractFile.fail()) {
-    std::cerr << "ERROR: could not open extract file " << fileNameExtract << std::endl;
-    exit(1);
-  }
-  istream &extractFileP = extractFile;
-
-  // output file: phrase translation table
-  ostream *phraseTableFile;
-
-  if (fileNamePhraseTable == "-") {
-    phraseTableFile = &std::cout;
-  } else {
-    Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
-    bool success = outputFile->Open(fileNamePhraseTable);
-    if (!success) {
-        std::cerr << "ERROR: could not open file phrase table file "
-                  << fileNamePhraseTable << std::endl;
-      exit(1);
-    }
-    phraseTableFile = outputFile;
-  }
-
-  // loop through all extracted phrase translations
-  char line[LINE_MAX_LENGTH], lastLine[LINE_MAX_LENGTH];
-  lastLine[0] = '\0';
-  ExtractionPhrasePair *phrasePair = NULL;
-  std::vector< ExtractionPhrasePair* > phrasePairsWithSameSource;
-  std::vector< ExtractionPhrasePair* > phrasePairsWithSameSourceAndTarget; // required for hierarchical rules only, as non-terminal alignments might make the phrases incompatible
-
-  int tmpSentenceId;
-  PHRASE *tmpPhraseSource, *tmpPhraseTarget;
-  ALIGNMENT *tmpTargetToSourceAlignment;
-  std::string tmpAdditionalPropertiesString;
-  float tmpCount=0.0f, tmpPcfgSum=0.0f;
-
-  int i=0;
-  SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ );
-  if ( !extractFileP.eof() ) {
-    ++i;
-    tmpPhraseSource = new PHRASE();
-    tmpPhraseTarget = new PHRASE();
-    tmpTargetToSourceAlignment = new ALIGNMENT();
-    processLine( std::string(line), 
-                 i, featureManager.includeSentenceId(), tmpSentenceId,
-                 tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, 
-                 tmpAdditionalPropertiesString,
-                 tmpCount, tmpPcfgSum);
-    phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, 
-                                           tmpTargetToSourceAlignment,
-                                           tmpCount, tmpPcfgSum );
-    phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
-    featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId );
-    phrasePairsWithSameSource.push_back( phrasePair );
-    if ( hierarchicalFlag ) {
-      phrasePairsWithSameSourceAndTarget.push_back( phrasePair );
-    }
-    strcpy( lastLine, line );
-    SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ );
-  }
-
-  while ( !extractFileP.eof() ) {
-
-    if ( ++i % 100000 == 0 ) {
-      std::cerr << "." << std::flush;
-    }
-
-    // identical to last line? just add count
-    if (strcmp(line,lastLine) == 0) {
-      phrasePair->IncrementPrevious(tmpCount,tmpPcfgSum);
-      SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-      continue;
-    } else {
-      strcpy( lastLine, line );
-    }
-
-    tmpPhraseSource = new PHRASE();
-    tmpPhraseTarget = new PHRASE();
-    tmpTargetToSourceAlignment = new ALIGNMENT();
-    tmpAdditionalPropertiesString.clear();
-    processLine( std::string(line), 
-                 i, featureManager.includeSentenceId(), tmpSentenceId,
-                 tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, 
-                 tmpAdditionalPropertiesString,
-                 tmpCount, tmpPcfgSum); 
-
-    bool matchesPrevious = false;
-    bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these,
-    // ExtractionPhrasePair::Matches() checks them in order and does not continue with the others
-    // once the first of them has been found to have to be set to false
-
-    if ( hierarchicalFlag ) {
-      for ( std::vector< ExtractionPhrasePair* >::const_iterator iter = phrasePairsWithSameSourceAndTarget.begin();
-            iter != phrasePairsWithSameSourceAndTarget.end(); ++iter ) {
-        if ( (*iter)->Matches( tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
-                               sourceMatch, targetMatch, alignmentMatch ) ) {
-          matchesPrevious = true;
-          phrasePair = (*iter);
-          break;
-        }
-      }
-    } else {
-      if ( phrasePair->Matches( tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
-                                sourceMatch, targetMatch, alignmentMatch ) ) {
-        matchesPrevious = true;
-      }
-    }
-
-    if ( matchesPrevious ) {
-      delete tmpPhraseSource;
-      delete tmpPhraseTarget;
-      if ( !phrasePair->Add( tmpTargetToSourceAlignment,
-                             tmpCount, tmpPcfgSum ) ) {
-        delete tmpTargetToSourceAlignment;
-      }
-      phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
-      featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId );
-    } else {
-
-      if ( !phrasePairsWithSameSource.empty() &&
-           !sourceMatch ) {
-        processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
-        for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
-              iter!=phrasePairsWithSameSource.end(); ++iter) {
-          delete *iter;
-        }
-        phrasePairsWithSameSource.clear();
-        if ( hierarchicalFlag ) {
-          phrasePairsWithSameSourceAndTarget.clear();
-        }
-      }
-
-      if ( hierarchicalFlag ) {
-        if ( !phrasePairsWithSameSourceAndTarget.empty() &&
-             !targetMatch ) {
-          phrasePairsWithSameSourceAndTarget.clear();
-        }
-      }
-
-      phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, 
-                                             tmpTargetToSourceAlignment, 
-                                             tmpCount, tmpPcfgSum );
-      phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
-      featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId );
-      phrasePairsWithSameSource.push_back(phrasePair);
-
-      if ( hierarchicalFlag ) {
-        phrasePairsWithSameSourceAndTarget.push_back(phrasePair);
-      }
-    }
-
-    SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-
-  }
-
-  processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
-  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
-        iter!=phrasePairsWithSameSource.end(); ++iter) {
-    delete *iter;
-  }
-  phrasePairsWithSameSource.clear();
-
-
-  phraseTableFile->flush();
-  if (phraseTableFile != &std::cout) {
-    delete phraseTableFile;
-  }
-
-  // output count of count statistics
-  if (goodTuringFlag || kneserNeyFlag) {
-    writeCountOfCounts( fileNameCountOfCounts );
-  }
-}
-
-
-void processLine( std::string line,
-                  int lineID, bool includeSentenceIdFlag, int &sentenceId,  
-                  PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
-                  std::string &additionalPropertiesString,
-                  float &count, float &pcfgSum )
-{
-  size_t foundAdditionalProperties = line.find("{{");
-  if (foundAdditionalProperties != std::string::npos) {
-    additionalPropertiesString = line.substr(foundAdditionalProperties);
-    line = line.substr(0,foundAdditionalProperties);
-  } else {
-    additionalPropertiesString.clear();
-  }
-
-  phraseSource->clear();
-  phraseTarget->clear();
-  targetToSourceAlignment->clear();
-
-  std::vector<std::string> token = tokenize( line.c_str() );
-  int item = 1;
-  for ( size_t j=0; j<token.size(); ++j ) {
-    if (token[j] == "|||") {
-      ++item;
-    } else if (item == 1) { // source phrase
-      phraseSource->push_back( vcbS.storeIfNew( token[j] ) );
-    } else if (item == 2) { // target phrase
-      phraseTarget->push_back( vcbT.storeIfNew( token[j] ) );
-    } else if (item == 3) { // alignment
-      int s,t;
-      sscanf(token[j].c_str(), "%d-%d", &s, &t);
-      if ((size_t)t >= phraseTarget->size() || (size_t)s >= phraseSource->size()) {
-        std::cerr << "WARNING: phrase pair " << lineID
-                  << " has alignment point (" << s << ", " << t << ")"
-                  << " out of bounds (" << phraseSource->size() << ", " << phraseTarget->size() << ")"
-                  << std::endl;
-      } else {
-        // first alignment point? -> initialize
-        if ( targetToSourceAlignment->size() == 0 ) {
-          size_t numberOfTargetSymbols = (hierarchicalFlag ? phraseTarget->size()-1 : phraseTarget->size());
-          targetToSourceAlignment->resize(numberOfTargetSymbols);
-        }
-        // add alignment point
-        targetToSourceAlignment->at(t).insert(s);
-      }
-    } else if (includeSentenceIdFlag && item == 4) { // optional sentence id
-      sscanf(token[j].c_str(), "%d", &sentenceId);
-    } else if (item + (includeSentenceIdFlag?-1:0) == 4) { // count
-      sscanf(token[j].c_str(), "%f", &count);
-    } else if (item + (includeSentenceIdFlag?-1:0) == 5) { // target syntax PCFG score
-      float pcfgScore = std::atof(token[j].c_str());
-      pcfgSum = pcfgScore * count;
-    }
-  }
-
-  if ( targetToSourceAlignment->size() == 0 ) {
-    size_t numberOfTargetSymbols = (hierarchicalFlag ? phraseTarget->size()-1 : phraseTarget->size());
-    targetToSourceAlignment->resize(numberOfTargetSymbols);
-  }
-
-  if (item + (includeSentenceIdFlag?-1:0) == 3) {
-    count = 1.0;
-  }
-  if (item < 3 || item > 6) {
-    std::cerr << "ERROR: faulty line " << lineID << ": " << line << endl;
-  }
-
-}
-
-
-void writeCountOfCounts( const string &fileNameCountOfCounts )
-{
-  // open file
-  Moses::OutputFileStream countOfCountsFile;
-  bool success = countOfCountsFile.Open(fileNameCountOfCounts.c_str());
-  if (!success) {
-    std::cerr << "ERROR: could not open count-of-counts file "
-              << fileNameCountOfCounts << std::endl;
-    return;
-  }
-
-  // Kneser-Ney needs the total number of phrase pairs
-  countOfCountsFile << totalDistinct << std::endl;
-
-  // write out counts
-  for(int i=1; i<=COC_MAX; i++) {
-    countOfCountsFile << countOfCounts[ i ] << std::endl;
-  }
-  countOfCountsFile.Close();
-}
-
-
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, 
-                         const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb )
-{
-  if (phrasePairsWithSameSource.size() == 0) {
-    return;
-  }
-
-  float totalSource = 0;
-
-  //std::cerr << "phrasePairs.size() = " << phrasePairs.size() << std::endl;
-
-  // loop through phrase pairs
-  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
-        iter!=phrasePairsWithSameSource.end(); ++iter) {
-    // add to total count
-    totalSource += (*iter)->GetCount();
-  }
-
-  // output the distinct phrase pairs, one at a time
-  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
-        iter!=phrasePairsWithSameSource.end(); ++iter) {
-    // add to total count
-    outputPhrasePair( **iter, totalSource, phrasePairsWithSameSource.size(), phraseTableFile, featureManager, maybeLogProb );
-  }
-}
-
-void outputPhrasePair(const ExtractionPhrasePair &phrasePair, 
-                      float totalCount, int distinctCount, 
-                      ostream &phraseTableFile, 
-                      const ScoreFeatureManager& featureManager,
-                      const MaybeLog& maybeLogProb )
-{
-  assert(phrasePair.IsValid());
-
-  const ALIGNMENT *bestAlignmentT2S = phrasePair.FindBestAlignmentTargetToSource();
-  float count = phrasePair.GetCount();
-
-  map< string, float > domainCount;
-
-  // collect count of count statistics
-  if (goodTuringFlag || kneserNeyFlag) {
-    totalDistinct++;
-    int countInt = count + 0.99999;
-    if (countInt <= COC_MAX)
-      countOfCounts[ countInt ]++;
-  }
-
-  // compute PCFG score
-  float pcfgScore = 0;
-  if (pcfgFlag && !inverseFlag) {
-    pcfgScore = phrasePair.GetPcfgScore() / count;
-  }
-
-  // output phrases
-  const PHRASE *phraseSource = phrasePair.GetSource();
-  const PHRASE *phraseTarget = phrasePair.GetTarget();
-
-  // do not output if hierarchical and count below threshold
-  if (hierarchicalFlag && count < minCountHierarchical) {
-    for(size_t j=0; j<phraseSource->size()-1; ++j) {
-      if (isNonTerminal(vcbS.getWord( phraseSource->at(j) )))
-        return;
-    }
-  }
-
-  // source phrase (unless inverse)
-  if (!inverseFlag) {
-    printSourcePhrase(phraseSource, phraseTarget, bestAlignmentT2S, phraseTableFile);
-    phraseTableFile << " ||| ";
-  }
-
-  // target phrase
-  printTargetPhrase(phraseSource, phraseTarget, bestAlignmentT2S, phraseTableFile);
-  phraseTableFile << " ||| ";
-
-  // source phrase (if inverse)
-  if (inverseFlag) {
-    printSourcePhrase(phraseSource, phraseTarget, bestAlignmentT2S, phraseTableFile);
-    phraseTableFile << " ||| ";
-  }
-
-  // lexical translation probability
-  if (lexFlag) {
-    double lexScore = computeLexicalTranslation( phraseSource, phraseTarget, bestAlignmentT2S );
-    phraseTableFile << maybeLogProb( lexScore );
-  }
-
-  // unaligned word penalty
-  if (unalignedFlag) {
-    double penalty = computeUnalignedPenalty( bestAlignmentT2S );
-    phraseTableFile << " " << maybeLogProb( penalty );
-  }
-
-  // unaligned function word penalty
-  if (unalignedFWFlag) {
-    double penalty = computeUnalignedFWPenalty( phraseTarget, bestAlignmentT2S );
-    phraseTableFile << " " << maybeLogProb( penalty );
-  }
-
-  if (crossedNonTerm && !inverseFlag) {
-    phraseTableFile << " " << calcCrossedNonTerm( phraseTarget, bestAlignmentT2S );
-  }
-
-  // target-side PCFG score
-  if (pcfgFlag && !inverseFlag) {
-    phraseTableFile << " " << maybeLogProb( pcfgScore );
-  }
-
-  // extra features
-  ScoreFeatureContext context(phrasePair, maybeLogProb);
-  std::vector<float> extraDense;
-  map<string,float> extraSparse;
-  featureManager.addFeatures(context, extraDense, extraSparse);
-  for (size_t i = 0; i < extraDense.size(); ++i) {
-    phraseTableFile << " " << extraDense[i];
-  }
-
-  for (map<string,float>::const_iterator i = extraSparse.begin();
-       i != extraSparse.end(); ++i) {
-    phraseTableFile << " " << i->first << " " << i->second;
-  }
-
-  phraseTableFile << " ||| ";
-
-  // output alignment info
-  if ( !inverseFlag ) {
-    if ( hierarchicalFlag ) {
-      // always output alignment if hiero style
-      assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
-      std::vector<std::string> alignment;
-      for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
-        if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
-          if ( bestAlignmentT2S->at(j).size() != 1 ) {
-            std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
-            phraseTableFile.flush();
-            assert(bestAlignmentT2S->at(j).size() == 1);
-          }
-          size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
-          //phraseTableFile << sourcePos << "-" << j << " ";
-          std::stringstream point;
-          point << sourcePos << "-" << j;
-          alignment.push_back(point.str());
-        } else {
-          for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin(); 
-                setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
-            size_t sourcePos = *setIter;
-            std::stringstream point;
-            point << sourcePos << "-" << j;
-            alignment.push_back(point.str());
-          }
-        }
-      }
-      // now print all alignments, sorted by source index
-      sort(alignment.begin(), alignment.end());
-      for (size_t i = 0; i < alignment.size(); ++i) {
-        phraseTableFile << alignment[i] << " ";
-      }
-    } else if (wordAlignmentFlag) {
-      // alignment info in pb model
-      for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
-        for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin(); 
-              setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
-          size_t sourcePos = *setIter;
-          phraseTableFile << sourcePos << "-" << j << " ";
-        }
-      }
-    }
-  }
-
-  // counts
-  phraseTableFile << " ||| " << totalCount << " " << count;
-  if (kneserNeyFlag)
-    phraseTableFile << " " << distinctCount;
-
-  if ((treeFragmentsFlag) && 
-      !inverseFlag) {
-    phraseTableFile << " |||";
-  }
-
-  // tree fragments
-  if (treeFragmentsFlag && !inverseFlag) {
-    const std::string *bestTreeFragment = phrasePair.FindBestPropertyValue("Tree");
-    if (bestTreeFragment) {
-      phraseTableFile << " {{Tree " << *bestTreeFragment << "}}";
-    }
-  }
-
-  phraseTableFile << std::endl;
-}
-
-
-
-bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *alignmentTargetToSource )
-{
-  for (size_t currTarget = 0; currTarget < alignmentTargetToSource->size(); ++currTarget) {
-    if (currTarget == targetPos) {
-      // skip
-    } else {
-      const std::set<size_t> &sourceSet = alignmentTargetToSource->at(currTarget);
-      for (std::set<size_t>::const_iterator iter = sourceSet.begin(); 
-           iter != sourceSet.end(); ++iter) {
-        size_t currSource = *iter;
-
-        if ((currTarget < targetPos && currSource > sourcePos)
-            || (currTarget > targetPos && currSource < sourcePos)
-           ) {
-          return true;
-        }
-      }
-
-    }
-  }
-
-  return false;
-}
-
-int calcCrossedNonTerm( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource )
-{
-  assert(phraseTarget->size() >= alignmentTargetToSource->size() );
-
-  for (size_t targetPos = 0; targetPos < alignmentTargetToSource->size(); ++targetPos) {
-
-    if ( isNonTerminal(vcbT.getWord( phraseTarget->at(targetPos) ))) {
-      const std::set<size_t> &alignmentPoints = alignmentTargetToSource->at(targetPos);
-      assert( alignmentPoints.size() == 1 );
-      size_t sourcePos = *alignmentPoints.begin();
-      bool ret = calcCrossedNonTerm(targetPos, sourcePos, alignmentTargetToSource);
-      if (ret)
-        return 1;
-    }
-  }
-
-  return 0;
-}
-
-
-double computeUnalignedPenalty( const ALIGNMENT *alignmentTargetToSource )
-{
-  // unaligned word counter
-  double unaligned = 1.0;
-  // only checking target words - source words are caught when computing inverse
-  for(size_t ti=0; ti<alignmentTargetToSource->size(); ++ti) {
-    const set< size_t > & srcIndices = alignmentTargetToSource->at(ti);
-    if (srcIndices.empty()) {
-      unaligned *= 2.718;
-    }
-  }
-  return unaligned;
-}
-
-
-double computeUnalignedFWPenalty( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource )
-{
-  // unaligned word counter
-  double unaligned = 1.0;
-  // only checking target words - source words are caught when computing inverse
-  for(size_t ti=0; ti<alignmentTargetToSource->size(); ++ti) {
-    const set< size_t > & srcIndices = alignmentTargetToSource->at(ti);
-    if (srcIndices.empty() && functionWordList.find( vcbT.getWord( phraseTarget->at(ti) ) ) != functionWordList.end()) {
-      unaligned *= 2.718;
-    }
-  }
-  return unaligned;
-}
-
-void loadFunctionWords( const string &fileName )
-{
-  std::cerr << "Loading function word list from " << fileName;
-  ifstream inFile;
-  inFile.open(fileName.c_str());
-  if (inFile.fail()) {
-    std::cerr << " - ERROR: could not open file" << std::endl;
-    exit(1);
-  }
-  istream *inFileP = &inFile;
-
-  char line[LINE_MAX_LENGTH];
-  while(true) {
-    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (inFileP->eof()) break;
-    std::vector<string> token = tokenize( line );
-    if (token.size() > 0)
-      functionWordList.insert( token[0] );
-  }
-
-  std::cerr << " - read " << functionWordList.size() << " function words" << std::endl;
-  inFile.close();
-}
-
-
-double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource )
-{
-  // lexical translation probability
-  double lexScore = 1.0;
-  int null = vcbS.getWordID("NULL");
-  // all target words have to be explained
-  for(size_t ti=0; ti<alignmentTargetToSource->size(); ti++) {
-    const set< size_t > & srcIndices = alignmentTargetToSource->at(ti);
-    if (srcIndices.empty()) {
-      // explain unaligned word by NULL
-      lexScore *= lexTable.permissiveLookup( null, phraseTarget->at(ti) );
-    } else {
-      // go through all the aligned words to compute average
-      double thisWordScore = 0;
-      for (set< size_t >::const_iterator p(srcIndices.begin()); p != srcIndices.end(); ++p) {
-        thisWordScore += lexTable.permissiveLookup( phraseSource->at(*p), phraseTarget->at(ti) );
-      }
-      lexScore *= thisWordScore / (double)srcIndices.size();
-    }
-  }
-  return lexScore;
-}
-
-
-void LexicalTable::load( const string &fileName )
-{
-  std::cerr << "Loading lexical translation table from " << fileName;
-  ifstream inFile;
-  inFile.open(fileName.c_str());
-  if (inFile.fail()) {
-    std::cerr << " - ERROR: could not open file" << std::endl;
-    exit(1);
-  }
-  istream *inFileP = &inFile;
-
-  char line[LINE_MAX_LENGTH];
-
-  int i=0;
-  while(true) {
-    i++;
-    if (i%100000 == 0) std::cerr << "." << flush;
-    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (inFileP->eof()) break;
-
-    std::vector<string> token = tokenize( line );
-    if (token.size() != 3) {
-        std::cerr << "line " << i << " in " << fileName
-           << " has wrong number of tokens, skipping:" << std::endl
-           << token.size() << " " << token[0] << " " << line << std::endl;
-      continue;
-    }
-
-    double prob = atof( token[2].c_str() );
-    WORD_ID wordT = vcbT.storeIfNew( token[0] );
-    WORD_ID wordS = vcbS.storeIfNew( token[1] );
-    ltable[ wordS ][ wordT ] = prob;
-  }
-  std::cerr << std::endl;
-}
-
-
-void printSourcePhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget,
-                       const ALIGNMENT *targetToSourceAlignment, ostream &out)
-{
-  // get corresponding target non-terminal and output pair
-  ALIGNMENT *sourceToTargetAlignment = new ALIGNMENT();
-  invertAlignment(phraseSource, phraseTarget, targetToSourceAlignment, sourceToTargetAlignment);
-  // output source symbols, except root, in rule table format
-  for (std::size_t i = 0; i < phraseSource->size()-1; ++i) {
-    const std::string &word = vcbS.getWord(phraseSource->at(i));
-    if (!unpairedExtractFormatFlag || !isNonTerminal(word)) {
-      out << word << " ";
-      continue;
-    }
-    const std::set<std::size_t> &alignmentPoints = sourceToTargetAlignment->at(i);
-    assert(alignmentPoints.size() == 1);
-    size_t j = *(alignmentPoints.begin());
-    if (inverseFlag) {
-      out << vcbT.getWord(phraseTarget->at(j)) << word << " ";
-    } else {
-      out << word << vcbT.getWord(phraseTarget->at(j)) << " ";
-    }
-  }
-  // output source root symbol
-  if (conditionOnTargetLhsFlag && !inverseFlag) {
-    out << "[X]";
-  } else {
-    out << vcbS.getWord(phraseSource->back());
-  }
-  delete sourceToTargetAlignment;
-}
-
-
-void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget,
-                       const ALIGNMENT *targetToSourceAlignment, ostream &out)
-{
-  // output target symbols, except root, in rule table format
-  for (std::size_t i = 0; i < phraseTarget->size()-1; ++i) {
-    const std::string &word = vcbT.getWord(phraseTarget->at(i));
-    if (!unpairedExtractFormatFlag || !isNonTerminal(word)) {
-      out << word << " ";
-      continue;
-    }
-    // get corresponding source non-terminal and output pair
-    std::set<std::size_t> alignmentPoints = targetToSourceAlignment->at(i);
-    assert(alignmentPoints.size() == 1);
-    int j = *(alignmentPoints.begin());
-    if (inverseFlag) {
-      out << word << vcbS.getWord(phraseSource->at(j)) << " ";
-    } else {
-      out << vcbS.getWord(phraseSource->at(j)) << word << " ";
-    }
-  }
-  // output target root symbol
-  if (conditionOnTargetLhsFlag) {
-    if (inverseFlag) {
-      out << "[X]";
-    } else {
-      out << vcbS.getWord(phraseSource->back());
-    }
-  } else {
-    out << vcbT.getWord(phraseTarget->back());
-  }
-}
-
-
-void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget,
-                     const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) {
-// typedef std::vector< std::set<size_t> > ALIGNMENT; 
-
-  outSourceToTargetAlignment->clear();
-  size_t numberOfSourceSymbols = (hierarchicalFlag ? phraseSource->size()-1 : phraseSource->size());
-  outSourceToTargetAlignment->resize(numberOfSourceSymbols);
-  // add alignment point
-  for (size_t targetPosition = 0; targetPosition < inTargetToSourceAlignment->size(); ++targetPosition) {
-    for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin(); 
-          setIter != (inTargetToSourceAlignment->at(targetPosition)).end(); ++setIter ) {
-      size_t sourcePosition = *setIter;
-      outSourceToTargetAlignment->at(sourcePosition).insert(targetPosition);
-    }
-  }
-}
-