Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-08-16 15:00:33 +03:00 · 2013-09-14 11:21:15 +01:00 · 2013-09-14 11:21:15 +01:00 · 6ea0bb1f61
commit 6ea0bb1f61
parent 7fad9c39de cdd9df19d2
20 changed files with 238 additions and 186 deletions
--- a/4
+++ b/4
@ -76,6 +76,10 @@ include $(TOP)/jam-files/sanity.jam ;
 boost 103600 ;
 external-lib z ;

+lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
+requirements += <library>dl ;
+
+
 if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
  if [ option.get "full-tcmalloc" : : "yes" ] {
    external-lib unwind ;
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -1066,6 +1066,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/DistortionScoreProducer.h</locationURI>
 		</link>
+		<link>
+			<name>FF/ExternalFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExternalFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/ExternalFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ExternalFeature.h</locationURI>
+		</link>
 		<link>
 			<name>FF/FFState.cpp</name>
 			<type>1</type>
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@ -93,6 +93,9 @@ void ChartManager::ProcessSentence()
      m_parser.Create(range, m_translationOptionList);
      m_translationOptionList.ApplyThreshold();

+      const InputPath &inputPath = m_parser.GetInputPath(range);
+      m_translationOptionList.Evaluate(m_source, inputPath);
+
      // decode
      ChartCell &cell = m_hypoStackColl.Get(range);

--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@ -219,6 +219,11 @@ void ChartParser::CreateInputPaths(const InputType &input)
  }
 }

+const InputPath &ChartParser::GetInputPath(WordsRange &range) const
+{
+	return GetInputPath(range.GetStartPos(), range.GetEndPos());
+}
+
 const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const
 {
  size_t offset = endPos - startPos;
--- a/moses/ChartParser.h
+++ b/moses/ChartParser.h
@ -66,6 +66,7 @@ public:
  long GetTranslationId() const;
  size_t GetSize() const;
  const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
+  const InputPath &GetInputPath(WordsRange &range) const;

 private:
  ChartParserUnknown m_unknown;
--- a/moses/ChartTranslationOption.h
+++ b/moses/ChartTranslationOption.h
@ -13,6 +13,7 @@ class ChartTranslationOption
 protected:
  const TargetPhrase &m_targetPhrase;
  ScoreComponentCollection m_scoreBreakdown;
+  const InputPath *m_inputPath;

 public:
  ChartTranslationOption(const TargetPhrase &targetPhrase);
@ -21,6 +22,11 @@ public:
    return m_targetPhrase;
  }

+  void SetInputPath(const InputPath *inputPath)
+  { m_inputPath = inputPath; }
+  const InputPath *GetInputPath() const
+  { return m_inputPath; }
+
  const ScoreComponentCollection &GetScores() const {
    return m_scoreBreakdown;
  }
--- a/moses/ChartTranslationOptions.cpp
+++ b/moses/ChartTranslationOptions.cpp
@ -69,6 +69,7 @@ void ChartTranslationOptions::Evaluate(const InputType &input, const InputPath &
  CollType::iterator iter;
  for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
    ChartTranslationOption &transOpt = **iter;
+    transOpt.SetInputPath(&inputPath);
    transOpt.Evaluate(input, inputPath);
  }

--- a/moses/FF/ExternalFeature.cpp
+++ b/moses/FF/ExternalFeature.cpp
@ -0,0 +1,73 @@
+#include "ExternalFeature.h"
+#include <dlfcn.h>
+
+using namespace std;
+
+namespace Moses
+{
+ExternalFeatureState::ExternalFeatureState(int stateSize, void *data)
+{
+	m_stateSize = stateSize;
+	m_data = malloc(stateSize);
+	memcpy(m_data, data, stateSize);
+}
+
+void ExternalFeature::Load()
+{
+  string nparam = "testing";
+
+  if (m_path.size() < 1) {
+	cerr << "External requires a path to a dynamic library!\n";
+	abort();
+  }
+  lib_handle = dlopen(m_path.c_str(), RTLD_LAZY);
+  if (!lib_handle) {
+	cerr << "dlopen reports: " << dlerror() << endl;
+	cerr << "Did you provide a full path to the dynamic library?\n";
+	abort();
+  }
+  CdecFF* (*fn)(const string&) =
+	(CdecFF* (*)(const string&))(dlsym(lib_handle, "create_ff"));
+  if (!fn) {
+	cerr << "dlsym reports: " << dlerror() << endl;
+	abort();
+  }
+  ff_ext = (*fn)(nparam);
+  m_stateSize = ff_ext->StateSize();
+
+}
+
+ExternalFeature::~ExternalFeature() {
+  delete ff_ext;
+  dlclose(lib_handle);
+}
+
+void ExternalFeature::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "path") {
+	  m_path = value;
+  }
+  else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
+FFState* ExternalFeature::Evaluate(
+  const Hypothesis& cur_hypo,
+  const FFState* prev_state,
+  ScoreComponentCollection* accumulator) const
+{
+	  return new ExternalFeatureState(m_stateSize);
+}
+
+FFState* ExternalFeature::EvaluateChart(
+  const ChartHypothesis& /* cur_hypo */,
+  int /* featureID - used to index the state in the previous hypotheses */,
+  ScoreComponentCollection* accumulator) const
+{
+	  return new ExternalFeatureState(m_stateSize);
+}
+
+
+}
+
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@ -0,0 +1,93 @@
+#pragma once
+
+#include <string>
+#include <cstring>
+#include "StatefulFeatureFunction.h"
+#include "FFState.h"
+
+namespace Moses
+{
+class CdecFF;
+
+class ExternalFeatureState : public FFState
+{
+protected:
+	int m_stateSize;
+	void *m_data;
+public:
+	ExternalFeatureState(int stateSize)
+	:m_stateSize(stateSize)
+	,m_data(NULL)
+	{}
+	ExternalFeatureState(int stateSize, void *data);
+
+	~ExternalFeatureState()
+	{
+		free(m_data);
+	}
+
+	int Compare(const FFState& other) const
+	{
+		const ExternalFeatureState &otherFF = static_cast<const ExternalFeatureState&>(other);
+		int ret = memcmp(m_data, otherFF.m_data, m_stateSize);
+		return ret;
+	}
+};
+
+// copied from cdec
+class ExternalFeature : public StatefulFeatureFunction
+{
+public:
+	ExternalFeature(const std::string &line)
+		:StatefulFeatureFunction("ExternalFeature", line)
+	{
+		ReadParameters();
+	}
+	~ExternalFeature();
+
+	void Load();
+
+	bool IsUseable(const FactorMask &mask) const
+		{ return true; }
+
+	void SetParameter(const std::string& key, const std::string& value);
+
+	void Evaluate(const Phrase &source
+	                        , const TargetPhrase &targetPhrase
+	                        , ScoreComponentCollection &scoreBreakdown
+	                        , ScoreComponentCollection &estimatedFutureScore) const
+	{}
+	void Evaluate(const InputType &input
+	                        , const InputPath &inputPath
+	                        , ScoreComponentCollection &scoreBreakdown) const
+	{}
+	  FFState* Evaluate(
+	    const Hypothesis& cur_hypo,
+	    const FFState* prev_state,
+	    ScoreComponentCollection* accumulator) const;
+
+	  FFState* EvaluateChart(
+	    const ChartHypothesis& /* cur_hypo */,
+	    int /* featureID - used to index the state in the previous hypotheses */,
+	    ScoreComponentCollection* accumulator) const;
+
+	  virtual const FFState* EmptyHypothesisState(const InputType &input) const
+	  {
+		  return new ExternalFeatureState(m_stateSize);
+	  }
+
+protected:
+	  std::string m_path;
+	  void* lib_handle;
+	  CdecFF *ff_ext;
+	  int m_stateSize;
+};
+
+class CdecFF
+{
+public:
+	virtual int StateSize() const = 0;
+};
+
+}
+
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -30,6 +30,8 @@
 #include "moses/FF/PhrasePenalty.h"
 #include "moses/FF/OSM-Feature/OpSequenceModel.h"
 #include "moses/FF/ControlRecombination.h"
+#include "moses/FF/ExternalFeature.h"
+
 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"

@ -142,6 +144,7 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(ControlRecombination);
  MOSES_FNAME(SkeletonStatelessFF);
  MOSES_FNAME(SkeletonStatefulFF);
+  MOSES_FNAME(ExternalFeature);

 #ifdef HAVE_SYNLM
  MOSES_FNAME(SyntacticLanguageModel);
--- a/moses/InputPath.cpp
+++ b/moses/InputPath.cpp
@ -18,6 +18,8 @@ InputPath::InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms,
  ,m_range(range)
  ,m_inputScore(inputScore)
 {
+  //cerr << "phrase=" << phrase << " m_inputScore=" << *m_inputScore << endl;
+
  FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().first;
  if (placeholderFactor != NOT_FOUND) {
    for (size_t pos = 0; pos < m_phrase.GetSize(); ++pos) {
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@ -1,6 +1,6 @@
 // $Id$
 #include <vector>
-
+#include "util/exception.hh"
 #include "ScoreComponentCollection.h"
 #include "StaticData.h"

@ -30,6 +30,20 @@ void ScorePair::PlusEquals(const StringPiece &key, float value)
 	}
 }

+std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
+{
+	for (size_t i = 0; i < rhs.denseScores.size(); ++i) {
+		os << rhs.denseScores[i] << ",";
+	}
+
+	std::map<StringPiece, float>::const_iterator iter;
+	for (iter = rhs.sparseScores.begin(); iter != rhs.sparseScores.end(); ++iter) {
+		os << iter->first << "=" << iter->second << ",";
+	}
+
+	return os;
+}
+
 ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
 size_t ScoreComponentCollection::s_denseVectorSize = 0;

@ -206,6 +220,21 @@ void ScoreComponentCollection::Assign(const FeatureFunction* sp, const string li
  }
 }

+void ScoreComponentCollection::Assign(const FeatureFunction* sp, const std::vector<float>& scores) {
+  IndexPair indexes = GetIndexes(sp);
+  size_t numScores = indexes.second - indexes.first;
+
+  if (scores.size() != numScores) {
+	  UTIL_THROW(util::Exception, "Feature function " << sp->GetScoreProducerDescription() << " specified "
+			  << numScores << " dense scores or weights. Actually has " << scores.size());
+  }
+
+  for (size_t i = 0; i < scores.size(); ++i) {
+    m_scores[i + indexes.first] = scores[i];
+  }
+}
+
+
 void ScoreComponentCollection::InvertDenseFeatures(const FeatureFunction* sp)
 {

--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@ -46,6 +46,8 @@ namespace Moses
 */
 struct ScorePair
 {
+	friend std::ostream& operator<<(std::ostream& os, const ScorePair& rhs);
+
 	std::vector<float> denseScores;
 	std::map<StringPiece, float> sparseScores;

@ -262,13 +264,7 @@ public:
    m_scores[fname] += score;
  }

-  void Assign(const FeatureFunction* sp, const std::vector<float>& scores) {
-    IndexPair indexes = GetIndexes(sp);
-    CHECK(scores.size() == indexes.second - indexes.first);
-    for (size_t i = 0; i < scores.size(); ++i) {
-      m_scores[i + indexes.first] = scores[i];
-    }
-  }
+  void Assign(const FeatureFunction* sp, const std::vector<float>& scores);

  //! Special version Assign(ScoreProducer, vector<float>)
  //! to add the score from a single ScoreProducer that produces
--- a/phrase-extract/ExtractedRule.cpp
+++ b/phrase-extract/ExtractedRule.cpp
@ -1,44 +0,0 @@
-//
-//  ExtractedRule.cpp
-//  extract
-//
-//  Created by Hieu Hoang on 13/09/2011.
-//  Copyright 2011 __MyCompanyName__. All rights reserved.
-//
-
-#include "ExtractedRule.h"
-
-using namespace std;
-
-namespace MosesTraining
-{
-
-void ExtractedRule::OutputNTLengths(std::ostream &out) const
-{
-  ostringstream outString;
-  OutputNTLengths(outString);
-  out << outString;
-}
-
-void ExtractedRule::OutputNTLengths(std::ostringstream &outString) const
-{
-  std::map<size_t, std::pair<size_t, size_t> >::const_iterator iter;
-  for (iter = m_ntLengths.begin(); iter != m_ntLengths.end(); ++iter) {
-    size_t sourcePos = iter->first;
-    const std::pair<size_t, size_t> &spanLengths = iter->second;
-    outString << sourcePos << "=" << spanLengths.first << "," <<spanLengths.second << " ";
-  }
-}
-
-std::ostream& operator<<(std::ostream &out, const ExtractedRule &obj)
-{
-  out << obj.source << " ||| " << obj.target << " ||| "
-      << obj.alignment << " ||| "
-      << obj.alignmentInv << " ||| ";
-
-  obj.OutputNTLengths(out);
-
-  return out;
-}
-
-} // namespace
--- a/phrase-extract/ExtractedRule.h
+++ b/phrase-extract/ExtractedRule.h
@ -32,8 +32,6 @@ namespace MosesTraining
 // sentence-level collection of rules
 class ExtractedRule
 {
-  friend std::ostream& operator<<(std::ostream &, const ExtractedRule &);
-
 public:
  std::string source;
  std::string target;
@ -54,8 +52,6 @@ public:
  float count;
  double pcfgScore;

-  std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
-
  ExtractedRule(int sT, int eT, int sS, int eS)
    : source()
    , target()
@ -76,13 +72,6 @@ public:
    , count(0)
    , pcfgScore(0.0) {
  }
-
-  void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength) {
-    m_ntLengths[sourcePos] = std::pair<size_t, size_t>(sourceLength, targetLength);
-  }
-
-  void OutputNTLengths(std::ostream &out) const;
-  void OutputNTLengths(std::ostringstream &out) const;
 };

 }
--- a/phrase-extract/RuleExtractionOptions.h
+++ b/phrase-extract/RuleExtractionOptions.h
@ -49,7 +49,6 @@ public:
  bool duplicateRules;
  bool fractionalCounting;
  bool pcfgScore;
-  bool outputNTLengths;
  bool gzOutput;
  bool unpairedExtractFormat;
  bool conditionOnTargetLhs;
@ -83,7 +82,6 @@ public:
    , duplicateRules(true)
    , fractionalCounting(true)
    , pcfgScore(false)
-    , outputNTLengths(false)
    , gzOutput(false)
    , unpairedExtractFormat(false)
    , conditionOnTargetLhs(false)
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@ -41,7 +41,6 @@ bool lowCountFlag = false;
 bool goodTuringFlag = false;
 bool kneserNeyFlag = false;
 bool logProbFlag = false;
-bool outputNTLengths = false;
 inline float maybeLogProb( float a )
 {
  return logProbFlag ? log(a) : a;
@ -62,7 +61,7 @@ int main(int argc, char* argv[])
       << "consolidating direct and indirect rule tables\n";

  if (argc < 4) {
-    cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--OutputNTLengths] \n";
+    cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] \n";
    exit(1);
  }
  char* &fileNameDirect = argv[1];
@ -119,8 +118,6 @@ int main(int argc, char* argv[])
    } else if (strcmp(argv[i],"--LogProb") == 0) {
      logProbFlag = true;
      cerr << "using log-probabilities\n";
-    } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
-      outputNTLengths = true;
    } else {
      cerr << "ERROR: unknown option " << argv[i] << endl;
      exit(1);
@ -315,10 +312,6 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
    // counts, for debugging
    fileConsolidated << "||| " << countE << " " << countF << " " << countEF;

-    if (outputNTLengths) {
-      fileConsolidated << " ||| " << itemDirect[5];
-    }
-
    // count bin feature (as a sparse feature)
    if (sparseCountBinFeatureFlag ||
        directSparseScores.compare("") != 0 ||
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@ -129,7 +129,6 @@ int main(int argc, char* argv[])
         << " --GlueGrammar FILE"
         << " | --UnknownWordLabel FILE"
         << " | --OnlyDirect"
-         << " | --OutputNTLengths"
         << " | --MaxSpan[" << options.maxSpan << "]"
         << " | --MinHoleTarget[" << options.minHoleTarget << "]"
         << " | --MinHoleSource[" << options.minHoleSource << "]"
@ -262,8 +261,6 @@ int main(int argc, char* argv[])
      options.fractionalCounting = false;
    } else if (strcmp(argv[i],"--PCFG") == 0) {
      options.pcfgScore = true;
-    } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
-      options.outputNTLengths = true;
    } else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
      options.unpairedExtractFormat = true;
    } else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
@ -663,9 +660,6 @@ void ExtractTask::saveHieroAlignment( int startT, int endT, int startS, int endS
    rule.alignment      += sourceSymbolIndex + "-" + targetSymbolIndex + " ";
    if (!m_options.onlyDirectFlag)
      rule.alignmentInv += targetSymbolIndex + "-" + sourceSymbolIndex + " ";
-
-    rule.SetSpanLength(hole.GetPos(0), hole.GetSize(0), hole.GetSize(1) ) ;
-
  }

  rule.alignment.erase(rule.alignment.size()-1);
@ -1077,9 +1071,6 @@ void ExtractTask::writeRulesToFile()
        << rule->target << " ||| "
        << rule->alignment << " ||| "
        << rule->count << " ||| ";
-    if (m_options.outputNTLengths) {
-      rule->OutputNTLengths(out);
-    }
    if (m_options.pcfgScore) {
      out << " ||| " << rule->pcfgScore;
    }
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@ -59,7 +59,6 @@ int negLogProb = 1;
 bool lexFlag = true;
 bool unalignedFlag = false;
 bool unalignedFWFlag = false;
-bool outputNTLengths = false;
 bool singletonFeature = false;
 bool crossedNonTerm = false;
 int countOfCounts[COC_MAX+1];
@ -82,9 +81,6 @@ double computeUnalignedPenalty( const PHRASE &, const PHRASE &, const PhraseAlig
 set<string> functionWordList;
 void loadFunctionWords( const string &fileNameFunctionWords );
 double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, const PhraseAlignment & );
-void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
-                      , map<size_t, map<size_t, float> > &sourceProb
-                      , map<size_t, map<size_t, float> > &targetProb);
 void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
 void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);

@ -95,7 +91,7 @@ int main(int argc, char* argv[])

  ScoreFeatureManager featureManager;
  if (argc < 4) {
-    cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
+    cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
    cerr << featureManager.usage() << endl;
    exit(1);
  }
@ -158,8 +154,6 @@ int main(int argc, char* argv[])
      minCountHierarchical = atof(argv[++i]);
      cerr << "dropping all phrase pairs occurring less than " << minCountHierarchical << " times\n";
      minCountHierarchical -= 0.00001; // account for rounding
-    } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
-      outputNTLengths = true;
    } else if (strcmp(argv[i],"--Singleton") == 0) {
      singletonFeature = true;
      cerr << "binary singleton feature\n";
@ -375,87 +369,6 @@ const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrase
  return *bestAlignment;
 }

-
-void calcNTLengthProb(const map<size_t, map<size_t, size_t> > &lengths
-                      , size_t total
-                      , map<size_t, map<size_t, float> > &probs)
-{
-  map<size_t, map<size_t, size_t> >::const_iterator iterOuter;
-  for (iterOuter = lengths.begin(); iterOuter != lengths.end(); ++iterOuter) {
-    size_t sourcePos = iterOuter->first;
-    const map<size_t, size_t> &inner = iterOuter->second;
-
-    map<size_t, size_t>::const_iterator iterInner;
-    for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
-      size_t length = iterInner->first;
-      size_t count = iterInner->second;
-      float prob = (float) count / (float) total;
-      probs[sourcePos][length] = prob;
-    }
-  }
-}
-
-void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
-                      , map<size_t, map<size_t, float> > &sourceProb
-                      , map<size_t, map<size_t, float> > &targetProb)
-{
-  map<size_t, map<size_t, size_t> > sourceLengths, targetLengths;
-  // 1st = position in source phrase, 2nd = length, 3rd = count
-  map<size_t, size_t> totals;
-  // 1st = position in source phrase, 2nd = total counts
-  // each source pos should have same count?
-
-  vector< PhraseAlignment* >::const_iterator iterOuter;
-  for (iterOuter = phrasePairs.begin(); iterOuter != phrasePairs.end(); ++iterOuter) {
-    const PhraseAlignment &phrasePair = **iterOuter;
-    const std::map<size_t, std::pair<size_t, size_t> > &ntLengths = phrasePair.GetNTLengths();
-
-    std::map<size_t, std::pair<size_t, size_t> >::const_iterator iterInner;
-    for (iterInner = ntLengths.begin(); iterInner != ntLengths.end(); ++iterInner) {
-      size_t sourcePos = iterInner->first;
-      size_t sourceLength = iterInner->second.first;
-      size_t targetLength = iterInner->second.second;
-
-      sourceLengths[sourcePos][sourceLength]++;
-      targetLengths[sourcePos][targetLength]++;
-
-      totals[sourcePos]++;
-    }
-  }
-
-  if (totals.size() == 0) {
-    // no non-term. Don't bother
-    return;
-  }
-
-  size_t total = totals.begin()->second;
-  if (totals.size() > 1) {
-    assert(total == (++totals.begin())->second );
-  }
-
-  calcNTLengthProb(sourceLengths, total, sourceProb);
-  calcNTLengthProb(targetLengths, total, targetProb);
-
-}
-
-void outputNTLengthProbs(ostream &phraseTableFile, const map<size_t, map<size_t, float> > &probs, const string &prefix)
-{
-  map<size_t, map<size_t, float> >::const_iterator iterOuter;
-  for (iterOuter = probs.begin(); iterOuter != probs.end(); ++iterOuter) {
-    size_t sourcePos = iterOuter->first;
-    const map<size_t, float> &inner = iterOuter->second;
-
-    map<size_t, float>::const_iterator iterInner;
-    for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
-      size_t length = iterInner->first;
-      float prob = iterInner->second;
-
-      phraseTableFile << sourcePos << "|" << prefix << "|" << length << "=" << prob << " ";
-    }
-  }
-
-}
-
 bool calcCrossedNonTerm(size_t sourcePos, size_t targetPos, const std::vector< std::set<size_t> > &alignedToS)
 {
  for (size_t currSource = 0; currSource < alignedToS.size(); ++currSource) {
@ -664,21 +577,6 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
  if (kneserNeyFlag)
    phraseTableFile << " " << distinctCount;

-  // nt lengths
-  if (outputNTLengths) {
-    phraseTableFile << " ||| ";
-
-    if (!inverseFlag) {
-      map<size_t, map<size_t, float> > sourceProb, targetProb;
-      // 1st sourcePos, 2nd = length, 3rd = prob
-
-      calcNTLengthProb(phrasePair, sourceProb, targetProb);
-
-      outputNTLengthProbs(phraseTableFile, sourceProb, "S");
-      outputNTLengthProbs(phraseTableFile, targetProb, "T");
-    }
-  }
-
  phraseTableFile << endl;
 }

--- a/scripts/ems/example/data/weight.ini
+++ b/scripts/ems/example/data/weight.ini
@ -6,7 +6,8 @@
 Distortion0= 0.3 
 UnknownWordPenalty0= 1 
 WordPenalty0= -1 
-TranslationModel0= 0.2 0.2 0.2 0.2 0.2 
+TranslationModel0= 0.2 0.2 0.2 0.2
+PhrasePenalty0= 0.2
 LexicalReordering0= 0.3 0.3 0.3 0.3 0.3 0.3 
 LM0= 0.5