Revert "revert Jacob's change 033fa821ca97ccbbca189bac84ec1e80509aa85e"

This reverts commit c966ca36b1.
2024-09-11 11:25:40 +03:00 · 2012-10-15 21:10:10 +04:00 · 2012-10-15 21:10:10 +04:00 · 92b887fb84
commit 92b887fb84
parent 0eb98df0fe
12 changed files with 187 additions and 87 deletions
--- a/moses-chart-cmd/src/IOWrapper.cpp
+++ b/moses-chart-cmd/src/IOWrapper.cpp
@ -69,6 +69,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType>	&inputFactorOrder
  ,m_nBestOutputCollector(NULL)
  ,m_searchGraphOutputCollector(NULL)
  ,m_singleBestOutputCollector(NULL)
+  ,m_alignmentOutputCollector(NULL)
 {
  const StaticData &staticData = StaticData::Instance();

@ -109,6 +110,15 @@ IOWrapper::IOWrapper(const std::vector<FactorType>	&inputFactorOrder
    m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
    m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
  }
+  
+  if (staticData.PrintAlignmentInfo()) {
+    if (staticData.GetAlignmentOutputFile().empty()) {
+      m_alignmentOutputCollector = new Moses::OutputCollector(&std::cout);
+    } else {
+      m_alignmentOutputCollector = new Moses::OutputCollector(new std::ofstream(staticData.GetAlignmentOutputFile().c_str()));
+      m_alignmentOutputCollector->HoldOutputStream();
+    }
+  }
 }

 IOWrapper::~IOWrapper()
@ -122,6 +132,7 @@ IOWrapper::~IOWrapper()
  delete m_nBestOutputCollector;
  delete m_searchGraphOutputCollector;
  delete m_singleBestOutputCollector;
+  delete m_alignmentOutputCollector;
 }

 void IOWrapper::ResetTranslationId() {
@ -184,6 +195,86 @@ void OutputSurface(std::ostream &out, const ChartHypothesis *hypo, const std::ve
    }
  }
 }
+  
+namespace {
+  typedef std::vector< std::pair<size_t, size_t> > WordAlignment;
+  
+  bool IsUnknownWord(const Word& word) {
+    const Factor* factor = word[MAX_NUM_FACTORS - 1];
+    if (factor == NULL)
+      return false;
+    return factor->GetString() == UNKNOWN_FACTOR;
+  }
+  
+  WordAlignment GetWordAlignment(const Moses::ChartHypothesis *hypo, size_t *targetWordsCount)
+  {
+    const Moses::TargetPhrase& targetPhrase = hypo->GetCurrTargetPhrase();
+    const AlignmentInfo& phraseAlignmentInfo = targetPhrase.GetAlignmentInfo();
+    size_t sourceSize = 0;
+    for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
+         it != phraseAlignmentInfo.end(); ++it)
+    {
+      sourceSize = std::max(sourceSize, it->first + 1);
+    }
+    std::vector<size_t> sourceSideLengths(sourceSize, 1);
+    std::vector<size_t> targetSideLengths(targetPhrase.GetSize(), 1);
+    std::vector<WordAlignment> alignmentsPerSourceNonTerm(sourceSize);
+    size_t prevHypoIndex = 0;
+    for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
+         it != phraseAlignmentInfo.end(); ++it)
+    {
+      if (targetPhrase.GetWord(it->second).IsNonTerminal()) {
+        const Moses::ChartHypothesis *prevHypo = hypo->GetPrevHypo(prevHypoIndex);
+        ++prevHypoIndex;
+        alignmentsPerSourceNonTerm[it->first] = GetWordAlignment(
+            prevHypo, &targetSideLengths[it->second]);
+        sourceSideLengths[it->first] = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
+        CHECK(prevHypo->GetCurrSourceRange().GetStartPos() - hypo->GetCurrSourceRange().GetStartPos()
+          == (int)std::accumulate(sourceSideLengths.begin(), sourceSideLengths.begin() + it->first, 0));
+      } else {
+        alignmentsPerSourceNonTerm[it->first].push_back(WordAlignment::value_type(0, 0));
+      }
+    }
+    if (targetWordsCount != NULL) {
+      *targetWordsCount = std::accumulate(targetSideLengths.begin(), targetSideLengths.end(), 0);
+    }
+    // isn't valid since there may be unaligned words: CHECK(hypo->GetCurrSourceRange().GetNumWordsCovered() == std::accumulate(sourceSideLengths.begin(), sourceSideLengths.end(), 0));
+    WordAlignment result;
+    for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
+         it != phraseAlignmentInfo.end(); ++it)
+    {
+      size_t sourceOffset = std::accumulate(sourceSideLengths.begin(), sourceSideLengths.begin() + it->first, 0);
+      size_t targetOffset = std::accumulate(targetSideLengths.begin(), targetSideLengths.begin() + it->second, 0);
+      for (WordAlignment::const_iterator it2 = alignmentsPerSourceNonTerm[it->first].begin();
+           it2 != alignmentsPerSourceNonTerm[it->first].end(); ++it2)
+      {
+        result.push_back(make_pair(sourceOffset + it2->first, targetOffset + it2->second));
+      }
+    }
+    if (result.empty() && targetPhrase.GetSize() == 1 && hypo->GetCurrSourceRange().GetNumWordsCovered() == 1 && IsUnknownWord(targetPhrase.GetWord(0))) {
+      result.push_back(WordAlignment::value_type(0, 0));
+    }
+    return result;
+  }
+}
+
+  
+void IOWrapper::OutputAlignment(const Moses::ChartHypothesis *hypo, long translationId)
+{
+  if (m_alignmentOutputCollector == NULL)
+    return;
+  WordAlignment alignment = GetWordAlignment(hypo, NULL);
+  std::ostringstream out;
+  for (WordAlignment::const_iterator it = alignment.begin();
+       it != alignment.end(); ++it)
+  {
+    if (it != alignment.begin())
+      out << " ";
+    out << it->first << "-" << it->second;
+  }
+  out << std::endl;
+  m_alignmentOutputCollector->Write(static_cast<int>(translationId), out.str());
+}

 void IOWrapper::Backtrack(const ChartHypothesis *hypo)
 {
--- a/moses-chart-cmd/src/IOWrapper.h
+++ b/moses-chart-cmd/src/IOWrapper.h
@ -72,6 +72,7 @@ protected:
  Moses::OutputCollector                *m_nBestOutputCollector;
  Moses::OutputCollector                *m_searchGraphOutputCollector;
  Moses::OutputCollector                *m_singleBestOutputCollector;
+  Moses::OutputCollector                *m_alignmentOutputCollector;

 public:
  IOWrapper(const std::vector<Moses::FactorType>	&inputFactorOrder
@ -87,6 +88,7 @@ public:
  void OutputBestHypo(const std::vector<const Moses::Factor*>&  mbrBestHypo, long translationId);
  void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId);
  void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
+  void OutputAlignment(const Moses::ChartHypothesis *hypo, long translationId);
  void Backtrack(const Moses::ChartHypothesis *hypo);

  Moses::OutputCollector *ExposeSingleBest() { return m_singleBestOutputCollector; }
--- a/moses-chart-cmd/src/Main.cpp
+++ b/moses-chart-cmd/src/Main.cpp
@ -112,6 +112,10 @@ public:
      const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
      m_ioWrapper.OutputDetailedTranslationReport(bestHypo, sentence, lineNumber);
    }
+    
+    if (staticData.PrintAlignmentInfo()) {
+      m_ioWrapper.OutputAlignment(bestHypo, lineNumber);
+    }

    // n-best
    size_t nBestSize = staticData.GetNBestSize();
--- a/moses/src/AlignmentInfo.cpp
+++ b/moses/src/AlignmentInfo.cpp
@ -21,30 +21,14 @@
 #include "AlignmentInfo.h"
 #include "TypeDef.h"
 #include "StaticData.h"
+#include "util/tokenize_piece.hh"
+#include "util/string_piece.hh"
+#include <boost/lexical_cast.hpp>
+#include "Util.h"

 namespace Moses
 {

-void AlignmentInfo::BuildNonTermIndexMap()
-{
-  if (m_collection.empty()) {
-    return;
-  }
-  const_iterator p = begin();
-  size_t maxIndex = p->second;
-  for (++p;  p != end(); ++p) {
-    if (p->second > maxIndex) {
-      maxIndex = p->second;
-    }
-  }
-  m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
-  size_t i = 0;
-  for (p = begin(); p != end(); ++p) {
-    m_nonTermIndexMap[p->second] = i++;
-  }
-            
-}
-
 bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b) {
  if(a->second < b->second)  return true;
  if(a->second == b->second) return (a->first < b->first);
@ -92,4 +76,32 @@ std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
  return out;
 }

+namespace {
+  void MosesShouldUseExceptions(bool value) {
+    if (!value) {
+      std::cerr << "Could not parse alignment info" << std::endl;
+      abort();
+    }
+  }
+} // namespace
+
+  
+std::set< std::pair<size_t, size_t> > ParseAlignmentFromString(const StringPiece &str)
+{
+  using std::set;
+  using std::pair;
+  set<pair<size_t,size_t> > alignmentInfo;
+  for (util::TokenIter<util::AnyCharacter, true> token(str, util::AnyCharacter(" \t")); token; ++token) {
+    util::TokenIter<util::AnyCharacter, false> dash(*token, util::AnyCharacter("-"));
+    MosesShouldUseExceptions(dash);
+    size_t sourcePos = boost::lexical_cast<size_t>(*dash++);
+    MosesShouldUseExceptions(dash);
+    size_t targetPos = boost::lexical_cast<size_t>(*dash++);
+    MosesShouldUseExceptions(!dash);
+    
+    alignmentInfo.insert(pair<size_t,size_t>(sourcePos, targetPos));
+  }
+  return alignmentInfo;
+}
+  
 }
--- a/moses/src/AlignmentInfo.h
+++ b/moses/src/AlignmentInfo.h
@ -21,6 +21,7 @@

 #include <ostream>
 #include <set>
+#include <map>
 #include <vector>

 namespace Moses
@ -33,27 +34,18 @@ class AlignmentInfoCollection;
 */
 class AlignmentInfo
 {
-  typedef std::set<std::pair<size_t,size_t> > CollType;
+  typedef std::multimap<size_t,size_t> CollType;

  friend std::ostream& operator<<(std::ostream &, const AlignmentInfo &);
  friend struct AlignmentInfoOrderer;
  friend class AlignmentInfoCollection;

 public:
-  typedef std::vector<size_t> NonTermIndexMap;
  typedef CollType::const_iterator const_iterator;

  const_iterator begin() const { return m_collection.begin(); }
  const_iterator end() const { return m_collection.end(); }

-  /** Provides a map from target-side to source-side non-terminal indices.
-    * The target-side index should be the rule symbol index (counting terminals).
-    * The index returned is the rule non-terminal index (ignoring terminals).
-   */
-  const NonTermIndexMap &GetNonTermIndexMap() const {
-    return m_nonTermIndexMap;
-  }
-
  size_t GetSize() const { return m_collection.size(); }

  std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
@ -61,15 +53,12 @@ class AlignmentInfo
 private:
  //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
  explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
-    : m_collection(pairs)
+    : m_collection(pairs.begin(), pairs.end())
  {
-    BuildNonTermIndexMap();
  }

-  void BuildNonTermIndexMap();

  CollType m_collection;
-  NonTermIndexMap m_nonTermIndexMap;
 };

 /** Define an arbitrary strict weak ordering between AlignmentInfo objects
@ -83,3 +72,8 @@ struct AlignmentInfoOrderer
 };

 }
+
+class StringPiece;
+namespace Moses {
+  std::set< std::pair<size_t, size_t> > ParseAlignmentFromString(const StringPiece &str);
+}
--- a/moses/src/ChartHypothesis.cpp
+++ b/moses/src/ChartHypothesis.cpp
@ -90,14 +90,11 @@ ChartHypothesis::~ChartHypothesis()
 */
 void ChartHypothesis::CreateOutputPhrase(Phrase &outPhrase) const
 {
-  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
-    GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
-
  for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
    const Word &word = GetCurrTargetPhrase().GetWord(pos);
    if (word.IsNonTerminal()) {
      // non-term. fill out with prev hypo
-      size_t nonTermInd = nonTermIndexMap[pos];
+      size_t nonTermInd = GetCurrTargetPhrase().GetNonTermIndex(pos);
      const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
      prevHypo->CreateOutputPhrase(outPhrase);
    } 
--- a/moses/src/ChartTrellisNode.cpp
+++ b/moses/src/ChartTrellisNode.cpp
@ -76,14 +76,12 @@ Phrase ChartTrellisNode::GetOutputPhrase() const
  // exactly like same fn in hypothesis, but use trellis nodes instead of prevHypos pointer
  Phrase ret(ARRAY_SIZE_INCR);

-  const Phrase &currTargetPhrase = m_hypo.GetCurrTargetPhrase();
-  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
-    m_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+  const TargetPhrase &currTargetPhrase = m_hypo.GetCurrTargetPhrase();
  for (size_t pos = 0; pos < currTargetPhrase.GetSize(); ++pos) {
    const Word &word = currTargetPhrase.GetWord(pos);
    if (word.IsNonTerminal()) {
      // non-term. fill out with prev hypo
-      size_t nonTermInd = nonTermIndexMap[pos];
+      size_t nonTermInd = currTargetPhrase.GetNonTermIndex(pos);
      const ChartTrellisNode &childNode = GetChild(nonTermInd);
      Phrase childPhrase = childNode.GetOutputPhrase();
      ret.Append(childPhrase);
--- a/moses/src/LM/Implementation.cpp
+++ b/moses/src/LM/Implementation.cpp
@ -230,16 +230,14 @@ private:
  size_t CalcPrefix(const ChartHypothesis &hypo, int featureID, Phrase &ret, size_t size) const
  {
    const TargetPhrase &target = hypo.GetCurrTargetPhrase();
-    const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
-      target.GetAlignmentInfo().GetNonTermIndexMap();
-
+    
    // loop over the rule that is being applied
    for (size_t pos = 0; pos < target.GetSize(); ++pos) {
      const Word &word = target.GetWord(pos);

      // for non-terminals, retrieve it from underlying hypothesis
      if (word.IsNonTerminal()) {
-        size_t nonTermInd = nonTermIndexMap[pos];
+        size_t nonTermInd = target.GetNonTermIndex(pos);
        const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
        size = static_cast<const LanguageModelChartState*>(prevHypo->GetFFState(featureID))->CalcPrefix(*prevHypo, featureID, ret, size);
      }
@ -283,13 +281,12 @@ private:
    }
    // construct suffix analogous to prefix
    else {
-      const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
-        hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
-      for (int pos = (int) hypo.GetCurrTargetPhrase().GetSize() - 1; pos >= 0 ; --pos) {
-        const Word &word = hypo.GetCurrTargetPhrase().GetWord(pos);
+      const TargetPhrase& target = hypo.GetCurrTargetPhrase();
+      for (int pos = (int) target.GetSize() - 1; pos >= 0 ; --pos) {
+        const Word &word = target.GetWord(pos);

        if (word.IsNonTerminal()) {
-          size_t nonTermInd = nonTermIndexMap[pos];
+          size_t nonTermInd = target.GetNonTermIndex(pos);;
          const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
          size = static_cast<const LanguageModelChartState*>(prevHypo->GetFFState(featureID))->CalcSuffix(*prevHypo, featureID, ret, size);
        }
@ -388,16 +385,15 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
  float finalizedScore = 0.0; // finalized, has sufficient context

  // get index map for underlying hypotheses
-  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
-    hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+  const TargetPhrase &target = hypo.GetCurrTargetPhrase();

  // loop over rule
  for (size_t phrasePos = 0, wordPos = 0;
-       phrasePos < hypo.GetCurrTargetPhrase().GetSize();
+       phrasePos < target.GetSize();
       phrasePos++)
  {
    // consult rule for either word or non-terminal
-    const Word &word = hypo.GetCurrTargetPhrase().GetWord(phrasePos);
+    const Word &word = target.GetWord(phrasePos);

    // regular word
    if (!word.IsNonTerminal())
@ -422,7 +418,7 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
    else
    {
      // look up underlying hypothesis
-      size_t nonTermIndex = nonTermIndexMap[phrasePos];
+      size_t nonTermIndex = target.GetNonTermIndex(phrasePos);
      const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);

      const LanguageModelChartState* prevState =
--- a/moses/src/LM/Ken.cpp
+++ b/moses/src/LM/Ken.cpp
@ -300,7 +300,7 @@ class LanguageModelChartStateKenLM : public FFState {
 template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const {
  LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
  lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
-  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+  const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase();

  const size_t size = hypo.GetCurrTargetPhrase().GetSize();
  size_t phrasePos = 0;
@ -313,7 +313,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const Cha
      phrasePos++;
    } else if (word.IsNonTerminal()) {
      // Non-terminal is first so we can copy instead of rescoring.  
-      const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
+      const ChartHypothesis *prevHypo = hypo.GetPrevHypo(targetPhrase.GetNonTermIndex(phrasePos));
      const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
      ruleScore.BeginNonTerminal(prevState, prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
      phrasePos++;
@ -323,7 +323,8 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const Cha
  for (; phrasePos < size; phrasePos++) {
    const Word &word = hypo.GetCurrTargetPhrase().GetWord(phrasePos);
    if (word.IsNonTerminal()) {
-      const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
+      size_t nonTermIndex = targetPhrase.GetNonTermIndex(phrasePos);
+      const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
      const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
      ruleScore.NonTerminal(prevState, prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
    } else {
--- a/moses/src/RuleTable/PhraseDictionarySCFG.cpp
+++ b/moses/src/RuleTable/PhraseDictionarySCFG.cpp
@ -55,10 +55,8 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
 {
  const size_t size = source.GetSize();

-  const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
-  AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
-
  PhraseDictionaryNodeSCFG *currNode = &m_collection;
+  map<size_t, size_t> sourceToTargetMap(target.GetAlignmentInfo().begin(), target.GetAlignmentInfo().end());
  for (size_t pos = 0 ; pos < size ; ++pos) {
    const Word& word = source.GetWord(pos);

@ -66,10 +64,9 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
      // indexed by source label 1st
      const Word &sourceNonTerm = word;

-      CHECK(iterAlign != target.GetAlignmentInfo().end());
-      CHECK(iterAlign->first == pos);
+      map<size_t, size_t>::const_iterator iterAlign = sourceToTargetMap.find(pos);
+      CHECK(iterAlign != sourceToTargetMap.end());
      size_t targetNonTermInd = iterAlign->second;
-      ++iterAlign;
      const Word &targetNonTerm = target.GetWord(targetNonTermInd);

      currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@ -43,6 +43,7 @@ namespace Moses
 TargetPhrase::TargetPhrase( std::string out_string)
  :Phrase(0), m_fullScore(0.0), m_sourcePhrase(0)
  , m_alignmentInfo(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
+  , m_nonTermIndexMap(NULL)
 {

  //ACAT
@ -56,6 +57,7 @@ TargetPhrase::TargetPhrase()
  , m_fullScore(0.0)
  , m_sourcePhrase(0)
  , m_alignmentInfo(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
+  , m_nonTermIndexMap(NULL)
 {
 }

@ -64,11 +66,13 @@ TargetPhrase::TargetPhrase(const Phrase &phrase)
  , m_fullScore(0.0)
  , m_sourcePhrase(0)
  , m_alignmentInfo(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
+  , m_nonTermIndexMap(NULL)
 {
 }

 TargetPhrase::~TargetPhrase()
 {
+  delete m_nonTermIndexMap;
 }

 void TargetPhrase::SetScore(const TranslationSystem* system)
@ -298,35 +302,19 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
  return clone;
 }

-namespace {
-void MosesShouldUseExceptions(bool value) {
-  if (!value) {
-    std::cerr << "Could not parse alignment info" << std::endl;
-    abort();
-  }
-}
-} // namespace
-
 void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
 {
-  set<pair<size_t,size_t> > alignmentInfo;
-  for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) {
-    util::TokenIter<util::AnyCharacter, false> dash(*token, util::AnyCharacter("-"));
-    MosesShouldUseExceptions(dash);
-    size_t sourcePos = boost::lexical_cast<size_t>(*dash++);
-    MosesShouldUseExceptions(dash);
-    size_t targetPos = boost::lexical_cast<size_t>(*dash++);
-    MosesShouldUseExceptions(!dash);
-
-    alignmentInfo.insert(pair<size_t,size_t>(sourcePos, targetPos));
-  }
-
-  SetAlignmentInfo(alignmentInfo);
+  SetAlignmentInfo(ParseAlignmentFromString(alignString));
 }

 void TargetPhrase::SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo)
 {
  m_alignmentInfo = AlignmentInfoCollection::Instance().Add(alignmentInfo);
+  size_t cntNonTerm = 0;
+  for (AlignmentInfo::const_iterator iter = m_alignmentInfo->begin(); iter != m_alignmentInfo->end(); ++iter) {
+    if (GetWord(iter->second).IsNonTerminal())
+      SetNonTermIndex(iter->second, cntNonTerm++);
+  }
 }


--- a/moses/src/TargetPhrase.h
+++ b/moses/src/TargetPhrase.h
@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #define moses_TargetPhrase_h

 #include <vector>
+#include <map>
 #include "TypeDef.h"
 #include "Phrase.h"
 #include "ScoreComponentCollection.h"
@ -55,6 +56,10 @@ protected:
  Phrase const* m_sourcePhrase;

  const AlignmentInfo *m_alignmentInfo;
+  
+  typedef std::map<size_t, size_t> NonTermIndexMap;
+  NonTermIndexMap* m_nonTermIndexMap;
+  
  Word m_lhsTarget;

 public:
@ -151,12 +156,27 @@ public:
  void SetAlignmentInfo(const AlignmentInfo *alignmentInfo) {
    m_alignmentInfo = alignmentInfo;
  }
+  
+  size_t GetNonTermIndex(size_t position) const {
+    if (m_nonTermIndexMap == NULL)
+      return NOT_FOUND;
+    NonTermIndexMap::const_iterator it = m_nonTermIndexMap->find(position);
+    if (it == m_nonTermIndexMap->end())
+      return NOT_FOUND;
+    return it->second;
+  }

  const AlignmentInfo &GetAlignmentInfo() const {
    return *m_alignmentInfo;
  }

  TO_STRING();
+private:
+  void SetNonTermIndex(size_t position, size_t index) {
+    if (!m_nonTermIndexMap)
+      m_nonTermIndexMap = new NonTermIndexMap;
+    m_nonTermIndexMap->insert(NonTermIndexMap::value_type(position, index));
+  }
 };

 std::ostream& operator<<(std::ostream&, const TargetPhrase&);