Option for target constituent constrained phrase extraction. TargetConstituentAdjacencyFeature.

2024-07-14 14:50:41 +03:00 · 2016-02-12 17:46:57 +00:00 · 2016-02-12 17:46:57 +00:00 · 1659d6b4c8
commit 1659d6b4c8
parent c75f9854e4
19 changed files with 810 additions and 44 deletions
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@ -42,6 +42,7 @@
 #include "moses/FF/ControlRecombination.h"
 #include "moses/FF/ConstrainedDecoding.h"
 #include "moses/FF/SoftSourceSyntacticConstraintsFeature.h"
+#include "moses/FF/TargetConstituentAdjacencyFeature.h"
 #include "moses/FF/TargetPreferencesFeature.h"
 #include "moses/FF/CoveredReferenceFeature.h"
 #include "moses/FF/TreeStructureFeature.h"
@ -264,6 +265,7 @@ FeatureRegistry::FeatureRegistry()
  MOSES_FNAME(CoveredReferenceFeature);
  MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
  MOSES_FNAME(SoftSourceSyntacticConstraintsFeature);
+  MOSES_FNAME(TargetConstituentAdjacencyFeature);
  MOSES_FNAME(TargetPreferencesFeature);
  MOSES_FNAME(TreeStructureFeature);
  MOSES_FNAME(SoftMatchingFeature);
--- a/moses/FF/TargetConstituentAdjacencyFeature.cpp
+++ b/moses/FF/TargetConstituentAdjacencyFeature.cpp
@ -0,0 +1,189 @@
+#include "TargetConstituentAdjacencyFeature.h"
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/StaticData.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Hypothesis.h"
+#include "moses/FactorCollection.h"
+#include "moses/TreeInput.h"
+#include <algorithm>
+
+
+using namespace std;
+
+namespace Moses
+{
+
+size_t TargetConstituentAdjacencyFeatureState::hash() const
+{
+  if (m_recombine) {
+    return 0;
+  }
+  size_t ret = 0;
+  boost::hash_combine(ret, m_collection.size());
+  for (std::map<const Factor*, float>::const_iterator it=m_collection.begin();
+       it!=m_collection.end(); ++it) {
+    boost::hash_combine(ret, it->first);
+  }
+  return ret;
+};
+
+bool TargetConstituentAdjacencyFeatureState::operator==(const FFState& other) const
+{
+  if (m_recombine) {
+    return true;
+  }
+
+  if (this == &other) {
+    return true;
+  }
+
+  const TargetConstituentAdjacencyFeatureState* otherState =
+    dynamic_cast<const TargetConstituentAdjacencyFeatureState*>(&other);
+  UTIL_THROW_IF2(otherState == NULL, "Wrong state type");
+
+  if (m_collection.size() != (otherState->m_collection).size()) {
+    return false;
+  }
+  std::map<const Factor*, float>::const_iterator thisIt, otherIt;
+  for (thisIt=m_collection.begin(), otherIt=(otherState->m_collection).begin();
+       thisIt!=m_collection.end(); ++thisIt, ++otherIt) {
+    if (thisIt->first != otherIt->first) {
+      return false;
+    }
+  }
+  return true;
+};
+
+
+TargetConstituentAdjacencyFeature::TargetConstituentAdjacencyFeature(const std::string &line)
+  : StatefulFeatureFunction(2, line)
+  , m_featureVariant(0)
+  , m_recombine(false)
+{
+  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+  ReadParameters();
+  VERBOSE(1, " Done." << std::endl);
+  VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl);
+}
+
+
+void TargetConstituentAdjacencyFeature::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "variant") {
+    m_featureVariant = Scan<size_t>(value);
+  } else if (key == "recombine") {
+    m_recombine = Scan<bool>(value);
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
+
+FFState* TargetConstituentAdjacencyFeature::EvaluateWhenApplied(
+  const Hypothesis& cur_hypo,
+  const FFState* prev_state,
+  ScoreComponentCollection* accumulator) const
+{
+  // dense scores
+  std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 2
+
+  // state
+  const TargetConstituentAdjacencyFeatureState *prevState = static_cast<const TargetConstituentAdjacencyFeatureState*>(prev_state);
+
+  // read TargetConstituentAdjacency property
+  const TargetPhrase &currTarPhr = cur_hypo.GetCurrTargetPhrase();
+  FEATUREVERBOSE(2, "Phrase: " << currTarPhr << std::endl);
+
+  if (const PhraseProperty *property = currTarPhr.GetProperty("TargetConstituentBoundariesLeft")) {
+
+    const TargetConstituentBoundariesLeftPhraseProperty *targetConstituentBoundariesLeftPhraseProperty = 
+      static_cast<const TargetConstituentBoundariesLeftPhraseProperty*>(property);
+    const TargetConstituentBoundariesLeftCollection& leftConstituentCollection = 
+      targetConstituentBoundariesLeftPhraseProperty->GetCollection();
+    float prob = 0;
+    size_t numMatch = 0;
+    size_t numOverall = 0;
+
+    if ( !cur_hypo.GetPrevHypo()->GetPrevHypo() ) {
+      // previous hypothesis is initial, i.e. target sentence starts here
+
+      ++numOverall;
+      FactorCollection &factorCollection = FactorCollection::Instance();
+      const Factor* bosFactor = factorCollection.AddFactor("BOS_",false);
+      TargetConstituentBoundariesLeftCollection::const_iterator found = 
+        leftConstituentCollection.find(bosFactor);
+      if ( found != leftConstituentCollection.end() ) {
+        ++numMatch;
+        prob += found->second;
+      }
+
+    } else {
+
+      const std::map<const Factor*, float>& hypConstituentCollection = prevState->m_collection;
+      std::map<const Factor*, float>::const_iterator iter1 = hypConstituentCollection.begin();
+      std::map<const Factor*, float>::const_iterator iter2 = leftConstituentCollection.begin();
+      while ( iter1 != hypConstituentCollection.end() && iter2 != leftConstituentCollection.end() ) {
+        ++numOverall;
+        if ( iter1->first < iter2->first ) {
+          ++iter1;
+        } else if ( iter2->first < iter1->first ) {
+          ++iter2;
+        } else {
+          ++numMatch;
+          float currProb = iter1->second * iter2->second;
+          if (currProb > prob)
+            prob = currProb;
+          ++iter1;
+          ++iter2;
+        }
+      }
+    }
+
+    if ( (numMatch == 0) || (prob == 0) ) {
+      ++newScores[1];
+    } else {
+      if ( m_featureVariant == 1 ) {
+        newScores[0] += TransformScore(prob);
+      } else {
+        newScores[0] += TransformScore( (float)numMatch/numOverall );
+      }
+    }
+      
+  } else {
+
+    // abort with error message if the phrase does not translate an unknown word
+    UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+                   << ": Missing TargetConstituentBoundariesLeft property.");
+      
+    ++newScores[1];
+
+  }
+
+  TargetConstituentAdjacencyFeatureState *newState = new TargetConstituentAdjacencyFeatureState(m_recombine);
+
+  if (const PhraseProperty *property = currTarPhr.GetProperty("TargetConstituentBoundariesRightAdjacent")) {
+
+    const TargetConstituentBoundariesRightAdjacentPhraseProperty *targetConstituentBoundariesRightAdjacentPhraseProperty = 
+      static_cast<const TargetConstituentBoundariesRightAdjacentPhraseProperty*>(property);
+    const TargetConstituentBoundariesLeftCollection& rightAdjacentConstituentCollection = targetConstituentBoundariesRightAdjacentPhraseProperty->GetCollection();
+
+    std::copy(rightAdjacentConstituentCollection.begin(), rightAdjacentConstituentCollection.end(),
+              std::inserter(newState->m_collection, newState->m_collection.begin()));
+
+  } else {
+
+    // abort with error message if the phrase does not translate an unknown word
+    UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+                   << ": Missing TargetConstituentBoundariesRightAdjacent property.");
+
+  }
+
+  // add scores
+  accumulator->PlusEquals(this, newScores);
+
+  return newState;
+}
+
+}
+
--- a/moses/FF/TargetConstituentAdjacencyFeature.h
+++ b/moses/FF/TargetConstituentAdjacencyFeature.h
@ -0,0 +1,101 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <set>
+#include <iostream>
+#include "StatefulFeatureFunction.h"
+#include "FFState.h"
+#include "util/exception.hh"
+#include <stdint.h>
+
+namespace Moses
+{
+
+class TargetConstituentAdjacencyFeatureState : public FFState
+{
+
+public:
+
+  friend class TargetConstituentAdjacencyFeature;
+
+  TargetConstituentAdjacencyFeatureState(bool recombine)
+    : m_recombine(recombine)
+  {};
+
+  size_t hash() const;
+
+  virtual bool operator==(const FFState& other) const;
+
+private:
+
+  const bool m_recombine;
+  std::map<const Factor*, float> m_collection;
+
+};
+
+
+class TargetConstituentAdjacencyFeature : public StatefulFeatureFunction
+{
+
+public:
+
+  TargetConstituentAdjacencyFeature(const std::string &line);
+
+  ~TargetConstituentAdjacencyFeature()
+  {};
+
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  };
+
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+    return new TargetConstituentAdjacencyFeatureState(m_recombine);
+  };
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+  void Load(AllOptions::ptr const& opts)
+  {};
+
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedFutureScore) const
+  {};
+
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
+  {};
+
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const
+  {};
+
+  FFState* EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+
+  FFState* EvaluateWhenApplied(
+    const ChartHypothesis& cur_hypo,
+    int featureID, // used to index the state in the previous hypotheses
+    ScoreComponentCollection* accumulator) const {
+    UTIL_THROW2(GetScoreProducerDescription() << ": feature currently not implemented for chart-based decoding.");
+    return new TargetConstituentAdjacencyFeatureState(m_recombine);
+  };
+
+
+private:
+
+  size_t m_featureVariant;
+  bool m_recombine;
+
+};
+
+}
+
--- a/moses/Factor.h
+++ b/moses/Factor.h
@ -19,8 +19,7 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/

-#ifndef moses_Factor_h
-#define moses_Factor_h
+#pragma once

 #include <ostream>
 #include <string>
@ -98,4 +97,4 @@ public:
 size_t hash_value(const Factor &f);

 }
-#endif
+
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@ -11,6 +11,8 @@
 #include "moses/PP/SpanLengthPhraseProperty.h"
 #include "moses/PP/NonTermContextProperty.h"
 #include "moses/PP/OrientationPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"

 namespace Moses
 {
@ -58,6 +60,8 @@ PhrasePropertyFactory::PhrasePropertyFactory()

  MOSES_PNAME2("Counts", CountsPhraseProperty);
  MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
+  MOSES_PNAME2("TargetConstituentBoundariesLeft", TargetConstituentBoundariesLeftPhraseProperty);
+  MOSES_PNAME2("TargetConstituentBoundariesRightAdjacent", TargetConstituentBoundariesRightAdjacentPhraseProperty);
  MOSES_PNAME2("TargetPreferences", TargetPreferencesPhraseProperty);
  MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
  MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
--- a/moses/PP/PhraseProperty.cpp
+++ b/moses/PP/PhraseProperty.cpp
@ -5,9 +5,14 @@ namespace Moses

 std::ostream& operator<<(std::ostream &out, const PhraseProperty &obj)
 {
-  out << "Base phrase property";
+  obj.Print(out);
  return out;
 }

+void PhraseProperty::Print(std::ostream &out) const
+{
+  out << "Base phrase property";
+}
+
 }

--- a/moses/PP/PhraseProperty.h
+++ b/moses/PP/PhraseProperty.h
@ -28,6 +28,8 @@ public:

 protected:

+  virtual void Print(std::ostream& out) const;
+
  std::string *m_value;

 };
--- a/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp
+++ b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp
@ -0,0 +1,63 @@
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include <iostream>
+#include <queue>
+#include <ostream>
+
+namespace Moses
+{
+
+void TargetConstituentBoundariesLeftPhraseProperty::ProcessValue(const std::string &value)
+{
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  std::vector<std::string> tokens;
+  Tokenize(tokens, value, " ");
+  std::vector<std::string>::const_iterator tokenIter = tokens.begin();
+  while (tokenIter != tokens.end()) {
+    try {
+
+      std::vector<std::string> constituents;
+      Tokenize(constituents, *tokenIter, "<");
+      ++tokenIter;
+      float count = std::atof( tokenIter->c_str() );
+      ++tokenIter;
+
+      std::set<const Factor* > dedup;
+
+      for ( std::vector<std::string>::iterator constituentIter = constituents.begin();
+            constituentIter != constituents.end(); ++constituentIter ) {
+  
+        const Factor* constituentFactor = factorCollection.AddFactor(*constituentIter,false);
+
+        std::pair< std::set<const Factor* >::iterator, bool > dedupIns = 
+          dedup.insert(constituentFactor);
+        if ( dedupIns.second ) {
+
+          std::pair< TargetConstituentBoundariesLeftCollection::iterator, bool > inserted =
+            m_constituentsCollection.insert(std::make_pair(constituentFactor,count));
+          if ( !inserted.second ) {
+            (inserted.first)->second += count;
+          }
+        }
+      }
+
+    } catch (const std::exception &e) {
+      UTIL_THROW2("TargetConstituentBoundariesLeftPhraseProperty: Read error. Flawed property?  " << value);
+    }
+  }
+};
+
+void TargetConstituentBoundariesLeftPhraseProperty::Print(std::ostream& out) const
+{
+  for ( TargetConstituentBoundariesLeftCollection::const_iterator it = m_constituentsCollection.begin();
+        it != m_constituentsCollection.end(); ++it ) {
+    if ( it != m_constituentsCollection.begin() ) {
+      out << " ";
+    }
+    out << *(it->first) << " " << it->second;
+  }
+}
+
+} // namespace Moses
+
--- a/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h
+++ b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h
@ -0,0 +1,40 @@
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "moses/Factor.h"
+#include "util/exception.hh"
+#include <map>
+#include <string>
+
+namespace Moses
+{
+  
+typedef std::map<const Factor*, float> TargetConstituentBoundariesLeftCollection;
+
+
+class TargetConstituentBoundariesLeftPhraseProperty : public PhraseProperty
+{
+public:
+  TargetConstituentBoundariesLeftPhraseProperty() 
+  {};
+
+  virtual void ProcessValue(const std::string &value);
+
+  const TargetConstituentBoundariesLeftCollection &GetCollection() const {
+    return m_constituentsCollection;
+  };
+
+  virtual const std::string *GetValueString() const {
+    UTIL_THROW2("TargetConstituentBoundariesLeftPhraseProperty: value string not available in this phrase property");
+    return NULL;
+  };
+
+protected:
+
+  virtual void Print(std::ostream& out) const;
+
+  TargetConstituentBoundariesLeftCollection m_constituentsCollection;
+};
+
+} // namespace Moses
+
--- a/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp
+++ b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp
@ -0,0 +1,63 @@
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include <iostream>
+#include <queue>
+#include <ostream>
+
+namespace Moses
+{
+
+void TargetConstituentBoundariesRightAdjacentPhraseProperty::ProcessValue(const std::string &value)
+{
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  std::vector<std::string> tokens;
+  Tokenize(tokens, value, " ");
+  std::vector<std::string>::const_iterator tokenIter = tokens.begin();
+  while (tokenIter != tokens.end()) {
+    try {
+
+      std::vector<std::string> constituents;
+      Tokenize(constituents, *tokenIter, "<");
+      ++tokenIter;
+      float count = std::atof( tokenIter->c_str() );
+      ++tokenIter;
+
+      std::set<const Factor* > dedup;
+
+      for ( std::vector<std::string>::iterator constituentIter = constituents.begin();
+            constituentIter != constituents.end(); ++constituentIter ) {
+  
+        const Factor* constituentFactor = factorCollection.AddFactor(*constituentIter,false);
+
+        std::pair< std::set<const Factor* >::iterator, bool > dedupIns = 
+          dedup.insert(constituentFactor);
+        if ( dedupIns.second ) {
+
+          std::pair< TargetConstituentBoundariesRightAdjacentCollection::iterator, bool > inserted =
+            m_constituentsCollection.insert(std::make_pair(constituentFactor,count));
+          if ( !inserted.second ) {
+            (inserted.first)->second += count;
+          }
+        }
+      }
+
+    } catch (const std::exception &e) {
+      UTIL_THROW2("TargetConstituentBoundariesRightAdjacentPhraseProperty: Read error. Flawed property?  " << value);
+    }
+  }
+};
+
+void TargetConstituentBoundariesRightAdjacentPhraseProperty::Print(std::ostream& out) const
+{
+  for ( TargetConstituentBoundariesRightAdjacentCollection::const_iterator it = m_constituentsCollection.begin();
+        it != m_constituentsCollection.end(); ++it ) {
+    if ( it != m_constituentsCollection.begin() ) {
+      out << " ";
+    }
+    out << *(it->first) << " " << it->second;
+  }
+}
+
+} // namespace Moses
+
--- a/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h
+++ b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h
@ -0,0 +1,40 @@
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "moses/Factor.h"
+#include "util/exception.hh"
+#include <map>
+#include <string>
+
+namespace Moses
+{
+  
+typedef std::map<const Factor*, float> TargetConstituentBoundariesRightAdjacentCollection;
+
+
+class TargetConstituentBoundariesRightAdjacentPhraseProperty : public PhraseProperty
+{
+public:
+  TargetConstituentBoundariesRightAdjacentPhraseProperty() 
+  {};
+
+  virtual void ProcessValue(const std::string &value);
+
+  const TargetConstituentBoundariesRightAdjacentCollection &GetCollection() const {
+    return m_constituentsCollection;
+  };
+
+  virtual const std::string *GetValueString() const {
+    UTIL_THROW2("TargetConstituentBoundariesRightAdjacentPhraseProperty: value string not available in this phrase property");
+    return NULL;
+  };
+
+protected:
+
+  virtual void Print(std::ostream& out) const;
+
+  TargetConstituentBoundariesRightAdjacentCollection m_constituentsCollection;
+};
+
+} // namespace Moses
+
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@ -311,12 +311,14 @@ std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &ke
  std::ostringstream oss;
  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
       iter!=allPropertyValues->end(); ++iter) {
-    if (iter!=allPropertyValues->begin()) {
+    if (!(iter->first).empty()) {
+      if (iter!=allPropertyValues->begin()) {
+        oss << " ";
+      }
+      oss << iter->first;
      oss << " ";
+      oss << iter->second;
    }
-    oss << iter->first;
-    oss << " ";
-    oss << iter->second;
  }

  std::string allPropertyValuesString(oss.str());
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@ -50,6 +50,8 @@ private:
  bool onlyOutputSpanInfo;
  bool gzOutput;
  std::string instanceWeightsFile; //weights for each sentence
+  bool targetConstituentConstrainedFlag;
+  bool targetConstituentBoundariesFlag;
  bool flexScoreFlag;
  bool singleWordHeuristicFlag;

@ -73,6 +75,8 @@ public:
    includeSentenceIdFlag(false),
    onlyOutputSpanInfo(false),
    gzOutput(false),
+    targetConstituentConstrainedFlag(false),
+    targetConstituentBoundariesFlag(false),
    flexScoreFlag(false),
    singleWordHeuristicFlag(false),
    debug(false) {
@ -118,6 +122,12 @@ public:
  void initInstanceWeightsFile(const char* initInstanceWeightsFile) {
    instanceWeightsFile = std::string(initInstanceWeightsFile);
  }
+  void initTargetConstituentConstrainedFlag(const bool initTargetConstituentConstrainedFlag) {
+    targetConstituentConstrainedFlag = initTargetConstituentConstrainedFlag;
+  }
+  void initTargetConstituentBoundariesFlag(const bool initTargetConstituentBoundariesFlag) {
+    targetConstituentBoundariesFlag = initTargetConstituentBoundariesFlag;
+  }
  void initFlexScoreFlag(const bool initflexScoreFlag) {
    flexScoreFlag=initflexScoreFlag;
  }
@ -165,6 +175,12 @@ public:
  std::string getInstanceWeightsFile() const {
    return instanceWeightsFile;
  }
+  bool isTargetConstituentConstrainedFlag() const {
+    return targetConstituentConstrainedFlag;
+  }
+  bool isTargetConstituentBoundariesFlag() const {
+    return targetConstituentBoundariesFlag;
+  }
  bool isFlexScoreFlag() const {
    return flexScoreFlag;
  }
--- a/phrase-extract/SyntaxNodeCollection.cpp
+++ b/phrase-extract/SyntaxNodeCollection.cpp
@ -47,6 +47,8 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
  SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos);
  m_nodes.push_back( newNode );
  m_index[ startPos ][ endPos ].push_back( newNode );
+  m_endPositionsIndex[ endPos ].push_back( newNode );
+  m_startPositionsIndex[ startPos ].push_back( newNode ); // TODO: may not need this: access m_index by startPos and iterate over its InnerNodeIndex (= end positions)?
  m_numWords = std::max(endPos+1, m_numWords);
  return newNode;
 }
@ -70,6 +72,36 @@ const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes(
  return endIndex->second;
 }

+bool SyntaxNodeCollection::HasNodeStartingAtPosition( int startPos ) const
+{
+  return GetNodesByStartPosition(startPos).size() > 0;
+}
+
+const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByStartPosition(
+  int startPos ) const
+{
+  InnerNodeIndex::const_iterator startIndex = m_startPositionsIndex.find( startPos );
+  if (startIndex == m_startPositionsIndex.end() )
+    return m_emptyNode;
+
+  return startIndex->second;
+}
+
+bool SyntaxNodeCollection::HasNodeEndingAtPosition( int endPos ) const
+{
+  return GetNodesByEndPosition(endPos).size() > 0;
+}
+
+const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByEndPosition(
+  int endPos ) const
+{
+  InnerNodeIndex::const_iterator endIndex = m_endPositionsIndex.find( endPos );
+  if (endIndex == m_endPositionsIndex.end() )
+    return m_emptyNode;
+
+  return endIndex->second;
+}
+
 std::auto_ptr<SyntaxTree> SyntaxNodeCollection::ExtractTree()
 {
  std::map<SyntaxNode *, SyntaxTree *> nodeToTree;
--- a/phrase-extract/SyntaxNodeCollection.h
+++ b/phrase-extract/SyntaxNodeCollection.h
@ -50,6 +50,11 @@ public:
  //! Lookup the SyntaxNodes for a given span.
  const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;

+  bool HasNodeStartingAtPosition( int startPos ) const;
+  const std::vector< SyntaxNode* >& GetNodesByStartPosition( int startPos ) const;
+  bool HasNodeEndingAtPosition( int endPos ) const;
+  const std::vector< SyntaxNode* >& GetNodesByEndPosition( int endPos ) const;
+
  //! Get a vector of pointers to all SyntaxNodes (unordered).
  const std::vector< SyntaxNode* >& GetAllNodes() {
    return m_nodes;
@ -78,6 +83,9 @@ private:
  NodeIndex m_index;
  int m_numWords;
  std::vector< SyntaxNode* > m_emptyNode;
+
+  InnerNodeIndex m_endPositionsIndex;
+  InnerNodeIndex m_startPositionsIndex;
 };

 }  // namespace MosesTraining
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@ -1,11 +1,3 @@
-/*
- * extract.cpp
- *	Modified by: Rohit Gupta CDAC, Mumbai, India
- *	on July 15, 2012 to implement parallel processing
- *      Modified by: Nadi Tomeh - LIMSI/CNRS
- *      Machine Translation Marathon 2010, Dublin
- */
-
 #include <cstdio>
 #include <iostream>
 #include <fstream>
@ -20,11 +12,12 @@
 #include <vector>
 #include <limits>

-#include "SentenceAlignment.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
 #include "OutputFileStream.h"
 #include "PhraseExtractionOptions.h"
+#include "SentenceAlignmentWithSyntax.h"
+#include "SyntaxNode.h"

 using namespace std;
 using namespace MosesTraining;
@ -46,14 +39,14 @@ typedef vector < HPhrase > HPhraseVector;
 // The key of the map is the English index and the value is a set of the source ones
 typedef map <int, set<int> > HSentenceVertices;

-REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientWordModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
                           int, int, int, int, int, int, int,
                           bool (*)(int, int), bool (*)(int, int));
-REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientPhraseModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
                             int, int, int, int, int, int, int,
                             bool (*)(int, int), bool (*)(int, int),
                             const HSentenceVertices &, const HSentenceVertices &);
-REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientHierModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
                           int, int, int, int, int, int, int,
                           bool (*)(int, int), bool (*)(int, int),
                           const HSentenceVertices &, const HSentenceVertices &,
@ -69,7 +62,7 @@ bool ge(int, int);
 bool le(int, int);
 bool lt(int, int);

-bool isAligned (SentenceAlignment &, int, int);
+bool isAligned (SentenceAlignmentWithSyntax &, int, int);

 int sentenceOffset = 0;

@ -87,7 +80,7 @@ class ExtractTask
 {
 public:
  ExtractTask(
-    size_t id, SentenceAlignment &sentence,
+    size_t id, SentenceAlignmentWithSyntax &sentence,
    PhraseExtractionOptions &initoptions,
    Moses::OutputFileStream &extractFile,
    Moses::OutputFileStream &extractFileInv,
@ -109,14 +102,17 @@ private:
  vector< string > m_extractedPhrasesSid;
  vector< string > m_extractedPhrasesContext;
  vector< string > m_extractedPhrasesContextInv;
-  void extractBase(SentenceAlignment &);
-  void extract(SentenceAlignment &);
-  void addPhrase(SentenceAlignment &, int, int, int, int, string &);
+  void extractBase(SentenceAlignmentWithSyntax &);
+  void extract(SentenceAlignmentWithSyntax &);
+  void addPhrase(const SentenceAlignmentWithSyntax &, int, int, int, int, const std::string &, const std::string &);
  void writePhrasesToFile();
-  bool checkPlaceholders (const SentenceAlignment &sentence, int startE, int endE, int startF, int endF);
+  bool checkPlaceholders (const SentenceAlignmentWithSyntax &sentence, int startE, int endE, int startF, int endF);
  bool isPlaceholder(const string &word);
+  bool checkTargetConstituentBoundaries(const SentenceAlignmentWithSyntax &sentence,
+                                        int startE, int endE, int startF, int endF,
+                                        std::string &phrasePropertiesString);

-  SentenceAlignment &m_sentence;
+  SentenceAlignmentWithSyntax &m_sentence;
  const PhraseExtractionOptions &m_options;
  Moses::OutputFileStream &m_extractFile;
  Moses::OutputFileStream &m_extractFileInv;
@ -133,7 +129,8 @@ int main(int argc, char* argv[])

  if (argc < 6) {
    cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
-    cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
+    cerr << "| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ";
+    cerr << "| --TargetConstituentConstrained | --TargetConstituentBoundaries ]" << std::endl;
    exit(1);
  }

@ -153,6 +150,10 @@ int main(int argc, char* argv[])
      options.initOnlyOutputSpanInfo(true);
    } else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
      options.initOrientationFlag(true);
+    } else if (strcmp(argv[i],"--TargetConstituentConstrained") == 0) {
+      options.initTargetConstituentConstrainedFlag(true);
+    } else if (strcmp(argv[i],"--TargetConstituentBoundaries") == 0) {
+      options.initTargetConstituentBoundariesFlag(true);
    } else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
      options.initFlexScoreFlag(true);
    } else if (strcmp(argv[i],"--SingleWordHeuristic") == 0) {
@ -280,6 +281,11 @@ int main(int argc, char* argv[])
    extractFileContextInv.Open(fileNameExtractContextInv.c_str());
  }

+  // stats on labels for glue grammar and unknown word label probabilities
+  set< string > targetLabelCollection, sourceLabelCollection;
+  map< string, int > targetTopLabelCollection, sourceTopLabelCollection;
+  const bool targetSyntax = true;
+
  int i = sentenceOffset;

  string englishString, foreignString, alignmentString, weightString;
@ -295,7 +301,10 @@ int main(int argc, char* argv[])
      getline(*iwFileP, weightString);
    }

-    SentenceAlignment sentence;
+    SentenceAlignmentWithSyntax sentence
+    (targetLabelCollection, sourceLabelCollection,
+     targetTopLabelCollection, sourceTopLabelCollection, 
+     targetSyntax, false);
    // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
    //az: output src, tgt, and alingment line
    if (options.isOnlyOutputSpanInfo()) {
@ -360,7 +369,7 @@ void ExtractTask::Run()

 }

-void ExtractTask::extract(SentenceAlignment &sentence)
+void ExtractTask::extract(SentenceAlignmentWithSyntax &sentence)
 {
  int countE = sentence.target.size();
  int countF = sentence.source.size();
@ -454,7 +463,15 @@ void ExtractTask::extract(SentenceAlignment &sentence)
                  // if(m_options.isAllModelsOutputFlag())
                  // " | | ";
                }
-                addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
+                std::string phrasePropertiesString;
+                bool doAdd = !m_options.isTargetConstituentBoundariesFlag();
+                if (m_options.isTargetConstituentBoundariesFlag() || m_options.isTargetConstituentConstrainedFlag()) {
+                  bool isTargetConstituentCovered = checkTargetConstituentBoundaries(sentence, startE, endE, startF, endF, phrasePropertiesString);
+                  doAdd = doAdd || isTargetConstituentCovered;
+                }
+                if (doAdd) {
+                  addPhrase(sentence, startE, endE, startF, endF, orientationInfo, phrasePropertiesString);
+                }
              }
            }
        }
@ -510,12 +527,20 @@ void ExtractTask::extract(SentenceAlignment &sentence)
                        ((m_options.isPhraseModel())? getOrientString(phrasePrevOrient, m_options.isPhraseType()) + " " + getOrientString(phraseNextOrient, m_options.isPhraseType()) : "") + " | " +
                        ((m_options.isHierModel())? getOrientString(hierPrevOrient, m_options.isHierType()) + " " + getOrientString(hierNextOrient, m_options.isHierType()) : "");

-      addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
+      std::string phrasePropertiesString;
+      bool doAdd = !m_options.isTargetConstituentBoundariesFlag();
+      if (m_options.isTargetConstituentBoundariesFlag() || m_options.isTargetConstituentConstrainedFlag()) {
+        bool isTargetConstituentCovered = checkTargetConstituentBoundaries(sentence, startE, endE, startF, endF, phrasePropertiesString);
+        doAdd = doAdd || isTargetConstituentCovered;
+      }
+      if (doAdd) {
+        addPhrase(sentence, startE, endE, startF, endF, orientationInfo, phrasePropertiesString);
+      }
    }
  }
 }

-REO_POS getOrientWordModel(SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
+REO_POS getOrientWordModel(SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
                           bool connectedLeftTop, bool connectedRightTop,
                           int startF, int endF, int startE, int endE, int countF, int zero, int unit,
                           bool (*ge)(int, int), bool (*lt)(int, int) )
@ -541,7 +566,7 @@ REO_POS getOrientWordModel(SentenceAlignment & sentence, REO_MODEL_TYPE modelTyp
 }

 // to be called with countF-1 instead of countF
-REO_POS getOrientPhraseModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
+REO_POS getOrientPhraseModel (SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
                              bool connectedLeftTop, bool connectedRightTop,
                              int startF, int endF, int startE, int endE, int countF, int zero, int unit,
                              bool (*ge)(int, int), bool (*lt)(int, int),
@ -577,7 +602,7 @@ REO_POS getOrientPhraseModel (SentenceAlignment & sentence, REO_MODEL_TYPE model
 }

 // to be called with countF-1 instead of countF
-REO_POS getOrientHierModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
+REO_POS getOrientHierModel (SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
                            bool connectedLeftTop, bool connectedRightTop,
                            int startF, int endF, int startE, int endE, int countF, int zero, int unit,
                            bool (*ge)(int, int), bool (*lt)(int, int),
@ -629,7 +654,7 @@ REO_POS getOrientHierModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelTy
  return UNKNOWN;
 }

-bool isAligned ( SentenceAlignment &sentence, int fi, int ei )
+bool isAligned ( SentenceAlignmentWithSyntax &sentence, int fi, int ei )
 {
  if (ei == -1 && fi == -1)
    return true;
@ -715,8 +740,138 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
  }
  return "";
 }
+  

-void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
+bool ExtractTask::checkTargetConstituentBoundaries( const SentenceAlignmentWithSyntax &sentence,
+                                                    int startE, int endE, int startF, int endF,
+                                                    std::string &phrasePropertiesString)
+{
+  ostringstream outextractstrPhrasePropertyTargetConstituentBoundariesLeft;
+
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+    outextractstrPhrasePropertyTargetConstituentBoundariesLeft << "{{TargetConstituentBoundariesLeft ";
+  }
+
+  bool validTargetConstituentBoundaries = false;
+  bool outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = true;
+
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+    if (startE==0) {
+      outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+      outextractstrPhrasePropertyTargetConstituentBoundariesLeft << "BOS_";
+    }
+  }
+
+  if (!sentence.targetTree.HasNodeStartingAtPosition(startE)) {
+
+    validTargetConstituentBoundaries = false;
+
+  } else {
+
+    const std::vector< SyntaxNode* >& startingNodes = sentence.targetTree.GetNodesByStartPosition(startE);
+    for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = startingNodes.rbegin(); iter != startingNodes.rend(); ++iter ) {
+      if ( (*iter)->end == endE ) {
+        validTargetConstituentBoundaries = true;
+        if (!m_options.isTargetConstituentBoundariesFlag()) {
+          break;
+        }
+      }
+      if (m_options.isTargetConstituentBoundariesFlag()) {
+        if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+          outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+        } else {
+          outextractstrPhrasePropertyTargetConstituentBoundariesLeft << "<";
+        }
+        outextractstrPhrasePropertyTargetConstituentBoundariesLeft << (*iter)->label;
+      }
+    }
+  }
+
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+    if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+      outextractstrPhrasePropertyTargetConstituentBoundariesLeft << "<";
+    }
+    outextractstrPhrasePropertyTargetConstituentBoundariesLeft << "}}";
+  }
+
+
+  if (m_options.isTargetConstituentConstrainedFlag() && !validTargetConstituentBoundaries) {
+    // skip over all boundary punctuation and check again
+    bool relaxedValidTargetConstituentBoundaries = false;
+    int relaxedStartE = startE;
+    int relaxedEndE = endE;
+    const std::string punctuation = ",;.:!?";
+    while ( (relaxedStartE < endE) &&
+            (sentence.target[relaxedStartE].size() == 1) &&
+            (punctuation.find(sentence.target[relaxedStartE].at(0)) != std::string::npos) ) {
+      ++relaxedStartE;
+    }
+    while ( (relaxedEndE > relaxedStartE) &&
+            (sentence.target[relaxedEndE].size() == 1) &&
+            (punctuation.find(sentence.target[relaxedEndE].at(0)) != std::string::npos) ) {
+      --relaxedEndE;
+    }
+
+    if ( (relaxedStartE != startE) || (relaxedEndE !=endE) ) {
+      const std::vector< SyntaxNode* >& startingNodes = sentence.targetTree.GetNodesByStartPosition(relaxedStartE);
+      for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = startingNodes.rbegin(); 
+            (iter != startingNodes.rend() && !relaxedValidTargetConstituentBoundaries); 
+            ++iter ) {
+        if ( (*iter)->end == relaxedEndE ) {
+          relaxedValidTargetConstituentBoundaries = true;
+        }
+      }
+    }
+
+    if (!relaxedValidTargetConstituentBoundaries) {
+      return false;
+    }
+  }
+
+
+  if (m_options.isTargetConstituentBoundariesFlag()) {
+
+    ostringstream outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent;
+    outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent << "{{TargetConstituentBoundariesRightAdjacent ";
+    outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = true;
+
+    if (endE==sentence.target.size()-1) {
+
+      outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent << "EOS_";
+      outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+
+    } else {
+
+      const std::vector< SyntaxNode* >& adjacentNodes = sentence.targetTree.GetNodesByStartPosition(endE+1);
+      for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = adjacentNodes.rbegin(); iter != adjacentNodes.rend(); ++iter ) {
+        if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+          outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
+        } else {
+          outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent << "<";
+        }
+        outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent << (*iter)->label;
+      }
+    }
+
+    if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
+      outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent << "<";
+    }
+    outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent << "}}";
+
+    phrasePropertiesString += " ";
+    phrasePropertiesString += outextractstrPhrasePropertyTargetConstituentBoundariesLeft.str();
+    phrasePropertiesString += " ";
+    phrasePropertiesString += outextractstrPhrasePropertyTargetConstituentBoundariesRightAdjacent.str();
+  }
+
+  return true;
+}
+
+
+void ExtractTask::addPhrase( const SentenceAlignmentWithSyntax &sentence, 
+                             int startE, int endE, int startF, int endF, 
+                             const std::string &orientationInfo,
+                             const std::string &phrasePropertiesString)
 {
  // source
  //   // cout << "adding ( " << startF << "-" << endF << ", " << startE << "-" << endE << ")\n";
@ -746,11 +901,18 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
  if (m_options.isTranslationFlag()) outextractstr << "||| ";
  if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";

+
  // target
  for(int ei=startE; ei<=endE; ei++) {
-    if (m_options.isTranslationFlag()) outextractstr << sentence.target[ei] << " ";
-    if (m_options.isTranslationFlag()) outextractstrInv << sentence.target[ei] << " ";
-    if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.target[ei] << " ";
+
+    if (m_options.isTranslationFlag()) {
+      outextractstr << sentence.target[ei] << " ";
+      outextractstrInv << sentence.target[ei] << " ";
+    }
+
+    if (m_options.isOrientationFlag()) {
+      outextractstrOrientation << sentence.target[ei] << " ";
+    }
  }
  if (m_options.isTranslationFlag()) outextractstr << "|||";
  if (m_options.isTranslationFlag()) outextractstrInv << "||| ";
@ -792,7 +954,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
    }
  }

-
+  outextractstr << phrasePropertiesString;

  // generate two lines for every extracted phrase:
  // once with left, once with right context
@ -901,7 +1063,7 @@ void ExtractTask::writePhrasesToFile()

 // if proper conditioning, we need the number of times a source phrase occured

-void ExtractTask::extractBase( SentenceAlignment &sentence )
+void ExtractTask::extractBase( SentenceAlignmentWithSyntax &sentence )
 {
  ostringstream outextractFile;
  ostringstream outextractFileInv;
@ -935,7 +1097,7 @@ void ExtractTask::extractBase( SentenceAlignment &sentence )
 }


-bool ExtractTask::checkPlaceholders (const SentenceAlignment &sentence, int startE, int endE, int startF, int endF)
+bool ExtractTask::checkPlaceholders (const SentenceAlignmentWithSyntax &sentence, int startE, int endE, int startF, int endF)
 {
  for (size_t pos = startF; pos <= endF; ++pos) {
    const string &sourceWord = sentence.source[pos];
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@ -68,6 +68,7 @@ bool spanLength = false;
 bool ruleLength = false;
 bool nonTermContext = false;
 bool nonTermContextTarget = false;
+bool targetConstituentBoundariesFlag = false;

 int countOfCounts[COC_MAX+1];
 int totalDistinct = 0;
@ -286,6 +287,9 @@ int main(int argc, char* argv[])
    } else if (strcmp(argv[i],"--NonTermContextTarget") == 0) {
      nonTermContextTarget = true;
      std::cerr << "non-term context (target)" << std::endl;
+    } else if (strcmp(argv[i],"--TargetConstituentBoundaries") == 0) {
+      targetConstituentBoundariesFlag = true;
+      std::cerr << "including target constituent boundaries information" << std::endl;
    } else {
      featureArgs.push_back(argv[i]);
      ++i;
@ -957,6 +961,18 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
    }
  }

+  // target constituent boundaries
+  if (targetConstituentBoundariesFlag && !inverseFlag) {
+    const std::string targetConstituentBoundariesLeftValues = phrasePair.CollectAllPropertyValues("TargetConstituentBoundariesLeft");
+    if (!targetConstituentBoundariesLeftValues.empty()) {
+      phraseTableFile << " {{TargetConstituentBoundariesLeft " << targetConstituentBoundariesLeftValues << "}}";
+    }
+    const std::string targetConstituentBoundariesRightAdjacentValues = phrasePair.CollectAllPropertyValues("TargetConstituentBoundariesRightAdjacent");
+    if (!targetConstituentBoundariesRightAdjacentValues.empty()) {
+      phraseTableFile << " {{TargetConstituentBoundariesRightAdjacent " << targetConstituentBoundariesRightAdjacentValues << "}}";
+    }
+  }
+
  phraseTableFile << std::endl;
 }

--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -2407,6 +2407,12 @@ sub define_training_extract_phrases {
      if (&get("TRAINING:ghkm-strip-bitpar-nonterminal-labels")) {
        $cmd .= "-ghkm-strip-bitpar-nonterminal-labels ";
      }
+
+    } else { # !hierarchical-rule-set
+
+      if (&get("TRAINING:target-constituent-boundaries")) {
+        $cmd .= "-target-constituent-boundaries ";
+      }
    }

    my $extract_settings = &get("TRAINING:extract-settings");
@ -2464,6 +2470,12 @@ sub define_training_build_ttable {
        my $parts_of_speech_labels_file = &versionize(&long_file_name("parts-of-speech","model",""));
        $cmd .= "-ghkm-parts-of-speech-file $parts_of_speech_labels_file ";
      }
+
+    } else { # !hierarchical-rule-set
+
+      if (&get("TRAINING:target-constituent-boundaries")) {
+        $cmd .= "-target-constituent-boundaries ";
+      }
    }

    &create_step($step_id,$cmd);
@ -2678,6 +2690,10 @@ sub define_training_create_config {
      $cmd .= "-ghkm-parts-of-speech-file $parts_of_speech_labels_file ";
    }

+    if (&get("TRAINING:target-constituent-boundaries")) {
+      $cmd .= "-target-constituent-boundaries ";
+    }
+
    # sparse lexical features provide additional content for config file
    my @additional_ini_files;
    push  (@additional_ini_files, "$sparse_lexical_features.ini") if $sparse_lexical_features;
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@ -134,6 +134,7 @@ my($_EXTERNAL_BINDIR,
   	$_LMODEL_OOV_FEATURE,
   	$_NUM_LATTICE_FEATURES,
   	$IGNORE,
+    $_TARGET_CONSTITUENT_BOUNDARIES,
   	$_FLEXIBILITY_SCORE,
   	$_FEATURE_LINES,
   	$_WEIGHT_LINES,
@ -258,6 +259,7 @@ $_HELP = 1
 		       'instance-weights-file=s' => \$_INSTANCE_WEIGHTS_FILE,
 		       'lmodel-oov-feature' => \$_LMODEL_OOV_FEATURE,
 		       'num-lattice-features=i' => \$_NUM_LATTICE_FEATURES,
+               'target-constituent-boundaries' => \$_TARGET_CONSTITUENT_BOUNDARIES,
 		       'flexibility-score' => \$_FLEXIBILITY_SCORE,
 		       'config-add-feature-lines=s' => \$_FEATURE_LINES,
 		       'config-add-weight-lines=s' => \$_WEIGHT_LINES,
@ -1607,6 +1609,7 @@ sub extract_phrase {
    $cmd .= " --GZOutput ";
    $cmd .= " --InstanceWeights $_INSTANCE_WEIGHTS_FILE " if defined $_INSTANCE_WEIGHTS_FILE;
    $cmd .= " --BaselineExtract $_BASELINE_EXTRACT" if defined($_BASELINE_EXTRACT) && $PHRASE_EXTRACT =~ /extract-parallel.perl/;
+    $cmd .= " --TargetConstituentBoundaries" if $_TARGET_CONSTITUENT_BOUNDARIES;
    $cmd .= " --FlexibilityScore" if $_FLEXIBILITY_SCORE;
    $cmd .= " --NoTTable" if $_MMSAPT;

@ -1764,9 +1767,10 @@ sub score_phrase_phrase_extract {
        $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
        $cmd .= " --TargetSyntacticPreferences $_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE" if $_TARGET_SYNTACTIC_PREFERENCES && defined($_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE);
        $cmd .= " --PartsOfSpeech $_GHKM_PARTS_OF_SPEECH_FILE" if $_GHKM_PARTS_OF_SPEECH && defined($_GHKM_PARTS_OF_SPEECH_FILE);
+        $cmd .= " --TargetConstituentBoundaries" if $_TARGET_CONSTITUENT_BOUNDARIES;
+        $cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;
        $cmd .= " $DOMAIN" if $DOMAIN;
        $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
-        $cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;

 				# sorting
 				if ($direction eq "e2f" || $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2) {
@ -2386,6 +2390,7 @@ sub create_ini {
    print INI " unknown-word-labels-file=$_UNKNOWN_WORD_LABEL_FILE" if defined($_UNKNOWN_WORD_LABEL_FILE);
    print INI "\n";
  }
+  print INI "TargetConstituentAdjacencyFeature\n" if $_TARGET_CONSTITUENT_BOUNDARIES;
  print INI $feature_spec;

  print INI "\n# dense weights for feature functions\n";
@ -2398,6 +2403,7 @@ sub create_ini {
  print INI "SoftSourceSyntacticConstraintsFeature0= -0.2 -0.2 -0.2 0.1 0.1 0.1\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
  print INI "PhraseOrientationFeature0= 0.05 0.05 0.05 0.05 0.05 0.05\n" if $_PHRASE_ORIENTATION;
  print INI "TargetPreferencesFeature0= 0.2 -0.2\n" if $_HIERARCHICAL && $_TARGET_SYNTAX && $_TARGET_SYNTACTIC_PREFERENCES && defined($_TARGET_SYNTACTIC_PREFERENCES_LABELS_FILE);
+  print INI "TargetConstituentAdjacencyFeature0= 0.05 -0.1\n" if $_TARGET_CONSTITUENT_BOUNDARIES;
  print INI $weight_spec;
  close(INI);
 }