Roll back to just tracking coordinates in mmsapt

(distance wants to be a separate FF)
2024-12-29 06:52:34 +03:00 · 2016-06-23 19:40:28 -04:00 · 2016-06-23 19:40:28 -04:00 · d29916bbb3
commit d29916bbb3
parent 5c2b8d843c
25 changed files with 117 additions and 222 deletions
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@ -3655,16 +3655,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_coherence.h</locationURI>
        </link>
-		<link>
-			<name>TranslationModel/UG/sapt_pscore_dist.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_dist.h</locationURI>
-		</link>
-		<link>
-			<name>TranslationModel/UG/sapt_pscore_length_ratio.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_length_ratio.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/UG/sapt_pscore_lex1.h</name>
 			<type>1</type>
@ -3710,11 +3700,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_wordcount.h</locationURI>
        </link>
-		<link>
-			<name>TranslationModel/UG/sapt_pscore_cumulative_bias.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h</locationURI>
-		</link>
 		<link>
 			<name>TranslationModel/UG/sim-pe.cc</name>
 			<type>1</type>
--- a/contrib/other-builds/moses/moses.project
+++ b/contrib/other-builds/moses/moses.project
@ -124,8 +124,6 @@
      <File Name="../../../moses/TranslationModel/UG/sapt_phrase_scorers.h"/>
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_base.h"/>
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_coherence.h"/>
-      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_dist.h"/>
-      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_length_ratio.h"/>
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_lex1.h"/>
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_logcnt.h"/>
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_pbwd.h"/>
@ -135,7 +133,6 @@
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_rareness.h"/>
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_unaligned.h"/>
      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_wordcount.h"/>
-      <File Name="../../../moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h"/>
      <File Name="../../../moses/TranslationModel/UG/sim-pe.cc" ExcludeProjConfig="Debug"/>
      <File Name="../../../moses/TranslationModel/UG/spe-check-coverage.cc" ExcludeProjConfig="Debug"/>
      <File Name="../../../moses/TranslationModel/UG/spe-check-coverage2.cc" ExcludeProjConfig="Debug"/>
--- a/moses/InputType.h
+++ b/moses/InputType.h
@ -68,8 +68,13 @@ public:
  size_t m_frontSpanCoveredLength;
  // how many words from the beginning are covered

-  // coordinates in some space, populated by xml tag "coord"
-  boost::shared_ptr<std::vector<float> > m_coord;
+  // Coordinates in user-defined spaces, indexed by phrase dictionary pointer
+  // Looking up PD* returns a vector of the input's coordinates in each space
+  // known to the PD, in order (vector of pointers to float vectors).  This
+  // allows different models to use different subsets of all named spaces.
+  typedef std::vector<boost::shared_ptr<std::vector<float> > > INCOORD;
+  typedef std::map<PhraseDictionary const*, INCOORD> PD2IC;
+  boost::shared_ptr<PD2IC> m_pd2InputCoord;

  InputType(AllOptions::ptr const& opts, long translationId = 0);
  virtual ~InputType();
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@ -147,6 +147,14 @@ public:

  void SetParameter(const std::string& key, const std::string& value);

+  void AddKnownSpace(const std::string& name) {
+    m_knownSpaces.push_back(name);
+  }
+
+  const std::vector<std::string> &GetKnownSpaces() const {
+    return m_knownSpaces;
+  }
+
  // LEGACY
  //! find list of translations that can translates a portion of src. Used by confusion network decoding
  virtual
@ -171,6 +179,9 @@ protected:
  // cache
  size_t m_maxCacheSize; // 0 = no caching

+  // Named coordinate spaces used by this model, in order (see "coord" XML tag)
+  std::vector<std::string> m_knownSpaces;
+
 #ifdef WITH_THREADS
  //reader-writer lock
  mutable boost::thread_specific_ptr<CacheColl> m_cache;
--- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
@ -20,6 +20,7 @@ namespace sapt
    for (int i = 0; i <= LRModel::NONE; ++i)
      ofwd[i] = obwd[i] = 0;
    my_aln.reserve(1);
+    sids.reset(new std::vector<uint32_t>);
  }

  jstats::
@ -80,9 +81,7 @@ namespace sapt
    ++obwd[bwd_orient];
    // Record sentence id if requested
    if (track_sid)
-      {
-        sids.push_back(sid);
-      }
+      sids->push_back(sid);
    if (docid >= 0)
      {
        // while (int(indoc.size()) <= docid) indoc.push_back(0);
--- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
@ -28,7 +28,7 @@ namespace sapt
    uint32_t obwd[LRModel::NONE+1]; // backward distortion type counts

  public:
-    std::vector<uint32_t> sids; // list of sentence ids in this sample
+    SPTR<std::vector<uint32_t> > sids; // list of sentence ids in this sample
    std::map<uint32_t,uint32_t> indoc;
    // std::vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
    jstats();
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.h
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@ -31,8 +31,8 @@ namespace sapt
    std::vector<unsigned char> aln;
    float score;
    bool inverse;
-    std::vector<uint32_t> sids; // list of sampled sentence ids where this
-                                // phrase pair was found
+    SPTR<std::vector<uint32_t> > sids; // list of sampled sentence ids where
+                                       // this phrase pair was found
    // std::vector<uint32_t> indoc;
    std::map<uint32_t,uint32_t> indoc;
    PhrasePair() { };
@ -185,6 +185,8 @@ namespace sapt
    sample2 += o.sample2;
    cum_bias += o.cum_bias;
    // todo: add distortion counts
+    if (sids && o.sids)
+      sids->insert(sids->end(), o.sids->begin(), o.sids->end());
    return *this;
  }

--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@ -215,7 +215,6 @@ namespace Moses
    param.insert(pair<string,string>("coh",    "0"));
    param.insert(pair<string,string>("prov",   "0"));
    param.insert(pair<string,string>("cumb",   "0"));
-    param.insert(pair<string,string>("dist",   "0"));

    poolCounts = true;

@ -276,6 +275,37 @@ namespace Moses
    m = param.find("name");
    if (m != param.end()) m_name = m->second;

+    // Optional coordinates for training corpus
+    // Takes form coord=name1:file1.gz,name2:file2.gz,...
+    // Names should match with XML input (coord tag)
+    param.insert(pair<string,string>("coord","0"));
+    if(param["coord"] != "0")
+      {
+        m_track_coord = true;
+        vector<string> coord_instances = Tokenize(param["coord"], ",");
+        BOOST_FOREACH(std::string instance, coord_instances)
+          {
+            vector<string> toks = Moses::Tokenize(instance, ":");
+            string name = toks[0];
+            string file = toks[1];
+            //TODO: register this space for this model
+            // Load sid coordinates from file
+            m_sid_coord_list.push_back(vector<vector<float> >());
+            vector<vector<float> >& sid_coord = m_sid_coord_list[m_sid_coord_list.size() - 1];
+            //TODO: support extra data for btdyn, here? extra?
+            sid_coord.reserve(btfix->T1->size());
+            string line;
+            cerr << "Loading coordinate lines for space \"" << name << "\" from " << file << endl;
+            iostreams::filtering_istream in;
+            ugdiss::open_input_stream(file, in);
+            while(getline(in, line))
+              {
+                sid_coord.push_back(Scan<float>(Tokenize(line)));
+              }
+            cerr << "Loaded " << sid_coord.size() << " lines" << endl;
+          }
+      }
+
    // check for unknown parameters
    vector<string> known_parameters; known_parameters.reserve(50);
    known_parameters.push_back("L1");
@ -291,8 +321,8 @@ namespace Moses
    known_parameters.push_back("cache");
    known_parameters.push_back("coh");
    known_parameters.push_back("config");
+    known_parameters.push_back("coord");
    known_parameters.push_back("cumb");
-    known_parameters.push_back("dist");
    known_parameters.push_back("extra");
    known_parameters.push_back("feature-sets");
    known_parameters.push_back("input-factor");
@ -468,19 +498,6 @@ namespace Moses
            SPTR<PScoreWC<Token> > ffwcnt(new PScoreWC<Token>("wcnt"));
            register_ff(ffwcnt,m_active_ff_common);
          }
-        // Optional distance feature
-        if(param["dist"] != "0")
-          {
-            // Now using sid coordinate list
-            // (to be populated after bitext load)
-            if(m_sid_coord == NULL) {
-              m_sid_coord.reset(new vector<vector<float> >());
-            }
-            // Track sids when sampling bitext
-            m_track_sids = true;
-            SPTR<PScoreDist<Token> > ff(new PScoreDist<Token>(m_sid_coord, param["dist"]));
-            register_ff(ff,m_active_ff_common);
-          }
      }
    // cerr << "Features: " << Join("|",m_feature_names) << endl;
    this->m_numScoreComponents = this->m_feature_names.size();
@ -524,28 +541,6 @@ namespace Moses
    if (m_extra_data.size())
      load_extra_data(m_extra_data, false);

-    // A feature (such as dist) left a note that we need to populate src
-    // sentence coordinates
-    if (m_sid_coord)
-      {
-        // We know the corpus size from the bitext
-        m_sid_coord->reserve(btfix->T1->size());
-        string coordfile = m_bname + L1 + ".coord.gz";
-        string line;
-        cerr << "Loading coordinate lines from " << coordfile << endl;
-        boost::iostreams::filtering_istream in;
-        ugdiss::open_input_stream(coordfile, in);
-        while(getline(in, line))
-          {
-            m_sid_coord->push_back(Scan<float>(Tokenize(line)));
-          }
-        cerr << "Loaded " << m_sid_coord->size() << " lines" << endl;
-        UTIL_THROW_IF2(m_sid_coord->size() != btfix->T1->size(),
-                       "Coordinates file size does not match bitext size ("
-                       << m_sid_coord->size() << " != " << btfix->T1->size()
-                       << ")");
-      }
-
 #if 0
    // currently not used
    LexicalPhraseScorer2<Token>::table_t & COOC = calc_lex.scorer.COOC;
@ -587,12 +582,12 @@ namespace Moses
    if (fix)
      {
        BOOST_FOREACH(SPTR<pscorer> const& ff, m_active_ff_fix)
-          (*ff)(*btfix, *fix, ttask, &fvals);
+          (*ff)(*btfix, *fix, &fvals);
      }
    if (dyn)
      {
        BOOST_FOREACH(SPTR<pscorer> const& ff, m_active_ff_dyn)
-          (*ff)(*dynbt, *dyn, ttask, &fvals);
+          (*ff)(*dynbt, *dyn, &fvals);
      }

    if (fix && dyn) { pool += *dyn; }
@ -604,7 +599,7 @@ namespace Moses
          zilch.raw2 = m.approxOccurrenceCount();
        pool += zilch;
        BOOST_FOREACH(SPTR<pscorer> const& ff, m_active_ff_dyn)
-          (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, ttask, &fvals);
+          (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
      }
    else if (dyn)
      {
@ -614,17 +609,17 @@ namespace Moses
          zilch.raw2 = m.approxOccurrenceCount();
        pool += zilch;
        BOOST_FOREACH(SPTR<pscorer> const& ff, m_active_ff_fix)
-          (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, ttask, &fvals);
+          (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
      }
    if (fix)
      {
        BOOST_FOREACH(SPTR<pscorer> const& ff, m_active_ff_common)
-          (*ff)(*btfix, pool, ttask, &fvals);
+          (*ff)(*btfix, pool, &fvals);
      }
    else
      {
        BOOST_FOREACH(SPTR<pscorer> const& ff, m_active_ff_common)
-          (*ff)(*dynbt, pool, ttask, &fvals);
+          (*ff)(*dynbt, pool, &fvals);
      }

    TargetPhrase* tp = new TargetPhrase(const_cast<ttasksptr&>(ttask), this);
@ -653,6 +648,21 @@ namespace Moses
      }
 #endif

+    // Track stats for rescoring non-cacheable phrases as needed
+    if (m_track_coord)
+    {
+      cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1) << endl;
+      BOOST_FOREACH(uint32_t const sid, *pool.sids)
+        {
+          BOOST_FOREACH(vector<vector<float> > coord, m_sid_coord_list)
+            {
+              //TODO: store coord[sid] in tp
+              cerr << " : " << Join(" ", coord[sid]);
+            }
+          cerr << endl;
+        }
+    }
+
    return tp;
  }

@ -728,7 +738,7 @@ namespace Moses
    SPTR<ContextScope> const& scope = ttask->GetScope();
    SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
    if (!cache) cache = m_cache; // no context-specific cache, use global one
-      
+
    ret = cache->get(phrasekey, dyn->revision());
    // TO DO: we should revise the revision mechanism: we take the
    // length of the dynamic bitext (in sentences) at the time the PT
@ -742,12 +752,12 @@ namespace Moses
    // std::cerr << ret << " with " << ret->refCount << " references at " 
    // << HERE << std::endl;
    boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock);
-    if (ret->GetSize()) return ret; 
+    if (ret->GetSize()) return ret;

    // new TPC (not found or old one was not up to date)
    boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock);
    // maybe another thread did the work while we waited for the lock ?
-    if (ret->GetSize()) return ret; 
+    if (ret->GetSize()) return ret;

    // OK: pt entry NOT found or NOT up to date
    // lookup and expansion could be done in parallel threads,
@ -768,7 +778,7 @@ namespace Moses
                                   m_min_sample_size, 
                                   m_default_sample_size, 
                                   m_sampling_method,
-                                   m_track_sids);
+                                   m_track_coord);
            s();
            sfix = s.stats();
          }
@ -956,7 +966,7 @@ namespace Moses
          {
            BitextSampler<Token> s(btfix, mfix, context->bias, 
                                   m_min_sample_size, m_default_sample_size, 
-                                   m_sampling_method, m_track_sids);
+                                   m_sampling_method, m_track_coord);
            if (*context->cache1->get(pid, s.stats()) == s.stats())
              m_thread_pool->add(s);
          }
@ -977,7 +987,7 @@ namespace Moses
        for (size_t i = 0; mdyn.size() == i && i < myphrase.size(); ++i)
          mdyn.extend(myphrase[i]);
        // let's assume a uniform bias over the foreground corpus
-        if (mdyn.size() == myphrase.size()) dyn->prep(ttask, mdyn, m_track_sids);
+        if (mdyn.size() == myphrase.size()) dyn->prep(ttask, mdyn, m_track_coord);
      }
    return mdyn.size() == myphrase.size();
  }
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@ -119,9 +119,8 @@ namespace Moses
    std::vector<SPTR<pscorer > > m_active_ff_common;
    // activated feature functions (dyn)

-    // Coordinates of bitext source sentences for dist feature
-    boost::shared_ptr<std::vector<std::vector<float> > > m_sid_coord;
-    bool m_track_sids; // track sids when sampling bitext?
+    bool m_track_coord = false; // track coordinates?  Effectively: track sids when sampling bitext?
+    std::vector<std::vector<std::vector<float> > > m_sid_coord_list;

    void
    parse_factor_spec(std::vector<FactorType>& flist, std::string const key);
--- a/moses/TranslationModel/UG/sapt_phrase_scorers.h
+++ b/moses/TranslationModel/UG/sapt_phrase_scorers.h
@ -14,4 +14,3 @@
 #include "sapt_pscore_phrasecount.h"  // phrase count
 #include "sapt_pscore_wordcount.h"    // word count
 #include "sapt_pscore_cumulative_bias.h" // cumulative bias score
-#include "sapt_pscore_dist.h"         // sample distance score
--- a/moses/TranslationModel/UG/sapt_pscore_base.h
+++ b/moses/TranslationModel/UG/sapt_pscore_base.h
@ -27,7 +27,6 @@

      virtual void
      operator()(Bitext<Token> const& pt, PhrasePair<Token>& pp,
-                 ttasksptr const& ttask,
                 std::vector<float> * dest=NULL) const = 0;

      void
--- a/moses/TranslationModel/UG/sapt_pscore_coherence.h
+++ b/moses/TranslationModel/UG/sapt_pscore_coherence.h
@ -22,7 +22,6 @@ namespace sapt
    void
    operator()(Bitext<Token> const& bt,
 	       PhrasePair<Token>& pp,
-	       ttasksptr const& ttask,
 	       std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h
+++ b/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h
@ -29,7 +29,6 @@ namespace sapt  {
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_dist.h
+++ b/moses/TranslationModel/UG/sapt_pscore_dist.h
@ -1,124 +0,0 @@
-// -*- c++ -*-
-//
-// This scorer measures distance between sentences in an arbitrary N-dimensional
-// space on the source side.  It provides two scores for each phrase pair:
-// * Distance to input, the average distance between training sentences and the
-//   input sentence (are training points close to test point?)
-// * Training data consistency, the average distance between training sentences
-//   and their centroid (are training points close to each other?)
-// Here "training sentences" refers to the subset of sentences sampled from the
-// suffix array from which the phrase pair can be extracted.  The two distances
-// reported as feature scores are log-transformed.
-//
-// This requires pre-computing the coordinates of every source sentence in the
-// bitext and computing the coordinates of each input sentence at run-time.
-//
-// Specify the coordinates of bitext source sentences with a file called
-// ${CORPUS}.${L1}.coord.gz that contains lines of space-delimited floats:
-// 0.1 0.5 0.2 ...
-//
-// Specify the coordinates of input sentences (InputType m_coord) with XML input
-// using the coord tag.  See www.statmt.org/moses/?n=Advanced.Hybrid#ntoc1 for
-// turning on XML input:
-// <coord coord="0.1 0.5 0.2 ..." />
-//
-// Activate this feature with "dist=MEASURE" where MEASURE is one of:
-// euc: Euclidean distance (for spaces)
-// var: total variation distance (for distributions)
-
-#pragma once
-#include "sapt_pscore_base.h"
-#include "mmsapt.h"
-
-#include <boost/foreach.hpp>
-
-namespace sapt
-{
-  template<typename Token>
-  class
-  PScoreDist : public PhraseScorer<Token>
-  {
-    enum Measure {
-      EuclideanDistance,
-      TotalVariationDistance,
-    };
-    boost::shared_ptr<std::vector<std::vector<float> > > m_sid_coord;
-    Measure m_measure;
-  public:
-    PScoreDist(boost::shared_ptr<std::vector<std::vector<float> > > const& sid_coord,
-        std::string const description)
-    {
-      this->m_index = -1;
-      this->m_num_feats = 2;
-      this->m_feature_names.push_back("dist-" + description + "-i");
-      this->m_feature_names.push_back("dist-" + description + "-c");
-      this->m_sid_coord = sid_coord;
-      if (description == "euc") {
-        this->m_measure = EuclideanDistance;
-      } else if (description == "var") {
-        this->m_measure = TotalVariationDistance;
-      } else {
-        UTIL_THROW2("Unknown specification \""
-            << description << "\" for dist phrase scorer (one of: euc var)");
-      }
-    }
-
-    void
-    operator()(Bitext<Token> const& bt,
-         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
-         std::vector<float> * dest = NULL) const
-    {
-      if (!dest) {
-        dest = &pp.fvals;
-      }
-      // Coordinates of input
-      std::vector<float> const& input = *(ttask->GetSource()->m_coord);
-      // Coordinates of training data centroid
-      std::vector<float> centroid = std::vector<float>((*m_sid_coord)[0].size());
-      BOOST_FOREACH(int const sid, pp.sids) {
-        std::vector<float> const& point = (*m_sid_coord)[sid];
-        for (size_t i = 0; i < centroid.size(); ++i) {
-          centroid[i] += point[i];
-        }
-      }
-      for (size_t i = 0; i < centroid.size(); ++i) {
-        centroid[i] /= pp.sids.size();
-      }
-      // Compute log-average-distance of specified type from the training points
-      // to both the input sentence and training centroid (max distance with
-      // float epsilon to avoid domain error)
-      float input_distance = 0;
-      float centroid_distance = 0;
-      if (m_measure == EuclideanDistance) {
-        BOOST_FOREACH(int const sid, pp.sids) {
-          std::vector<float> const& point = (*m_sid_coord)[sid];
-          float input_point_distance = 0;
-          float centroid_point_distance = 0;
-          for (size_t i = 0; i < input.size(); ++i) {
-            input_point_distance += pow(input[i] - point[i], 2);
-            centroid_point_distance += pow(centroid[i] - point[i], 2);
-          }
-          input_distance += sqrt(input_point_distance);
-          centroid_distance += sqrt(centroid_point_distance);
-        }
-      } else if (m_measure == TotalVariationDistance) {
-        BOOST_FOREACH(int const sid, pp.sids) {
-          std::vector<float> const& point = (*m_sid_coord)[sid];
-          float input_point_distance = 0;
-          float centroid_point_distance = 0;
-          for (size_t i = 0; i < input.size(); ++i) {
-            input_point_distance += std::abs(input[i] - point[i]);
-            centroid_point_distance += std::abs(centroid[i] - point[i]);
-          }
-          input_distance += input_point_distance / 2;
-          centroid_distance += centroid_point_distance / 2;
-        }
-      }
-      input_distance /= pp.sids.size();
-      centroid_distance /= pp.sids.size();
-      (*dest)[this->m_index] = log(std::max(input_distance, Moses::FLOAT_EPSILON));
-      (*dest)[this->m_index + 1] = log(std::max(centroid_distance, Moses::FLOAT_EPSILON));
-    }
-  };
-}
--- a/moses/TranslationModel/UG/sapt_pscore_length_ratio.h
+++ b/moses/TranslationModel/UG/sapt_pscore_length_ratio.h
@ -49,7 +49,6 @@ namespace sapt  {
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_lex1.h
+++ b/moses/TranslationModel/UG/sapt_pscore_lex1.h
@ -37,7 +37,6 @@ namespace sapt
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_logcnt.h
+++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
@ -38,7 +38,6 @@ namespace sapt  {
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_pbwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
@ -39,7 +39,6 @@ namespace sapt
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_pfwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
@ -40,7 +40,6 @@ namespace sapt
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
+++ b/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
@ -23,7 +23,6 @@ namespace sapt
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_provenance.h
+++ b/moses/TranslationModel/UG/sapt_pscore_provenance.h
@ -29,7 +29,6 @@ namespace sapt {
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_rareness.h
+++ b/moses/TranslationModel/UG/sapt_pscore_rareness.h
@ -27,7 +27,6 @@ namespace sapt  {
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_unaligned.h
+++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
@ -38,7 +38,6 @@ namespace sapt
    void
    operator()(Bitext<Token> const& bt,
         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
         std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
--- a/moses/TranslationModel/UG/sapt_pscore_wordcount.h
+++ b/moses/TranslationModel/UG/sapt_pscore_wordcount.h
@ -22,9 +22,8 @@ namespace sapt

    void
    operator()(Bitext<Token> const& bt,
-         PhrasePair<Token>& pp,
-         ttasksptr const& ttask,
-         std::vector<float> * dest = NULL) const
+        PhrasePair<Token>& pp,
+        std::vector<float> * dest = NULL) const
    {
      if (!dest) dest = &pp.fvals;
      (*dest)[this->m_index] = pp.len2;
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@ -402,12 +402,37 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line,
          StaticData::InstanceNonConst().SetAllWeights(allWeights);
        }

-        // coord: coordinate(s) of the input sentence in some space
-        // (one or more floats)
+        // Coord: coordinates of the input sentence in a user-defined space
+        // <coord space="NAME" coord="X Y Z ..." />
+        // where NAME is the name of the space and X Y Z ... are floats.  See
+        // PScoreDist in PhraseDictionaryBitextSampling (Mmsapt) for an example
+        // of using this information for feature scoring.
        else if (tagName == "coord") {
+          // Parse tag
+          string space = ParseXmlTagAttribute(tagContent, "space");
          vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent, "coord"));
-          input.m_coord.reset(new vector<float>());
-          Scan<float>(*(input.m_coord), toks);
+          boost::shared_ptr<vector<float> > coord(new vector<float>);
+          Scan<float>(*coord, toks);
+          // Init if needed
+          if (!input.m_pd2InputCoord) {
+            input.m_pd2InputCoord.reset(new std::map<PhraseDictionary const*, std::vector<boost::shared_ptr<std::vector<float> > > >);
+          }
+          // Scan phrase dictionaries to see which (if any) use this space
+          BOOST_FOREACH(PhraseDictionary const* pd, PhraseDictionary::GetColl()) {
+            const vector<string>& pdKnownSpaces = pd->GetKnownSpaces();
+            for (size_t i = 0; i < pdKnownSpaces.size(); ++i) {
+              // Match
+              if (pdKnownSpaces[i] == space) {
+                // Make sure a slot to store the coordinates exists
+                std::vector<boost::shared_ptr<std::vector<float> > >& inputCoord = (*input.m_pd2InputCoord)[pd];
+                if (inputCoord.size() < i + 1) {
+                  inputCoord.resize(i + 1);
+                }
+                // Store
+                inputCoord[i] = coord;
+              }
+            }
+          }
        }

        // default: opening tag that specifies translation options