mosesdecoder/moses/FF/PhrasePairFeature.cpp

#include <boost/algorithm/string.hpp>

#include "PhrasePairFeature.h"
#include "moses/AlignmentInfo.h"
#include "moses/TargetPhrase.h"
#include "moses/Hypothesis.h"
#include "moses/TranslationOption.h"
#include "moses/InputPath.h"
#include "util/string_piece_hash.hh"
#include "util/exception.hh"

using namespace std;

namespace Moses
{

PhrasePairFeature::PhrasePairFeature(const std::string &line)
  :StatelessFeatureFunction(0, line)
{
  std::cerr << "Initializing PhrasePairFeature.." << std::endl;
  ReadParameters();

  if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
  if (m_sourceContext == 1) std::cerr << "using source context.. ";
  if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";

  // compile a list of punctuation characters
  if (m_ignorePunctuation) {
    std::cerr << "ignoring punctuation for triggers.. ";
    char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
    for (size_t i=0; i < sizeof(punctuation)-1; ++i)
      m_punctuationHash[punctuation[i]] = 1;
  }
}

void PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
{
  if (key == "input-factor") {
    m_sourceFactorId = Scan<FactorType>(value);
  } else if (key == "output-factor") {
    m_targetFactorId = Scan<FactorType>(value);
  } else if (key == "unrestricted") {
    m_unrestricted = Scan<bool>(value);
  } else if (key == "simple") {
    m_simple = Scan<bool>(value);
  } else if (key == "source-context") {
    m_sourceContext = Scan<bool>(value);
  } else if (key == "domain-trigger") {
    m_domainTrigger = Scan<bool>(value);
  } else if (key == "ignore-punctuation") {
    m_ignorePunctuation = Scan<bool>(value);
  } else if (key == "ignore-punctuation") {
    m_filePathSource = value;
  } else {
    StatelessFeatureFunction::SetParameter(key, value);
  }
}

void PhrasePairFeature::Load()
{
  if (m_domainTrigger) {
    // domain trigger terms for each input document
    ifstream inFileSource(m_filePathSource.c_str());
    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);

    std::string line;
    while (getline(inFileSource, line)) {
      std::set<std::string> terms;
      vector<string> termVector;
      boost::split(termVector, line, boost::is_any_of("\t "));
      for (size_t i=0; i < termVector.size(); ++i)
        terms.insert(termVector[i]);

      // add term set for current document
      m_vocabDomain.push_back(terms);
    }

    inFileSource.close();
  } else {
    // restricted source word vocabulary
    ifstream inFileSource(m_filePathSource.c_str());
    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);

    std::string line;
    while (getline(inFileSource, line)) {
      m_vocabSource.insert(line);
    }

    inFileSource.close();

    /*  // restricted target word vocabulary
    ifstream inFileTarget(filePathTarget.c_str());
    if (!inFileTarget)
    {
      cerr << "could not open file " << filePathTarget << endl;
      return false;
    }

    while (getline(inFileTarget, line)) {
    m_vocabTarget.insert(line);
    }

    inFileTarget.close();*/

    m_unrestricted = false;
  }
}

void PhrasePairFeature::Evaluate(
  const Hypothesis& hypo,
  ScoreComponentCollection* accumulator) const
{
  const TargetPhrase& target = hypo.GetCurrTargetPhrase();
  const Phrase& source = hypo.GetTranslationOption().GetInputPath().GetPhrase();
  if (m_simple) {
    ostringstream namestr;
    namestr << "pp_";
    namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
      namestr << ",";
      namestr << sourceFactor->GetString();
    }
    namestr << "~";
    namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
    for (size_t i = 1; i < target.GetSize(); ++i) {
      const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
      namestr << ",";
      namestr << targetFactor->GetString();
    }

    accumulator->SparsePlusEquals(namestr.str(),1);
  }
  if (m_domainTrigger) {
    const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
    const bool use_topicid = input.GetUseTopicId();
    const bool use_topicid_prob = input.GetUseTopicIdAndProb();

    // compute pair
    ostringstream pair;
    pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
    for (size_t i = 1; i < source.GetSize(); ++i) {
      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
      pair << ",";
      pair << sourceFactor->GetString();
    }
    pair << "~";
    pair << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
    for (size_t i = 1; i < target.GetSize(); ++i) {
      const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
      pair << ",";
      pair << targetFactor->GetString();
    }

    if (use_topicid || use_topicid_prob) {
      if(use_topicid) {
        // use topicid as trigger
        const long topicid = input.GetTopicId();
        stringstream feature;
        feature << "pp_";
        if (topicid == -1)
          feature << "unk";
        else
          feature << topicid;

        feature << "_";
        feature << pair.str();
        accumulator->SparsePlusEquals(feature.str(), 1);
      } else {
        // use topic probabilities
        const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
        if (atol(topicid_prob[0].c_str()) == -1) {
          stringstream feature;
          feature << "pp_unk_";
          feature << pair.str();
          accumulator->SparsePlusEquals(feature.str(), 1);
        } else {
          for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
            stringstream feature;
            feature << "pp_";
            feature << topicid_prob[i];
            feature << "_";
            feature << pair.str();
            accumulator->SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
          }
        }
      }
    } else {
      // range over domain trigger words
      const long docid = input.GetDocumentId();
      for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
        string sourceTrigger = *p;
        ostringstream namestr;
        namestr << "pp_";
        namestr << sourceTrigger;
        namestr << "_";
        namestr << pair.str();
        accumulator->SparsePlusEquals(namestr.str(),1);
      }
    }
  }
  if (m_sourceContext) {
    const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());

    // range over source words to get context
    for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
      StringPiece sourceTrigger = input.GetWord(contextIndex).GetFactor(m_sourceFactorId)->GetString();
      if (m_ignorePunctuation) {
        // check if trigger is punctuation
        char firstChar = sourceTrigger[0];
        CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
        if(charIterator != m_punctuationHash.end())
          continue;
      }

      bool sourceTriggerExists = false;
      if (!m_unrestricted)
        sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

      if (m_unrestricted || sourceTriggerExists) {
        ostringstream namestr;
        namestr << "pp_";
        namestr << sourceTrigger;
        namestr << "~";
        namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
        for (size_t i = 1; i < source.GetSize(); ++i) {
          const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
          namestr << ",";
          namestr << sourceFactor->GetString();
        }
        namestr << "~";
        namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
        for (size_t i = 1; i < target.GetSize(); ++i) {
          const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
          namestr << ",";
          namestr << targetFactor->GetString();
        }

        accumulator->SparsePlusEquals(namestr.str(),1);
      }
    }
  }
}

bool PhrasePairFeature::IsUseable(const FactorMask &mask) const
{
  bool ret = mask[m_targetFactorId];
  return ret;
}

}
-												precalculation of stateless features

											
										
										
											2012-09-07 19:57:53 +04:00
+								#include <boost/algorithm/string.hpp>
-												Implementation of phrase pair feature

git-svn-id: http://svn.statmt.org/repository/mira@3842 cc96ff50-19ce-11e0-b349-13d7f0bd23df

											
										
										
											2011-03-22 17:33:16 +03:00
+								#include "PhrasePairFeature.h"
-												move feature functions into moses/FF

											
										
										
											2013-05-24 21:02:49 +04:00
+								#include "moses/AlignmentInfo.h"
 								#include "moses/TargetPhrase.h"
 								#include "moses/Hypothesis.h"
 								#include "moses/TranslationOption.h"
-												speed up compiling by removing headers from StaticData.h

											
										
										
											2013-10-03 14:05:53 +04:00
+								#include "moses/InputPath.h"
-												Back FactorCollection with a memory pool.  Less memory for large vocabularies.

											
										
										
											2013-04-25 22:42:30 +04:00
+								#include "util/string_piece_hash.hh"
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-11 02:16:28 +04:00
+								#include "util/exception.hh"
-												Implementation of phrase pair feature

git-svn-id: http://svn.statmt.org/repository/mira@3842 cc96ff50-19ce-11e0-b349-13d7f0bd23df

											
										
										
											2011-03-22 17:33:16 +03:00
 								using namespace std;
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								namespace Moses
 								{
-												Implementation of phrase pair feature

git-svn-id: http://svn.statmt.org/repository/mira@3842 cc96ff50-19ce-11e0-b349-13d7f0bd23df

											
										
										
											2011-03-22 17:33:16 +03:00
-												refactor PhrasePairFeature

											
										
										
											2013-01-15 19:04:32 +04:00
+								PhrasePairFeature::PhrasePairFeature(const std::string &line)
-												remove description argument from feature function constructor. Redundant

											
										
										
											2013-10-29 22:44:33 +04:00
+								  :StatelessFeatureFunction(0, line)
-												refactor PhrasePairFeature

											
										
										
											2013-01-15 19:04:32 +04:00
+								{
 								  std::cerr << "Initializing PhrasePairFeature.." << std::endl;
-												redo parsing of feature function parameters

											
										
										
											2013-06-20 16:06:03 +04:00
+								  ReadParameters();
-												refactor PhrasePairFeature

											
										
										
											2013-01-15 19:04:32 +04:00
 								  if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
 								  if (m_sourceContext == 1) std::cerr << "using source context.. ";
 								  if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
 								  // compile a list of punctuation characters
 								  if (m_ignorePunctuation) {
 								    std::cerr << "ignoring punctuation for triggers.. ";
 								    char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
 								    for (size_t i=0; i < sizeof(punctuation)-1; ++i)
 								      m_punctuationHash[punctuation[i]] = 1;
 								  }
 								}
-												redo parsing of feature function parameters

											
										
										
											2013-06-20 16:25:02 +04:00
+								void PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-10 21:11:55 +04:00
+								{
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-11 04:46:04 +04:00
+								  if (key == "input-factor") {
 								    m_sourceFactorId = Scan<FactorType>(value);
 								  } else if (key == "output-factor") {
 								    m_targetFactorId = Scan<FactorType>(value);
 								  } else if (key == "unrestricted") {
 								    m_unrestricted = Scan<bool>(value);
 								  } else if (key == "simple") {
 								    m_simple = Scan<bool>(value);
 								  } else if (key == "source-context") {
 								    m_sourceContext = Scan<bool>(value);
 								  } else if (key == "domain-trigger") {
 								    m_domainTrigger = Scan<bool>(value);
 								  } else if (key == "ignore-punctuation") {
 								    m_ignorePunctuation = Scan<bool>(value);
 								  } else if (key == "ignore-punctuation") {
 								    m_filePathSource = value;
 								  } else {
-												redo parsing of feature function parameters

											
										
										
											2013-06-20 16:25:02 +04:00
+								    StatelessFeatureFunction::SetParameter(key, value);
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-11 04:46:04 +04:00
+								  }
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-10 21:11:55 +04:00
+								}
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-11 02:16:28 +04:00
+								void PhrasePairFeature::Load()
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								{
 								  if (m_domainTrigger) {
 								    // domain trigger terms for each input document
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-11 02:16:28 +04:00
+								    ifstream inFileSource(m_filePathSource.c_str());
-												UTIL_THROW_IF -> UTIL_THROW_IF2

											
										
										
											2013-11-23 00:27:46 +04:00
+								    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								    std::string line;
 								    while (getline(inFileSource, line)) {
 								      std::set<std::string> terms;
 								      vector<string> termVector;
 								      boost::split(termVector, line, boost::is_any_of("\t "));
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								      for (size_t i=0; i < termVector.size(); ++i)
-												refactor PhrasePairFeature

											
										
										
											2013-01-15 19:04:32 +04:00
+								        terms.insert(termVector[i]);
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								      // add term set for current document
 								      m_vocabDomain.push_back(terms);
 								    }
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								    inFileSource.close();
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								  } else {
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								    // restricted source word vocabulary
-												refactor  parsing of feature functiona args

											
										
										
											2013-06-11 02:16:28 +04:00
+								    ifstream inFileSource(m_filePathSource.c_str());
-												UTIL_THROW_IF -> UTIL_THROW_IF2

											
										
										
											2013-11-23 00:27:46 +04:00
+								    UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								    std::string line;
 								    while (getline(inFileSource, line)) {
 								      m_vocabSource.insert(line);
 								    }
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								    inFileSource.close();
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												save all changes related to domain adaptation

											
										
										
											2012-07-26 20:32:50 +04:00
+								    /*  // restricted target word vocabulary
 								    ifstream inFileTarget(filePathTarget.c_str());
 								    if (!inFileTarget)
 								    {
 								      cerr << "could not open file " << filePathTarget << endl;
 								      return false;
 								    }
 								    while (getline(inFileTarget, line)) {
 								    m_vocabTarget.insert(line);
 								    }
 								    inFileTarget.close();*/
 								    m_unrestricted = false;
 								  }
 								}
-												precalculation of stateless features

											
										
										
											2012-09-07 19:57:53 +04:00
+								void PhrasePairFeature::Evaluate(
-												delete PhraseBasedFeatureContext

											
										
										
											2013-08-23 17:25:25 +04:00
+								  const Hypothesis& hypo,
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								  ScoreComponentCollection* accumulator) const
-												add optional source context to phrase pair feature

											
										
										
											2012-03-19 06:45:59 +04:00
+								{
-												delete PhraseBasedFeatureContext

											
										
										
											2013-08-23 17:25:25 +04:00
+								  const TargetPhrase& target = hypo.GetCurrTargetPhrase();
 								  const Phrase& source = hypo.GetTranslationOption().GetInputPath().GetPhrase();
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								  if (m_simple) {
-												Implementation of phrase pair feature

git-svn-id: http://svn.statmt.org/repository/mira@3842 cc96ff50-19ce-11e0-b349-13d7f0bd23df

											
										
										
											2011-03-22 17:33:16 +03:00
+								    ostringstream namestr;
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								    namestr << "pp_";
 								    namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
 								    for (size_t i = 1; i < source.GetSize(); ++i) {
 								      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
 								      namestr << ",";
 								      namestr << sourceFactor->GetString();
 								    }
 								    namestr << "~";
 								    namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
 								    for (size_t i = 1; i < target.GetSize(); ++i) {
 								      const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
 								      namestr << ",";
 								      namestr << targetFactor->GetString();
 								    }
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								    accumulator->SparsePlusEquals(namestr.str(),1);
 								  }
 								  if (m_domainTrigger) {
-												delete PhraseBasedFeatureContext

											
										
										
											2013-08-23 17:25:25 +04:00
+								    const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								    const bool use_topicid = input.GetUseTopicId();
 								    const bool use_topicid_prob = input.GetUseTopicIdAndProb();
-												add optional source context to phrase pair feature

											
										
										
											2012-03-19 06:45:59 +04:00
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								    // compute pair
 								    ostringstream pair;
 								    pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
 								    for (size_t i = 1; i < source.GetSize(); ++i) {
 								      const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
 								      pair << ",";
 								      pair << sourceFactor->GetString();
 								    }
 								    pair << "~";
 								    pair << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
 								    for (size_t i = 1; i < target.GetSize(); ++i) {
 								      const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
 								      pair << ",";
 								      pair << targetFactor->GetString();
 								    }
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								    if (use_topicid || use_topicid_prob) {
 								      if(use_topicid) {
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								        // use topicid as trigger
 								        const long topicid = input.GetTopicId();
 								        stringstream feature;
 								        feature << "pp_";
 								        if (topicid == -1)
 								          feature << "unk";
 								        else
 								          feature << topicid;
 								        feature << "_";
 								        feature << pair.str();
 								        accumulator->SparsePlusEquals(feature.str(), 1);
 								      } else {
 								        // use topic probabilities
 								        const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
 								        if (atol(topicid_prob[0].c_str()) == -1) {
 								          stringstream feature;
 								          feature << "pp_unk_";
 								          feature << pair.str();
 								          accumulator->SparsePlusEquals(feature.str(), 1);
 								        } else {
 								          for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
 								            stringstream feature;
 								            feature << "pp_";
 								            feature << topicid_prob[i];
 								            feature << "_";
 								            feature << pair.str();
 								            accumulator->SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
 								          }
 								        }
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      }
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								    } else {
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      // range over domain trigger words
 								      const long docid = input.GetDocumentId();
 								      for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								        string sourceTrigger = *p;
 								        ostringstream namestr;
 								        namestr << "pp_";
 								        namestr << sourceTrigger;
 								        namestr << "_";
 								        namestr << pair.str();
 								        accumulator->SparsePlusEquals(namestr.str(),1);
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      }
 								    }
 								  }
 								  if (m_sourceContext) {
-												delete PhraseBasedFeatureContext

											
										
										
											2013-08-23 17:25:25 +04:00
+								    const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								    // range over source words to get context
 								    for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
-												Back FactorCollection with a memory pool.  Less memory for large vocabularies.

											
										
										
											2013-04-25 22:42:30 +04:00
+								      StringPiece sourceTrigger = input.GetWord(contextIndex).GetFactor(m_sourceFactorId)->GetString();
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      if (m_ignorePunctuation) {
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								        // check if trigger is punctuation
 								        char firstChar = sourceTrigger[0];
 								        CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
 								        if(charIterator != m_punctuationHash.end())
 								          continue;
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      }
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      bool sourceTriggerExists = false;
 								      if (!m_unrestricted)
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								        sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      if (m_unrestricted || sourceTriggerExists) {
-												beautify

											
										
										
											2013-05-29 21:16:15 +04:00
+								        ostringstream namestr;
 								        namestr << "pp_";
 								        namestr << sourceTrigger;
 								        namestr << "~";
 								        namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
 								        for (size_t i = 1; i < source.GetSize(); ++i) {
 								          const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
 								          namestr << ",";
 								          namestr << sourceFactor->GetString();
 								        }
 								        namestr << "~";
 								        namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
 								        for (size_t i = 1; i < target.GetSize(); ++i) {
 								          const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
 								          namestr << ",";
 								          namestr << targetFactor->GetString();
 								        }
 								        accumulator->SparsePlusEquals(namestr.str(),1);
-												merge remaining changes to mira, word pair features, phrase pair features

											
										
										
											2012-10-03 21:53:55 +04:00
+								      }
 								    }
 								  }
-												Implementation of phrase pair feature

git-svn-id: http://svn.statmt.org/repository/mira@3842 cc96ff50-19ce-11e0-b349-13d7f0bd23df

											
										
										
											2011-03-22 17:33:16 +03:00
+								}
-												figure out which feature function to apply at which decode step. Book-keeping

											
										
										
											2013-05-30 15:41:08 +04:00
+								bool PhrasePairFeature::IsUseable(const FactorMask &mask) const
 								{
-												beautify

											
										
										
											2013-05-30 15:51:40 +04:00
+								  bool ret = mask[m_targetFactorId];
 								  return ret;
-												figure out which feature function to apply at which decode step. Book-keeping

											
										
										
											2013-05-30 15:41:08 +04:00
+								}
-												Implementation of phrase pair feature

git-svn-id: http://svn.statmt.org/repository/mira@3842 cc96ff50-19ce-11e0-b349-13d7f0bd23df

											
										
										
											2011-03-22 17:33:16 +03:00
+								}