2011-05-11 02:02:25 +04:00
|
|
|
#include "PhraseBoundaryFeature.h"
|
|
|
|
|
2013-05-24 21:02:49 +04:00
|
|
|
#include "moses/Hypothesis.h"
|
2011-05-11 02:02:25 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2011-05-11 02:02:25 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
int PhraseBoundaryState::Compare(const FFState& other) const
|
2011-05-11 02:02:25 +04:00
|
|
|
{
|
|
|
|
const PhraseBoundaryState& rhs = dynamic_cast<const PhraseBoundaryState&>(other);
|
2011-06-24 17:06:58 +04:00
|
|
|
int tgt = Word::Compare(*m_targetWord,*(rhs.m_targetWord));
|
|
|
|
if (tgt) return tgt;
|
|
|
|
return Word::Compare(*m_sourceWord,*(rhs.m_sourceWord));
|
2011-05-11 02:02:25 +04:00
|
|
|
}
|
|
|
|
|
2013-01-02 20:06:22 +04:00
|
|
|
PhraseBoundaryFeature::PhraseBoundaryFeature(const std::string &line)
|
2013-05-29 21:16:15 +04:00
|
|
|
: StatefulFeatureFunction("PhraseBoundaryFeature", 0, line)
|
2011-05-11 02:02:25 +04:00
|
|
|
{
|
2013-01-02 20:06:22 +04:00
|
|
|
std::cerr << "Initializing source word deletion feature.." << std::endl;
|
|
|
|
|
2013-06-11 02:16:28 +04:00
|
|
|
size_t ind = 0;
|
|
|
|
while (ind < m_args.size()) {
|
|
|
|
vector<string> &args = m_args[ind];
|
|
|
|
bool consumed = OverrideParameter(args[0], args[1]);
|
|
|
|
if (consumed) {
|
|
|
|
m_args.erase(m_args.begin() + ind);
|
2013-05-29 21:16:15 +04:00
|
|
|
} else {
|
2013-06-11 02:16:28 +04:00
|
|
|
++ind;
|
2013-02-19 21:31:11 +04:00
|
|
|
}
|
2013-01-02 20:06:22 +04:00
|
|
|
}
|
2011-05-11 02:02:25 +04:00
|
|
|
}
|
|
|
|
|
2013-06-11 02:16:28 +04:00
|
|
|
bool PhraseBoundaryFeature::OverrideParameter(const std::string& key, const std::string& value)
|
|
|
|
{
|
|
|
|
if (key == "source") {
|
|
|
|
m_sourceFactors = Tokenize<FactorType>(value, ",");
|
|
|
|
} else if (key == "target") {
|
|
|
|
m_targetFactors = Tokenize<FactorType>(value, ",");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const FFState* PhraseBoundaryFeature::EmptyHypothesisState(const InputType &) const
|
2011-05-11 02:02:25 +04:00
|
|
|
{
|
2011-06-24 17:06:58 +04:00
|
|
|
return new PhraseBoundaryState(NULL,NULL);
|
2011-05-11 02:02:25 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PhraseBoundaryFeature::AddFeatures(
|
|
|
|
const Word* leftWord, const Word* rightWord, const FactorList& factors, const string& side,
|
2013-05-29 21:16:15 +04:00
|
|
|
ScoreComponentCollection* scores) const
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < factors.size(); ++i) {
|
|
|
|
ostringstream name;
|
|
|
|
name << side << ":";
|
|
|
|
name << factors[i];
|
|
|
|
name << ":";
|
|
|
|
if (leftWord) {
|
|
|
|
name << leftWord->GetFactor(factors[i])->GetString();
|
|
|
|
} else {
|
|
|
|
name << BOS_;
|
|
|
|
}
|
|
|
|
name << ":";
|
|
|
|
if (rightWord) {
|
|
|
|
name << rightWord->GetFactor(factors[i])->GetString();
|
|
|
|
} else {
|
|
|
|
name << EOS_;
|
2011-05-11 02:02:25 +04:00
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
scores->PlusEquals(this,name.str(),1);
|
|
|
|
}
|
2011-05-11 02:02:25 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
FFState* PhraseBoundaryFeature::Evaluate
|
2013-05-29 21:16:15 +04:00
|
|
|
(const Hypothesis& cur_hypo, const FFState* prev_state,
|
|
|
|
ScoreComponentCollection* scores) const
|
2011-05-11 02:02:25 +04:00
|
|
|
{
|
|
|
|
const PhraseBoundaryState* pbState = dynamic_cast<const PhraseBoundaryState*>(prev_state);
|
|
|
|
const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
|
|
|
|
if (targetPhrase.GetSize() == 0) {
|
|
|
|
return new PhraseBoundaryState(*pbState);
|
|
|
|
}
|
2011-06-24 17:06:58 +04:00
|
|
|
const Word* leftTargetWord = pbState->GetTargetWord();
|
|
|
|
const Word* rightTargetWord = &(targetPhrase.GetWord(0));
|
|
|
|
AddFeatures(leftTargetWord,rightTargetWord,m_targetFactors,"tgt",scores);
|
2011-05-11 02:02:25 +04:00
|
|
|
|
2011-06-24 17:06:58 +04:00
|
|
|
const Phrase* sourcePhrase = cur_hypo.GetSourcePhrase();
|
|
|
|
const Word* leftSourceWord = pbState->GetSourceWord();
|
|
|
|
const Word* rightSourceWord = &(sourcePhrase->GetWord(0));
|
|
|
|
AddFeatures(leftSourceWord,rightSourceWord,m_sourceFactors,"src",scores);
|
|
|
|
|
|
|
|
const Word* endSourceWord = &(sourcePhrase->GetWord(sourcePhrase->GetSize()-1));
|
|
|
|
const Word* endTargetWord = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
|
2011-05-11 02:02:25 +04:00
|
|
|
|
|
|
|
//if end of sentence add EOS
|
|
|
|
if (cur_hypo.IsSourceCompleted()) {
|
2011-06-24 17:06:58 +04:00
|
|
|
AddFeatures(endSourceWord,NULL,m_sourceFactors,"src",scores);
|
|
|
|
AddFeatures(endTargetWord,NULL,m_targetFactors,"tgt",scores);
|
2011-05-11 02:02:25 +04:00
|
|
|
}
|
|
|
|
|
2011-06-24 17:06:58 +04:00
|
|
|
return new PhraseBoundaryState(endSourceWord,endTargetWord);
|
2011-05-11 02:02:25 +04:00
|
|
|
}
|
|
|
|
|
2013-05-30 15:41:08 +04:00
|
|
|
bool PhraseBoundaryFeature::IsUseable(const FactorMask &mask) const
|
|
|
|
{
|
2013-05-30 15:51:40 +04:00
|
|
|
for (size_t i = 0; i < m_targetFactors.size(); ++i) {
|
|
|
|
const FactorType &factor = m_targetFactors[i];
|
|
|
|
if (!mask[factor]) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2013-05-30 15:41:08 +04:00
|
|
|
}
|
2011-05-11 02:02:25 +04:00
|
|
|
|
|
|
|
}
|