mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-06 19:49:41 +03:00
207 lines
6.1 KiB
C++
207 lines
6.1 KiB
C++
#include "ConstrainedDecoding.h"
|
|
#include "moses/Hypothesis.h"
|
|
#include "moses/Manager.h"
|
|
#include "moses/ChartHypothesis.h"
|
|
#include "moses/ChartManager.h"
|
|
#include "moses/StaticData.h"
|
|
#include "moses/InputFileStream.h"
|
|
#include "moses/Util.h"
|
|
#include "util/exception.hh"
|
|
|
|
using namespace std;
|
|
|
|
namespace Moses
|
|
{
|
|
ConstrainedDecodingState::ConstrainedDecodingState(const Hypothesis &hypo)
|
|
{
|
|
hypo.GetOutputPhrase(m_outputPhrase);
|
|
}
|
|
|
|
ConstrainedDecodingState::ConstrainedDecodingState(const ChartHypothesis &hypo)
|
|
{
|
|
hypo.GetOutputPhrase(m_outputPhrase);
|
|
}
|
|
|
|
int ConstrainedDecodingState::Compare(const FFState& other) const
|
|
{
|
|
const ConstrainedDecodingState &otherFF = static_cast<const ConstrainedDecodingState&>(other);
|
|
int ret = m_outputPhrase.Compare(otherFF.m_outputPhrase);
|
|
return ret;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////
|
|
ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
|
|
:StatefulFeatureFunction(1, line)
|
|
,m_maxUnknowns(0)
|
|
,m_negate(false)
|
|
,m_soft(false)
|
|
{
|
|
m_tuneable = false;
|
|
ReadParameters();
|
|
}
|
|
|
|
void ConstrainedDecoding::Load()
|
|
{
|
|
const StaticData &staticData = StaticData::Instance();
|
|
bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
|
|
|
|
for(size_t i = 0; i < m_paths.size(); ++i) {
|
|
InputFileStream constraintFile(m_paths[i]);
|
|
std::string line;
|
|
long sentenceID = staticData.GetStartTranslationId() - 1;
|
|
while (getline(constraintFile, line)) {
|
|
vector<string> vecStr = Tokenize(line, "\t");
|
|
|
|
Phrase phrase(0);
|
|
if (vecStr.size() == 1) {
|
|
sentenceID++;
|
|
// phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[0], staticData.GetFactorDelimiter(), NULL);
|
|
phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[0], NULL);
|
|
} else if (vecStr.size() == 2) {
|
|
sentenceID = Scan<long>(vecStr[0]);
|
|
// phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[1], staticData.GetFactorDelimiter(), NULL);
|
|
phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[1], NULL);
|
|
} else {
|
|
UTIL_THROW(util::Exception, "Reference file not loaded");
|
|
}
|
|
|
|
if (addBeginEndWord) {
|
|
phrase.InitStartEndWord();
|
|
}
|
|
m_constraints[sentenceID].push_back(phrase);
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<float> ConstrainedDecoding::DefaultWeights() const
|
|
{
|
|
UTIL_THROW_IF2(m_numScoreComponents != 1,
|
|
"ConstrainedDecoding must only have 1 score");
|
|
vector<float> ret(1, 1);
|
|
return ret;
|
|
}
|
|
|
|
template <class H, class M>
|
|
const std::vector<Phrase> *GetConstraint(const std::map<long,std::vector<Phrase> > &constraints, const H &hypo)
|
|
{
|
|
const M &mgr = hypo.GetManager();
|
|
const InputType &input = mgr.GetSource();
|
|
long id = input.GetTranslationId();
|
|
|
|
map<long,std::vector<Phrase> >::const_iterator iter;
|
|
iter = constraints.find(id);
|
|
|
|
if (iter == constraints.end()) {
|
|
UTIL_THROW(util::Exception, "Couldn't find reference " << id);
|
|
|
|
return NULL;
|
|
} else {
|
|
return &iter->second;
|
|
}
|
|
}
|
|
|
|
FFState* ConstrainedDecoding::EvaluateWhenApplied(
|
|
const Hypothesis& hypo,
|
|
const FFState* prev_state,
|
|
ScoreComponentCollection* accumulator) const
|
|
{
|
|
const std::vector<Phrase> *ref = GetConstraint<Hypothesis, Manager>(m_constraints, hypo);
|
|
assert(ref);
|
|
|
|
ConstrainedDecodingState *ret = new ConstrainedDecodingState(hypo);
|
|
const Phrase &outputPhrase = ret->GetPhrase();
|
|
|
|
size_t searchPos = NOT_FOUND;
|
|
size_t i = 0;
|
|
size_t size = 0;
|
|
while(searchPos == NOT_FOUND && i < ref->size()) {
|
|
searchPos = (*ref)[i].Find(outputPhrase, m_maxUnknowns);
|
|
size = (*ref)[i].GetSize();
|
|
i++;
|
|
}
|
|
|
|
float score;
|
|
if (hypo.IsSourceCompleted()) {
|
|
// translated entire sentence.
|
|
bool match = (searchPos == 0) && (size == outputPhrase.GetSize());
|
|
if (!m_negate) {
|
|
score = match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
|
|
} else {
|
|
score = !match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
|
|
}
|
|
} else if (m_negate) {
|
|
// keep all derivations
|
|
score = 0;
|
|
} else {
|
|
score = (searchPos != NOT_FOUND) ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
|
|
}
|
|
|
|
accumulator->PlusEquals(this, score);
|
|
|
|
return ret;
|
|
}
|
|
|
|
FFState* ConstrainedDecoding::EvaluateWhenApplied(
|
|
const ChartHypothesis &hypo,
|
|
int /* featureID - used to index the state in the previous hypotheses */,
|
|
ScoreComponentCollection* accumulator) const
|
|
{
|
|
const std::vector<Phrase> *ref = GetConstraint<ChartHypothesis, ChartManager>(m_constraints, hypo);
|
|
assert(ref);
|
|
|
|
const ChartManager &mgr = hypo.GetManager();
|
|
const Sentence &source = static_cast<const Sentence&>(mgr.GetSource());
|
|
|
|
ConstrainedDecodingState *ret = new ConstrainedDecodingState(hypo);
|
|
const Phrase &outputPhrase = ret->GetPhrase();
|
|
|
|
size_t searchPos = NOT_FOUND;
|
|
size_t i = 0;
|
|
size_t size = 0;
|
|
while(searchPos == NOT_FOUND && i < ref->size()) {
|
|
searchPos = (*ref)[i].Find(outputPhrase, m_maxUnknowns);
|
|
size = (*ref)[i].GetSize();
|
|
i++;
|
|
}
|
|
|
|
float score;
|
|
if (hypo.GetCurrSourceRange().GetStartPos() == 0 &&
|
|
hypo.GetCurrSourceRange().GetEndPos() == source.GetSize() - 1) {
|
|
// translated entire sentence.
|
|
bool match = (searchPos == 0) && (size == outputPhrase.GetSize());
|
|
|
|
if (!m_negate) {
|
|
score = match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
|
|
} else {
|
|
score = !match ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
|
|
}
|
|
} else if (m_negate) {
|
|
// keep all derivations
|
|
score = 0;
|
|
} else {
|
|
score = (searchPos != NOT_FOUND) ? 0 : - ( m_soft ? 1 : std::numeric_limits<float>::infinity());
|
|
}
|
|
|
|
accumulator->PlusEquals(this, score);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void ConstrainedDecoding::SetParameter(const std::string& key, const std::string& value)
|
|
{
|
|
if (key == "path") {
|
|
m_paths = Tokenize(value, ",");
|
|
} else if (key == "max-unknowns") {
|
|
m_maxUnknowns = Scan<int>(value);
|
|
} else if (key == "negate") {
|
|
m_negate = Scan<bool>(value);
|
|
} else if (key == "soft") {
|
|
m_soft = Scan<bool>(value);
|
|
} else {
|
|
StatefulFeatureFunction::SetParameter(key, value);
|
|
}
|
|
}
|
|
|
|
}
|
|
|