mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-29 06:52:34 +03:00
486 lines
14 KiB
C++
486 lines
14 KiB
C++
// -*- c++ -*-
|
|
#include <vector>
|
|
#include <string>
|
|
|
|
#include "moses/FF/FFState.h"
|
|
#include "moses/Hypothesis.h"
|
|
#include "moses/WordsRange.h"
|
|
#include "moses/TranslationOption.h"
|
|
#include "moses/Util.h"
|
|
|
|
#include "LexicalReordering.h"
|
|
#include "LexicalReorderingState.h"
|
|
#include "ReorderingStack.h"
|
|
|
|
namespace Moses
|
|
{
|
|
|
|
bool
|
|
IsMonotonicStep(WordsRange const& prev, // words range of last source phrase
|
|
WordsRange const& cur, // words range of current source phrase
|
|
WordsBitmap const& cov) // coverage bitmap
|
|
{
|
|
size_t e = prev.GetEndPos() + 1;
|
|
size_t s = cur.GetStartPos();
|
|
return (s == e || (s >= e && !cov.GetValue(e)));
|
|
}
|
|
|
|
bool
|
|
IsSwap(WordsRange const& prev, WordsRange const& cur, WordsBitmap const& cov)
|
|
{
|
|
size_t s = prev.GetStartPos();
|
|
size_t e = cur.GetEndPos();
|
|
return (e+1 == s || (e < s && !cov.GetValue(s-1)));
|
|
}
|
|
|
|
size_t
|
|
LRModel::
|
|
GetNumberOfTypes() const
|
|
{
|
|
return ((m_modelType == MSD) ? 3 :
|
|
(m_modelType == MSLR) ? 4 : 2);
|
|
}
|
|
|
|
size_t
|
|
LRModel::
|
|
GetNumScoreComponents() const
|
|
{
|
|
size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
|
|
return ((m_direction == Bidirectional)
|
|
? 2 * score_per_dir + m_additionalScoreComponents
|
|
: score_per_dir + m_additionalScoreComponents);
|
|
}
|
|
|
|
void
|
|
LRModel::
|
|
ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
|
|
const LexicalReordering* producer)
|
|
{
|
|
if (sparseArgs.size()) {
|
|
m_sparse.reset(new SparseReordering(sparseArgs, producer));
|
|
}
|
|
}
|
|
|
|
void
|
|
LRModel::
|
|
SetAdditionalScoreComponents(size_t number)
|
|
{
|
|
m_additionalScoreComponents = number;
|
|
}
|
|
|
|
/// return orientation for the first phrase
|
|
LRModel::ReorderingType
|
|
LRModel::
|
|
GetOrientation(WordsRange const& cur) const
|
|
{
|
|
UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
|
|
return ((m_modelType == LeftRight) ? R :
|
|
(cur.GetStartPos() == 0) ? M :
|
|
(m_modelType == MSD) ? D :
|
|
(m_modelType == MSLR) ? DR : NM);
|
|
}
|
|
|
|
LRModel::ReorderingType
|
|
LRModel::
|
|
GetOrientation(WordsRange const& prev, WordsRange const& cur) const
|
|
{
|
|
UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
|
|
return ((m_modelType == LeftRight)
|
|
? prev.GetEndPos() <= cur.GetStartPos() ? R : L
|
|
: (cur.GetStartPos() == prev.GetEndPos() + 1) ? M
|
|
: (m_modelType == Monotonic) ? NM
|
|
: (prev.GetStartPos() == cur.GetEndPos() + 1) ? S
|
|
: (m_modelType == MSD) ? D
|
|
: (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
|
|
}
|
|
|
|
LRModel::ReorderingType
|
|
LRModel::
|
|
GetOrientation(int const reoDistance) const
|
|
{
|
|
// this one is for HierarchicalReorderingBackwardState
|
|
return ((m_modelType == LeftRight)
|
|
? (reoDistance >= 1) ? R : L
|
|
: (reoDistance == 1) ? M
|
|
: (m_modelType == Monotonic) ? NM
|
|
: (reoDistance == -1) ? S
|
|
: (m_modelType == MSD) ? D
|
|
: (reoDistance > 1) ? DR : DL);
|
|
}
|
|
|
|
LRModel::ReorderingType
|
|
LRModel::
|
|
GetOrientation(WordsRange const& prev, WordsRange const& cur,
|
|
WordsBitmap const& cov) const
|
|
{
|
|
return ((m_modelType == LeftRight)
|
|
? cur.GetStartPos() > prev.GetEndPos() ? R : L
|
|
: IsMonotonicStep(prev,cur,cov) ? M
|
|
: (m_modelType == Monotonic) ? NM
|
|
: IsSwap(prev,cur,cov) ? S
|
|
: (m_modelType == MSD) ? D
|
|
: cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
|
|
}
|
|
|
|
LRModel::
|
|
LRModel(const std::string &modelType)
|
|
: m_modelString(modelType)
|
|
, m_scoreProducer(NULL)
|
|
, m_modelType(None)
|
|
, m_phraseBased(true)
|
|
, m_collapseScores(false)
|
|
, m_direction(Backward)
|
|
, m_additionalScoreComponents(0)
|
|
{
|
|
std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
|
|
|
|
for (size_t i=0; i<config.size(); ++i) {
|
|
if (config[i] == "hier") {
|
|
m_phraseBased = false;
|
|
} else if (config[i] == "phrase") {
|
|
m_phraseBased = true;
|
|
} else if (config[i] == "wbe") {
|
|
m_phraseBased = true;
|
|
}
|
|
// no word-based decoding available, fall-back to phrase-based
|
|
// This is the old lexical reordering model combination of moses
|
|
|
|
else if (config[i] == "msd") {
|
|
m_modelType = MSD;
|
|
} else if (config[i] == "mslr") {
|
|
m_modelType = MSLR;
|
|
} else if (config[i] == "monotonicity") {
|
|
m_modelType = Monotonic;
|
|
} else if (config[i] == "leftright") {
|
|
m_modelType = LeftRight;
|
|
}
|
|
|
|
// unidirectional is deprecated, use backward instead
|
|
else if (config[i] == "unidirectional") {
|
|
m_direction = Backward;
|
|
} else if (config[i] == "backward") {
|
|
m_direction = Backward;
|
|
} else if (config[i] == "forward") {
|
|
m_direction = Forward;
|
|
} else if (config[i] == "bidirectional") {
|
|
m_direction = Bidirectional;
|
|
}
|
|
|
|
else if (config[i] == "f") {
|
|
m_condition = F;
|
|
} else if (config[i] == "fe") {
|
|
m_condition = FE;
|
|
}
|
|
|
|
else if (config[i] == "collapseff") {
|
|
m_collapseScores = true;
|
|
} else if (config[i] == "allff") {
|
|
m_collapseScores = false;
|
|
} else {
|
|
std::cerr
|
|
<< "Illegal part in the lexical reordering configuration string: "
|
|
<< config[i] << std::endl;
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
if (m_modelType == None) {
|
|
std::cerr
|
|
<< "You need to specify the type of the reordering model "
|
|
<< "(msd, monotonicity,...)" << std::endl;
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
LRState *
|
|
LRModel::
|
|
CreateLRState(const InputType &input) const
|
|
{
|
|
LRState *bwd = NULL, *fwd = NULL;
|
|
size_t offset = 0;
|
|
|
|
switch(m_direction) {
|
|
case Backward:
|
|
case Bidirectional:
|
|
if (m_phraseBased)
|
|
bwd = new PhraseBasedReorderingState(*this, Backward, offset);
|
|
else
|
|
bwd = new HReorderingBackwardState(*this, offset);
|
|
offset += m_collapseScores ? 1 : GetNumberOfTypes();
|
|
if (m_direction == Backward) return bwd; // else fall through
|
|
case Forward:
|
|
if (m_phraseBased)
|
|
fwd = new PhraseBasedReorderingState(*this, Forward, offset);
|
|
else
|
|
fwd = new HReorderingForwardState(*this, input.GetSize(), offset);
|
|
offset += m_collapseScores ? 1 : GetNumberOfTypes();
|
|
if (m_direction == Forward) return fwd;
|
|
}
|
|
return new BidirectionalReorderingState(*this, bwd, fwd, 0);
|
|
}
|
|
|
|
|
|
void
|
|
LRState::
|
|
CopyScores(ScoreComponentCollection* accum,
|
|
const TranslationOption &topt,
|
|
const InputType& input,
|
|
ReorderingType reoType) const
|
|
{
|
|
// don't call this on a bidirectional object
|
|
UTIL_THROW_IF2(m_direction != LRModel::Backward &&
|
|
m_direction != LRModel::Forward,
|
|
"Unknown direction: " << m_direction);
|
|
|
|
TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward)
|
|
? &topt : m_prevOption);
|
|
|
|
LexicalReordering* producer = m_configuration.GetScoreProducer();
|
|
Scores const* cached = relevantOpt->GetLexReorderingScores(producer);
|
|
|
|
// The approach here is bizarre! Why create a whole vector and do
|
|
// vector addition (acumm->PlusEquals) to update a single value? - UG
|
|
size_t off_remote = m_offset + reoType;
|
|
size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
|
|
|
|
UTIL_THROW_IF2(off_remote >= producer->GetNumScoreComponents(),
|
|
"offset out of vector bounds!");
|
|
|
|
// look up applicable score from vectore of scores
|
|
if(cached) {
|
|
Scores scores(producer->GetNumScoreComponents(),0);
|
|
scores[off_local ] = (*cached)[off_remote];
|
|
accum->PlusEquals(producer, scores);
|
|
}
|
|
|
|
// else: use default scores (if specified)
|
|
else if (producer->GetHaveDefaultScores()) {
|
|
Scores scores(producer->GetNumScoreComponents(),0);
|
|
scores[off_local] = producer->GetDefaultScore(off_remote);
|
|
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
|
|
}
|
|
// note: if no default score, no cost
|
|
|
|
const SparseReordering* sparse = m_configuration.GetSparseReordering();
|
|
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
|
|
m_direction, accum);
|
|
}
|
|
|
|
|
|
int
|
|
LRState::
|
|
ComparePrevScores(const TranslationOption *other) const
|
|
{
|
|
LexicalReordering* producer = m_configuration.GetScoreProducer();
|
|
const Scores* myScores = m_prevOption->GetLexReorderingScores(producer);
|
|
const Scores* yrScores = other->GetLexReorderingScores(producer);
|
|
|
|
if(myScores == yrScores) return 0;
|
|
|
|
// The pointers are NULL if a phrase pair isn't found in the reordering table.
|
|
if(yrScores == NULL) return -1;
|
|
if(myScores == NULL) return 1;
|
|
|
|
size_t stop = m_offset + m_configuration.GetNumberOfTypes();
|
|
for(size_t i = m_offset; i < stop; i++) {
|
|
if((*myScores)[i] < (*yrScores)[i]) return -1;
|
|
if((*myScores)[i] > (*yrScores)[i]) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// ===========================================================================
|
|
// PHRASE BASED REORDERING STATE
|
|
// ===========================================================================
|
|
bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
|
|
|
|
PhraseBasedReorderingState::
|
|
PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
|
|
const TranslationOption &topt)
|
|
: LRState(prev, topt)
|
|
, m_prevRange(topt.GetSourceWordsRange())
|
|
, m_first(false)
|
|
{ }
|
|
|
|
|
|
PhraseBasedReorderingState::
|
|
PhraseBasedReorderingState(const LRModel &config,
|
|
LRModel::Direction dir, size_t offset)
|
|
: LRState(config, dir, offset)
|
|
, m_prevRange(NOT_FOUND,NOT_FOUND)
|
|
, m_first(true)
|
|
{ }
|
|
|
|
|
|
int
|
|
PhraseBasedReorderingState::
|
|
Compare(const FFState& o) const
|
|
{
|
|
if (&o == this) return 0;
|
|
|
|
const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o);
|
|
if (m_prevRange == other->m_prevRange) {
|
|
if (m_direction == LRModel::Forward) {
|
|
return ComparePrevScores(other->m_prevOption);
|
|
} else {
|
|
return 0;
|
|
}
|
|
} else if (m_prevRange < other->m_prevRange) {
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
LRState*
|
|
PhraseBasedReorderingState::
|
|
Expand(const TranslationOption& topt, const InputType& input,
|
|
ScoreComponentCollection* scores) const
|
|
{
|
|
// const LRModel::ModelType modelType = m_configuration.GetModelType();
|
|
|
|
if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) {
|
|
LRModel const& lrmodel = m_configuration;
|
|
WordsRange const cur = topt.GetSourceWordsRange();
|
|
LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
|
|
: lrmodel.GetOrientation(m_prevRange,cur));
|
|
CopyScores(scores, topt, input, reoType);
|
|
}
|
|
return new PhraseBasedReorderingState(this, topt);
|
|
}
|
|
|
|
|
|
///////////////////////////
|
|
//BidirectionalReorderingState
|
|
|
|
int
|
|
BidirectionalReorderingState::
|
|
Compare(FFState const& o) const
|
|
{
|
|
if (&o == this) return 0;
|
|
|
|
BidirectionalReorderingState const &other
|
|
= static_cast<BidirectionalReorderingState const&>(o);
|
|
|
|
int cmp = m_backward->Compare(*other.m_backward);
|
|
return (cmp < 0) ? -1 : cmp ? 1 : m_forward->Compare(*other.m_forward);
|
|
}
|
|
|
|
LRState*
|
|
BidirectionalReorderingState::
|
|
Expand(const TranslationOption& topt, const InputType& input,
|
|
ScoreComponentCollection* scores) const
|
|
{
|
|
LRState *newbwd = m_backward->Expand(topt,input, scores);
|
|
LRState *newfwd = m_forward->Expand(topt, input, scores);
|
|
return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
|
|
}
|
|
|
|
///////////////////////////
|
|
//HierarchicalReorderingBackwardState
|
|
|
|
HReorderingBackwardState::
|
|
HReorderingBackwardState(const HReorderingBackwardState *prev,
|
|
const TranslationOption &topt,
|
|
ReorderingStack reoStack)
|
|
: LRState(prev, topt), m_reoStack(reoStack)
|
|
{ }
|
|
|
|
HReorderingBackwardState::
|
|
HReorderingBackwardState(const LRModel &config, size_t offset)
|
|
: LRState(config, LRModel::Backward, offset)
|
|
{ }
|
|
|
|
|
|
int
|
|
HReorderingBackwardState::
|
|
Compare(const FFState& o) const
|
|
{
|
|
const HReorderingBackwardState& other
|
|
= static_cast<const HReorderingBackwardState&>(o);
|
|
return m_reoStack.Compare(other.m_reoStack);
|
|
}
|
|
|
|
LRState*
|
|
HReorderingBackwardState::
|
|
Expand(const TranslationOption& topt, const InputType& input,
|
|
ScoreComponentCollection* scores) const
|
|
{
|
|
HReorderingBackwardState* nextState;
|
|
nextState = new HReorderingBackwardState(this, topt, m_reoStack);
|
|
WordsRange swrange = topt.GetSourceWordsRange();
|
|
int reoDistance = nextState->m_reoStack.ShiftReduce(swrange);
|
|
ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
|
|
CopyScores(scores, topt, input, reoType);
|
|
return nextState;
|
|
}
|
|
|
|
///////////////////////////
|
|
//HReorderingForwardState
|
|
|
|
HReorderingForwardState::
|
|
HReorderingForwardState(const LRModel &config,
|
|
size_t size, size_t offset)
|
|
: LRState(config, LRModel::Forward, offset)
|
|
, m_first(true)
|
|
, m_prevRange(NOT_FOUND,NOT_FOUND)
|
|
, m_coverage(size)
|
|
{ }
|
|
|
|
HReorderingForwardState::
|
|
HReorderingForwardState(const HReorderingForwardState *prev,
|
|
const TranslationOption &topt)
|
|
: LRState(prev, topt)
|
|
, m_first(false)
|
|
, m_prevRange(topt.GetSourceWordsRange())
|
|
, m_coverage(prev->m_coverage)
|
|
{
|
|
m_coverage.SetValue(topt.GetSourceWordsRange(), true);
|
|
}
|
|
|
|
int
|
|
HReorderingForwardState::
|
|
Compare(const FFState& o) const
|
|
{
|
|
if (&o == this) return 0;
|
|
|
|
HReorderingForwardState const& other
|
|
= static_cast<HReorderingForwardState const&>(o);
|
|
|
|
return ((m_prevRange == other.m_prevRange)
|
|
? ComparePrevScores(other.m_prevOption)
|
|
: (m_prevRange < other.m_prevRange) ? -1 : 1);
|
|
}
|
|
|
|
// For compatibility with the phrase-based reordering model, scoring is one
|
|
// step delayed.
|
|
// The forward model takes determines orientations heuristically as follows:
|
|
// mono: if the next phrase comes after the conditioning phrase and
|
|
// - there is a gap to the right of the conditioning phrase, or
|
|
// - the next phrase immediately follows it
|
|
// swap: if the next phrase goes before the conditioning phrase and
|
|
// - there is a gap to the left of the conditioning phrase, or
|
|
// - the next phrase immediately precedes it
|
|
// dright: if the next phrase follows the conditioning phrase and other
|
|
// stuff comes in between
|
|
// dleft: if the next phrase precedes the conditioning phrase and other
|
|
// stuff comes in between
|
|
|
|
LRState*
|
|
HReorderingForwardState::
|
|
Expand(TranslationOption const& topt, InputType const& input,
|
|
ScoreComponentCollection* scores) const
|
|
{
|
|
const WordsRange cur = topt.GetSourceWordsRange();
|
|
// keep track of the current coverage ourselves so we don't need the hypothesis
|
|
WordsBitmap cov = m_coverage;
|
|
cov.SetValue(cur, true);
|
|
if (!m_first) {
|
|
LRModel::ReorderingType reoType;
|
|
reoType = m_configuration.GetOrientation(m_prevRange,cur,cov);
|
|
CopyScores(scores, topt, input, reoType);
|
|
}
|
|
return new HReorderingForwardState(this, topt);
|
|
}
|
|
}
|
|
|