mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
Adapted code to deal with new return type of TranslationOptionCollection::GetTranslationOptionList(...).
This commit is contained in:
parent
68f418a12a
commit
80a9f84422
@ -3,6 +3,8 @@
|
|||||||
#include "SearchNormal.h"
|
#include "SearchNormal.h"
|
||||||
#include "SentenceStats.h"
|
#include "SentenceStats.h"
|
||||||
|
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
@ -104,136 +106,140 @@ void SearchNormal::ProcessSentence()
|
|||||||
* violation of reordering limits.
|
* violation of reordering limits.
|
||||||
* \param hypothesis hypothesis to be expanded upon
|
* \param hypothesis hypothesis to be expanded upon
|
||||||
*/
|
*/
|
||||||
void SearchNormal::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
void
|
||||||
|
SearchNormal::
|
||||||
|
ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||||
{
|
{
|
||||||
// since we check for reordering limits, its good to have that limit handy
|
// since we check for reordering limits, its good to have that limit handy
|
||||||
int maxDistortion = StaticData::Instance().GetMaxDistortion();
|
int maxDistortion = StaticData::Instance().GetMaxDistortion();
|
||||||
bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
|
bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
|
||||||
|
|
||||||
|
const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
|
||||||
|
const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
|
||||||
|
size_t const sourceSize = m_source.GetSize();
|
||||||
|
|
||||||
|
ReorderingConstraint const&
|
||||||
|
ReoConstraint = m_source.GetReorderingConstraint();
|
||||||
|
|
||||||
// no limit of reordering: only check for overlap
|
// no limit of reordering: only check for overlap
|
||||||
if (maxDistortion < 0) {
|
if (maxDistortion < 0) {
|
||||||
const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
|
|
||||||
const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
|
|
||||||
, sourceSize = m_source.GetSize();
|
|
||||||
|
|
||||||
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
|
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
|
||||||
size_t maxSize = sourceSize - startPos;
|
{
|
||||||
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
TranslationOptionList const* tol;
|
||||||
maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
|
size_t endPos = startPos;
|
||||||
|
for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
|
||||||
|
tol && endPos < sourceSize;
|
||||||
|
tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos))
|
||||||
|
{
|
||||||
|
if (tol->size() == 0
|
||||||
|
|| hypoBitmap.Overlap(WordsRange(startPos, endPos))
|
||||||
|
|| !ReoConstraint.Check(hypoBitmap, startPos, endPos))
|
||||||
|
{ continue; }
|
||||||
|
|
||||||
for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
|
//TODO: does this method include incompatible WordLattice hypotheses?
|
||||||
// basic checks
|
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
||||||
// there have to be translation options
|
}
|
||||||
if (m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos)).size() == 0 ||
|
|
||||||
// no overlap with existing words
|
|
||||||
hypoBitmap.Overlap(WordsRange(startPos, endPos)) ||
|
|
||||||
// specified reordering constraints (set with -monotone-at-punctuation or xml)
|
|
||||||
!m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) ) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO: does this method include incompatible WordLattice hypotheses?
|
|
||||||
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return; // done with special case (no reordering limit)
|
return; // done with special case (no reordering limit)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if there are reordering limits, make sure it is not violated
|
// There are reordering limits. Make sure they are not violated.
|
||||||
// the coverage bitmap is handy here (and the position of the first gap)
|
|
||||||
const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
|
|
||||||
const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
|
|
||||||
, sourceSize = m_source.GetSize();
|
|
||||||
|
|
||||||
// MAIN LOOP. go through each possible range
|
WordsRange prevRange = hypothesis.GetCurrSourceWordsRange();
|
||||||
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
|
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
|
||||||
// don't bother expanding phrases if the first position is already taken
|
{
|
||||||
if(hypoBitmap.GetValue(startPos))
|
|
||||||
continue;
|
// don't bother expanding phrases if the first position is already taken
|
||||||
|
if(hypoBitmap.GetValue(startPos)) continue;
|
||||||
|
|
||||||
WordsRange prevRange = hypothesis.GetCurrSourceWordsRange();
|
size_t maxSize = sourceSize - startPos;
|
||||||
|
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
||||||
|
maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
|
||||||
|
size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
|
||||||
|
|
||||||
size_t maxSize = sourceSize - startPos;
|
if (isWordLattice)
|
||||||
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
{
|
||||||
maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
|
// first question: is there a path from the closest translated word to the left
|
||||||
size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
|
// of the hypothesized extension to the start of the hypothesized extension?
|
||||||
if (isWordLattice) {
|
// long version:
|
||||||
// first question: is there a path from the closest translated word to the left
|
// - is there anything to our left?
|
||||||
// of the hypothesized extension to the start of the hypothesized extension?
|
// - is it farther left than where we're starting anyway?
|
||||||
// long version: is there anything to our left? is it farther left than where we're starting anyway? can we get to it?
|
// - can we get to it?
|
||||||
// closestLeft is exclusive: a value of 3 means 2 is covered, our arc is currently ENDING at 3 and can start at 3 implicitly
|
|
||||||
if (closestLeft != 0 && closestLeft != startPos && !m_source.CanIGetFromAToB(closestLeft, startPos)) {
|
// closestLeft is exclusive: a value of 3 means 2 is covered, our
|
||||||
continue;
|
// arc is currently ENDING at 3 and can start at 3 implicitly
|
||||||
}
|
if (closestLeft != 0 && closestLeft != startPos
|
||||||
if (prevRange.GetStartPos() != NOT_FOUND &&
|
&& !m_source.CanIGetFromAToB(closestLeft, startPos))
|
||||||
prevRange.GetStartPos() > startPos && !m_source.CanIGetFromAToB(startPos, prevRange.GetStartPos())) {
|
continue;
|
||||||
continue;
|
|
||||||
}
|
if (prevRange.GetStartPos() != NOT_FOUND &&
|
||||||
|
prevRange.GetStartPos() > startPos &&
|
||||||
|
!m_source.CanIGetFromAToB(startPos, prevRange.GetStartPos()))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
WordsRange currentStartRange(startPos, startPos);
|
||||||
|
if(m_source.ComputeDistortionDistance(prevRange, currentStartRange)
|
||||||
|
> maxDistortion)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
TranslationOptionList const* tol;
|
||||||
|
size_t endPos = startPos;
|
||||||
|
for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
|
||||||
|
tol && endPos < sourceSize;
|
||||||
|
tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos))
|
||||||
|
{
|
||||||
|
WordsRange extRange(startPos, endPos);
|
||||||
|
if (tol->size() == 0
|
||||||
|
|| hypoBitmap.Overlap(extRange)
|
||||||
|
|| !ReoConstraint.Check(hypoBitmap, startPos, endPos)
|
||||||
|
|| (isWordLattice && !m_source.IsCoveragePossible(extRange)))
|
||||||
|
{ continue; }
|
||||||
|
|
||||||
|
// ask second question here: we already know we can get to our
|
||||||
|
// starting point from the closest thing to the left. We now ask the
|
||||||
|
// follow up: can we get from our end to the closest thing on the
|
||||||
|
// right?
|
||||||
|
//
|
||||||
|
// long version: is anything to our right? is it farther
|
||||||
|
// right than our (inclusive) end? can our end reach it?
|
||||||
|
bool isLeftMostEdge = (hypoFirstGapPos == startPos);
|
||||||
|
|
||||||
|
size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
|
||||||
|
if (isWordLattice) {
|
||||||
|
if (closestRight != endPos
|
||||||
|
&& ((closestRight + 1) < sourceSize)
|
||||||
|
&& !m_source.CanIGetFromAToB(endPos + 1, closestRight + 1))
|
||||||
|
{ continue; }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLeftMostEdge)
|
||||||
|
{ // any length extension is okay if starting at left-most edge
|
||||||
|
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
||||||
|
}
|
||||||
|
else // starting somewhere other than left-most edge, use caution
|
||||||
|
{
|
||||||
|
// the basic idea is this: we would like to translate a phrase
|
||||||
|
// starting from a position further right than the left-most
|
||||||
|
// open gap. The distortion penalty for the following phrase
|
||||||
|
// will be computed relative to the ending position of the
|
||||||
|
// current extension, so we ask now what its maximum value will
|
||||||
|
// be (which will always be the value of the hypothesis starting
|
||||||
|
// at the left-most edge). If this value is less than the
|
||||||
|
// distortion limit, we don't allow this extension to be made.
|
||||||
|
WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
|
||||||
|
|
||||||
|
if (m_source.ComputeDistortionDistance(extRange, bestNextExtension)
|
||||||
|
> maxDistortion) continue;
|
||||||
|
|
||||||
|
// everything is fine, we're good to go
|
||||||
|
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WordsRange currentStartRange(startPos, startPos);
|
|
||||||
if(m_source.ComputeDistortionDistance(prevRange, currentStartRange) > maxDistortion)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
|
|
||||||
// basic checks
|
|
||||||
WordsRange extRange(startPos, endPos);
|
|
||||||
// there have to be translation options
|
|
||||||
if (m_transOptColl.GetTranslationOptionList(extRange).size() == 0 ||
|
|
||||||
// no overlap with existing words
|
|
||||||
hypoBitmap.Overlap(extRange) ||
|
|
||||||
// specified reordering constraints (set with -monotone-at-punctuation or xml)
|
|
||||||
!m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) || //
|
|
||||||
// connection in input word lattice
|
|
||||||
(isWordLattice && !m_source.IsCoveragePossible(extRange))) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ask second question here:
|
|
||||||
// we already know we can get to our starting point from the closest thing to the left. We now ask the follow up:
|
|
||||||
// can we get from our end to the closest thing on the right?
|
|
||||||
// long version: is anything to our right? is it farther right than our (inclusive) end? can our end reach it?
|
|
||||||
bool leftMostEdge = (hypoFirstGapPos == startPos);
|
|
||||||
|
|
||||||
// closest right definition:
|
|
||||||
size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
|
|
||||||
if (isWordLattice) {
|
|
||||||
//if (!leftMostEdge && closestRight != endPos && closestRight != sourceSize && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) {
|
|
||||||
if (closestRight != endPos && ((closestRight + 1) < sourceSize) && !m_source.CanIGetFromAToB(endPos + 1, closestRight + 1)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// any length extension is okay if starting at left-most edge
|
|
||||||
if (leftMostEdge) {
|
|
||||||
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
|
||||||
}
|
|
||||||
// starting somewhere other than left-most edge, use caution
|
|
||||||
else {
|
|
||||||
// the basic idea is this: we would like to translate a phrase starting
|
|
||||||
// from a position further right than the left-most open gap. The
|
|
||||||
// distortion penalty for the following phrase will be computed relative
|
|
||||||
// to the ending position of the current extension, so we ask now what
|
|
||||||
// its maximum value will be (which will always be the value of the
|
|
||||||
// hypothesis starting at the left-most edge). If this value is less than
|
|
||||||
// the distortion limit, we don't allow this extension to be made.
|
|
||||||
WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
|
|
||||||
int required_distortion =
|
|
||||||
m_source.ComputeDistortionDistance(extRange, bestNextExtension);
|
|
||||||
|
|
||||||
if (required_distortion > maxDistortion) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// everything is fine, we're good to go
|
|
||||||
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expand a hypothesis given a list of translation options
|
* Expand a hypothesis given a list of translation options
|
||||||
@ -242,7 +248,9 @@ void SearchNormal::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
|||||||
* \param endPos last word position of span covered
|
* \param endPos last word position of span covered
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
|
void
|
||||||
|
SearchNormal::
|
||||||
|
ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
|
||||||
{
|
{
|
||||||
// early discarding: check if hypothesis is too bad to build
|
// early discarding: check if hypothesis is too bad to build
|
||||||
// this idea is explained in (Moore&Quirk, MT Summit 2007)
|
// this idea is explained in (Moore&Quirk, MT Summit 2007)
|
||||||
@ -250,15 +258,19 @@ void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t star
|
|||||||
if (StaticData::Instance().UseEarlyDiscarding()) {
|
if (StaticData::Instance().UseEarlyDiscarding()) {
|
||||||
// expected score is based on score of current hypothesis
|
// expected score is based on score of current hypothesis
|
||||||
expectedScore = hypothesis.GetScore();
|
expectedScore = hypothesis.GetScore();
|
||||||
|
|
||||||
// add new future score estimate
|
// add new future score estimate
|
||||||
expectedScore += m_transOptColl.GetFutureScore().CalcFutureScore( hypothesis.GetWordsBitmap(), startPos, endPos );
|
expectedScore +=
|
||||||
|
m_transOptColl.GetFutureScore()
|
||||||
|
.CalcFutureScore(hypothesis.GetWordsBitmap(), startPos, endPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
// loop through all translation options
|
// loop through all translation options
|
||||||
const TranslationOptionList &transOptList = m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos));
|
const TranslationOptionList* tol
|
||||||
|
= m_transOptColl.GetTranslationOptionList(startPos, endPos);
|
||||||
|
if (!tol) return;
|
||||||
TranslationOptionList::const_iterator iter;
|
TranslationOptionList::const_iterator iter;
|
||||||
for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter) {
|
for (iter = tol->begin() ; iter != tol->end() ; ++iter) {
|
||||||
ExpandHypothesis(hypothesis, **iter, expectedScore);
|
ExpandHypothesis(hypothesis, **iter, expectedScore);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user