FIXED OPTION FOR FUTURE DISTORTION COST [Moore & Quirk 2007]

Summary of changes

1) DistortionScoreProducer::CalculateDistortionScore

The previous implementation had the following bugs:
- wasn't correctly converting size_t to int
- in initial (empty) hypothesis, prefixEndPos was -2 instead of -1
- nb of words between phrases was always one too much

2) DistortionScoreProducer::Evaluate

The new distortion state was assgined the first gap of the old hypothesis (it should be the current's)

3) WordsRange::GetNumWordsBetween

It returned one word too much. For instance the nb of words between [0..1] and [2..3] was 1, now it's 0.

4) Parameter.cpp, StaticData.cpp, StaticData.h

Added binary option to activate future distortion cost (fdc) and corresponding StaticData's variable
This commit is contained in:
Arianna Bisazza 2012-06-21 17:41:05 +02:00
parent e5bec4a48b
commit 01c2b001b1
5 changed files with 46 additions and 20 deletions

View File

@ -61,28 +61,45 @@ std::string DistortionScoreProducer::GetScoreProducerWeightShortName(unsigned) c
float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
{
const int USE_OLD = 1;
if (USE_OLD) {
if(!StaticData::Instance().UseFutureDistortionCost()) {
return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
}
else {
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
Definitions:
S : current source range
S' : last translated source phrase range
S'' : longest fully-translated initial segment
*/
// Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
int prefixEndPos = (int)FirstGap-1;
if((int)FirstGap==-1)
prefixEndPos = -1;
int prefixEndPos = FirstGap-1;
if ((int) curr.GetStartPos() == prefixEndPos+1) {
return 0;
// case1: S is adjacent to S'' => return 0
if ((int) curr.GetStartPos() == prefixEndPos+1) {
IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl;
return 0;
}
// case2: S is to the left of S' => return 2(length(S))
if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl;
return (float) -2*(int)curr.GetNumWordsCovered();
}
// case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
if ((int) prev.GetEndPos() <= prefixEndPos) {
IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl;
int z = (int)curr.GetStartPos()-prefixEndPos - 1;
return (float) -2*(z + (int)curr.GetNumWordsCovered());
}
// case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
return (float) -2*((int)curr.GetNumWordsBetween(prev) + (int)curr.GetNumWordsCovered());
}
if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
return (float) -2*curr.GetNumWordsCovered();
}
if ((int) prev.GetEndPos() <= prefixEndPos) {
int z = curr.GetStartPos()-prefixEndPos;
return (float) -2*(z + curr.GetNumWordsCovered());
}
return (float) -2*(curr.GetNumWordsBetween(prev) + curr.GetNumWordsCovered());
}
size_t DistortionScoreProducer::GetNumInputScores() const
@ -104,7 +121,7 @@ FFState* DistortionScoreProducer::Evaluate(
out->PlusEquals(this, distortionScore);
DistortionState_traditional* res = new DistortionState_traditional(
hypo.GetCurrSourceWordsRange(),
hypo.GetPrevHypo()->GetWordsBitmap().GetFirstGapPos());
hypo.GetWordsBitmap().GetFirstGapPos());
return res;
}

View File

@ -98,6 +98,7 @@ Parameter::Parameter()
AddParam("monotone-at-punctuation", "mp", "do not reorder over punctuation");
AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
AddParam("future-distortion-cost", "fdc", "include estimate of future cost in the distortion penalty [Moore & Quirk 2007]. Default is no");
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");

View File

@ -359,6 +359,9 @@ bool StaticData::LoadData(Parameter *parameter)
SetBooleanParameter(&m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", false);
// include future distortion cost in distortion penalty
SetBooleanParameter( &m_useFutureDistortionCost, "future-distortion-cost", false );
// unknown word processing
SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );

View File

@ -97,12 +97,14 @@ protected:
m_translationOptionThreshold,
m_wordDeletionWeight;
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
int m_maxDistortion;
// do it differently from old pharaoh
// -ve = no limit on distortion
// 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; //! use additional reordering constraints
bool m_useFutureDistortionCost;
size_t
m_maxHypoStackSize //! hypothesis-stack size that triggers pruning
, m_minHypoStackDiversity //! minimum number of hypothesis in stack for each source word coverage
@ -338,6 +340,9 @@ public:
bool UseEarlyDiscarding() const {
return m_earlyDiscardingThreshold != -std::numeric_limits<float>::infinity();
}
bool UseFutureDistortionCost() const {
return m_useFutureDistortionCost;
}
float GetTranslationOptionThreshold() const {
return m_translationOptionThreshold;
}

View File

@ -78,10 +78,10 @@ public:
CHECK(!Overlap(x));
if (x.m_endPos < m_startPos) {
return m_startPos - x.m_endPos;
return m_startPos - x.m_endPos - 1;
}
return x.m_startPos - m_endPos;
return x.m_startPos - m_endPos - 1;
}