mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
FIXED OPTION FOR FUTURE DISTORTION COST [Moore & Quirk 2007]
Summary of changes 1) DistortionScoreProducer::CalculateDistortionScore The previous implementation had the following bugs: - wasn't correctly converting size_t to int - in initial (empty) hypothesis, prefixEndPos was -2 instead of -1 - nb of words between phrases was always one too much 2) DistortionScoreProducer::Evaluate The new distortion state was assgined the first gap of the old hypothesis (it should be the current's) 3) WordsRange::GetNumWordsBetween It returned one word too much. For instance the nb of words between [0..1] and [2..3] was 1, now it's 0. 4) Parameter.cpp, StaticData.cpp, StaticData.h Added binary option to activate future distortion cost (fdc) and corresponding StaticData's variable
This commit is contained in:
parent
e5bec4a48b
commit
01c2b001b1
@ -61,28 +61,45 @@ std::string DistortionScoreProducer::GetScoreProducerWeightShortName(unsigned) c
|
||||
float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
|
||||
const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
|
||||
{
|
||||
const int USE_OLD = 1;
|
||||
if (USE_OLD) {
|
||||
if(!StaticData::Instance().UseFutureDistortionCost()) {
|
||||
return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
|
||||
}
|
||||
else {
|
||||
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
||||
Definitions:
|
||||
S : current source range
|
||||
S' : last translated source phrase range
|
||||
S'' : longest fully-translated initial segment
|
||||
*/
|
||||
|
||||
// Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
||||
int prefixEndPos = (int)FirstGap-1;
|
||||
if((int)FirstGap==-1)
|
||||
prefixEndPos = -1;
|
||||
|
||||
int prefixEndPos = FirstGap-1;
|
||||
if ((int) curr.GetStartPos() == prefixEndPos+1) {
|
||||
return 0;
|
||||
// case1: S is adjacent to S'' => return 0
|
||||
if ((int) curr.GetStartPos() == prefixEndPos+1) {
|
||||
IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// case2: S is to the left of S' => return 2(length(S))
|
||||
if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
|
||||
IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl;
|
||||
return (float) -2*(int)curr.GetNumWordsCovered();
|
||||
}
|
||||
|
||||
// case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
|
||||
if ((int) prev.GetEndPos() <= prefixEndPos) {
|
||||
IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl;
|
||||
int z = (int)curr.GetStartPos()-prefixEndPos - 1;
|
||||
return (float) -2*(z + (int)curr.GetNumWordsCovered());
|
||||
}
|
||||
|
||||
// case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
|
||||
IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
|
||||
return (float) -2*((int)curr.GetNumWordsBetween(prev) + (int)curr.GetNumWordsCovered());
|
||||
|
||||
}
|
||||
|
||||
if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
|
||||
return (float) -2*curr.GetNumWordsCovered();
|
||||
}
|
||||
|
||||
if ((int) prev.GetEndPos() <= prefixEndPos) {
|
||||
int z = curr.GetStartPos()-prefixEndPos;
|
||||
return (float) -2*(z + curr.GetNumWordsCovered());
|
||||
}
|
||||
|
||||
return (float) -2*(curr.GetNumWordsBetween(prev) + curr.GetNumWordsCovered());
|
||||
}
|
||||
|
||||
size_t DistortionScoreProducer::GetNumInputScores() const
|
||||
@ -104,7 +121,7 @@ FFState* DistortionScoreProducer::Evaluate(
|
||||
out->PlusEquals(this, distortionScore);
|
||||
DistortionState_traditional* res = new DistortionState_traditional(
|
||||
hypo.GetCurrSourceWordsRange(),
|
||||
hypo.GetPrevHypo()->GetWordsBitmap().GetFirstGapPos());
|
||||
hypo.GetWordsBitmap().GetFirstGapPos());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -98,6 +98,7 @@ Parameter::Parameter()
|
||||
AddParam("monotone-at-punctuation", "mp", "do not reorder over punctuation");
|
||||
AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
|
||||
AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
|
||||
AddParam("future-distortion-cost", "fdc", "include estimate of future cost in the distortion penalty [Moore & Quirk 2007]. Default is no");
|
||||
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
|
||||
AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
|
||||
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
|
||||
|
@ -359,6 +359,9 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
SetBooleanParameter(&m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", false);
|
||||
|
||||
// include future distortion cost in distortion penalty
|
||||
SetBooleanParameter( &m_useFutureDistortionCost, "future-distortion-cost", false );
|
||||
|
||||
// unknown word processing
|
||||
SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );
|
||||
|
||||
|
@ -97,12 +97,14 @@ protected:
|
||||
m_translationOptionThreshold,
|
||||
m_wordDeletionWeight;
|
||||
|
||||
|
||||
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
|
||||
int m_maxDistortion;
|
||||
// do it differently from old pharaoh
|
||||
// -ve = no limit on distortion
|
||||
// 0 = no disortion (monotone in old pharaoh)
|
||||
bool m_reorderingConstraint; //! use additional reordering constraints
|
||||
bool m_useFutureDistortionCost;
|
||||
size_t
|
||||
m_maxHypoStackSize //! hypothesis-stack size that triggers pruning
|
||||
, m_minHypoStackDiversity //! minimum number of hypothesis in stack for each source word coverage
|
||||
@ -338,6 +340,9 @@ public:
|
||||
bool UseEarlyDiscarding() const {
|
||||
return m_earlyDiscardingThreshold != -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
bool UseFutureDistortionCost() const {
|
||||
return m_useFutureDistortionCost;
|
||||
}
|
||||
float GetTranslationOptionThreshold() const {
|
||||
return m_translationOptionThreshold;
|
||||
}
|
||||
|
@ -78,10 +78,10 @@ public:
|
||||
CHECK(!Overlap(x));
|
||||
|
||||
if (x.m_endPos < m_startPos) {
|
||||
return m_startPos - x.m_endPos;
|
||||
return m_startPos - x.m_endPos - 1;
|
||||
}
|
||||
|
||||
return x.m_startPos - m_endPos;
|
||||
return x.m_startPos - m_endPos - 1;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user