Fixes to distortion and phrase feature.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/josiah@3325 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2010-06-10 15:57:23 +00:00
parent f685adbb3a
commit 15f50ad35f
8 changed files with 89 additions and 52 deletions

View File

@ -21,18 +21,27 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "DistortionPenaltyFeature.h"
#include "DummyScoreProducers.h"
#include "Derivation.h"
#include "Gibbler.h"
#include "GibbsOperator.h"
namespace Josiah {
FValue DistortionPenaltyFeature::computeScore() {
//FIXME: To be useful for consistency checking, this needs to be computed
//from scratch each time.
const Hypothesis* sampleHypo = m_sample->GetSampleHypothesis();
const ScoreComponentCollection& mosesScores = sampleHypo->GetScoreBreakdown();
const ScoreProducer* distortionProducer = StaticData::Instance().GetDistortionScoreProducer();
return mosesScores.GetScoreForProducer(distortionProducer);
FValue distortion = 0;
//cerr << Derivation(*m_sample) << endl;
const Hypothesis* currHypo = m_sample->GetTargetTail(); //target tail
//step through in target order
int lastSrcEnd = -1;
while ((currHypo = (currHypo->GetNextHypo()))) {
int srcStart = currHypo->GetCurrSourceWordsRange().GetStartPos();
distortion -= abs(srcStart - (lastSrcEnd+1));
lastSrcEnd = currHypo->GetCurrSourceWordsRange().GetEndPos();
}
//cerr << "distortion " << distortion << endl;
return distortion;
}
@ -40,9 +49,22 @@ FValue DistortionPenaltyFeature::getFlipUpdateScore(const TranslationOption* lef
const TargetGap& leftGap, const TargetGap& rightGap)
{
FValue distortion;
const Hypothesis* leftTgtNextHypo = leftGap.rightHypo;
const Hypothesis* rightTgtPrevHypo = rightGap.leftHypo;
//if the segments are contiguous and we're swapping, then these hypos have to be swapped so
//that they're in the order they'd appear in in the proposed target
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
if (leftTgtNextHypo->GetCurrSourceWordsRange() != rightOption->GetSourceWordsRange()) {
const Hypothesis* tmp = leftTgtNextHypo;
leftTgtNextHypo = rightTgtPrevHypo;
rightTgtPrevHypo = tmp;
}
}
CheckValidReordering(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange(),
leftGap.leftHypo, leftGap.rightHypo,
rightGap.leftHypo, rightGap.rightHypo, distortion);
leftGap.leftHypo, leftTgtNextHypo,
rightTgtPrevHypo, rightGap.rightHypo, distortion);
//cerr << leftOption->GetSourceWordsRange() << " " << rightOption->GetSourceWordsRange() << " " << distortion << endl;
//cerr << "lg.rh" << leftTgtNextHypo->GetCurrSourceWordsRange() << " rg.lh" << rightTgtPrevHypo->GetCurrSourceWordsRange() << endl;
return distortion;
}

View File

@ -29,39 +29,6 @@ FeatureFunction::~FeatureFunction(){} // n.b. is pure virtual, must be empty
bool FeatureFunction::isConsistent(const FVector& featureValues){
/*FVector expectedFeatureValues;
assignScore(expectedFeatureValues);
vector<float> expectedVector = expectedFeatureValues.GetScoresForProducer(&getScoreProducer());
vector<float> actualVector = featureValues.GetScoresForProducer(&getScoreProducer());
VERBOSE(2, "Checking Feature " << getScoreProducer().GetScoreProducerDescription() << endl);
IFVERBOSE(2) {
VERBOSE(2, "Expected: ");
for (size_t i = 0; i < expectedVector.size(); ++i) {
VERBOSE(2, expectedVector[i] << ",");
}
VERBOSE(2,endl);
VERBOSE(2, "Actual: ");
for (size_t i = 0; i < actualVector.size(); ++i) {
VERBOSE(2, actualVector[i] << ",");
}
VERBOSE(2,endl);
}
if (expectedVector.size() != actualVector.size()) {
VERBOSE(1, "FF Mismatch: Feature vectors were of different size: "<< getScoreProducer().GetScoreProducerDescription() << endl);
return false;
}
for (size_t i = 0; i < expectedVector.size(); ++i) {
if (expectedVector[i] != actualVector[i]) {
VERBOSE(1, "FF Mismatch: Expected[" << i << "] = " << expectedVector[i] << " Actual[" << i << "] = " << actualVector[i] <<
" " << getScoreProducer().GetScoreProducerDescription() << endl);
return false;
}
}*/
throw runtime_error("isConsistent() not yet implemented");
return true;
}
}//namespace

View File

@ -69,7 +69,7 @@ struct TargetGap {
* 2. When a new Sample() object is created to begin sampling on a new sentence:
* - init() - with the new sample
* - updateTarget() - to indicate to the FeatureFunction that the target words have changed
* - assignScore() - to tell the FeatureFunction to calculate its initial score
* - assignScore() - to tell the FeatureFunction to set its initial score.
* 3. When scoring possible transitions.
* - doXXX() - to calculate the score deltas.
* 4. When performing a transition.
@ -99,9 +99,6 @@ class FeatureFunction {
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) = 0;
/** Checks the internal consistency of the FeatureFunction by computing the scores from scratch and comparing with the
given scores. */
bool isConsistent(const FVector& featureValues);
virtual ~FeatureFunction() = 0;

View File

@ -105,9 +105,40 @@ namespace Josiah {
out << i->first << " " << i->second << endl;
}
}
FName FVector::DEFAULT_NAME("DEFAULT","");
const FValue FVector::DEFAULT = 0;
static bool equalsTolerance(FValue lhs, FValue rhs) {
if (lhs == rhs) return true;
static const FValue TOLERANCE = 1e-4;
FValue diff = abs(lhs-rhs);
FValue mean = (abs(lhs)+abs(rhs))/2;
//cerr << "ET " << lhs << " " << rhs << " " << diff << " " << mean << " " << endl;
return diff/mean < TOLERANCE ;
}
bool FVector::operator== (const FVector& rhs) const {
if (this == &rhs) {
return true;
}
if (get(DEFAULT_NAME) != rhs.get(DEFAULT_NAME)) return false;
for (const_iterator i = begin(); i != end(); ++i) {
if (!equalsTolerance(i->second,rhs.get(i->first))) return false;
}
for (const_iterator i = rhs.begin(); i != rhs.end(); ++i) {
if (!equalsTolerance(i->second, get(i->first))) return false;
}
return true;
}
bool FVector::operator!= (const FVector& rhs) const {
return ! (*this == rhs);
}
FName FVector::DEFAULT_NAME("DEFAULT","");
const FValue FVector::DEFAULT = 0;
ProxyFVector FVector::operator[](const FName& name) {
// At this point, we don't know whether operator[] was called, so we return
@ -318,7 +349,7 @@ namespace Josiah {
FValue product = 0.0;
for (const_iterator i = begin(); i != end(); ++i) {
if (i->first != DEFAULT_NAME) {
product += (i->second + lhsDefault)*(rhs.get(i->first) + rhsDefault);
product += ((i->second + lhsDefault)*(rhs.get(i->first) + rhsDefault));
}
}

View File

@ -68,6 +68,11 @@ class FVector
/** Empty feature vector, possibly with default value */
FVector(FValue defaultValue = DEFAULT);
//defaults
static FName DEFAULT_NAME;
static const FValue DEFAULT;
void clear();
@ -85,6 +90,10 @@ class FVector
/** Size */
size_t size() const {return m_features.size();}
/** Equality */
bool operator== (const FVector& rhs) const;
bool operator!= (const FVector& rhs) const;
friend class ProxyFVector;
@ -132,8 +141,7 @@ class FVector
static FName DEFAULT_NAME;
static const FValue DEFAULT;
std::map<FName,FValue,FNameComparator> m_features;

View File

@ -219,7 +219,7 @@ void Sample::FlipNodes(const TranslationOption& leftTgtOption, const Translation
UpdateHead(oldLeftHypo, newRightHypo, source_head);
UpdateHead(oldRightHypo, newRightHypo, target_head);
UpdateHead(oldLeftHypo, newRightHypo, target_head);
UpdateFeatureValues(deltaFV);
UpdateTargetWords();
@ -380,8 +380,16 @@ void Sample::UpdateFeatureValues(const FVector& deltaFV) {
}
void Sample::CheckFeatureConsistency() const {
FVector expected;
for (size_t i = 0; i < _extra_features.size(); ++i) {
assert(_extra_features[i]->isConsistent(feature_values));
_extra_features[i]->assignScore(expected);
}
if (expected != feature_values) {
VERBOSE(1, "Expected: " << expected << endl);
VERBOSE(1, "Actual: " << feature_values << endl);
ostringstream msg;
msg << "Score mismatch: e-a = " << (expected-feature_values);
throw runtime_error(msg.str());
}
}

View File

@ -57,6 +57,7 @@ static float ComputeDistortionDistance(const WordsRange& prev, const WordsRange&
} else {
dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
}
//cerr << "Computing dist " << prev << " " << current << " " << -abs(dist) << endl;
return - (float) abs(dist);
}

View File

@ -48,6 +48,9 @@ namespace Josiah {
/** Assign the total score of this feature on the current hypo */
void PhraseFeature::assignScore(FVector& scores) {
for (size_t i = 0; i < m_featureNames.size(); ++i) {
scores[m_featureNames[i]] = 0;
}
const Hypothesis* currHypo = m_sample->GetTargetTail();
while ((currHypo = (currHypo->GetNextHypo()))) {
assign(&(currHypo->GetTranslationOption()), scores);