PhraseOrientationFeature: more sparse scores

This commit is contained in:
Matthias Huck 2015-01-29 20:23:41 +00:00
parent c33e9e81da
commit a2efb9afc7
2 changed files with 239 additions and 153 deletions

View File

@ -15,7 +15,6 @@
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "moses/ChartManager.h"
#include "moses/FactorCollection.h"
#include "phrase-extract/extract-ghkm/Alignment.h"
@ -23,12 +22,17 @@ namespace Moses
{
const std::string PhraseOrientationFeature::MORIENT("M");
const std::string PhraseOrientationFeature::SORIENT("S");
const std::string PhraseOrientationFeature::DORIENT("D");
PhraseOrientationFeature::PhraseOrientationFeature(const std::string &line)
: StatefulFeatureFunction(6, line)
, m_glueTargetLHSStr("Q")
, m_glueTargetLHS(true)
, m_distinguishStates(true)
, m_useSparse(false)
, m_useSparseWord(false)
, m_useSparseNT(false)
, m_offsetR2LScores(m_numScoreComponents/2)
, m_weightsVector(StaticData::Instance().GetAllWeights().GetScoresForProducer(this))
, m_useTargetWordList(false)
@ -37,8 +41,7 @@ PhraseOrientationFeature::PhraseOrientationFeature(const std::string &line)
VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
FactorCollection &factorCollection = FactorCollection::Instance();
const Factor *factor = factorCollection.AddFactor(m_glueTargetLHSStr, true);
m_glueTargetLHS.SetFactor(0, factor);
m_glueTargetLHS = factorCollection.AddFactor(m_glueTargetLHSStr, true);
VERBOSE(1, " Done." << std::endl);
}
@ -49,8 +52,10 @@ void PhraseOrientationFeature::SetParameter(const std::string& key, const std::s
m_glueTargetLHSStr = value;
} else if (key == "distinguishStates") {
m_distinguishStates = Scan<bool>(value);
} else if (key == "sparse") {
m_useSparse = Scan<bool>(value);
} else if (key == "sparseWord") {
m_useSparseWord = Scan<bool>(value);
} else if (key == "sparseNT") {
m_useSparseNT = Scan<bool>(value);
} else if (key == "targetWordList") {
m_filenameTargetWordList = value;
} else if (key == "sourceWordList") {
@ -155,16 +160,16 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// Dense scores
std::vector<float> newScores(m_numScoreComponents,0);
// State: used to propagate orientation probabilities in case of boundary non-terminals
PhraseOrientationFeatureState *state = new PhraseOrientationFeatureState(m_distinguishStates);
// Read Orientation property
const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
const Word &currTarPhrLHS = currTarPhr.GetTargetLHS();
const Factor* currTarPhrLHS = currTarPhr.GetTargetLHS()[0];
const Phrase *currSrcPhr = currTarPhr.GetRuleSource();
// const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
// bool isGlueGrammarRule = false;
// State: used to propagate orientation probabilities in case of boundary non-terminals
PhraseOrientationFeatureState *state = new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, *currSrcPhr << std::endl);
FEATUREVERBOSE(2, currTarPhr << std::endl);
@ -201,6 +206,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// consult subderivation
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
const TargetPhrase &prevTarPhr = prevHypo->GetCurrTargetPhrase();
const Factor* prevTarPhrLHS = prevTarPhr.GetTargetLHS()[0];
if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation")) {
const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
@ -291,7 +297,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
size_t heuristicScoreIndexL2R = GetHeuristicScoreIndex(scoresL2R, 0, possibleFutureOrientationsL2R);
newScores[heuristicScoreIndexL2R] += scoresL2R[heuristicScoreIndexL2R];
state->SetLeftBoundaryL2R(scoresL2R, heuristicScoreIndexL2R, possibleFutureOrientationsL2R, prevState);
state->SetLeftBoundaryL2R(scoresL2R, heuristicScoreIndexL2R, possibleFutureOrientationsL2R, prevTarPhrLHS, prevState);
if ( (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) == 0x4 ) {
// recursive: discontinuous orientation
@ -299,69 +305,49 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
<< possibleFutureOrientationsL2R << " & " << prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations
<< " = " << (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations)
<< std::endl);
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
state->m_leftBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
}
}
}
if (!delayedScoringL2R) {
switch (l2rOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores);
// sparse scores
if ( m_useSparse ) {
SparseL2RScore(prevHypo,accumulator,"M");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores);
// sparse scores
if ( m_useSparse ) {
SparseL2RScore(prevHypo,accumulator,"S");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
// sparse scores
if ( m_useSparse ) {
SparseL2RScore(prevHypo,accumulator,"D");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
// sparse scores
if ( m_useSparse ) {
SparseL2RScore(prevHypo,accumulator,"D");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores);
// sparse scores
if ( m_useSparse ) {
SparseL2RScore(prevHypo,accumulator,"D");
}
break;
default:
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
} else {
UTIL_THROW2(GetScoreProducerDescription()
<< ": Unsupported orientation type.");
break;
}
// sparse scores
if ( m_useSparseWord ) {
SparseWordL2RScore(prevHypo,accumulator,ToString(l2rOrientation));
}
if ( m_useSparseNT ) {
SparseNonTerminalL2RScore(prevTarPhrLHS,accumulator,ToString(l2rOrientation));
}
}
@ -435,7 +421,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
size_t heuristicScoreIndexR2L = GetHeuristicScoreIndex(scoresR2L, m_offsetR2LScores, possibleFutureOrientationsR2L);
newScores[m_offsetR2LScores+heuristicScoreIndexR2L] += scoresR2L[heuristicScoreIndexR2L];
state->SetRightBoundaryR2L(scoresR2L, heuristicScoreIndexR2L, possibleFutureOrientationsR2L, prevState);
state->SetRightBoundaryR2L(scoresR2L, heuristicScoreIndexR2L, possibleFutureOrientationsR2L, prevTarPhrLHS, prevState);
if ( (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) == 0x4 ) {
// recursive: discontinuous orientation
@ -443,71 +429,52 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
<< possibleFutureOrientationsR2L << " & " << prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations
<< " = " << (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations)
<< std::endl);
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
state->m_rightBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
}
}
}
if (!delayedScoringR2L) {
switch (r2lOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores);
// sparse scores
if ( m_useSparse ) {
SparseR2LScore(prevHypo,accumulator,"M");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores);
// sparse scores
if ( m_useSparse ) {
SparseR2LScore(prevHypo,accumulator,"S");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
// sparse scores
if ( m_useSparse ) {
SparseR2LScore(prevHypo,accumulator,"D");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
// sparse scores
if ( m_useSparse ) {
SparseR2LScore(prevHypo,accumulator,"D");
}
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores);
// sparse scores
if ( m_useSparse ) {
SparseR2LScore(prevHypo,accumulator,"D");
}
break;
default:
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x4, newScores, accumulator);
} else {
UTIL_THROW2(GetScoreProducerDescription()
<< ": Unsupported orientation type.");
break;
}
// sparse scores
if ( m_useSparseWord ) {
SparseWordR2LScore(prevHypo,accumulator,ToString(r2lOrientation));
}
if ( m_useSparseNT ) {
SparseNonTerminalR2LScore(prevTarPhrLHS,accumulator,ToString(r2lOrientation));
}
}
} else {
// abort with error message if the phrase does not translate an unknown word
UTIL_THROW_IF2(!prevTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
@ -564,9 +531,13 @@ size_t PhraseOrientationFeature::GetHeuristicScoreIndex(const std::vector<float>
void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
const PhraseOrientationFeatureState *state,
const std::bitset<3> orientation,
std::vector<float>& newScores) const
std::vector<float>& newScores,
ScoreComponentCollection* scoreBreakdown) const
// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
{
if (state->m_leftBoundaryIsSet) {
const std::string* recursiveOrientationString;
// subtract heuristic score from subderivation
newScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] -= state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex];
@ -574,20 +545,25 @@ void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
std::bitset<3> recursiveOrientation = orientation;
if ( (orientation == 0x4) || (orientation == 0x0) ) {
// discontinuous
recursiveOrientationString = &DORIENT;
newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
} else {
recursiveOrientation &= state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
if ( recursiveOrientation == 0x1 ) {
// monotone
recursiveOrientationString = &MORIENT;
newScores[0] += state->GetLeftBoundaryL2RScoreMono();
} else if ( recursiveOrientation == 0x2 ) {
// swap
recursiveOrientationString = &SORIENT;
newScores[1] += state->GetLeftBoundaryL2RScoreSwap();
} else if ( recursiveOrientation == 0x4 ) {
// discontinuous
recursiveOrientationString = &DORIENT;
newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
} else if ( recursiveOrientation == 0x0 ) {
// discontinuous
recursiveOrientationString = &DORIENT;
newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
} else {
UTIL_THROW2(GetScoreProducerDescription()
@ -595,13 +571,17 @@ void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
}
}
if ( m_useSparseNT ) {
SparseNonTerminalL2RScore(state->m_leftBoundaryNonTerminalSymbol,scoreBreakdown,recursiveOrientationString);
}
FEATUREVERBOSE(6, "Left boundary recursion: " << orientation << " & " << state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations << " = " << recursiveOrientation
<< " --- Subtracted heuristic score: " << state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] << std::endl);
if (!state->m_leftBoundaryRecursionGuard) {
// recursive call
const PhraseOrientationFeatureState* prevState = state->m_leftBoundaryPrevState;
LeftBoundaryL2RScoreRecursive(featureID, prevState, recursiveOrientation, newScores);
LeftBoundaryL2RScoreRecursive(featureID, prevState, recursiveOrientation, newScores, scoreBreakdown);
} else {
FEATUREVERBOSE(6, "m_leftBoundaryRecursionGuard" << std::endl);
}
@ -612,9 +592,13 @@ void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
const PhraseOrientationFeatureState *state,
const std::bitset<3> orientation,
std::vector<float>& newScores) const
std::vector<float>& newScores,
ScoreComponentCollection* scoreBreakdown) const
// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
{
if (state->m_rightBoundaryIsSet) {
const std::string* recursiveOrientationString;
// subtract heuristic score from subderivation
newScores[m_offsetR2LScores+state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] -= state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex];
@ -622,20 +606,25 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
std::bitset<3> recursiveOrientation = orientation;
if ( (orientation == 0x4) || (orientation == 0x0) ) {
// discontinuous
recursiveOrientationString = &DORIENT;
newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
} else {
recursiveOrientation &= state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
if ( recursiveOrientation == 0x1 ) {
// monotone
recursiveOrientationString = &MORIENT;
newScores[m_offsetR2LScores+0] += state->GetRightBoundaryR2LScoreMono();
} else if ( recursiveOrientation == 0x2 ) {
// swap
recursiveOrientationString = &SORIENT;
newScores[m_offsetR2LScores+1] += state->GetRightBoundaryR2LScoreSwap();
} else if ( recursiveOrientation == 0x4 ) {
// discontinuous
recursiveOrientationString = &DORIENT;
newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
} else if ( recursiveOrientation == 0x0 ) {
// discontinuous
recursiveOrientationString = &DORIENT;
newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
} else {
UTIL_THROW2(GetScoreProducerDescription()
@ -643,13 +632,17 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
}
}
if ( m_useSparseNT ) {
SparseNonTerminalR2LScore(state->m_rightBoundaryNonTerminalSymbol,scoreBreakdown,recursiveOrientationString);
}
FEATUREVERBOSE(6, "Right boundary recursion: " << orientation << " & " << state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations << " = " << recursiveOrientation
<< " --- Subtracted heuristic score: " << state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] << std::endl);
if (!state->m_rightBoundaryRecursionGuard) {
// recursive call
const PhraseOrientationFeatureState* prevState = state->m_rightBoundaryPrevState;
RightBoundaryR2LScoreRecursive(featureID, prevState, recursiveOrientation, newScores);
RightBoundaryR2LScoreRecursive(featureID, prevState, recursiveOrientation, newScores, scoreBreakdown);
} else {
FEATUREVERBOSE(6, "m_rightBoundaryRecursionGuard" << std::endl);
}
@ -657,9 +650,9 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
}
void PhraseOrientationFeature::SparseL2RScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string& o) const
void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const
{
// target word
@ -681,14 +674,14 @@ void PhraseOrientationFeature::SparseL2RScore(const ChartHypothesis* hypo,
if (targetWordString != "<s>" && targetWordString != "</s>") {
if ( !m_useTargetWordList || m_targetWordList.find((*targetWord)[0]) != m_targetWordList.end() ) {
scoreBreakdown->PlusEquals(this,
"L2R"+o+"_tw_"+targetWordString,
"L2R"+*o+"_tw_"+targetWordString,
1);
FEATUREVERBOSE(3, "Sparse: L2R"+o+"_tw_"+targetWordString << std::endl);
FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_tw_"+targetWordString << std::endl);
} else {
scoreBreakdown->PlusEquals(this,
"L2R"+o+"_tw_OTHER",
"L2R"+*o+"_tw_OTHER",
1);
FEATUREVERBOSE(3, "Sparse: L2R"+o+"_tw_OTHER" << std::endl);
FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_tw_OTHER" << std::endl);
}
}
@ -703,22 +696,22 @@ void PhraseOrientationFeature::SparseL2RScore(const ChartHypothesis* hypo,
if (sourceWordString != "<s>" && sourceWordString != "</s>") {
if ( !m_useSourceWordList || m_sourceWordList.find(sourceWord[0]) != m_sourceWordList.end() ) {
scoreBreakdown->PlusEquals(this,
"L2R"+o+"_sw_"+sourceWordString,
"L2R"+*o+"_sw_"+sourceWordString,
1);
FEATUREVERBOSE(3, "Sparse: L2R"+o+"_sw_"+sourceWordString << std::endl);
FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_sw_"+sourceWordString << std::endl);
} else {
scoreBreakdown->PlusEquals(this,
"L2R"+o+"_sw_OTHER",
"L2R"+*o+"_sw_OTHER",
1);
FEATUREVERBOSE(3, "Sparse: L2R"+o+"_sw_OTHER" << std::endl);
FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_sw_OTHER" << std::endl);
}
}
}
void PhraseOrientationFeature::SparseR2LScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string& o) const
void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const
{
// target word
@ -740,14 +733,14 @@ void PhraseOrientationFeature::SparseR2LScore(const ChartHypothesis* hypo,
if (targetWordString != "<s>" && targetWordString != "</s>") {
if ( !m_useTargetWordList || m_targetWordList.find((*targetWord)[0]) != m_targetWordList.end() ) {
scoreBreakdown->PlusEquals(this,
"R2L"+o+"_tw_"+targetWordString,
"R2L"+*o+"_tw_"+targetWordString,
1);
FEATUREVERBOSE(3, "Sparse: R2L"+o+"_tw_"+targetWordString << std::endl);
FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_tw_"+targetWordString << std::endl);
} else {
scoreBreakdown->PlusEquals(this,
"R2L"+o+"_tw_OTHER",
"R2L"+*o+"_tw_OTHER",
1);
FEATUREVERBOSE(3, "Sparse: R2L"+o+"_tw_OTHER" << std::endl);
FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_tw_OTHER" << std::endl);
}
}
@ -762,18 +755,67 @@ void PhraseOrientationFeature::SparseR2LScore(const ChartHypothesis* hypo,
if (sourceWordString != "<s>" && sourceWordString != "</s>") {
if ( !m_useSourceWordList || m_sourceWordList.find(sourceWord[0]) != m_sourceWordList.end() ) {
scoreBreakdown->PlusEquals(this,
"R2L"+o+"_sw_"+sourceWordString,
"R2L"+*o+"_sw_"+sourceWordString,
1);
FEATUREVERBOSE(3, "Sparse: R2L"+o+"_sw_"+sourceWordString << std::endl);
FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_sw_"+sourceWordString << std::endl);
} else {
scoreBreakdown->PlusEquals(this,
"R2L"+o+"_sw_OTHER",
"R2L"+*o+"_sw_OTHER",
1);
FEATUREVERBOSE(3, "Sparse: R2L"+o+"_sw_OTHER" << std::endl);
FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_sw_OTHER" << std::endl);
}
}
}
void PhraseOrientationFeature::SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const
{
if ( nonTerminalSymbol != m_glueTargetLHS ) {
const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string();
scoreBreakdown->PlusEquals(this,
"L2R"+*o+"_n_"+nonTerminalString,
1);
FEATUREVERBOSE(3, "Sparse: L2R"+*o+"_n_"+nonTerminalString << std::endl);
}
}
void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const
{
if ( nonTerminalSymbol != m_glueTargetLHS ) {
const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string();
scoreBreakdown->PlusEquals(this,
"R2L"+*o+"_n_"+nonTerminalString,
1);
FEATUREVERBOSE(3, "Sparse: R2L"+*o+"_n_"+nonTerminalString << std::endl);
}
}
const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const
{
if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
return &MORIENT;
} else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
return &SORIENT;
} else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
return &DORIENT;
} else {
UTIL_THROW2(GetScoreProducerDescription()
<< ": Unsupported orientation type.");
}
return NULL;
}
}

View File

@ -30,7 +30,7 @@ public:
friend class PhraseOrientationFeature;
PhraseOrientationFeatureState(bool distinguishStates)
PhraseOrientationFeatureState(bool distinguishStates, bool useSparseWord, bool useSparseNT)
: m_leftBoundaryNonTerminalL2RScores(3,0)
, m_rightBoundaryNonTerminalR2LScores(3,0)
, m_leftBoundaryNonTerminalL2RPossibleFutureOrientations(0x7)
@ -40,17 +40,21 @@ public:
, m_leftBoundaryIsSet(false)
, m_rightBoundaryIsSet(false)
, m_distinguishStates(distinguishStates)
, m_useSparseWord(useSparseWord)
, m_useSparseNT(useSparseNT)
{}
void SetLeftBoundaryL2R(const std::vector<float> &scores,
size_t heuristicScoreIndex,
std::bitset<3> &possibleFutureOrientations,
const Factor* leftBoundaryNonTerminalSymbol,
const PhraseOrientationFeatureState* prevState) {
for (size_t i=0; i<3; ++i) {
m_leftBoundaryNonTerminalL2RScores[i] = scores[i];
m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i] = possibleFutureOrientations[i];
}
m_leftBoundaryNonTerminalL2RHeuristicScoreIndex = heuristicScoreIndex;
m_leftBoundaryNonTerminalSymbol = leftBoundaryNonTerminalSymbol;
m_leftBoundaryPrevState = prevState;
m_leftBoundaryIsSet = true;
}
@ -58,17 +62,18 @@ public:
void SetRightBoundaryR2L(const std::vector<float> &scores,
size_t heuristicScoreIndex,
std::bitset<3> &possibleFutureOrientations,
const Factor* rightBoundaryNonTerminalSymbol,
const PhraseOrientationFeatureState* prevState) {
for (size_t i=0; i<3; ++i) {
m_rightBoundaryNonTerminalR2LScores[i] = scores[i];
m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i] = possibleFutureOrientations[i];
}
m_rightBoundaryNonTerminalR2LHeuristicScoreIndex = heuristicScoreIndex;
m_rightBoundaryNonTerminalSymbol = rightBoundaryNonTerminalSymbol;
m_rightBoundaryPrevState = prevState;
m_rightBoundaryIsSet = true;
}
float GetLeftBoundaryL2RScoreMono() const {
return m_leftBoundaryNonTerminalL2RScores[0];
}
@ -120,13 +125,13 @@ public:
}
if (m_leftBoundaryIsSet) {
int compareLeft = CompareLeftBoundaryRecursive(*this, otherState);
int compareLeft = CompareLeftBoundaryRecursive(*this, otherState, m_useSparseNT);
if (compareLeft != 0) {
return compareLeft;
}
}
if (m_rightBoundaryIsSet) {
int compareRight = CompareRightBoundaryRecursive(*this, otherState);
int compareRight = CompareRightBoundaryRecursive(*this, otherState, m_useSparseNT);
if (compareRight != 0) {
return compareRight;
}
@ -137,7 +142,7 @@ public:
protected:
static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState) {
static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
if (!state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
return 0;
}
@ -148,6 +153,15 @@ protected:
return -1;
}
if (useSparseNT) {
if ( otherState.m_leftBoundaryNonTerminalSymbol < state.m_leftBoundaryNonTerminalSymbol ) {
return 1;
}
if ( state.m_leftBoundaryNonTerminalSymbol < otherState.m_leftBoundaryNonTerminalSymbol ) {
return -1;
}
}
if ( otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
return 1;
}
@ -163,7 +177,7 @@ protected:
for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
// compare only for possible future orientations
// (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks)
if ( state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
return 1;
}
@ -186,10 +200,10 @@ protected:
const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState;
const PhraseOrientationFeatureState *otherPrevState = otherState.m_leftBoundaryPrevState;
return CompareLeftBoundaryRecursive(*prevState, *otherPrevState);
return CompareLeftBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
};
static int CompareRightBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState) {
static int CompareRightBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
if (!state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
return 0;
}
@ -200,6 +214,15 @@ protected:
return -1;
}
if (useSparseNT) {
if ( otherState.m_rightBoundaryNonTerminalSymbol < state.m_rightBoundaryNonTerminalSymbol ) {
return 1;
}
if ( state.m_rightBoundaryNonTerminalSymbol < otherState.m_rightBoundaryNonTerminalSymbol ) {
return -1;
}
}
if ( otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
return 1;
}
@ -238,7 +261,7 @@ protected:
const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState;
const PhraseOrientationFeatureState *otherPrevState = otherState.m_rightBoundaryPrevState;
return CompareRightBoundaryRecursive(*prevState, *otherPrevState);
return CompareRightBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
};
template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y) {
@ -264,7 +287,11 @@ protected:
bool m_rightBoundaryIsSet;
const PhraseOrientationFeatureState* m_leftBoundaryPrevState;
const PhraseOrientationFeatureState* m_rightBoundaryPrevState;
bool m_distinguishStates;
const bool m_distinguishStates;
const bool m_useSparseWord;
const bool m_useSparseNT;
const Factor* m_leftBoundaryNonTerminalSymbol;
const Factor* m_rightBoundaryNonTerminalSymbol;
};
@ -283,7 +310,7 @@ public:
}
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
return new PhraseOrientationFeatureState(m_distinguishStates);
return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
}
void SetParameter(const std::string& key, const std::string& value);
@ -313,7 +340,7 @@ public:
ScoreComponentCollection* accumulator) const {
UTIL_THROW2(GetScoreProducerDescription()
<< ": EvaluateWhenApplied(const Hypothesis&, ...) not implemented");
return new PhraseOrientationFeatureState(m_distinguishStates);
return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
};
FFState* EvaluateWhenApplied(
@ -337,25 +364,42 @@ protected:
void LeftBoundaryL2RScoreRecursive(int featureID,
const PhraseOrientationFeatureState *state,
const std::bitset<3> orientation,
std::vector<float>& newScores) const;
std::vector<float>& newScores,
ScoreComponentCollection* scoreBreakdown) const;
void RightBoundaryR2LScoreRecursive(int featureID,
const PhraseOrientationFeatureState *state,
const std::bitset<3> orientation,
std::vector<float>& newScores) const;
std::vector<float>& newScores,
ScoreComponentCollection* scoreBreakdown) const;
void SparseL2RScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string& o) const;
void SparseWordL2RScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const;
void SparseR2LScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string& o) const;
void SparseWordR2LScore(const ChartHypothesis* hypo,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const;
void SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const;
void SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol,
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const;
const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const;
static const std::string MORIENT;
static const std::string SORIENT;
static const std::string DORIENT;
std::string m_glueTargetLHSStr;
Word m_glueTargetLHS;
const Factor* m_glueTargetLHS;
bool m_distinguishStates;
bool m_useSparse;
bool m_useSparseWord;
bool m_useSparseNT;
size_t m_offsetR2LScores;
const std::vector<float> m_weightsVector;
std::string m_filenameTargetWordList;