SoftSourceSyntacticConstraintsFeature: improved efficiency

This commit is contained in:
Matthias Huck 2014-12-08 18:20:58 +00:00
parent 1353aa57dc
commit a0b6b6a341
3 changed files with 60 additions and 27 deletions

View File

@ -217,14 +217,42 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
}
void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const
void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
{
assert(stackVec);
IFFEATUREVERBOSE(2)
{
FEATUREVERBOSE(2, targetPhrase << std::endl);
FEATUREVERBOSE(2, inputPath << std::endl);
for (size_t i = 0; i < stackVec->size(); ++i)
{
const ChartCellLabel &cell = *stackVec->at(i);
const WordsRange &ntRange = cell.GetCoverage();
FEATUREVERBOSE(2, "stackVec[ " << i << " ] : " << ntRange.GetStartPos() << " - " << ntRange.GetEndPos() << std::endl);
}
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignTerm().begin();
it!=targetPhrase.GetAlignTerm().end(); ++it)
{
FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl);
}
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
it!=targetPhrase.GetAlignNonTerm().end(); ++it)
{
FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
}
}
// dense scores
std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 3
const InputType& input = hypo.GetManager().GetSource();
const TreeInput& treeInput = static_cast<const TreeInput&>(input);
const StaticData& staticData = StaticData::Instance();
const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
@ -238,12 +266,11 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
float ruleLabelledProbability = 1;
// read SourceLabels property
const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
const Factor* targetLHS = targetPhrase.GetTargetLHS()[0];
bool isGlueGrammarRule = false;
bool isUnkRule = false;
if (const PhraseProperty *property = currTarPhr.GetProperty("SourceLabels")) {
if (const PhraseProperty *property = targetPhrase.GetProperty("SourceLabels")) {
const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property);
@ -256,33 +283,36 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
// get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
targetPhrase.GetAlignNonTerm().GetNonTermIndexMap();
std::vector<const Factor*> targetLabelsRHS;
if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
size_t nonTerminalNumber = 0;
for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
for (size_t phrasePos=0; phrasePos<targetPhrase.GetSize(); ++phrasePos) {
// consult rule for either word or non-terminal
const Word &word = currTarPhr.GetWord(phrasePos);
const Word &word = targetPhrase.GetWord(phrasePos);
if ( word.IsNonTerminal() ) {
// non-terminal: consult subderivation
size_t nonTermIndex = nonTermIndexMap[phrasePos];
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
targetLabelsRHS.push_back( prevHypo->GetTargetLHS()[0] );
targetLabelsRHS.push_back( word[0] );
// retrieve information that is required for input tree label matching (RHS)
const WordsRange& prevWordsRange = prevHypo->GetCurrSourceRange();
const ChartCellLabel &cell = *stackVec->at(nonTermIndex);
const WordsRange& prevWordsRange = cell.GetCoverage();
size_t prevStartPos = prevWordsRange.GetStartPos();
size_t prevEndPos = prevWordsRange.GetEndPos();
const NonTerminalSet& prevTreeInputLabels = treeInput.GetLabelSet(prevStartPos,prevEndPos);
for (NonTerminalSet::const_iterator prevTreeInputLabelsIt = prevTreeInputLabels.begin();
prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) {
if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) {
prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt)
{
if (*prevTreeInputLabelsIt != outputDefaultNonTerminal)
{
boost::unordered_map<const Factor*,size_t>::const_iterator foundPrevTreeInputLabel
= m_sourceLabelIndexesByFactor.find((*prevTreeInputLabelsIt)[0]);
if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end())
{
size_t prevTreeInputLabelIndex = foundPrevTreeInputLabel->second;
treeInputLabelsRHS[nonTerminalNumber].insert(prevTreeInputLabelIndex);
}
@ -295,7 +325,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
}
// retrieve information that is required for input tree label matching (LHS)
const WordsRange& wordsRange = hypo.GetCurrSourceRange();
const WordsRange& wordsRange = inputPath.GetWordsRange();
size_t startPos = wordsRange.GetStartPos();
size_t endPos = wordsRange.GetEndPos();
const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(startPos,endPos);
@ -348,7 +378,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
// (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
float score_RHS_1 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
accumulator->PlusEquals(this,
scoreBreakdown.PlusEquals(this,
std::string("RHS_1_" + m_sourceLabelsByIndex[*sourceLabelsRHSIt]),
score_RHS_1);
sparseScoredTreeInputLabelsRHS[nonTerminalNumber].insert(*sourceLabelsRHSIt);
@ -383,7 +413,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
if (sparseScoredTreeInputLabelsLHS.find(sourceLabelsLHSIt->first) == sparseScoredTreeInputLabelsLHS.end()) {
// (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
float score_LHS_1 = (float)1/treeInputLabelsLHS.size();
accumulator->PlusEquals(this,
scoreBreakdown.PlusEquals(this,
std::string("LHS_1_" + m_sourceLabelsByIndex[sourceLabelsLHSIt->first]),
score_LHS_1);
sparseScoredTreeInputLabelsLHS.insert(sourceLabelsLHSIt->first);
@ -451,7 +481,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*treeInputLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
// score sparse features: RHS mismatch
accumulator->PlusEquals(this,
scoreBreakdown.PlusEquals(this,
std::string("RHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsRHSIt]),
score_RHS_0);
}
@ -470,7 +500,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) {
// score sparse features: RHS mismatch
accumulator->PlusEquals(this,
scoreBreakdown.PlusEquals(this,
std::string("LHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt]),
score_LHS_0);
}
@ -482,7 +512,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
} else {
// abort with error message if the phrase does not translate an unknown word
UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
UTIL_THROW_IF2(!targetPhrase.GetWord(0).IsOOV(), GetScoreProducerDescription()
<< ": Missing SourceLabels property. "
<< "Please check phrase table and glue rules.");
@ -513,7 +543,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
// newScores[4] = hasCompleteTreeInputMatch ? std::log(s2tLabelsProb) : 0;
// newScores[3] = hasCompleteTreeInputMatch ? std::log(ruleLabelledProbability) : 0;
accumulator->PlusEquals(this, newScores);
scoreBreakdown.PlusEquals(this, newScores);
}

View File

@ -33,15 +33,16 @@ public:
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{};
{
targetPhrase.SetRuleSource(source);
};
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{};
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateWhenApplied(
const Hypothesis& cur_hypo,
@ -50,7 +51,8 @@ public:
void EvaluateWhenApplied(
const ChartHypothesis& cur_hypo,
ScoreComponentCollection* accumulator) const;
ScoreComponentCollection* accumulator) const
{};
private:
std::string m_sourceLabelSetFile;

View File

@ -22,6 +22,7 @@ public:
StatelessFeatureFunction(const std::string &line);
StatelessFeatureFunction(size_t numScoreComponents, const std::string &line);
/**
* This should be implemented for features that apply to phrase-based models.
**/