mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 04:43:03 +03:00
SoftSourceSyntacticConstraintsFeature: improved efficiency
This commit is contained in:
parent
1353aa57dc
commit
a0b6b6a341
@ -217,14 +217,42 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
|
||||
}
|
||||
|
||||
|
||||
void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
const ChartHypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
assert(stackVec);
|
||||
|
||||
IFFEATUREVERBOSE(2)
|
||||
{
|
||||
FEATUREVERBOSE(2, targetPhrase << std::endl);
|
||||
FEATUREVERBOSE(2, inputPath << std::endl);
|
||||
for (size_t i = 0; i < stackVec->size(); ++i)
|
||||
{
|
||||
const ChartCellLabel &cell = *stackVec->at(i);
|
||||
const WordsRange &ntRange = cell.GetCoverage();
|
||||
FEATUREVERBOSE(2, "stackVec[ " << i << " ] : " << ntRange.GetStartPos() << " - " << ntRange.GetEndPos() << std::endl);
|
||||
}
|
||||
|
||||
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignTerm().begin();
|
||||
it!=targetPhrase.GetAlignTerm().end(); ++it)
|
||||
{
|
||||
FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl);
|
||||
}
|
||||
|
||||
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
|
||||
it!=targetPhrase.GetAlignNonTerm().end(); ++it)
|
||||
{
|
||||
FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
|
||||
}
|
||||
}
|
||||
|
||||
// dense scores
|
||||
std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 3
|
||||
|
||||
const InputType& input = hypo.GetManager().GetSource();
|
||||
const TreeInput& treeInput = static_cast<const TreeInput&>(input);
|
||||
const StaticData& staticData = StaticData::Instance();
|
||||
const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
|
||||
@ -238,12 +266,11 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
float ruleLabelledProbability = 1;
|
||||
|
||||
// read SourceLabels property
|
||||
const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
|
||||
const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
|
||||
const Factor* targetLHS = targetPhrase.GetTargetLHS()[0];
|
||||
bool isGlueGrammarRule = false;
|
||||
bool isUnkRule = false;
|
||||
|
||||
if (const PhraseProperty *property = currTarPhr.GetProperty("SourceLabels")) {
|
||||
if (const PhraseProperty *property = targetPhrase.GetProperty("SourceLabels")) {
|
||||
|
||||
const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast<const SourceLabelsPhraseProperty*>(property);
|
||||
|
||||
@ -256,33 +283,36 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
|
||||
// get index map for underlying hypotheses
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
targetPhrase.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
|
||||
std::vector<const Factor*> targetLabelsRHS;
|
||||
if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
|
||||
size_t nonTerminalNumber = 0;
|
||||
|
||||
for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
|
||||
for (size_t phrasePos=0; phrasePos<targetPhrase.GetSize(); ++phrasePos) {
|
||||
// consult rule for either word or non-terminal
|
||||
const Word &word = currTarPhr.GetWord(phrasePos);
|
||||
const Word &word = targetPhrase.GetWord(phrasePos);
|
||||
if ( word.IsNonTerminal() ) {
|
||||
// non-terminal: consult subderivation
|
||||
size_t nonTermIndex = nonTermIndexMap[phrasePos];
|
||||
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
|
||||
targetLabelsRHS.push_back( prevHypo->GetTargetLHS()[0] );
|
||||
targetLabelsRHS.push_back( word[0] );
|
||||
|
||||
// retrieve information that is required for input tree label matching (RHS)
|
||||
const WordsRange& prevWordsRange = prevHypo->GetCurrSourceRange();
|
||||
const ChartCellLabel &cell = *stackVec->at(nonTermIndex);
|
||||
const WordsRange& prevWordsRange = cell.GetCoverage();
|
||||
size_t prevStartPos = prevWordsRange.GetStartPos();
|
||||
size_t prevEndPos = prevWordsRange.GetEndPos();
|
||||
const NonTerminalSet& prevTreeInputLabels = treeInput.GetLabelSet(prevStartPos,prevEndPos);
|
||||
|
||||
for (NonTerminalSet::const_iterator prevTreeInputLabelsIt = prevTreeInputLabels.begin();
|
||||
prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) {
|
||||
if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) {
|
||||
prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt)
|
||||
{
|
||||
if (*prevTreeInputLabelsIt != outputDefaultNonTerminal)
|
||||
{
|
||||
boost::unordered_map<const Factor*,size_t>::const_iterator foundPrevTreeInputLabel
|
||||
= m_sourceLabelIndexesByFactor.find((*prevTreeInputLabelsIt)[0]);
|
||||
if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
|
||||
if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end())
|
||||
{
|
||||
size_t prevTreeInputLabelIndex = foundPrevTreeInputLabel->second;
|
||||
treeInputLabelsRHS[nonTerminalNumber].insert(prevTreeInputLabelIndex);
|
||||
}
|
||||
@ -295,7 +325,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
}
|
||||
|
||||
// retrieve information that is required for input tree label matching (LHS)
|
||||
const WordsRange& wordsRange = hypo.GetCurrSourceRange();
|
||||
const WordsRange& wordsRange = inputPath.GetWordsRange();
|
||||
size_t startPos = wordsRange.GetStartPos();
|
||||
size_t endPos = wordsRange.GetEndPos();
|
||||
const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(startPos,endPos);
|
||||
@ -348,7 +378,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
|
||||
// (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
|
||||
float score_RHS_1 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size();
|
||||
accumulator->PlusEquals(this,
|
||||
scoreBreakdown.PlusEquals(this,
|
||||
std::string("RHS_1_" + m_sourceLabelsByIndex[*sourceLabelsRHSIt]),
|
||||
score_RHS_1);
|
||||
sparseScoredTreeInputLabelsRHS[nonTerminalNumber].insert(*sourceLabelsRHSIt);
|
||||
@ -383,7 +413,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
if (sparseScoredTreeInputLabelsLHS.find(sourceLabelsLHSIt->first) == sparseScoredTreeInputLabelsLHS.end()) {
|
||||
// (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem)
|
||||
float score_LHS_1 = (float)1/treeInputLabelsLHS.size();
|
||||
accumulator->PlusEquals(this,
|
||||
scoreBreakdown.PlusEquals(this,
|
||||
std::string("LHS_1_" + m_sourceLabelsByIndex[sourceLabelsLHSIt->first]),
|
||||
score_LHS_1);
|
||||
sparseScoredTreeInputLabelsLHS.insert(sourceLabelsLHSIt->first);
|
||||
@ -451,7 +481,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
|
||||
if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*treeInputLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) {
|
||||
// score sparse features: RHS mismatch
|
||||
accumulator->PlusEquals(this,
|
||||
scoreBreakdown.PlusEquals(this,
|
||||
std::string("RHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsRHSIt]),
|
||||
score_RHS_0);
|
||||
}
|
||||
@ -470,7 +500,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
|
||||
if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) {
|
||||
// score sparse features: RHS mismatch
|
||||
accumulator->PlusEquals(this,
|
||||
scoreBreakdown.PlusEquals(this,
|
||||
std::string("LHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt]),
|
||||
score_LHS_0);
|
||||
}
|
||||
@ -482,7 +512,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
} else {
|
||||
|
||||
// abort with error message if the phrase does not translate an unknown word
|
||||
UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
|
||||
UTIL_THROW_IF2(!targetPhrase.GetWord(0).IsOOV(), GetScoreProducerDescription()
|
||||
<< ": Missing SourceLabels property. "
|
||||
<< "Please check phrase table and glue rules.");
|
||||
|
||||
@ -513,7 +543,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied(
|
||||
// newScores[4] = hasCompleteTreeInputMatch ? std::log(s2tLabelsProb) : 0;
|
||||
// newScores[3] = hasCompleteTreeInputMatch ? std::log(ruleLabelledProbability) : 0;
|
||||
|
||||
accumulator->PlusEquals(this, newScores);
|
||||
scoreBreakdown.PlusEquals(this, newScores);
|
||||
}
|
||||
|
||||
|
||||
|
@ -33,15 +33,16 @@ public:
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{};
|
||||
{
|
||||
targetPhrase.SetRuleSource(source);
|
||||
};
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{};
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
|
||||
void EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
@ -50,7 +51,8 @@ public:
|
||||
|
||||
void EvaluateWhenApplied(
|
||||
const ChartHypothesis& cur_hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{};
|
||||
|
||||
private:
|
||||
std::string m_sourceLabelSetFile;
|
||||
|
@ -22,6 +22,7 @@ public:
|
||||
|
||||
StatelessFeatureFunction(const std::string &line);
|
||||
StatelessFeatureFunction(size_t numScoreComponents, const std::string &line);
|
||||
|
||||
/**
|
||||
* This should be implemented for features that apply to phrase-based models.
|
||||
**/
|
||||
|
Loading…
Reference in New Issue
Block a user