diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp index 39156b09a..368e1597c 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp @@ -217,14 +217,42 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou } -void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( - const ChartHypothesis& hypo, - ScoreComponentCollection* accumulator) const +void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore) const { + assert(stackVec); + + IFFEATUREVERBOSE(2) + { + FEATUREVERBOSE(2, targetPhrase << std::endl); + FEATUREVERBOSE(2, inputPath << std::endl); + for (size_t i = 0; i < stackVec->size(); ++i) + { + const ChartCellLabel &cell = *stackVec->at(i); + const WordsRange &ntRange = cell.GetCoverage(); + FEATUREVERBOSE(2, "stackVec[ " << i << " ] : " << ntRange.GetStartPos() << " - " << ntRange.GetEndPos() << std::endl); + } + + for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignTerm().begin(); + it!=targetPhrase.GetAlignTerm().end(); ++it) + { + FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl); + } + + for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin(); + it!=targetPhrase.GetAlignNonTerm().end(); ++it) + { + FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl); + } + } + // dense scores std::vector newScores(m_numScoreComponents,0); // m_numScoreComponents == 3 - const InputType& input = hypo.GetManager().GetSource(); const TreeInput& treeInput = static_cast(input); const StaticData& staticData = StaticData::Instance(); const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal(); @@ -238,12 +266,11 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( float ruleLabelledProbability = 1; // read SourceLabels property - const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase(); - const Factor* targetLHS = currTarPhr.GetTargetLHS()[0]; + const Factor* targetLHS = targetPhrase.GetTargetLHS()[0]; bool isGlueGrammarRule = false; bool isUnkRule = false; - if (const PhraseProperty *property = currTarPhr.GetProperty("SourceLabels")) { + if (const PhraseProperty *property = targetPhrase.GetProperty("SourceLabels")) { const SourceLabelsPhraseProperty *sourceLabelsPhraseProperty = static_cast(property); @@ -256,33 +283,36 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( // get index map for underlying hypotheses const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = - currTarPhr.GetAlignNonTerm().GetNonTermIndexMap(); + targetPhrase.GetAlignNonTerm().GetNonTermIndexMap(); std::vector targetLabelsRHS; if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule size_t nonTerminalNumber = 0; - for (size_t phrasePos=0; phrasePosGetTargetLHS()[0] ); + targetLabelsRHS.push_back( word[0] ); // retrieve information that is required for input tree label matching (RHS) - const WordsRange& prevWordsRange = prevHypo->GetCurrSourceRange(); + const ChartCellLabel &cell = *stackVec->at(nonTermIndex); + const WordsRange& prevWordsRange = cell.GetCoverage(); size_t prevStartPos = prevWordsRange.GetStartPos(); size_t prevEndPos = prevWordsRange.GetEndPos(); const NonTerminalSet& prevTreeInputLabels = treeInput.GetLabelSet(prevStartPos,prevEndPos); for (NonTerminalSet::const_iterator prevTreeInputLabelsIt = prevTreeInputLabels.begin(); - prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) { - if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) { + prevTreeInputLabelsIt != prevTreeInputLabels.end(); ++prevTreeInputLabelsIt) + { + if (*prevTreeInputLabelsIt != outputDefaultNonTerminal) + { boost::unordered_map::const_iterator foundPrevTreeInputLabel = m_sourceLabelIndexesByFactor.find((*prevTreeInputLabelsIt)[0]); - if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) { + if (foundPrevTreeInputLabel != m_sourceLabelIndexesByFactor.end()) + { size_t prevTreeInputLabelIndex = foundPrevTreeInputLabel->second; treeInputLabelsRHS[nonTerminalNumber].insert(prevTreeInputLabelIndex); } @@ -295,7 +325,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( } // retrieve information that is required for input tree label matching (LHS) - const WordsRange& wordsRange = hypo.GetCurrSourceRange(); + const WordsRange& wordsRange = inputPath.GetWordsRange(); size_t startPos = wordsRange.GetStartPos(); size_t endPos = wordsRange.GetEndPos(); const NonTerminalSet& treeInputLabels = treeInput.GetLabelSet(startPos,endPos); @@ -348,7 +378,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*sourceLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) { // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem) float score_RHS_1 = (float)1/treeInputLabelsRHS[nonTerminalNumber].size(); - accumulator->PlusEquals(this, + scoreBreakdown.PlusEquals(this, std::string("RHS_1_" + m_sourceLabelsByIndex[*sourceLabelsRHSIt]), score_RHS_1); sparseScoredTreeInputLabelsRHS[nonTerminalNumber].insert(*sourceLabelsRHSIt); @@ -383,7 +413,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( if (sparseScoredTreeInputLabelsLHS.find(sourceLabelsLHSIt->first) == sparseScoredTreeInputLabelsLHS.end()) { // (only if no match has been scored for this tree input label and rule non-terminal with a previous sourceLabelItem) float score_LHS_1 = (float)1/treeInputLabelsLHS.size(); - accumulator->PlusEquals(this, + scoreBreakdown.PlusEquals(this, std::string("LHS_1_" + m_sourceLabelsByIndex[sourceLabelsLHSIt->first]), score_LHS_1); sparseScoredTreeInputLabelsLHS.insert(sourceLabelsLHSIt->first); @@ -451,7 +481,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( if (sparseScoredTreeInputLabelsRHS[nonTerminalNumber].find(*treeInputLabelsRHSIt) == sparseScoredTreeInputLabelsRHS[nonTerminalNumber].end()) { // score sparse features: RHS mismatch - accumulator->PlusEquals(this, + scoreBreakdown.PlusEquals(this, std::string("RHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsRHSIt]), score_RHS_0); } @@ -470,7 +500,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( if (sparseScoredTreeInputLabelsLHS.find(*treeInputLabelsLHSIt) == sparseScoredTreeInputLabelsLHS.end()) { // score sparse features: RHS mismatch - accumulator->PlusEquals(this, + scoreBreakdown.PlusEquals(this, std::string("LHS_0_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt]), score_LHS_0); } @@ -482,7 +512,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( } else { // abort with error message if the phrase does not translate an unknown word - UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription() + UTIL_THROW_IF2(!targetPhrase.GetWord(0).IsOOV(), GetScoreProducerDescription() << ": Missing SourceLabels property. " << "Please check phrase table and glue rules."); @@ -513,7 +543,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWhenApplied( // newScores[4] = hasCompleteTreeInputMatch ? std::log(s2tLabelsProb) : 0; // newScores[3] = hasCompleteTreeInputMatch ? std::log(ruleLabelledProbability) : 0; - accumulator->PlusEquals(this, newScores); + scoreBreakdown.PlusEquals(this, newScores); } diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h index 725a836eb..6bcb23303 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h @@ -33,15 +33,16 @@ public: , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const - {}; + { + targetPhrase.SetRuleSource(source); + }; void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore = NULL) const - {}; + , ScoreComponentCollection *estimatedFutureScore = NULL) const; void EvaluateWhenApplied( const Hypothesis& cur_hypo, @@ -50,7 +51,8 @@ public: void EvaluateWhenApplied( const ChartHypothesis& cur_hypo, - ScoreComponentCollection* accumulator) const; + ScoreComponentCollection* accumulator) const + {}; private: std::string m_sourceLabelSetFile; diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h index abf029f0c..9ef5d269a 100644 --- a/moses/FF/StatelessFeatureFunction.h +++ b/moses/FF/StatelessFeatureFunction.h @@ -22,6 +22,7 @@ public: StatelessFeatureFunction(const std::string &line); StatelessFeatureFunction(size_t numScoreComponents, const std::string &line); + /** * This should be implemented for features that apply to phrase-based models. **/