mosesdecoder/moses/Hypothesis.cpp

629 lines
20 KiB
C++
Raw Normal View History

// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <limits>
#include <vector>
#include <algorithm>
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230
2009-02-06 18:43:06 +03:00
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "Hypothesis.h"
#include "Util.h"
#include "SquareMatrix.h"
#include "StaticData.h"
#include "InputType.h"
#include "Manager.h"
#include "IOWrapper.h"
2013-05-24 21:02:49 +04:00
#include "moses/FF/FFState.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include <boost/foreach.hpp>
using namespace std;
namespace Moses
{
2015-11-06 14:51:14 +03:00
size_t g_numHypos = 0;
2015-05-02 13:45:24 +03:00
Hypothesis::
2015-10-25 16:07:25 +03:00
Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt, const Bitmap &bitmap)
2015-05-02 13:45:24 +03:00
: m_prevHypo(NULL)
2015-10-20 02:50:40 +03:00
, m_sourceCompleted(bitmap)
2015-05-02 13:45:24 +03:00
, m_sourceInput(source)
, m_currSourceWordsRange(
m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
, m_wordDeleted(false)
, m_futureScore(0.0f)
, m_estimatedScore(0.0f)
2015-05-02 13:45:24 +03:00
, m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
, m_arcList(NULL)
, m_transOpt(initialTransOpt)
, m_manager(manager)
, m_id(m_manager.GetNextHypoId())
{
2015-11-06 14:51:14 +03:00
++g_numHypos;
2015-05-02 13:45:24 +03:00
// used for initial seeding of trans process
// initialize scores
//_hash_computed = false;
//s_HypothesesCreated = 1;
const vector<const StatefulFeatureFunction*>& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i)
m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
m_manager.GetSentenceStats().AddCreated();
}
2015-05-02 13:45:24 +03:00
/***
* continue prevHypo by appending the phrases in transOpt
*/
Hypothesis::
2015-10-25 16:07:25 +03:00
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Bitmap &bitmap)
2015-05-02 13:45:24 +03:00
: m_prevHypo(&prevHypo)
2015-10-20 03:15:15 +03:00
, m_sourceCompleted(bitmap)
2015-05-02 13:45:24 +03:00
, m_sourceInput(prevHypo.m_sourceInput)
, m_currSourceWordsRange(transOpt.GetSourceWordsRange())
, m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1,
prevHypo.m_currTargetWordsRange.GetEndPos()
+ transOpt.GetTargetPhrase().GetSize())
, m_wordDeleted(false)
, m_futureScore(0.0f)
, m_estimatedScore(0.0f)
2015-05-02 13:45:24 +03:00
, m_ffStates(prevHypo.m_ffStates.size())
, m_arcList(NULL)
, m_transOpt(transOpt)
, m_manager(prevHypo.GetManager())
, m_id(m_manager.GetNextHypoId())
{
2015-11-06 14:51:14 +03:00
++g_numHypos;
2015-05-02 13:45:24 +03:00
m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
m_wordDeleted = transOpt.IsDeletionOption();
m_manager.GetSentenceStats().AddCreated();
}
Hypothesis::
~Hypothesis()
{
for (unsigned i = 0; i < m_ffStates.size(); ++i)
delete m_ffStates[i];
2015-05-02 13:45:24 +03:00
if (m_arcList) {
ArcList::iterator iter;
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
2015-10-20 22:16:07 +03:00
delete *iter;
}
2015-05-02 13:45:24 +03:00
m_arcList->clear();
delete m_arcList;
m_arcList = NULL;
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
AddArc(Hypothesis *loserHypo)
{
if (!m_arcList) {
if (loserHypo->m_arcList) { // we don't have an arcList, but loser does
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
loserHypo->m_arcList = 0; // prevent a double deletion
} else {
2015-05-02 13:45:24 +03:00
this->m_arcList = new ArcList();
}
} else {
if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
size_t my_size = m_arcList->size();
size_t add_size = loserHypo->m_arcList->size();
this->m_arcList->resize(my_size + add_size, 0);
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
delete loserHypo->m_arcList;
loserHypo->m_arcList = 0;
} else { // loserHypo doesn't have any arcs
// DO NOTHING
}
}
2015-05-02 13:45:24 +03:00
m_arcList->push_back(loserHypo);
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
EvaluateWhenApplied(StatefulFeatureFunction const& sfff, int state_idx)
2015-05-02 13:45:24 +03:00
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( sfff )) {
// Manager& manager = this->GetManager(); //Get the manager and the ttask
// ttasksptr const& ttask = manager.GetTtask();
FFState const* prev = m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL;
2015-10-27 03:00:59 +03:00
m_ffStates[state_idx]
= sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown);
}
2015-05-02 13:45:24 +03:00
}
2012-07-02 18:57:54 +04:00
2015-05-02 13:45:24 +03:00
void
Hypothesis::
EvaluateWhenApplied(const StatelessFeatureFunction& slff)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( slff )) {
slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
2012-09-04 20:50:30 +04:00
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
/***
* calculate the logarithm of our total translation score (sum up components)
*/
void
Hypothesis::
EvaluateWhenApplied(float estimatedScore)
2015-05-02 13:45:24 +03:00
{
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
}
// some stateless score producers cache their values in the translation
// option: add these here
// language model scores for n-grams completely contained within a target
// phrase are also included here
// compute values of stateless feature functions that were not
// cached in the translation option
const vector<const StatelessFeatureFunction*>& sfs =
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
const StatelessFeatureFunction &ff = *sfs[i];
EvaluateWhenApplied(ff);
}
const vector<const StatefulFeatureFunction*>& ffs =
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
const StatefulFeatureFunction &ff = *ffs[i];
const StaticData &staticData = StaticData::Instance();
2015-05-02 13:45:24 +03:00
if (! staticData.IsFeatureFunctionIgnored(ff)) {
FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL;
m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown);
}
}
2015-05-02 13:45:24 +03:00
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeOtherScore();
m_manager.GetSentenceStats().StartTimeEstimateScore();
}
2015-05-02 13:45:24 +03:00
// FUTURE COST
m_estimatedScore = estimatedScore;
2015-05-02 13:45:24 +03:00
// TOTAL
m_futureScore = m_currScoreBreakdown.GetWeightedScore() + m_estimatedScore;
if (m_prevHypo) m_futureScore += m_prevHypo->GetScore();
2015-05-02 13:45:24 +03:00
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeEstimateScore();
}
}
2015-05-02 13:45:24 +03:00
const Hypothesis* Hypothesis::GetPrevHypo()const
{
return m_prevHypo;
}
2015-05-02 13:45:24 +03:00
/**
* print hypothesis information for pharaoh-style logging
*/
void
Hypothesis::
PrintHypothesis() const
{
if (!m_prevHypo) {
TRACE_ERR(endl << "NULL hypo" << endl);
return;
}
2015-05-02 13:45:24 +03:00
TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1);
int start = end-1;
if ( start < 0 ) start = 0;
if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
TRACE_ERR( "<s> ");
} else {
TRACE_ERR( "... ");
}
2015-05-02 13:45:24 +03:00
if (end>=0) {
2015-10-25 16:37:59 +03:00
Range range(start, end);
2015-05-02 13:45:24 +03:00
TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " ");
}
2015-05-02 13:45:24 +03:00
TRACE_ERR( ")"<<endl);
TRACE_ERR( "\tbase score "<< (m_prevHypo->m_futureScore - m_prevHypo->m_estimatedScore) <<endl);
2015-05-02 13:45:24 +03:00
TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()
<<": " << m_transOpt.GetInputPath().GetPhrase() << endl);
TRACE_ERR( "\ttranslated as: "<<(Phrase&) GetCurrTargetPhrase()<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
// TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
// TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
TRACE_ERR( "\tscore "<<m_futureScore - m_estimatedScore<<" + future cost "<<m_estimatedScore<<" = "<<m_futureScore<<endl);
2015-05-02 13:45:24 +03:00
TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
//PrintLMScores();
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
CleanupArcList()
{
// point this hypo's main hypo to itself
SetWinningHypo(this);
2015-05-02 13:45:24 +03:00
if (!m_arcList) return;
2015-05-02 13:45:24 +03:00
/* keep only number of arcs we need to create all n-best paths.
* However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list
*/
2015-05-02 13:45:24 +03:00
const StaticData &staticData = StaticData::Instance();
AllOptions const& opts = m_manager.options();
size_t nBestSize = opts.nbest.nbest_size;
bool distinctNBest = opts.NBestDistinct();
2015-05-02 13:45:24 +03:00
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1,
m_arcList->end(), CompareHypothesisTotalScore());
// delete bad ones
ArcList::iterator i = m_arcList->begin() + nBestSize;
while (i != m_arcList->end()) delete *i++;
2015-05-02 13:45:24 +03:00
m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
}
2015-05-02 13:45:24 +03:00
// set all arc's main hypo variable to this hypo
ArcList::iterator iter = m_arcList->begin();
for (; iter != m_arcList->end() ; ++iter) {
Hypothesis *arc = *iter;
arc->SetWinningHypo(this);
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
TargetPhrase const&
Hypothesis::
GetCurrTargetPhrase() const
{
return m_transOpt.GetTargetPhrase();
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
GetOutputPhrase(Phrase &out) const
{
if (m_prevHypo != NULL)
m_prevHypo->GetOutputPhrase(out);
out.Append(GetCurrTargetPhrase());
}
2015-05-02 13:45:24 +03:00
TO_STRING_BODY(Hypothesis)
2015-05-02 13:45:24 +03:00
// friend
ostream& operator<<(ostream& out, const Hypothesis& hypo)
{
hypo.ToStream(out);
// words bitmap
out << "[" << hypo.m_sourceCompleted << "] ";
2015-05-02 13:45:24 +03:00
// scores
out << " [total=" << hypo.GetFutureScore() << "]";
2015-05-02 13:45:24 +03:00
out << " " << hypo.GetScoreBreakdown();
2015-05-02 13:45:24 +03:00
// alignment
out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm();
2015-05-02 13:45:24 +03:00
return out;
}
2015-05-02 13:45:24 +03:00
std::string
Hypothesis::
GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
{
return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint);
}
2015-05-02 13:45:24 +03:00
std::string
Hypothesis::
GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
{
return (m_prevHypo ? GetCurrTargetPhrase().GetStringRep(factorsToPrint) : "");
}
std::string
Hypothesis::
GetSourcePhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetSourcePhraseStringRep(allFactors);
}
std::string
Hypothesis::
GetTargetPhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetTargetPhraseStringRep(allFactors);
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
OutputAlignment(std::ostream &out) const
{
std::vector<const Hypothesis *> edges;
const Hypothesis *currentHypo = this;
while (currentHypo) {
edges.push_back(currentHypo);
currentHypo = currentHypo->GetPrevHypo();
}
2015-11-02 03:00:37 +03:00
OutputAlignment(out, edges, m_manager.options().output.WA_SortOrder);
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
2015-11-02 03:00:37 +03:00
OutputAlignment(ostream &out,
vector<const Hypothesis *> const& edges,
WordAlignmentSort waso)
2015-05-02 13:45:24 +03:00
{
size_t targetOffset = 0;
2015-05-02 13:45:24 +03:00
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset, waso);
2015-05-02 13:45:24 +03:00
targetOffset += tp.GetSize();
}
2015-05-02 13:45:24 +03:00
// Used by --print-alignment-info, so no endl
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
OutputAlignment(ostream &out, const AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset,
2015-11-02 03:00:37 +03:00
WordAlignmentSort waso)
2015-05-02 13:45:24 +03:00
{
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
AlignVec alignments = ai.GetSortedAlignments(waso);
2014-12-28 19:39:38 +03:00
2015-05-02 13:45:24 +03:00
AlignVec::const_iterator it;
for (it = alignments.begin(); it != alignments.end(); ++it) {
const std::pair<size_t,size_t> &alignment = **it;
2015-11-02 03:00:37 +03:00
out << alignment.first + sourceOffset << "-"
<< alignment.second + targetOffset << " ";
}
2015-05-02 13:45:24 +03:00
}
void
Hypothesis::
OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
{
if (!hypo->GetPrevHypo()) return;
OutputInput(map, hypo->GetPrevHypo());
map[hypo->GetCurrSourceWordsRange().GetStartPos()]
= &hypo->GetTranslationOption().GetInputPath().GetPhrase();
}
void
Hypothesis::
OutputInput(std::ostream& os) const
{
size_t len = this->GetInput().GetSize();
std::vector<const Phrase*> inp_phrases(len, 0);
OutputInput(inp_phrases, this);
for (size_t i=0; i<len; ++i)
if (inp_phrases[i]) os << *inp_phrases[i];
}
void
Hypothesis::
OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
if (m_prevHypo) {
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
}
2015-05-02 13:45:24 +03:00
OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors);
}
2015-05-02 13:45:24 +03:00
//////////////////////////////////////////////////////////////////////////
/***
* print surface factor only for the given phrase
*/
void
Hypothesis::
OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
out << phrase;
} else {
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(edge, placeholderFactor);
}
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
2015-05-02 13:45:24 +03:00
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
}
2015-05-02 13:45:24 +03:00
UTIL_THROW_IF2(factor == NULL,
"No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << StaticData::Instance().GetUnknownWordPrefix()
<< *factor
<< StaticData::Instance().GetUnknownWordSuffix();
2015-05-02 13:45:24 +03:00
} else {
out << *factor;
}
2015-05-02 13:45:24 +03:00
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"No factor " << i << " at position " << pos);
out << "|" << *factor;
}
2015-05-02 13:45:24 +03:00
out << " ";
}
}
2015-05-02 13:45:24 +03:00
// trace ("report segmentation") option "-t" / "-tt"
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
2015-10-25 16:37:59 +03:00
const Range &sourceRange = edge.GetCurrSourceWordsRange();
2015-05-02 13:45:24 +03:00
const int sourceStart = sourceRange.GetStartPos();
const int sourceEnd = sourceRange.GetEndPos();
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
if (reportSegmentation == 2) {
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
2015-05-02 13:45:24 +03:00
out << ",wa=";
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
Hypothesis::OutputAlignment(out, ai, 0, 0, waso);
2015-05-02 13:45:24 +03:00
out << ",total=";
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
bool with_labels = m_manager.options().nbest.include_feature_labels;
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
}
2015-05-02 13:45:24 +03:00
out << "| ";
}
}
2015-05-02 13:45:24 +03:00
std::map<size_t, const Factor*>
Hypothesis::
GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
{
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
const Phrase &inputPhrase = inputPath.GetPhrase();
std::map<size_t, const Factor*> ret;
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
"Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
2015-05-02 13:45:24 +03:00
return ret;
}
size_t Hypothesis::hash() const
{
size_t seed;
// coverage NOTE from Hieu - we could make bitmap comparison here
// and in operator== compare the pointers since the bitmaps come
// from a factory. Same coverage is guaranteed to have the same
// bitmap. However, this make the decoding algorithm
// non-deterministic as the order of hypo extension can be
// different. This causes several regression tests to break. Since
// the speedup is minimal, I'm gonna leave it comparing the actual
// bitmaps
seed = m_sourceCompleted.hash();
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
2015-10-16 15:53:33 +03:00
const FFState *state = m_ffStates[i];
size_t hash = state->hash();
boost::hash_combine(seed, hash);
}
return seed;
}
bool Hypothesis::operator==(const Hypothesis& other) const
{
// coverage
2015-11-04 13:54:46 +03:00
if (&m_sourceCompleted != &other.m_sourceCompleted) {
2015-10-16 15:53:33 +03:00
return false;
}
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
2015-10-16 15:53:33 +03:00
const FFState &thisState = *m_ffStates[i];
const FFState &otherState = *other.m_ffStates[i];
if (thisState != otherState) {
return false;
}
}
return true;
}
2015-11-03 23:56:37 +03:00
bool
Hypothesis::
beats(Hypothesis const& b) const
{
if (m_futureScore != b.m_futureScore)
2015-11-03 23:56:37 +03:00
return m_futureScore > b.m_futureScore;
else if (m_estimatedScore != b.m_estimatedScore)
return m_estimatedScore > b.m_estimatedScore;
2015-11-03 23:56:37 +03:00
else if (m_prevHypo)
return b.m_prevHypo ? m_prevHypo->beats(*b.m_prevHypo) : true;
else return false;
// TO DO: add more tie breaking here
// results. We should compare other property of the hypos here.
// On the other hand, how likely is this going to happen?
}
}