mosesdecoder/moses/Hypothesis.cpp

468 lines
14 KiB
C++
Raw Normal View History

// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <limits>
#include <vector>
#include <algorithm>
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230
2009-02-06 18:43:06 +03:00
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "Hypothesis.h"
#include "Util.h"
#include "SquareMatrix.h"
#include "StaticData.h"
#include "InputType.h"
#include "Manager.h"
#include "IOWrapper.h"
2013-05-24 21:02:49 +04:00
#include "moses/FF/FFState.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include <boost/foreach.hpp>
using namespace std;
namespace Moses
{
2015-11-10 19:27:28 +03:00
//size_t g_numHypos = 0;
2015-11-06 14:51:14 +03:00
2015-05-02 13:45:24 +03:00
Hypothesis::
Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt, const Bitmap &bitmap, int id)
2015-05-02 13:45:24 +03:00
: m_prevHypo(NULL)
2015-10-20 02:50:40 +03:00
, m_sourceCompleted(bitmap)
2015-05-02 13:45:24 +03:00
, m_sourceInput(source)
, m_currSourceWordsRange(
m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
, m_wordDeleted(false)
, m_futureScore(0.0f)
, m_estimatedScore(0.0f)
2015-05-02 13:45:24 +03:00
, m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
, m_arcList(NULL)
, m_transOpt(initialTransOpt)
, m_manager(manager)
, m_id(id)
2015-05-02 13:45:24 +03:00
{
2015-11-10 19:27:28 +03:00
// ++g_numHypos;
2015-05-02 13:45:24 +03:00
// used for initial seeding of trans process
// initialize scores
//_hash_computed = false;
//s_HypothesesCreated = 1;
const vector<const StatefulFeatureFunction*>& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i)
m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
}
2015-05-02 13:45:24 +03:00
/***
* continue prevHypo by appending the phrases in transOpt
*/
Hypothesis::
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Bitmap &bitmap, int id)
2015-05-02 13:45:24 +03:00
: m_prevHypo(&prevHypo)
2015-10-20 03:15:15 +03:00
, m_sourceCompleted(bitmap)
2015-05-02 13:45:24 +03:00
, m_sourceInput(prevHypo.m_sourceInput)
, m_currSourceWordsRange(transOpt.GetSourceWordsRange())
, m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1,
prevHypo.m_currTargetWordsRange.GetEndPos()
+ transOpt.GetTargetPhrase().GetSize())
, m_wordDeleted(false)
, m_futureScore(0.0f)
, m_estimatedScore(0.0f)
2015-05-02 13:45:24 +03:00
, m_ffStates(prevHypo.m_ffStates.size())
, m_arcList(NULL)
, m_transOpt(transOpt)
, m_manager(prevHypo.GetManager())
, m_id(id)
2015-05-02 13:45:24 +03:00
{
2015-11-10 19:27:28 +03:00
// ++g_numHypos;
2015-11-06 14:51:14 +03:00
2015-05-02 13:45:24 +03:00
m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
m_wordDeleted = transOpt.IsDeletionOption();
}
Hypothesis::
~Hypothesis()
{
for (unsigned i = 0; i < m_ffStates.size(); ++i)
delete m_ffStates[i];
2015-05-02 13:45:24 +03:00
if (m_arcList) {
ArcList::iterator iter;
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
2015-10-20 22:16:07 +03:00
delete *iter;
}
2015-05-02 13:45:24 +03:00
m_arcList->clear();
delete m_arcList;
m_arcList = NULL;
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
AddArc(Hypothesis *loserHypo)
{
if (!m_arcList) {
if (loserHypo->m_arcList) { // we don't have an arcList, but loser does
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
loserHypo->m_arcList = 0; // prevent a double deletion
} else {
2015-05-02 13:45:24 +03:00
this->m_arcList = new ArcList();
}
} else {
if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
size_t my_size = m_arcList->size();
size_t add_size = loserHypo->m_arcList->size();
this->m_arcList->resize(my_size + add_size, 0);
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
delete loserHypo->m_arcList;
loserHypo->m_arcList = 0;
} else { // loserHypo doesn't have any arcs
// DO NOTHING
}
}
2015-05-02 13:45:24 +03:00
m_arcList->push_back(loserHypo);
}
2015-05-02 13:45:24 +03:00
/***
* calculate the logarithm of our total translation score (sum up components)
*/
void
Hypothesis::
EvaluateWhenApplied(float estimatedScore)
2015-05-02 13:45:24 +03:00
{
const StaticData &staticData = StaticData::Instance();
2015-05-02 13:45:24 +03:00
// some stateless score producers cache their values in the translation
// option: add these here
// language model scores for n-grams completely contained within a target
// phrase are also included here
// compute values of stateless feature functions that were not
// cached in the translation option
const vector<const StatelessFeatureFunction*>& sfs =
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
const StatelessFeatureFunction &ff = *sfs[i];
if(!staticData.IsFeatureFunctionIgnored(ff)) {
ff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
}
2015-05-02 13:45:24 +03:00
}
const vector<const StatefulFeatureFunction*>& ffs =
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
const StatefulFeatureFunction &ff = *ffs[i];
if(!staticData.IsFeatureFunctionIgnored(ff)) {
FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL;
m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown);
}
}
2015-05-02 13:45:24 +03:00
// FUTURE COST
m_estimatedScore = estimatedScore;
2015-05-02 13:45:24 +03:00
// TOTAL
m_futureScore = m_currScoreBreakdown.GetWeightedScore() + m_estimatedScore;
if (m_prevHypo) m_futureScore += m_prevHypo->GetScore();
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
const Hypothesis* Hypothesis::GetPrevHypo()const
{
return m_prevHypo;
}
2015-05-02 13:45:24 +03:00
/**
* print hypothesis information for pharaoh-style logging
*/
void
Hypothesis::
PrintHypothesis() const
{
if (!m_prevHypo) {
TRACE_ERR(endl << "NULL hypo" << endl);
return;
}
2015-05-02 13:45:24 +03:00
TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1);
int start = end-1;
if ( start < 0 ) start = 0;
if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
TRACE_ERR( "<s> ");
} else {
TRACE_ERR( "... ");
}
2015-05-02 13:45:24 +03:00
if (end>=0) {
2015-10-25 16:37:59 +03:00
Range range(start, end);
2015-05-02 13:45:24 +03:00
TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " ");
}
2015-05-02 13:45:24 +03:00
TRACE_ERR( ")"<<endl);
TRACE_ERR( "\tbase score "<< (m_prevHypo->m_futureScore - m_prevHypo->m_estimatedScore) <<endl);
2015-05-02 13:45:24 +03:00
TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()
<<": " << m_transOpt.GetInputPath().GetPhrase() << endl);
TRACE_ERR( "\ttranslated as: "<<(Phrase&) GetCurrTargetPhrase()<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
// TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
// TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
TRACE_ERR( "\tscore "<<m_futureScore - m_estimatedScore<<" + future cost "<<m_estimatedScore<<" = "<<m_futureScore<<endl);
2015-05-02 13:45:24 +03:00
TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
//PrintLMScores();
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
2015-11-13 20:56:16 +03:00
CleanupArcList(size_t nBestSize, bool distinctNBest)
2015-05-02 13:45:24 +03:00
{
// point this hypo's main hypo to itself
SetWinningHypo(this);
2015-05-02 13:45:24 +03:00
if (!m_arcList) return;
2015-05-02 13:45:24 +03:00
/* keep only number of arcs we need to create all n-best paths.
* However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list
*/
2015-05-02 13:45:24 +03:00
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1,
m_arcList->end(), CompareHypothesisTotalScore());
// delete bad ones
ArcList::iterator i = m_arcList->begin() + nBestSize;
while (i != m_arcList->end()) delete *i++;
2015-05-02 13:45:24 +03:00
m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
}
2015-05-02 13:45:24 +03:00
// set all arc's main hypo variable to this hypo
ArcList::iterator iter = m_arcList->begin();
for (; iter != m_arcList->end() ; ++iter) {
Hypothesis *arc = *iter;
arc->SetWinningHypo(this);
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
TargetPhrase const&
Hypothesis::
GetCurrTargetPhrase() const
{
return m_transOpt.GetTargetPhrase();
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
GetOutputPhrase(Phrase &out) const
{
if (m_prevHypo != NULL)
m_prevHypo->GetOutputPhrase(out);
out.Append(GetCurrTargetPhrase());
}
2015-05-02 13:45:24 +03:00
TO_STRING_BODY(Hypothesis)
2015-05-02 13:45:24 +03:00
// friend
ostream& operator<<(ostream& out, const Hypothesis& hypo)
{
hypo.ToStream(out);
// words bitmap
out << "[" << hypo.m_sourceCompleted << "] ";
2015-05-02 13:45:24 +03:00
// scores
out << " [total=" << hypo.GetFutureScore() << "]";
2015-05-02 13:45:24 +03:00
out << " " << hypo.GetScoreBreakdown();
2015-05-02 13:45:24 +03:00
// alignment
out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm();
2015-05-02 13:45:24 +03:00
return out;
}
2015-05-02 13:45:24 +03:00
std::string
Hypothesis::
GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
{
return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint);
}
2015-05-02 13:45:24 +03:00
std::string
Hypothesis::
GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
{
2015-11-12 03:00:40 +03:00
return (m_prevHypo
? GetCurrTargetPhrase().GetStringRep(factorsToPrint)
: "");
2015-05-02 13:45:24 +03:00
}
std::string
Hypothesis::
GetSourcePhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetSourcePhraseStringRep(allFactors);
}
std::string
Hypothesis::
GetTargetPhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetTargetPhraseStringRep(allFactors);
}
2015-12-06 03:12:01 +03:00
size_t
2015-05-02 13:45:24 +03:00
Hypothesis::
2015-12-06 03:12:01 +03:00
OutputAlignment(std::ostream &out, bool recursive=true) const
2015-05-02 13:45:24 +03:00
{
WordAlignmentSort const& waso = m_manager.options()->output.WA_SortOrder;
2015-12-06 03:12:01 +03:00
TargetPhrase const& tp = GetCurrTargetPhrase();
// call with head recursion to output things in the right order
size_t trg_off = recursive && m_prevHypo ? m_prevHypo->OutputAlignment(out) : 0;
size_t src_off = GetCurrSourceWordsRange().GetStartPos();
2015-12-09 03:00:35 +03:00
2015-12-06 03:12:01 +03:00
typedef std::pair<size_t,size_t> const* entry;
std::vector<entry> alnvec = tp.GetAlignTerm().GetSortedAlignments(waso);
BOOST_FOREACH(entry e, alnvec)
2015-12-09 03:00:35 +03:00
out << e->first + src_off << "-" << e->second + trg_off << " ";
2015-12-06 03:12:01 +03:00
return trg_off + tp.GetSize();
2015-05-02 13:45:24 +03:00
}
void
Hypothesis::
OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
{
if (!hypo->GetPrevHypo()) return;
OutputInput(map, hypo->GetPrevHypo());
map[hypo->GetCurrSourceWordsRange().GetStartPos()]
= &hypo->GetTranslationOption().GetInputPath().GetPhrase();
}
void
Hypothesis::
OutputInput(std::ostream& os) const
{
size_t len = this->GetInput().GetSize();
std::vector<const Phrase*> inp_phrases(len, 0);
OutputInput(inp_phrases, this);
for (size_t i=0; i<len; ++i)
if (inp_phrases[i]) os << *inp_phrases[i];
}
std::map<size_t, const Factor*>
Hypothesis::
GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
{
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
const Phrase &inputPhrase = inputPath.GetPhrase();
std::map<size_t, const Factor*> ret;
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
"Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
2015-05-02 13:45:24 +03:00
return ret;
}
size_t Hypothesis::hash() const
{
size_t seed;
// coverage NOTE from Hieu - we could make bitmap comparison here
// and in operator== compare the pointers since the bitmaps come
// from a factory. Same coverage is guaranteed to have the same
// bitmap. However, this make the decoding algorithm
// non-deterministic as the order of hypo extension can be
// different. This causes several regression tests to break. Since
// the speedup is minimal, I'm gonna leave it comparing the actual
// bitmaps
seed = m_sourceCompleted.hash();
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
2015-10-16 15:53:33 +03:00
const FFState *state = m_ffStates[i];
if (state) {
size_t hash = state->hash();
boost::hash_combine(seed, hash);
}
}
return seed;
}
bool Hypothesis::operator==(const Hypothesis& other) const
{
// coverage
2015-11-04 13:54:46 +03:00
if (&m_sourceCompleted != &other.m_sourceCompleted) {
2015-10-16 15:53:33 +03:00
return false;
}
// states
for (size_t i = 0; i < m_ffStates.size(); ++i) {
const FFState *thisState = m_ffStates[i];
if (thisState) {
const FFState *otherState = other.m_ffStates[i];
assert(otherState);
if ((*thisState) != (*otherState)) {
return false;
}
2015-10-16 15:53:33 +03:00
}
}
return true;
}
2015-11-03 23:56:37 +03:00
bool
Hypothesis::
beats(Hypothesis const& b) const
{
if (m_futureScore != b.m_futureScore)
2015-11-03 23:56:37 +03:00
return m_futureScore > b.m_futureScore;
else if (m_estimatedScore != b.m_estimatedScore)
return m_estimatedScore > b.m_estimatedScore;
2015-11-03 23:56:37 +03:00
else if (m_prevHypo)
return b.m_prevHypo ? m_prevHypo->beats(*b.m_prevHypo) : true;
else return false;
// TO DO: add more tie breaking here
// results. We should compare other property of the hypos here.
// On the other hand, how likely is this going to happen?
}
}