mosesdecoder/moses/Hypothesis.cpp

681 lines
21 KiB
C++
Raw Normal View History

// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <limits>
#include <vector>
#include <algorithm>
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230
2009-02-06 18:43:06 +03:00
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "Hypothesis.h"
#include "Util.h"
#include "SquareMatrix.h"
#include "StaticData.h"
#include "InputType.h"
#include "Manager.h"
#include "IOWrapper.h"
2013-05-24 21:02:49 +04:00
#include "moses/FF/FFState.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include <boost/foreach.hpp>
using namespace std;
namespace Moses
{
#ifdef USE_HYPO_POOL
2015-05-02 13:45:24 +03:00
ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
#endif
2015-05-02 13:45:24 +03:00
Hypothesis::
Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt)
: m_prevHypo(NULL)
, m_sourceCompleted(source.GetSize(), manager.GetSource().m_sourceCompleted)
, m_sourceInput(source)
, m_currSourceWordsRange(
m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
, m_wordDeleted(false)
, m_totalScore(0.0f)
, m_futureScore(0.0f)
, m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
, m_arcList(NULL)
, m_transOpt(initialTransOpt)
, m_manager(manager)
, m_id(m_manager.GetNextHypoId())
{
// used for initial seeding of trans process
// initialize scores
//_hash_computed = false;
//s_HypothesesCreated = 1;
const vector<const StatefulFeatureFunction*>& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i)
m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
m_manager.GetSentenceStats().AddCreated();
}
2015-05-02 13:45:24 +03:00
/***
* continue prevHypo by appending the phrases in transOpt
*/
Hypothesis::
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
: m_prevHypo(&prevHypo)
, m_sourceCompleted(prevHypo.m_sourceCompleted )
, m_sourceInput(prevHypo.m_sourceInput)
, m_currSourceWordsRange(transOpt.GetSourceWordsRange())
, m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1,
prevHypo.m_currTargetWordsRange.GetEndPos()
+ transOpt.GetTargetPhrase().GetSize())
, m_wordDeleted(false)
, m_totalScore(0.0f)
, m_futureScore(0.0f)
, m_ffStates(prevHypo.m_ffStates.size())
, m_arcList(NULL)
, m_transOpt(transOpt)
, m_manager(prevHypo.GetManager())
, m_id(m_manager.GetNextHypoId())
{
m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
2015-05-02 13:45:24 +03:00
// assert that we are not extending our hypothesis by retranslating something
// that this hypothesis has already translated!
assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
2015-05-02 13:45:24 +03:00
//_hash_computed = false;
m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
m_wordDeleted = transOpt.IsDeletionOption();
m_manager.GetSentenceStats().AddCreated();
}
Hypothesis::
~Hypothesis()
{
for (unsigned i = 0; i < m_ffStates.size(); ++i)
delete m_ffStates[i];
2015-05-02 13:45:24 +03:00
if (m_arcList) {
ArcList::iterator iter;
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
FREEHYPO(*iter);
}
2015-05-02 13:45:24 +03:00
m_arcList->clear();
delete m_arcList;
m_arcList = NULL;
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
AddArc(Hypothesis *loserHypo)
{
if (!m_arcList) {
if (loserHypo->m_arcList) { // we don't have an arcList, but loser does
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
loserHypo->m_arcList = 0; // prevent a double deletion
} else {
2015-05-02 13:45:24 +03:00
this->m_arcList = new ArcList();
}
} else {
if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
size_t my_size = m_arcList->size();
size_t add_size = loserHypo->m_arcList->size();
this->m_arcList->resize(my_size + add_size, 0);
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
delete loserHypo->m_arcList;
loserHypo->m_arcList = 0;
} else { // loserHypo doesn't have any arcs
// DO NOTHING
}
}
2015-05-02 13:45:24 +03:00
m_arcList->push_back(loserHypo);
}
2015-05-02 13:45:24 +03:00
/***
* return the subclass of Hypothesis most appropriate to the given translation option
*/
Hypothesis*
Hypothesis::
CreateNext(const TranslationOption &transOpt) const
{
return Create(*this, transOpt);
}
2015-05-02 13:45:24 +03:00
/***
* return the subclass of Hypothesis most appropriate to the given translation option
*/
Hypothesis*
Hypothesis::
Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
{
#ifdef USE_HYPO_POOL
2015-05-02 13:45:24 +03:00
Hypothesis *ptr = s_objectPool.getPtr();
return new(ptr) Hypothesis(prevHypo, transOpt);
#else
2015-05-02 13:45:24 +03:00
return new Hypothesis(prevHypo, transOpt);
#endif
2015-05-02 13:45:24 +03:00
}
/***
* return the subclass of Hypothesis most appropriate to the given target phrase
*/
Hypothesis*
Hypothesis::
Create(Manager& manager, InputType const& m_source,
const TranslationOption &initialTransOpt)
{
#ifdef USE_HYPO_POOL
2015-05-02 13:45:24 +03:00
Hypothesis *ptr = s_objectPool.getPtr();
return new(ptr) Hypothesis(manager, m_source, initialTransOpt);
#else
2015-05-02 13:45:24 +03:00
return new Hypothesis(manager, m_source, initialTransOpt);
#endif
2015-05-02 13:45:24 +03:00
}
2013-05-29 21:16:15 +04:00
2015-05-02 13:45:24 +03:00
/** check, if two hypothesis can be recombined.
this is actually a sorting function that allows us to
keep an ordered list of hypotheses. This makes recombination
much quicker.
*/
int
Hypothesis::
RecombineCompare(const Hypothesis &compare) const
{
// -1 = this < compare
// +1 = this > compare
// 0 = this ==compare
int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
if (comp != 0)
return comp;
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
comp = m_ffStates[i] - compare.m_ffStates[i];
} else {
comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
}
2015-05-02 13:45:24 +03:00
if (comp != 0) return comp;
}
return 0;
}
2012-07-02 18:57:54 +04:00
2015-05-02 13:45:24 +03:00
void
Hypothesis::
EvaluateWhenApplied(StatefulFeatureFunction const& sfff,
int state_idx)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( sfff )) {
m_ffStates[state_idx]
= sfff.EvaluateWhenApplied
(*this, m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
&m_currScoreBreakdown);
}
2015-05-02 13:45:24 +03:00
}
2012-07-02 18:57:54 +04:00
2015-05-02 13:45:24 +03:00
void
Hypothesis::
EvaluateWhenApplied(const StatelessFeatureFunction& slff)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( slff )) {
slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
2012-09-04 20:50:30 +04:00
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
/***
* calculate the logarithm of our total translation score (sum up components)
*/
void
Hypothesis::
EvaluateWhenApplied(const SquareMatrix &futureScore)
{
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
}
// some stateless score producers cache their values in the translation
// option: add these here
// language model scores for n-grams completely contained within a target
// phrase are also included here
// compute values of stateless feature functions that were not
// cached in the translation option
const vector<const StatelessFeatureFunction*>& sfs =
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
const StatelessFeatureFunction &ff = *sfs[i];
EvaluateWhenApplied(ff);
}
const vector<const StatefulFeatureFunction*>& ffs =
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
const StatefulFeatureFunction &ff = *ffs[i];
const StaticData &staticData = StaticData::Instance();
2015-05-02 13:45:24 +03:00
if (! staticData.IsFeatureFunctionIgnored(ff)) {
m_ffStates[i] = ff.EvaluateWhenApplied(*this,
m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
&m_currScoreBreakdown);
}
}
2015-05-02 13:45:24 +03:00
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeOtherScore();
m_manager.GetSentenceStats().StartTimeEstimateScore();
}
2015-05-02 13:45:24 +03:00
// FUTURE COST
m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
2015-05-02 13:45:24 +03:00
// TOTAL
m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore;
if (m_prevHypo) m_totalScore += m_prevHypo->GetScore();
2015-05-02 13:45:24 +03:00
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeEstimateScore();
}
}
2015-05-02 13:45:24 +03:00
const Hypothesis* Hypothesis::GetPrevHypo()const
{
return m_prevHypo;
}
2015-05-02 13:45:24 +03:00
/**
* print hypothesis information for pharaoh-style logging
*/
void
Hypothesis::
PrintHypothesis() const
{
if (!m_prevHypo) {
TRACE_ERR(endl << "NULL hypo" << endl);
return;
}
2015-05-02 13:45:24 +03:00
TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1);
int start = end-1;
if ( start < 0 ) start = 0;
if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
TRACE_ERR( "<s> ");
} else {
TRACE_ERR( "... ");
}
2015-05-02 13:45:24 +03:00
if (end>=0) {
WordsRange range(start, end);
TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " ");
}
2015-05-02 13:45:24 +03:00
TRACE_ERR( ")"<<endl);
TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()
<<": " << m_transOpt.GetInputPath().GetPhrase() << endl);
TRACE_ERR( "\ttranslated as: "<<(Phrase&) GetCurrTargetPhrase()<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
// TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
// TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
//PrintLMScores();
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
CleanupArcList()
{
// point this hypo's main hypo to itself
SetWinningHypo(this);
2015-05-02 13:45:24 +03:00
if (!m_arcList) return;
2015-05-02 13:45:24 +03:00
/* keep only number of arcs we need to create all n-best paths.
* However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = (staticData.GetDistinctNBest() ||
staticData.GetLatticeSamplesSize() ||
staticData.UseMBR() ||
staticData.GetOutputSearchGraph() ||
staticData.GetOutputSearchGraphSLF() ||
staticData.GetOutputSearchGraphHypergraph() ||
staticData.UseLatticeMBR());
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1,
m_arcList->end(), CompareHypothesisTotalScore());
// delete bad ones
ArcList::iterator iter;
for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter)
FREEHYPO(*iter);
m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
}
2015-05-02 13:45:24 +03:00
// set all arc's main hypo variable to this hypo
ArcList::iterator iter = m_arcList->begin();
for (; iter != m_arcList->end() ; ++iter) {
Hypothesis *arc = *iter;
arc->SetWinningHypo(this);
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
TargetPhrase const&
Hypothesis::
GetCurrTargetPhrase() const
{
return m_transOpt.GetTargetPhrase();
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
GetOutputPhrase(Phrase &out) const
{
if (m_prevHypo != NULL)
m_prevHypo->GetOutputPhrase(out);
out.Append(GetCurrTargetPhrase());
}
2015-05-02 13:45:24 +03:00
TO_STRING_BODY(Hypothesis)
2015-05-02 13:45:24 +03:00
// friend
ostream& operator<<(ostream& out, const Hypothesis& hypo)
{
hypo.ToStream(out);
// words bitmap
out << "[" << hypo.m_sourceCompleted << "] ";
2015-05-02 13:45:24 +03:00
// scores
out << " [total=" << hypo.GetTotalScore() << "]";
out << " " << hypo.GetScoreBreakdown();
2015-05-02 13:45:24 +03:00
// alignment
out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm();
2015-05-02 13:45:24 +03:00
return out;
}
2015-05-02 13:45:24 +03:00
std::string
Hypothesis::
GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
{
return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint);
}
2015-05-02 13:45:24 +03:00
std::string
Hypothesis::
GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
{
return (m_prevHypo ? GetCurrTargetPhrase().GetStringRep(factorsToPrint) : "");
}
std::string
Hypothesis::
GetSourcePhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetSourcePhraseStringRep(allFactors);
}
std::string
Hypothesis::
GetTargetPhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetTargetPhraseStringRep(allFactors);
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
OutputAlignment(std::ostream &out) const
{
std::vector<const Hypothesis *> edges;
const Hypothesis *currentHypo = this;
while (currentHypo) {
edges.push_back(currentHypo);
currentHypo = currentHypo->GetPrevHypo();
}
2015-05-02 13:45:24 +03:00
OutputAlignment(out, edges);
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
{
size_t targetOffset = 0;
2015-05-02 13:45:24 +03:00
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
2015-05-02 13:45:24 +03:00
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
2015-05-02 13:45:24 +03:00
targetOffset += tp.GetSize();
}
2015-05-02 13:45:24 +03:00
// Used by --print-alignment-info, so no endl
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
OutputAlignment(ostream &out, const AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset)
{
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
AlignVec alignments = ai.GetSortedAlignments();
2014-12-28 19:39:38 +03:00
2015-05-02 13:45:24 +03:00
AlignVec::const_iterator it;
for (it = alignments.begin(); it != alignments.end(); ++it) {
const std::pair<size_t,size_t> &alignment = **it;
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
}
2015-05-02 13:45:24 +03:00
}
void
Hypothesis::
OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
{
if (!hypo->GetPrevHypo()) return;
OutputInput(map, hypo->GetPrevHypo());
map[hypo->GetCurrSourceWordsRange().GetStartPos()]
= &hypo->GetTranslationOption().GetInputPath().GetPhrase();
}
void
Hypothesis::
OutputInput(std::ostream& os) const
{
size_t len = this->GetInput().GetSize();
std::vector<const Phrase*> inp_phrases(len, 0);
OutputInput(inp_phrases, this);
for (size_t i=0; i<len; ++i)
if (inp_phrases[i]) os << *inp_phrases[i];
}
void
Hypothesis::
OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
if (m_prevHypo) {
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
}
2015-05-02 13:45:24 +03:00
OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors);
}
2015-05-02 13:45:24 +03:00
//////////////////////////////////////////////////////////////////////////
/***
* print surface factor only for the given phrase
*/
void
Hypothesis::
OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
out << phrase;
} else {
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(edge, placeholderFactor);
}
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
2015-05-02 13:45:24 +03:00
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
}
2015-05-02 13:45:24 +03:00
UTIL_THROW_IF2(factor == NULL,
"No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << "UNK" << *factor;
} else {
out << *factor;
}
2015-05-02 13:45:24 +03:00
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"No factor " << i << " at position " << pos);
out << "|" << *factor;
}
2015-05-02 13:45:24 +03:00
out << " ";
}
}
2015-05-02 13:45:24 +03:00
// trace ("report segmentation") option "-t" / "-tt"
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
const int sourceStart = sourceRange.GetStartPos();
const int sourceEnd = sourceRange.GetEndPos();
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
if (reportSegmentation == 2) {
out << ",wa=";
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
Hypothesis::OutputAlignment(out, ai, 0, 0);
out << ",total=";
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
scoreBreakdown.OutputAllFeatureScores(out);
}
2015-05-02 13:45:24 +03:00
out << "| ";
}
}
2015-05-02 13:45:24 +03:00
std::map<size_t, const Factor*>
Hypothesis::
GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
{
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
const Phrase &inputPhrase = inputPath.GetPhrase();
std::map<size_t, const Factor*> ret;
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
"Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
2015-05-02 13:45:24 +03:00
return ret;
}
#ifdef HAVE_XMLRPC_C
2015-05-02 13:45:24 +03:00
void
Hypothesis::
OutputLocalWordAlignment(vector<xmlrpc_c::value>& dest) const
{
using namespace std;
WordsRange const& src = this->GetCurrSourceWordsRange();
WordsRange const& trg = this->GetCurrTargetWordsRange();
vector<pair<size_t,size_t> const* > a
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments();
typedef pair<size_t,size_t> item;
map<string, xmlrpc_c::value> M;
BOOST_FOREACH(item const* p, a) {
M["source-word"] = xmlrpc_c::value_int(src.GetStartPos() + p->first);
M["target-word"] = xmlrpc_c::value_int(trg.GetStartPos() + p->second);
dest.push_back(xmlrpc_c::value_struct(M));
}
2015-05-02 13:45:24 +03:00
}
2015-05-02 13:45:24 +03:00
void
Hypothesis::
OutputWordAlignment(vector<xmlrpc_c::value>& out) const
{
vector<Hypothesis const*> tmp;
for (Hypothesis const* h = this; h; h = h->GetPrevHypo())
tmp.push_back(h);
for (size_t i = tmp.size(); i-- > 0;)
tmp[i]->OutputLocalWordAlignment(out);
}
#endif
}