2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
// vim:tabstop=2
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <limits>
|
|
|
|
#include <vector>
|
|
|
|
#include <algorithm>
|
2009-02-06 18:43:06 +03:00
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "TranslationOption.h"
|
|
|
|
#include "TranslationOptionCollection.h"
|
|
|
|
#include "Hypothesis.h"
|
|
|
|
#include "Util.h"
|
|
|
|
#include "SquareMatrix.h"
|
|
|
|
#include "StaticData.h"
|
|
|
|
#include "InputType.h"
|
2009-08-07 20:47:54 +04:00
|
|
|
#include "Manager.h"
|
2014-12-28 22:18:40 +03:00
|
|
|
#include "IOWrapper.h"
|
2013-05-24 21:02:49 +04:00
|
|
|
#include "moses/FF/FFState.h"
|
2013-10-03 14:05:53 +04:00
|
|
|
#include "moses/FF/StatefulFeatureFunction.h"
|
|
|
|
#include "moses/FF/StatelessFeatureFunction.h"
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
#ifdef USE_HYPO_POOL
|
2011-02-24 16:14:42 +03:00
|
|
|
ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
|
2008-06-11 14:52:57 +04:00
|
|
|
#endif
|
|
|
|
|
2013-08-05 19:53:15 +04:00
|
|
|
Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt)
|
2011-02-24 16:14:42 +03:00
|
|
|
: m_prevHypo(NULL)
|
2014-12-06 00:33:59 +03:00
|
|
|
, m_sourceCompleted(source.GetSize(), manager.GetSource().m_sourceCompleted)
|
2011-02-24 16:14:42 +03:00
|
|
|
, m_sourceInput(source)
|
|
|
|
, m_currSourceWordsRange(
|
|
|
|
m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
|
|
|
|
m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
|
2013-08-05 19:53:15 +04:00
|
|
|
, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
|
2011-02-24 16:14:42 +03:00
|
|
|
, m_wordDeleted(false)
|
2013-06-05 22:44:43 +04:00
|
|
|
, m_totalScore(0.0f)
|
|
|
|
, m_futureScore(0.0f)
|
2012-12-31 04:57:21 +04:00
|
|
|
, m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
|
2011-02-24 16:14:42 +03:00
|
|
|
, m_arcList(NULL)
|
2013-08-05 19:53:15 +04:00
|
|
|
, m_transOpt(initialTransOpt)
|
2009-08-07 20:47:54 +04:00
|
|
|
, m_manager(manager)
|
2010-05-14 13:33:03 +04:00
|
|
|
, m_id(m_manager.GetNextHypoId())
|
2011-02-24 16:14:42 +03:00
|
|
|
{
|
|
|
|
// used for initial seeding of trans process
|
|
|
|
// initialize scores
|
|
|
|
//_hash_computed = false;
|
|
|
|
//s_HypothesesCreated = 1;
|
2012-12-31 04:57:21 +04:00
|
|
|
const vector<const StatefulFeatureFunction*>& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
2011-02-24 16:14:42 +03:00
|
|
|
for (unsigned i = 0; i < ffs.size(); ++i)
|
|
|
|
m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
|
|
|
|
m_manager.GetSentenceStats().AddCreated();
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/***
|
|
|
|
* continue prevHypo by appending the phrases in transOpt
|
|
|
|
*/
|
|
|
|
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
|
2011-02-24 16:14:42 +03:00
|
|
|
: m_prevHypo(&prevHypo)
|
|
|
|
, m_sourceCompleted (prevHypo.m_sourceCompleted )
|
|
|
|
, m_sourceInput (prevHypo.m_sourceInput)
|
|
|
|
, m_currSourceWordsRange (transOpt.GetSourceWordsRange())
|
|
|
|
, m_currTargetWordsRange ( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
|
|
|
|
,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
|
|
|
|
, m_wordDeleted(false)
|
2013-06-05 22:44:43 +04:00
|
|
|
, m_totalScore(0.0f)
|
|
|
|
, m_futureScore(0.0f)
|
2009-08-07 20:47:54 +04:00
|
|
|
, m_ffStates(prevHypo.m_ffStates.size())
|
2011-02-24 16:14:42 +03:00
|
|
|
, m_arcList(NULL)
|
2013-08-05 19:53:15 +04:00
|
|
|
, m_transOpt(transOpt)
|
2009-08-07 20:47:54 +04:00
|
|
|
, m_manager(prevHypo.GetManager())
|
2011-02-24 16:14:42 +03:00
|
|
|
, m_id(m_manager.GetNextHypoId())
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2014-08-05 20:57:57 +04:00
|
|
|
m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
|
2013-04-23 00:25:36 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
// assert that we are not extending our hypothesis by retranslating something
|
|
|
|
// that this hypothesis has already translated!
|
2013-11-19 17:19:23 +04:00
|
|
|
assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
//_hash_computed = false;
|
2008-06-11 14:52:57 +04:00
|
|
|
m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
|
|
|
|
m_wordDeleted = transOpt.IsDeletionOption();
|
2011-02-24 16:14:42 +03:00
|
|
|
m_manager.GetSentenceStats().AddCreated();
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
Hypothesis::~Hypothesis()
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
for (unsigned i = 0; i < m_ffStates.size(); ++i)
|
|
|
|
delete m_ffStates[i];
|
|
|
|
|
|
|
|
if (m_arcList) {
|
|
|
|
ArcList::iterator iter;
|
|
|
|
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
|
|
|
|
FREEHYPO(*iter);
|
|
|
|
}
|
|
|
|
m_arcList->clear();
|
|
|
|
|
|
|
|
delete m_arcList;
|
|
|
|
m_arcList = NULL;
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void Hypothesis::AddArc(Hypothesis *loserHypo)
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
if (!m_arcList) {
|
|
|
|
if (loserHypo->m_arcList) { // we don't have an arcList, but loser does
|
|
|
|
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
|
|
|
|
loserHypo->m_arcList = 0; // prevent a double deletion
|
|
|
|
} else {
|
|
|
|
this->m_arcList = new ArcList();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
|
|
|
|
size_t my_size = m_arcList->size();
|
|
|
|
size_t add_size = loserHypo->m_arcList->size();
|
|
|
|
this->m_arcList->resize(my_size + add_size, 0);
|
|
|
|
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
|
|
|
|
delete loserHypo->m_arcList;
|
|
|
|
loserHypo->m_arcList = 0;
|
|
|
|
} else { // loserHypo doesn't have any arcs
|
|
|
|
// DO NOTHING
|
|
|
|
}
|
|
|
|
}
|
|
|
|
m_arcList->push_back(loserHypo);
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/***
|
|
|
|
* return the subclass of Hypothesis most appropriate to the given translation option
|
|
|
|
*/
|
2013-09-17 17:06:17 +04:00
|
|
|
Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt) const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2013-09-17 17:06:17 +04:00
|
|
|
return Create(*this, transOpt);
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/***
|
|
|
|
* return the subclass of Hypothesis most appropriate to the given translation option
|
|
|
|
*/
|
2013-09-17 17:06:17 +04:00
|
|
|
Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2008-06-19 03:14:09 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
#ifdef USE_HYPO_POOL
|
2013-09-27 12:35:24 +04:00
|
|
|
Hypothesis *ptr = s_objectPool.getPtr();
|
|
|
|
return new(ptr) Hypothesis(prevHypo, transOpt);
|
2011-02-24 16:14:42 +03:00
|
|
|
#else
|
2013-09-27 12:35:24 +04:00
|
|
|
return new Hypothesis(prevHypo, transOpt);
|
2011-02-24 16:14:42 +03:00
|
|
|
#endif
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
/***
|
|
|
|
* return the subclass of Hypothesis most appropriate to the given target phrase
|
|
|
|
*/
|
|
|
|
|
2013-08-05 19:53:15 +04:00
|
|
|
Hypothesis* Hypothesis::Create(Manager& manager, InputType const& m_source, const TranslationOption &initialTransOpt)
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
|
|
|
#ifdef USE_HYPO_POOL
|
2011-02-24 16:14:42 +03:00
|
|
|
Hypothesis *ptr = s_objectPool.getPtr();
|
2013-08-05 19:53:15 +04:00
|
|
|
return new(ptr) Hypothesis(manager, m_source, initialTransOpt);
|
2008-06-11 14:52:57 +04:00
|
|
|
#else
|
2013-08-05 19:53:15 +04:00
|
|
|
return new Hypothesis(manager, m_source, initialTransOpt);
|
2008-06-11 14:52:57 +04:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/** check, if two hypothesis can be recombined.
|
|
|
|
this is actually a sorting function that allows us to
|
|
|
|
keep an ordered list of hypotheses. This makes recombination
|
2011-02-24 16:14:42 +03:00
|
|
|
much quicker.
|
2008-06-11 14:52:57 +04:00
|
|
|
*/
|
2009-02-06 18:43:06 +03:00
|
|
|
int Hypothesis::RecombineCompare(const Hypothesis &compare) const
|
2011-02-24 16:14:42 +03:00
|
|
|
{
|
|
|
|
// -1 = this < compare
|
|
|
|
// +1 = this > compare
|
|
|
|
// 0 = this ==compare
|
|
|
|
int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
|
|
|
|
if (comp != 0)
|
|
|
|
return comp;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
|
|
|
|
if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
|
|
|
|
comp = m_ffStates[i] - compare.m_ffStates[i];
|
|
|
|
} else {
|
|
|
|
comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
|
|
|
|
}
|
|
|
|
if (comp != 0) return comp;
|
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
return 0;
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2014-08-08 19:41:16 +04:00
|
|
|
void Hypothesis::EvaluateWhenApplied(const StatefulFeatureFunction &sfff,
|
2015-01-14 14:07:42 +03:00
|
|
|
int state_idx)
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2013-06-05 22:44:43 +04:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
if (! staticData.IsFeatureFunctionIgnored( sfff )) {
|
2014-07-10 02:41:08 +04:00
|
|
|
m_ffStates[state_idx] = sfff.EvaluateWhenApplied(
|
2013-06-06 18:51:31 +04:00
|
|
|
*this,
|
|
|
|
m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
|
2014-08-05 20:57:57 +04:00
|
|
|
&m_currScoreBreakdown);
|
2013-06-05 22:44:43 +04:00
|
|
|
}
|
2012-07-02 18:57:54 +04:00
|
|
|
}
|
|
|
|
|
2014-08-08 19:41:16 +04:00
|
|
|
void Hypothesis::EvaluateWhenApplied(const StatelessFeatureFunction& slff)
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2013-06-05 22:44:43 +04:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
if (! staticData.IsFeatureFunctionIgnored( slff )) {
|
2014-08-05 20:57:57 +04:00
|
|
|
slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
|
2013-06-05 22:44:43 +04:00
|
|
|
}
|
2012-07-02 18:57:54 +04:00
|
|
|
}
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
/***
|
|
|
|
* calculate the logarithm of our total translation score (sum up components)
|
|
|
|
*/
|
2014-08-08 19:41:16 +04:00
|
|
|
void Hypothesis::EvaluateWhenApplied(const SquareMatrix &futureScore)
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2014-01-03 22:45:31 +04:00
|
|
|
IFVERBOSE(2) {
|
|
|
|
m_manager.GetSentenceStats().StartTimeOtherScore();
|
|
|
|
}
|
2009-02-06 18:43:06 +03:00
|
|
|
// some stateless score producers cache their values in the translation
|
2011-02-24 16:14:42 +03:00
|
|
|
// option: add these here
|
2009-07-20 17:56:37 +04:00
|
|
|
// language model scores for n-grams completely contained within a target
|
|
|
|
// phrase are also included here
|
2009-02-06 18:43:06 +03:00
|
|
|
|
|
|
|
// compute values of stateless feature functions that were not
|
2012-09-04 20:50:30 +04:00
|
|
|
// cached in the translation option
|
2011-02-24 16:14:42 +03:00
|
|
|
const vector<const StatelessFeatureFunction*>& sfs =
|
2013-05-29 21:16:15 +04:00
|
|
|
StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
2012-09-04 20:50:30 +04:00
|
|
|
for (unsigned i = 0; i < sfs.size(); ++i) {
|
2013-05-29 21:16:15 +04:00
|
|
|
const StatelessFeatureFunction &ff = *sfs[i];
|
2014-08-08 19:41:16 +04:00
|
|
|
EvaluateWhenApplied(ff);
|
2012-09-04 20:50:30 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
const vector<const StatefulFeatureFunction*>& ffs =
|
2013-05-29 21:16:15 +04:00
|
|
|
StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
2011-02-24 16:14:42 +03:00
|
|
|
for (unsigned i = 0; i < ffs.size(); ++i) {
|
2013-04-22 16:03:07 +04:00
|
|
|
const StatefulFeatureFunction &ff = *ffs[i];
|
2013-06-05 22:44:43 +04:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
if (! staticData.IsFeatureFunctionIgnored(ff)) {
|
2014-07-10 02:41:08 +04:00
|
|
|
m_ffStates[i] = ff.EvaluateWhenApplied(*this,
|
2015-01-14 14:07:42 +03:00
|
|
|
m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
|
|
|
|
&m_currScoreBreakdown);
|
2013-06-05 22:44:43 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
IFVERBOSE(2) {
|
2014-01-03 22:45:31 +04:00
|
|
|
m_manager.GetSentenceStats().StopTimeOtherScore();
|
|
|
|
m_manager.GetSentenceStats().StartTimeEstimateScore();
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// FUTURE COST
|
|
|
|
m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
|
|
|
|
|
|
|
|
// TOTAL
|
2014-08-05 20:57:57 +04:00
|
|
|
m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore;
|
|
|
|
if (m_prevHypo) m_totalScore += m_prevHypo->GetScore();
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
IFVERBOSE(2) {
|
2014-01-03 22:45:31 +04:00
|
|
|
m_manager.GetSentenceStats().StopTimeEstimateScore();
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
const Hypothesis* Hypothesis::GetPrevHypo()const
|
|
|
|
{
|
|
|
|
return m_prevHypo;
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* print hypothesis information for pharaoh-style logging
|
|
|
|
*/
|
|
|
|
void Hypothesis::PrintHypothesis() const
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
if (!m_prevHypo) {
|
|
|
|
TRACE_ERR(endl << "NULL hypo" << endl);
|
|
|
|
return;
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
|
2013-08-05 19:53:15 +04:00
|
|
|
int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1);
|
2008-06-11 14:52:57 +04:00
|
|
|
int start = end-1;
|
|
|
|
if ( start < 0 ) start = 0;
|
|
|
|
if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
|
|
|
|
TRACE_ERR( "<s> ");
|
2011-02-24 16:14:42 +03:00
|
|
|
} else {
|
2008-06-11 14:52:57 +04:00
|
|
|
TRACE_ERR( "... ");
|
|
|
|
}
|
|
|
|
if (end>=0) {
|
|
|
|
WordsRange range(start, end);
|
2013-08-05 19:53:15 +04:00
|
|
|
TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " ");
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
TRACE_ERR( ")"<<endl);
|
2011-02-24 16:14:42 +03:00
|
|
|
TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
|
2013-08-05 19:53:15 +04:00
|
|
|
TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()
|
2013-08-13 23:36:32 +04:00
|
|
|
<<": " << m_transOpt.GetInputPath().GetPhrase() << endl);
|
2013-08-05 16:46:54 +04:00
|
|
|
|
2013-08-05 19:53:15 +04:00
|
|
|
TRACE_ERR( "\ttranslated as: "<<(Phrase&) GetCurrTargetPhrase()<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
|
2008-06-11 14:52:57 +04:00
|
|
|
// TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
|
|
|
|
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
|
|
|
|
// TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
|
2011-02-24 16:14:42 +03:00
|
|
|
TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
|
2014-08-05 20:57:57 +04:00
|
|
|
TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
|
2011-02-24 16:14:42 +03:00
|
|
|
//PrintLMScores();
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void Hypothesis::CleanupArcList()
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
// point this hypo's main hypo to itself
|
|
|
|
SetWinningHypo(this);
|
|
|
|
|
|
|
|
if (!m_arcList) return;
|
|
|
|
|
|
|
|
/* keep only number of arcs we need to create all n-best paths.
|
|
|
|
* However, may not be enough if only unique candidates are needed,
|
|
|
|
* so we'll keep all of arc list if nedd distinct n-best list
|
|
|
|
*/
|
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
size_t nBestSize = staticData.GetNBestSize();
|
2014-08-11 12:40:34 +04:00
|
|
|
bool distinctNBest = staticData.GetDistinctNBest() || staticData.GetLatticeSamplesSize() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphSLF() || staticData.GetOutputSearchGraphHypergraph() || staticData.UseLatticeMBR() ;
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
|
|
|
|
// prune arc list only if there too many arcs
|
2015-01-14 14:07:42 +03:00
|
|
|
NTH_ELEMENT4(m_arcList->begin()
|
|
|
|
, m_arcList->begin() + nBestSize - 1
|
|
|
|
, m_arcList->end()
|
|
|
|
, CompareHypothesisTotalScore());
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
// delete bad ones
|
|
|
|
ArcList::iterator iter;
|
|
|
|
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
|
|
|
|
Hypothesis *arc = *iter;
|
|
|
|
FREEHYPO(arc);
|
|
|
|
}
|
|
|
|
m_arcList->erase(m_arcList->begin() + nBestSize
|
|
|
|
, m_arcList->end());
|
|
|
|
}
|
|
|
|
|
|
|
|
// set all arc's main hypo variable to this hypo
|
|
|
|
ArcList::iterator iter = m_arcList->begin();
|
|
|
|
for (; iter != m_arcList->end() ; ++iter) {
|
|
|
|
Hypothesis *arc = *iter;
|
|
|
|
arc->SetWinningHypo(this);
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2013-08-07 17:18:12 +04:00
|
|
|
const TargetPhrase &Hypothesis::GetCurrTargetPhrase() const
|
|
|
|
{
|
2013-08-05 19:53:15 +04:00
|
|
|
return m_transOpt.GetTargetPhrase();
|
|
|
|
}
|
|
|
|
|
2013-07-23 04:32:25 +04:00
|
|
|
void Hypothesis::GetOutputPhrase(Phrase &out) const
|
|
|
|
{
|
|
|
|
if (m_prevHypo != NULL) {
|
|
|
|
m_prevHypo->GetOutputPhrase(out);
|
|
|
|
}
|
|
|
|
out.Append(GetCurrTargetPhrase());
|
|
|
|
}
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
TO_STRING_BODY(Hypothesis)
|
2011-02-24 16:14:42 +03:00
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
// friend
|
2010-07-18 02:29:06 +04:00
|
|
|
ostream& operator<<(ostream& out, const Hypothesis& hypo)
|
2011-02-24 16:14:42 +03:00
|
|
|
{
|
|
|
|
hypo.ToStream(out);
|
|
|
|
// words bitmap
|
|
|
|
out << "[" << hypo.m_sourceCompleted << "] ";
|
|
|
|
|
|
|
|
// scores
|
|
|
|
out << " [total=" << hypo.GetTotalScore() << "]";
|
|
|
|
out << " " << hypo.GetScoreBreakdown();
|
|
|
|
|
|
|
|
// alignment
|
2012-10-19 18:10:10 +04:00
|
|
|
out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm();
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
const Hypothesis *prevHypo = hypo.GetPrevHypo();
|
|
|
|
if (prevHypo)
|
|
|
|
out << endl << *prevHypo;
|
|
|
|
*/
|
|
|
|
|
|
|
|
return out;
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
std::string Hypothesis::GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2013-08-13 23:36:32 +04:00
|
|
|
return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint);
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
2013-08-05 19:53:15 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
if (!m_prevHypo) {
|
|
|
|
return "";
|
|
|
|
}
|
2013-08-05 19:53:15 +04:00
|
|
|
return GetCurrTargetPhrase().GetStringRep(factorsToPrint);
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
std::string Hypothesis::GetSourcePhraseStringRep() const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
vector<FactorType> allFactors;
|
2013-02-08 23:28:19 +04:00
|
|
|
for(size_t i=0; i < MAX_NUM_FACTORS; i++) {
|
2011-02-24 16:14:42 +03:00
|
|
|
allFactors.push_back(i);
|
|
|
|
}
|
|
|
|
return GetSourcePhraseStringRep(allFactors);
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
std::string Hypothesis::GetTargetPhraseStringRep() const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
vector<FactorType> allFactors;
|
2013-02-08 23:28:19 +04:00
|
|
|
for(size_t i=0; i < MAX_NUM_FACTORS; i++) {
|
2011-02-24 16:14:42 +03:00
|
|
|
allFactors.push_back(i);
|
|
|
|
}
|
|
|
|
return GetTargetPhraseStringRep(allFactors);
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2014-12-28 16:07:18 +03:00
|
|
|
void Hypothesis::OutputAlignment(std::ostream &out) const
|
|
|
|
{
|
|
|
|
std::vector<const Hypothesis *> edges;
|
|
|
|
const Hypothesis *currentHypo = this;
|
|
|
|
while (currentHypo) {
|
|
|
|
edges.push_back(currentHypo);
|
|
|
|
currentHypo = currentHypo->GetPrevHypo();
|
|
|
|
}
|
|
|
|
|
|
|
|
OutputAlignment(out, edges);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void Hypothesis::OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
|
|
|
|
{
|
|
|
|
size_t targetOffset = 0;
|
|
|
|
|
|
|
|
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
|
|
|
|
const Hypothesis &edge = *edges[currEdge];
|
|
|
|
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
|
|
|
|
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
|
|
|
|
|
|
|
|
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
|
|
|
|
|
|
|
|
targetOffset += tp.GetSize();
|
|
|
|
}
|
|
|
|
// Removing std::endl here breaks -alignment-output-file, so stop doing that, please :)
|
|
|
|
// Or fix it somewhere else.
|
|
|
|
out << std::endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Hypothesis::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
|
|
|
|
{
|
|
|
|
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
|
|
|
|
AlignVec alignments = ai.GetSortedAlignments();
|
|
|
|
|
|
|
|
AlignVec::const_iterator it;
|
|
|
|
for (it = alignments.begin(); it != alignments.end(); ++it) {
|
|
|
|
const std::pair<size_t,size_t> &alignment = **it;
|
|
|
|
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-12-28 19:39:38 +03:00
|
|
|
void Hypothesis::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
|
|
|
|
{
|
|
|
|
if (hypo->GetPrevHypo()) {
|
|
|
|
OutputInput(map, hypo->GetPrevHypo());
|
|
|
|
map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Hypothesis::OutputInput(std::ostream& os) const
|
|
|
|
{
|
|
|
|
size_t len = this->GetInput().GetSize();
|
|
|
|
std::vector<const Phrase*> inp_phrases(len, 0);
|
|
|
|
OutputInput(inp_phrases, this);
|
|
|
|
for (size_t i=0; i<len; ++i)
|
|
|
|
if (inp_phrases[i]) os << *inp_phrases[i];
|
|
|
|
}
|
|
|
|
|
2014-12-28 23:40:57 +03:00
|
|
|
void Hypothesis::OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
|
2015-01-14 14:07:42 +03:00
|
|
|
char reportSegmentation, bool reportAllFactors) const
|
2014-12-28 22:18:40 +03:00
|
|
|
{
|
2014-12-28 23:40:57 +03:00
|
|
|
if (m_prevHypo) {
|
2014-12-28 22:18:40 +03:00
|
|
|
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
|
2014-12-28 23:40:57 +03:00
|
|
|
m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
|
2014-12-28 22:18:40 +03:00
|
|
|
}
|
2014-12-28 23:40:57 +03:00
|
|
|
OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors);
|
2014-12-28 22:18:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/***
|
|
|
|
* print surface factor only for the given phrase
|
|
|
|
*/
|
|
|
|
void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
|
2015-01-14 14:07:42 +03:00
|
|
|
char reportSegmentation, bool reportAllFactors) const
|
2014-12-28 22:18:40 +03:00
|
|
|
{
|
|
|
|
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
|
2015-01-14 14:07:42 +03:00
|
|
|
"Must specific at least 1 output factor");
|
2014-12-28 22:18:40 +03:00
|
|
|
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
|
|
|
|
bool markUnknown = StaticData::Instance().GetMarkUnknown();
|
|
|
|
if (reportAllFactors == true) {
|
|
|
|
out << phrase;
|
|
|
|
} else {
|
|
|
|
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
|
|
|
|
|
|
|
|
std::map<size_t, const Factor*> placeholders;
|
|
|
|
if (placeholderFactor != NOT_FOUND) {
|
|
|
|
// creates map of target position -> factor for placeholders
|
|
|
|
placeholders = GetPlaceholders(edge, placeholderFactor);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t size = phrase.GetSize();
|
|
|
|
for (size_t pos = 0 ; pos < size ; pos++) {
|
|
|
|
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
|
|
|
|
|
|
|
if (placeholders.size()) {
|
|
|
|
// do placeholders
|
|
|
|
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
|
|
|
|
if (iter != placeholders.end()) {
|
|
|
|
factor = iter->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UTIL_THROW_IF2(factor == NULL,
|
2015-01-14 14:07:42 +03:00
|
|
|
"No factor 0 at position " << pos);
|
2014-12-28 22:18:40 +03:00
|
|
|
|
|
|
|
//preface surface form with UNK if marking unknowns
|
|
|
|
const Word &word = phrase.GetWord(pos);
|
|
|
|
if(markUnknown && word.IsOOV()) {
|
|
|
|
out << "UNK" << *factor;
|
|
|
|
} else {
|
|
|
|
out << *factor;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
|
|
|
|
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
|
|
|
UTIL_THROW_IF2(factor == NULL,
|
2015-01-14 14:07:42 +03:00
|
|
|
"No factor " << i << " at position " << pos);
|
2014-12-28 22:18:40 +03:00
|
|
|
|
|
|
|
out << "|" << *factor;
|
|
|
|
}
|
|
|
|
out << " ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// trace ("report segmentation") option "-t" / "-tt"
|
|
|
|
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
|
|
|
|
const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
|
|
|
|
const int sourceStart = sourceRange.GetStartPos();
|
|
|
|
const int sourceEnd = sourceRange.GetEndPos();
|
|
|
|
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
|
|
|
|
if (reportSegmentation == 2) {
|
|
|
|
out << ",wa=";
|
|
|
|
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
|
|
|
|
Hypothesis::OutputAlignment(out, ai, 0, 0);
|
|
|
|
out << ",total=";
|
|
|
|
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
|
|
|
|
out << ",";
|
|
|
|
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
|
|
|
|
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
|
2014-12-30 16:23:30 +03:00
|
|
|
scoreBreakdown.OutputAllFeatureScores(out);
|
2014-12-28 22:18:40 +03:00
|
|
|
}
|
|
|
|
out << "| ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::map<size_t, const Factor*> Hypothesis::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
|
|
|
|
{
|
|
|
|
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
|
|
|
|
const Phrase &inputPhrase = inputPath.GetPhrase();
|
|
|
|
|
|
|
|
std::map<size_t, const Factor*> ret;
|
|
|
|
|
|
|
|
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
|
|
|
|
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
|
|
|
|
if (factor) {
|
|
|
|
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
|
|
|
|
UTIL_THROW_IF2(targetPos.size() != 1,
|
2015-01-14 14:07:42 +03:00
|
|
|
"Placeholder should be aligned to 1, and only 1, word");
|
2014-12-28 22:18:40 +03:00
|
|
|
ret[*targetPos.begin()] = factor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|