mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 22:45:50 +03:00
segfault creating nbest. Was using all hypos in coll instead of sorted and pruned hypos only
This commit is contained in:
parent
16044c176b
commit
051ce0b44e
@ -18,147 +18,145 @@ namespace Moses2
|
||||
{
|
||||
|
||||
HypothesisColl::HypothesisColl(const ManagerBase &mgr) :
|
||||
m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool())), m_sortedHypos(
|
||||
NULL)
|
||||
m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool())), m_sortedHypos(
|
||||
NULL)
|
||||
{
|
||||
}
|
||||
|
||||
void HypothesisColl::Add(
|
||||
const System &system,
|
||||
HypothesisBase *hypo,
|
||||
Recycler<HypothesisBase*> &hypoRecycle,
|
||||
ArcLists &arcLists)
|
||||
const System &system,
|
||||
HypothesisBase *hypo,
|
||||
Recycler<HypothesisBase*> &hypoRecycle,
|
||||
ArcLists &arcLists)
|
||||
{
|
||||
StackAdd added = Add(hypo);
|
||||
StackAdd added = Add(hypo);
|
||||
|
||||
size_t nbestSize = system.options.nbest.nbest_size;
|
||||
if (nbestSize) {
|
||||
arcLists.AddArc(added.added, hypo, added.other);
|
||||
}
|
||||
else {
|
||||
if (!added.added) {
|
||||
hypoRecycle.Recycle(hypo);
|
||||
size_t nbestSize = system.options.nbest.nbest_size;
|
||||
if (nbestSize) {
|
||||
arcLists.AddArc(added.added, hypo, added.other);
|
||||
}
|
||||
else if (added.other) {
|
||||
hypoRecycle.Recycle(added.other);
|
||||
else {
|
||||
if (!added.added) {
|
||||
hypoRecycle.Recycle(hypo);
|
||||
}
|
||||
else if (added.other) {
|
||||
hypoRecycle.Recycle(added.other);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
|
||||
{
|
||||
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
|
||||
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
|
||||
|
||||
// CHECK RECOMBINATION
|
||||
if (addRet.second) {
|
||||
// equiv hypo doesn't exists
|
||||
return StackAdd(true, NULL);
|
||||
}
|
||||
else {
|
||||
HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
|
||||
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
|
||||
// incoming hypo is better than the one we have
|
||||
const HypothesisBase * const &hypoExisting1 = *addRet.first;
|
||||
const HypothesisBase *&hypoExisting2 =
|
||||
const_cast<const HypothesisBase *&>(hypoExisting1);
|
||||
hypoExisting2 = hypo;
|
||||
// CHECK RECOMBINATION
|
||||
if (addRet.second) {
|
||||
// equiv hypo doesn't exists
|
||||
return StackAdd(true, NULL);
|
||||
}
|
||||
else {
|
||||
HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
|
||||
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
|
||||
// incoming hypo is better than the one we have
|
||||
const HypothesisBase * const &hypoExisting1 = *addRet.first;
|
||||
const HypothesisBase *&hypoExisting2 =
|
||||
const_cast<const HypothesisBase *&>(hypoExisting1);
|
||||
hypoExisting2 = hypo;
|
||||
|
||||
return StackAdd(true, hypoExisting);
|
||||
}
|
||||
else {
|
||||
// already storing the best hypo. discard incoming hypo
|
||||
return StackAdd(false, hypoExisting);
|
||||
}
|
||||
}
|
||||
return StackAdd(true, hypoExisting);
|
||||
}
|
||||
else {
|
||||
// already storing the best hypo. discard incoming hypo
|
||||
return StackAdd(false, hypoExisting);
|
||||
}
|
||||
}
|
||||
|
||||
assert(false);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
const Hypotheses &HypothesisColl::GetSortedAndPruneHypos(
|
||||
const ManagerBase &mgr,
|
||||
ArcLists &arcLists) const
|
||||
const ManagerBase &mgr,
|
||||
ArcLists &arcLists) const
|
||||
{
|
||||
if (m_sortedHypos == NULL) {
|
||||
// create sortedHypos first
|
||||
MemPool &pool = mgr.GetPool();
|
||||
m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
|
||||
m_coll.size());
|
||||
if (m_sortedHypos == NULL) {
|
||||
// create sortedHypos first
|
||||
MemPool &pool = mgr.GetPool();
|
||||
m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
|
||||
m_coll.size());
|
||||
|
||||
size_t ind = 0;
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, m_coll){
|
||||
(*m_sortedHypos)[ind] = hypo;
|
||||
++ind;
|
||||
}
|
||||
size_t ind = 0;
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, m_coll){
|
||||
(*m_sortedHypos)[ind] = hypo;
|
||||
++ind;
|
||||
}
|
||||
|
||||
SortAndPruneHypos(mgr, arcLists);
|
||||
}
|
||||
SortAndPruneHypos(mgr, arcLists);
|
||||
}
|
||||
|
||||
return *m_sortedHypos;
|
||||
return *m_sortedHypos;
|
||||
}
|
||||
|
||||
void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr,
|
||||
ArcLists &arcLists) const
|
||||
ArcLists &arcLists) const
|
||||
{
|
||||
size_t stackSize = mgr.system.options.search.stack_size;
|
||||
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
|
||||
size_t stackSize = mgr.system.options.search.stack_size;
|
||||
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
|
||||
|
||||
/*
|
||||
cerr << "UNSORTED hypos:" << endl;
|
||||
for (size_t i = 0; i < hypos.size(); ++i) {
|
||||
const Hypothesis *hypo = hypos[i];
|
||||
cerr << *hypo << endl;
|
||||
/*
|
||||
cerr << "UNSORTED hypos: ";
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
|
||||
cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
Hypotheses::iterator iterMiddle;
|
||||
iterMiddle =
|
||||
(stackSize == 0 || m_sortedHypos->size() < stackSize) ?
|
||||
m_sortedHypos->end() : m_sortedHypos->begin() + stackSize;
|
||||
*/
|
||||
Hypotheses::iterator iterMiddle;
|
||||
iterMiddle =
|
||||
(stackSize == 0 || m_sortedHypos->size() < stackSize) ?
|
||||
m_sortedHypos->end() : m_sortedHypos->begin() + stackSize;
|
||||
|
||||
std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
|
||||
HypothesisFutureScoreOrderer());
|
||||
std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
|
||||
HypothesisFutureScoreOrderer());
|
||||
|
||||
// prune
|
||||
if (stackSize && m_sortedHypos->size() > stackSize) {
|
||||
for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
|
||||
HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
|
||||
recycler.Recycle(hypo);
|
||||
// prune
|
||||
if (stackSize && m_sortedHypos->size() > stackSize) {
|
||||
for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
|
||||
HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
|
||||
recycler.Recycle(hypo);
|
||||
|
||||
// delete from arclist
|
||||
if (mgr.system.options.nbest.nbest_size) {
|
||||
arcLists.Delete(hypo);
|
||||
}
|
||||
}
|
||||
m_sortedHypos->resize(stackSize);
|
||||
}
|
||||
// delete from arclist
|
||||
if (mgr.system.options.nbest.nbest_size) {
|
||||
arcLists.Delete(hypo);
|
||||
}
|
||||
}
|
||||
m_sortedHypos->resize(stackSize);
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "sorted hypos:" << endl;
|
||||
for (size_t i = 0; i < hypos.size(); ++i) {
|
||||
const Hypothesis *hypo = hypos[i];
|
||||
cerr << hypo << " " << *hypo << endl;
|
||||
/*
|
||||
cerr << "sorted hypos: ";
|
||||
for (size_t i = 0; i < m_sortedHypos->size(); ++i) {
|
||||
const HypothesisBase *hypo = (*m_sortedHypos)[i];
|
||||
cerr << hypo << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
|
||||
*/
|
||||
}
|
||||
|
||||
void HypothesisColl::Clear()
|
||||
{
|
||||
m_sortedHypos = NULL;
|
||||
m_coll.clear();
|
||||
m_sortedHypos = NULL;
|
||||
m_coll.clear();
|
||||
}
|
||||
|
||||
std::string HypothesisColl::Debug(const System &system) const
|
||||
{
|
||||
stringstream out;
|
||||
BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
|
||||
out << hypo->Debug(system);
|
||||
out << std::endl << std::endl;
|
||||
}
|
||||
stringstream out;
|
||||
BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
|
||||
out << hypo->Debug(system);
|
||||
out << std::endl << std::endl;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
return out.str();
|
||||
}
|
||||
|
||||
} /* namespace Moses2 */
|
||||
|
@ -36,69 +36,70 @@ using namespace std;
|
||||
namespace Moses2
|
||||
{
|
||||
Manager::Manager(System &sys, const TranslationTask &task,
|
||||
const std::string &inputStr, long translationId) :
|
||||
ManagerBase(sys, task, inputStr, translationId)
|
||||
const std::string &inputStr, long translationId) :
|
||||
ManagerBase(sys, task, inputStr, translationId)
|
||||
,m_search(NULL)
|
||||
,m_bitmaps(NULL)
|
||||
{
|
||||
//cerr << translationId << " inputStr=" << inputStr << endl;
|
||||
//cerr << translationId << " inputStr=" << inputStr << endl;
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
{
|
||||
//cerr << "Start ~Manager " << this << endl;
|
||||
delete m_search;
|
||||
delete m_bitmaps;
|
||||
//cerr << "Finish ~Manager " << this << endl;
|
||||
//cerr << "Start ~Manager " << this << endl;
|
||||
delete m_search;
|
||||
delete m_bitmaps;
|
||||
//cerr << "Finish ~Manager " << this << endl;
|
||||
}
|
||||
|
||||
void Manager::Init()
|
||||
{
|
||||
// init pools etc
|
||||
InitPools();
|
||||
// init pools etc
|
||||
InitPools();
|
||||
|
||||
FactorCollection &vocab = system.GetVocab();
|
||||
m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
|
||||
FactorCollection &vocab = system.GetVocab();
|
||||
m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
|
||||
|
||||
m_bitmaps = new Bitmaps(GetPool());
|
||||
m_bitmaps = new Bitmaps(GetPool());
|
||||
|
||||
const PhraseTable &firstPt = *system.featureFunctions.m_phraseTables[0];
|
||||
m_initPhrase = new (GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(
|
||||
GetPool(), firstPt, system, 0);
|
||||
const PhraseTable &firstPt = *system.featureFunctions.m_phraseTables[0];
|
||||
m_initPhrase = new (GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(
|
||||
GetPool(), firstPt, system, 0);
|
||||
|
||||
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
|
||||
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
|
||||
cerr << "sentence=" << sentence.GetSize() << " " << sentence.Debug(system) << endl;
|
||||
|
||||
m_inputPaths.Init(sentence, *this);
|
||||
m_inputPaths.Init(sentence, *this);
|
||||
|
||||
// xml
|
||||
const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty();
|
||||
UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF");
|
||||
unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths);
|
||||
// xml
|
||||
const UnknownWordPenalty *unkWP = system.featureFunctions.GetUnknownWordPenalty();
|
||||
UTIL_THROW_IF2(unkWP == NULL, "There must be a UnknownWordPenalty FF");
|
||||
unkWP->ProcessXML(*this, GetPool(), sentence, m_inputPaths);
|
||||
|
||||
// lookup with every pt
|
||||
const std::vector<const PhraseTable*> &pts = system.mappings;
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
const PhraseTable &pt = *pts[i];
|
||||
//cerr << "Looking up from " << pt.GetName() << endl;
|
||||
pt.Lookup(*this, m_inputPaths);
|
||||
}
|
||||
//m_inputPaths.DeleteUnusedPaths();
|
||||
CalcFutureScore();
|
||||
// lookup with every pt
|
||||
const std::vector<const PhraseTable*> &pts = system.mappings;
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
const PhraseTable &pt = *pts[i];
|
||||
//cerr << "Looking up from " << pt.GetName() << endl;
|
||||
pt.Lookup(*this, m_inputPaths);
|
||||
}
|
||||
//m_inputPaths.DeleteUnusedPaths();
|
||||
CalcFutureScore();
|
||||
|
||||
m_bitmaps->Init(sentence.GetSize(), vector<bool>(0));
|
||||
m_bitmaps->Init(sentence.GetSize(), vector<bool>(0));
|
||||
|
||||
switch (system.options.search.algo) {
|
||||
case Normal:
|
||||
m_search = new NSNormal::Search(*this);
|
||||
break;
|
||||
case NormalBatch:
|
||||
m_search = new NSBatch::Search(*this);
|
||||
break;
|
||||
case CubePruning:
|
||||
case CubePruningMiniStack:
|
||||
m_search = new NSCubePruningMiniStack::Search(*this);
|
||||
break;
|
||||
/*
|
||||
switch (system.options.search.algo) {
|
||||
case Normal:
|
||||
m_search = new NSNormal::Search(*this);
|
||||
break;
|
||||
case NormalBatch:
|
||||
m_search = new NSBatch::Search(*this);
|
||||
break;
|
||||
case CubePruning:
|
||||
case CubePruningMiniStack:
|
||||
m_search = new NSCubePruningMiniStack::Search(*this);
|
||||
break;
|
||||
/*
|
||||
case CubePruningPerMiniStack:
|
||||
m_search = new NSCubePruningPerMiniStack::Search(*this);
|
||||
break;
|
||||
@ -111,154 +112,151 @@ void Manager::Init()
|
||||
case CubePruningBitmapStack:
|
||||
m_search = new NSCubePruningBitmapStack::Search(*this);
|
||||
break;
|
||||
*/
|
||||
default:
|
||||
cerr << "Unknown search algorithm" << endl;
|
||||
abort();
|
||||
}
|
||||
*/
|
||||
default:
|
||||
cerr << "Unknown search algorithm" << endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void Manager::Decode()
|
||||
{
|
||||
//cerr << "Start Decode " << this << endl;
|
||||
//cerr << "Start Decode " << this << endl;
|
||||
|
||||
Init();
|
||||
m_search->Decode();
|
||||
Init();
|
||||
m_search->Decode();
|
||||
|
||||
//cerr << "Finished Decode " << this << endl;
|
||||
//cerr << "Finished Decode " << this << endl;
|
||||
}
|
||||
|
||||
void Manager::CalcFutureScore()
|
||||
{
|
||||
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
|
||||
size_t size = sentence.GetSize();
|
||||
m_estimatedScores =
|
||||
new (GetPool().Allocate<EstimatedScores>()) EstimatedScores(GetPool(),
|
||||
size);
|
||||
m_estimatedScores->InitTriangle(-numeric_limits<SCORE>::infinity());
|
||||
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
|
||||
size_t size = sentence.GetSize();
|
||||
m_estimatedScores =
|
||||
new (GetPool().Allocate<EstimatedScores>()) EstimatedScores(GetPool(),
|
||||
size);
|
||||
m_estimatedScores->InitTriangle(-numeric_limits<SCORE>::infinity());
|
||||
|
||||
// walk all the translation options and record the cheapest option for each span
|
||||
BOOST_FOREACH(const InputPathBase *path, m_inputPaths){
|
||||
const Range &range = path->range;
|
||||
SCORE bestScore = -numeric_limits<SCORE>::infinity();
|
||||
// walk all the translation options and record the cheapest option for each span
|
||||
BOOST_FOREACH(const InputPathBase *path, m_inputPaths){
|
||||
const Range &range = path->range;
|
||||
SCORE bestScore = -numeric_limits<SCORE>::infinity();
|
||||
|
||||
size_t numPt = system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = static_cast<const InputPath*>(path)->targetPhrases[i];
|
||||
if (tps) {
|
||||
BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) {
|
||||
SCORE score = tp->GetFutureScore();
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore);
|
||||
}
|
||||
size_t numPt = system.mappings.size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = static_cast<const InputPath*>(path)->targetPhrases[i];
|
||||
if (tps) {
|
||||
BOOST_FOREACH(const TargetPhraseImpl *tp, *tps) {
|
||||
SCORE score = tp->GetFutureScore();
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
m_estimatedScores->SetValue(range.GetStartPos(), range.GetEndPos(), bestScore);
|
||||
}
|
||||
|
||||
// now fill all the cells in the strictly upper triangle
|
||||
// there is no way to modify the diagonal now, in the case
|
||||
// where no translation option covers a single-word span,
|
||||
// we leave the +inf in the matrix
|
||||
// like in chart parsing we want each cell to contain the highest score
|
||||
// of the full-span trOpt or the sum of scores of joining two smaller spans
|
||||
// now fill all the cells in the strictly upper triangle
|
||||
// there is no way to modify the diagonal now, in the case
|
||||
// where no translation option covers a single-word span,
|
||||
// we leave the +inf in the matrix
|
||||
// like in chart parsing we want each cell to contain the highest score
|
||||
// of the full-span trOpt or the sum of scores of joining two smaller spans
|
||||
|
||||
for (size_t colstart = 1; colstart < size; colstart++) {
|
||||
for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) {
|
||||
size_t sPos = diagshift;
|
||||
size_t ePos = colstart + diagshift;
|
||||
for (size_t joinAt = sPos; joinAt < ePos; joinAt++) {
|
||||
float joinedScore = m_estimatedScores->GetValue(sPos, joinAt)
|
||||
+ m_estimatedScores->GetValue(joinAt + 1, ePos);
|
||||
// uncomment to see the cell filling scheme
|
||||
// TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
|
||||
// << sPos << "," << joinAt << "]+["
|
||||
// << joinAt+1 << "," << ePos << "] (colstart: "
|
||||
// << colstart << ", diagshift: " << diagshift << ")"
|
||||
// << endl);
|
||||
for (size_t colstart = 1; colstart < size; colstart++) {
|
||||
for (size_t diagshift = 0; diagshift < size - colstart; diagshift++) {
|
||||
size_t sPos = diagshift;
|
||||
size_t ePos = colstart + diagshift;
|
||||
for (size_t joinAt = sPos; joinAt < ePos; joinAt++) {
|
||||
float joinedScore = m_estimatedScores->GetValue(sPos, joinAt)
|
||||
+ m_estimatedScores->GetValue(joinAt + 1, ePos);
|
||||
// uncomment to see the cell filling scheme
|
||||
// TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
|
||||
// << sPos << "," << joinAt << "]+["
|
||||
// << joinAt+1 << "," << ePos << "] (colstart: "
|
||||
// << colstart << ", diagshift: " << diagshift << ")"
|
||||
// << endl);
|
||||
|
||||
if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue(
|
||||
sPos, ePos, joinedScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (joinedScore > m_estimatedScores->GetValue(sPos, ePos)) m_estimatedScores->SetValue(
|
||||
sPos, ePos, joinedScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//cerr << "Square matrix:" << endl;
|
||||
//cerr << *m_estimatedScores << endl;
|
||||
//cerr << "Square matrix:" << endl;
|
||||
//cerr << *m_estimatedScores << endl;
|
||||
}
|
||||
|
||||
std::string Manager::OutputBest() const
|
||||
{
|
||||
stringstream out;
|
||||
const Hypothesis *bestHypo = m_search->GetBestHypothesis();
|
||||
if (bestHypo) {
|
||||
if (system.options.output.ReportHypoScore) {
|
||||
out << bestHypo->GetScores().GetTotalScore() << " ";
|
||||
}
|
||||
stringstream out;
|
||||
const Hypothesis *bestHypo = m_search->GetBestHypothesis();
|
||||
if (bestHypo) {
|
||||
if (system.options.output.ReportHypoScore) {
|
||||
out << bestHypo->GetScores().GetTotalScore() << " ";
|
||||
}
|
||||
|
||||
bestHypo->OutputToStream(out);
|
||||
//cerr << "BEST TRANSLATION: " << *bestHypo;
|
||||
}
|
||||
else {
|
||||
if (system.options.output.ReportHypoScore) {
|
||||
out << "0 ";
|
||||
}
|
||||
//cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
|
||||
}
|
||||
bestHypo->OutputToStream(out);
|
||||
//cerr << "BEST TRANSLATION: " << *bestHypo;
|
||||
}
|
||||
else {
|
||||
if (system.options.output.ReportHypoScore) {
|
||||
out << "0 ";
|
||||
}
|
||||
//cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
//cerr << endl;
|
||||
return out.str();
|
||||
//cerr << endl;
|
||||
}
|
||||
|
||||
std::string Manager::OutputNBest()
|
||||
{
|
||||
arcLists.Sort();
|
||||
arcLists.Sort();
|
||||
|
||||
set<string> distinctHypos;
|
||||
set<string> distinctHypos;
|
||||
|
||||
TrellisPaths<TrellisPath> contenders;
|
||||
//cerr << "START AddInitialTrellisPaths" << endl;
|
||||
m_search->AddInitialTrellisPaths(contenders);
|
||||
//cerr << "END AddInitialTrellisPaths" << endl;
|
||||
TrellisPaths<TrellisPath> contenders;
|
||||
m_search->AddInitialTrellisPaths(contenders);
|
||||
|
||||
long transId = GetTranslationId();
|
||||
long transId = GetTranslationId();
|
||||
|
||||
// MAIN LOOP
|
||||
stringstream out;
|
||||
size_t bestInd = 0;
|
||||
while (bestInd < system.options.nbest.nbest_size && !contenders.empty()) {
|
||||
//cerr << "bestInd=" << bestInd << endl;
|
||||
TrellisPath *path = contenders.Get();
|
||||
// MAIN LOOP
|
||||
stringstream out;
|
||||
size_t bestInd = 0;
|
||||
while (bestInd < system.options.nbest.nbest_size && !contenders.empty()) {
|
||||
//cerr << "bestInd=" << bestInd << endl;
|
||||
TrellisPath *path = contenders.Get();
|
||||
|
||||
bool ok = false;
|
||||
if (system.options.nbest.only_distinct) {
|
||||
string tgtPhrase = path->OutputTargetPhrase(system);
|
||||
//cerr << "tgtPhrase=" << tgtPhrase << endl;
|
||||
bool ok = false;
|
||||
if (system.options.nbest.only_distinct) {
|
||||
string tgtPhrase = path->OutputTargetPhrase(system);
|
||||
//cerr << "tgtPhrase=" << tgtPhrase << endl;
|
||||
|
||||
if (distinctHypos.insert(tgtPhrase).second) {
|
||||
ok = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ok = true;
|
||||
}
|
||||
if (distinctHypos.insert(tgtPhrase).second) {
|
||||
ok = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ok = true;
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
++bestInd;
|
||||
if (ok) {
|
||||
++bestInd;
|
||||
out << transId << " |||";
|
||||
path->OutputToStream(out, system);
|
||||
out << "\n";
|
||||
}
|
||||
|
||||
out << transId << " |||";
|
||||
path->OutputToStream(out, system);
|
||||
out << "\n";
|
||||
}
|
||||
// create next paths
|
||||
path->CreateDeviantPaths(contenders, arcLists, GetPool(), system);
|
||||
|
||||
// create next paths
|
||||
path->CreateDeviantPaths(contenders, arcLists, GetPool(), system);
|
||||
delete path;
|
||||
}
|
||||
|
||||
delete path;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
return out.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -29,133 +29,137 @@ Search::Search(Manager &mgr)
|
||||
:Moses2::Search(mgr)
|
||||
, m_stacks(mgr)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Search::~Search()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Search::Decode()
|
||||
{
|
||||
// init stacks
|
||||
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
|
||||
m_stacks.Init(mgr, sentence.GetSize() + 1);
|
||||
// init stacks
|
||||
const Sentence &sentence = static_cast<const Sentence&>(mgr.GetInput());
|
||||
m_stacks.Init(mgr, sentence.GetSize() + 1);
|
||||
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
|
||||
initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap();
|
||||
Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(),
|
||||
initBitmap);
|
||||
initHypo->EmptyHypothesisState(mgr.GetInput());
|
||||
|
||||
m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
Decode(stackInd);
|
||||
//cerr << m_stacks << endl;
|
||||
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
|
||||
Decode(stackInd);
|
||||
//cerr << m_stacks << endl;
|
||||
|
||||
// delete stack to save mem
|
||||
if (stackInd < m_stacks.GetSize() - 1) {
|
||||
m_stacks.Delete(stackInd);
|
||||
}
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
// delete stack to save mem
|
||||
if (stackInd < m_stacks.GetSize() - 1) {
|
||||
m_stacks.Delete(stackInd);
|
||||
}
|
||||
//cerr << m_stacks << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Decode(size_t stackInd)
|
||||
{
|
||||
Stack &stack = m_stacks[stackInd];
|
||||
if (&stack == &m_stacks.Back()) {
|
||||
// last stack. don't do anythin
|
||||
return;
|
||||
}
|
||||
//cerr << "stackInd=" << stackInd << endl;
|
||||
Stack &stack = m_stacks[stackInd];
|
||||
if (&stack == &m_stacks.Back()) {
|
||||
// last stack. don't do anythin
|
||||
return;
|
||||
}
|
||||
|
||||
const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
//cerr << "hypos=" << hypos.size() << endl;
|
||||
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
const InputPaths &paths = mgr.GetInputPaths();
|
||||
|
||||
BOOST_FOREACH(const InputPathBase *path, paths){
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
|
||||
Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
|
||||
}
|
||||
}
|
||||
BOOST_FOREACH(const InputPathBase *path, paths){
|
||||
BOOST_FOREACH(const HypothesisBase *hypo, hypos) {
|
||||
Extend(*static_cast<const Hypothesis*>(hypo), *static_cast<const InputPath*>(path));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const InputPath &path)
|
||||
{
|
||||
const Bitmap &hypoBitmap = hypo.GetBitmap();
|
||||
const Range &hypoRange = hypo.GetInputPath().range;
|
||||
const Range &pathRange = path.range;
|
||||
const Bitmap &hypoBitmap = hypo.GetBitmap();
|
||||
const Range &hypoRange = hypo.GetInputPath().range;
|
||||
const Range &pathRange = path.range;
|
||||
|
||||
if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
|
||||
return;
|
||||
}
|
||||
if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
|
||||
if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
|
||||
return;
|
||||
}
|
||||
const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint();
|
||||
if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) {
|
||||
return;
|
||||
}
|
||||
|
||||
// extend this hypo
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
//SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
|
||||
SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
// extend this hypo
|
||||
const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
|
||||
//SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos());
|
||||
SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
|
||||
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
const TargetPhrases **tpsAllPt = path.targetPhrases;
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = tpsAllPt[i];
|
||||
if (tps) {
|
||||
Extend(hypo, *tps, path, newBitmap, estimatedScore);
|
||||
}
|
||||
}
|
||||
size_t numPt = mgr.system.mappings.size();
|
||||
const TargetPhrases **tpsAllPt = path.targetPhrases;
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const TargetPhrases *tps = tpsAllPt[i];
|
||||
if (tps) {
|
||||
Extend(hypo, *tps, path, newBitmap, estimatedScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
{
|
||||
BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
|
||||
Extend(hypo, *tp, path, newBitmap, estimatedScore);
|
||||
}
|
||||
BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
|
||||
Extend(hypo, *tp, path, newBitmap, estimatedScore);
|
||||
}
|
||||
}
|
||||
|
||||
void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp,
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore)
|
||||
{
|
||||
Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
|
||||
newHypo->EvaluateWhenApplied();
|
||||
Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
|
||||
newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore);
|
||||
newHypo->EvaluateWhenApplied();
|
||||
|
||||
m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists);
|
||||
|
||||
//m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
|
||||
//stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
|
||||
//m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other);
|
||||
//stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2);
|
||||
|
||||
}
|
||||
|
||||
const Hypothesis *Search::GetBestHypothesis() const
|
||||
{
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
const Hypotheses &sortedHypos = lastStack.GetSortedAndPruneHypos(mgr,
|
||||
mgr.arcLists);
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
const Hypotheses &sortedHypos = lastStack.GetSortedAndPruneHypos(mgr,
|
||||
mgr.arcLists);
|
||||
|
||||
const Hypothesis *best = NULL;
|
||||
if (sortedHypos.size()) {
|
||||
best = static_cast<const Hypothesis*>(sortedHypos[0]);
|
||||
}
|
||||
return best;
|
||||
const Hypothesis *best = NULL;
|
||||
if (sortedHypos.size()) {
|
||||
best = static_cast<const Hypothesis*>(sortedHypos[0]);
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
void Search::AddInitialTrellisPaths(TrellisPaths<TrellisPath> &paths) const
|
||||
{
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
BOOST_FOREACH(const HypothesisBase *hypoBase, lastStack){
|
||||
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
|
||||
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
|
||||
paths.Add(path);
|
||||
}
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists);
|
||||
|
||||
BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){
|
||||
const Hypothesis *hypo = static_cast<const Hypothesis*>(hypoBase);
|
||||
TrellisPath *path = new TrellisPath(hypo, mgr.arcLists);
|
||||
paths.Add(path);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user