More efficient implementation of print all derivations

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2957 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
abarun 2010-03-08 15:28:40 +00:00
parent 8eeb1708ae
commit e09be2bfd5
2 changed files with 66 additions and 44 deletions

View File

@ -116,58 +116,78 @@ void Manager::PrintAllDerivations(long translationId ) const
if (sortedPureHypo.size() == 0)
return;
TrellisPathList contenders, uninspectedConts;
float remainingScore = 0;
vector<const TargetPhrase*> remainingPhrases;
// add all pure paths
vector<const Hypothesis*>::const_iterator iterBestHypo;
for (iterBestHypo = sortedPureHypo.begin()
; iterBestHypo != sortedPureHypo.end()
; ++iterBestHypo)
{
contenders.Add(new TrellisPath(*iterBestHypo));
}
// MAIN loop
while( !(contenders.GetSize() == 0 && uninspectedConts.GetSize() == 0) )
{
if (contenders.GetSize() > 0) {
// get next best from list of contenders
const TrellisPath *path = contenders.pop();
assert(path);
const std::vector<const Hypothesis *> &edges = path->GetEdges();
cerr << translationId << " ||| ";
// print the surface factor of the translation
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
const Phrase& phrase = edge.GetCurrTargetPhrase();
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++)
{
const Factor *factor = phrase.GetFactor(pos, 0);
cerr << *factor;
cerr << " ";
}
}
cerr << "||| " << path->GetTotalScore();
cerr << endl;
// create deviations from current best
uninspectedConts.Add(const_cast<TrellisPath*>(path));
}
else {
const TrellisPath *path = uninspectedConts.pop();
assert(path);
path->CreateDeviantPaths(contenders);
}
}
printThisHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore);
printDivergentHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore);
}
}
void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore ) const
{
//Backtrack from the predecessor
if (hypo->GetPrevHypo()->GetId() > 0) {
vector <const TargetPhrase*> followingPhrases;
followingPhrases.push_back(& (hypo->GetCurrTargetPhrase()));
///((Phrase) hypo->GetPrevHypo()->GetTargetPhrase());
followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
printDivergentHypothesis(translationId, hypo->GetPrevHypo(), followingPhrases , remainingScore + hypo->GetScore() - hypo->GetPrevHypo()->GetScore());
}
//Process the arcs
const ArcList *pAL = hypo->GetArcList();
if (pAL) {
const ArcList &arcList = *pAL;
// every possible Arc to replace this edge
ArcList::const_iterator iterArc;
for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc)
{
const Hypothesis *loserHypo = *iterArc;
const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
float arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
vector <const TargetPhrase* > followingPhrases;
followingPhrases.push_back(&(loserHypo->GetCurrTargetPhrase()));
followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
printThisHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore);
printDivergentHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore);
}
}
}
void Manager::printThisHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore ) const
{
cerr << translationId << " ||| ";
//Yield of this hypothesis
hypo->ToStream(cerr);
for (size_t p = 0; p < remainingPhrases.size(); ++p) {
const TargetPhrase * phrase = remainingPhrases[p];
size_t size = phrase->GetSize();
for (size_t pos = 0 ; pos < size ; pos++)
{
const Factor *factor = phrase->GetFactor(pos, 0);
cerr << *factor;
cerr << " ";
}
}
cerr << "||| " << hypo->GetScore() + remainingScore;
cerr << endl;
}
/**
* After decoding, the hypotheses in the stacks and additional arcs
* form a search graph that can be mined for n-best lists.

View File

@ -105,6 +105,8 @@ public:
const Hypothesis *GetActualBestHypothesis() const;
void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
void PrintAllDerivations(long translationId) const;
void printDivergentHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase*> & remainingPhrases, float remainingScore ) const;
void printThisHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase* > & remainingPhrases, float remainingScore ) const;
void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
#ifdef HAVE_PROTOBUF
void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;