mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 04:43:03 +03:00
ChartKBestExtractor: fix memory leak, clean-up code
This commit is contained in:
parent
00a2bd395a
commit
568685cb66
@ -32,52 +32,48 @@ namespace Moses
|
||||
|
||||
// Extract the k-best list from the search graph.
|
||||
void ChartKBestExtractor::Extract(
|
||||
const std::vector<const ChartHypothesis*> &topHypos, std::size_t k,
|
||||
const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
|
||||
KBestVec &kBestList)
|
||||
{
|
||||
typedef std::vector<const ChartHypothesis*> HypoVec;
|
||||
|
||||
kBestList.clear();
|
||||
if (topHypos.empty()) {
|
||||
if (topLevelHypos.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a new top-level ChartHypothesis that has the best hypothesis as its
|
||||
// predecessor. This is the search hypergraph's target vertex.
|
||||
HypoVec::const_iterator iter = topHypos.begin();
|
||||
// Create a new ChartHypothesis object, supremeHypo, that has the best
|
||||
// top-level hypothesis as its predecessor and has the same score.
|
||||
std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
|
||||
const ChartHypothesis &bestTopLevelHypo = **p;
|
||||
boost::scoped_ptr<ChartHypothesis> supremeHypo(
|
||||
new ChartHypothesis(**iter, *this));
|
||||
new ChartHypothesis(bestTopLevelHypo, *this));
|
||||
|
||||
// Do the same for each alternative top-level hypothesis, but add the new
|
||||
// ChartHypothesis objects as arcs from supremeHypo, as if they had been
|
||||
// recombined.
|
||||
float prevScore = (*iter)->GetTotalScore();
|
||||
for (++iter; iter != topHypos.end(); ++iter) {
|
||||
// Check that the first item in topHypos really was the best.
|
||||
UTIL_THROW_IF2((*iter)->GetTotalScore() <= prevScore,
|
||||
"top-level vertices are not correctly sorted");
|
||||
for (++p; p != topLevelHypos.end(); ++p) {
|
||||
// Check that the first item in topLevelHypos really was the best.
|
||||
UTIL_THROW_IF2((*p)->GetTotalScore() <= bestTopLevelHypo.GetTotalScore(),
|
||||
"top-level hypotheses are not correctly sorted");
|
||||
// Note: there's no need for a smart pointer here: supremeHypo will take
|
||||
// ownership of altHypo.
|
||||
ChartHypothesis *altHypo = new ChartHypothesis(**iter, *this);
|
||||
ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
|
||||
supremeHypo->AddArc(altHypo);
|
||||
}
|
||||
|
||||
// Create the target vertex corresponding to supremeHypo then generate
|
||||
// it's k-best list.
|
||||
boost::shared_ptr<Vertex> top = FindOrCreateVertex(*supremeHypo);
|
||||
LazyKthBest(*top, k, k);
|
||||
// Create the target vertex then lazily fill its k-best list.
|
||||
boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
|
||||
LazyKthBest(*targetVertex, k, k);
|
||||
|
||||
// Copy the k-best list from the target vertex, but drop the top edge from
|
||||
// each derivation.
|
||||
kBestList.reserve(top->kBestList.size());
|
||||
for (KBestVec::const_iterator p = top->kBestList.begin();
|
||||
p != top->kBestList.end(); ++p) {
|
||||
const Derivation &d = **p;
|
||||
assert(d.edge.tail.size() == 1); // d should have exactly one predecessor.
|
||||
assert(d.backPointers.size() == 1);
|
||||
std::size_t i = d.backPointers[0];
|
||||
boost::shared_ptr<Derivation> pred = d.edge.tail[0]->kBestList[i];
|
||||
kBestList.push_back(pred);
|
||||
kBestList.reserve(targetVertex->kBestList.size());
|
||||
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
|
||||
q = targetVertex->kBestList.begin();
|
||||
q != targetVertex->kBestList.end(); ++q) {
|
||||
const boost::shared_ptr<Derivation> d(*q);
|
||||
assert(d);
|
||||
assert(d->subderivations.size() == 1);
|
||||
kBestList.push_back(d->subderivations[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,8 +92,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
|
||||
const Word &word = phrase.GetWord(pos);
|
||||
if (word.IsNonTerminal()) {
|
||||
std::size_t nonTermInd = nonTermIndexMap[pos];
|
||||
const Derivation &subderivation =
|
||||
*d.edge.tail[nonTermInd]->kBestList[d.backPointers[nonTermInd]];
|
||||
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
||||
Phrase subPhrase = GetOutputPhrase(subderivation);
|
||||
ret.Append(subPhrase);
|
||||
} else {
|
||||
@ -142,26 +137,6 @@ ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
|
||||
return edge;
|
||||
}
|
||||
|
||||
void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
|
||||
{
|
||||
// Create a derivation for v's best incoming edge.
|
||||
UnweightedHyperarc bestEdge = CreateEdge(v.hypothesis);
|
||||
boost::shared_ptr<Derivation> d(new Derivation(bestEdge));
|
||||
v.candidates.push(d);
|
||||
v.seen.insert(d);
|
||||
// Create derivations for the rest of v's incoming edges.
|
||||
const ChartArcList *arcList = v.hypothesis.GetArcList();
|
||||
if (arcList) {
|
||||
for (std::size_t i = 0; i < arcList->size(); ++i) {
|
||||
const ChartHypothesis &recombinedHypo = *(*arcList)[i];
|
||||
UnweightedHyperarc edge = CreateEdge(recombinedHypo);
|
||||
boost::shared_ptr<Derivation> d(new Derivation(edge));
|
||||
v.candidates.push(d);
|
||||
v.seen.insert(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for the vertex corresponding to a given ChartHypothesis, creating
|
||||
// a new one if necessary.
|
||||
boost::shared_ptr<ChartKBestExtractor::Vertex>
|
||||
@ -174,14 +149,51 @@ ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
|
||||
return sp; // Vertex was already in m_vertexMap.
|
||||
}
|
||||
sp.reset(new Vertex(h));
|
||||
// Create the 1-best derivation and add it to the vertex's kBestList.
|
||||
UnweightedHyperarc bestEdge;
|
||||
bestEdge.head = sp;
|
||||
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
|
||||
bestEdge.tail.resize(prevHypos.size());
|
||||
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
|
||||
const ChartHypothesis *prevHypo = prevHypos[i];
|
||||
bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
|
||||
}
|
||||
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
|
||||
std::pair<DerivationSet::iterator, bool> q =
|
||||
m_derivations.insert(bestDerivation);
|
||||
assert(q.second);
|
||||
sp->kBestList.push_back(bestDerivation);
|
||||
return sp;
|
||||
}
|
||||
|
||||
// Create the 1-best derivation for each edge in BS(v) (except the best one)
|
||||
// and add it to v's candidate queue.
|
||||
void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
|
||||
{
|
||||
// Create derivations for all of v's incoming edges except the best. This
|
||||
// means everything in v.hypothesis.GetArcList() and not the edge defined
|
||||
// by v.hypothesis itself. The 1-best derivation for that edge will already
|
||||
// have been created.
|
||||
const ChartArcList *arcList = v.hypothesis.GetArcList();
|
||||
if (arcList) {
|
||||
for (std::size_t i = 0; i < arcList->size(); ++i) {
|
||||
const ChartHypothesis &recombinedHypo = *(*arcList)[i];
|
||||
boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
|
||||
assert(w->kBestList.size() == 1);
|
||||
v.candidates.push(w->kBestList[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Lazily fill v's k-best list.
|
||||
void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
|
||||
std::size_t globalK)
|
||||
{
|
||||
// If this is the first visit to vertex v then initialize the priority queue.
|
||||
if (v.visited == false) {
|
||||
// The 1-best derivation should already be in v's k-best list.
|
||||
assert(v.kBestList.size() == 1);
|
||||
// Initialize v's priority queue.
|
||||
GetCandidates(v, globalK);
|
||||
v.visited = true;
|
||||
}
|
||||
@ -191,49 +203,57 @@ void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
|
||||
if (!v.kBestList.empty()) {
|
||||
// Update the priority queue by adding the successors of the last
|
||||
// derivation (unless they've been seen before).
|
||||
const Derivation &d = *v.kBestList.back();
|
||||
LazyNext(v, d, globalK);
|
||||
boost::shared_ptr<Derivation> d(v.kBestList.back());
|
||||
LazyNext(v, *d, globalK);
|
||||
}
|
||||
// Check if there are any derivations left in the queue.
|
||||
if (v.candidates.empty()) {
|
||||
break;
|
||||
}
|
||||
// Get the next best derivation and delete it from the queue.
|
||||
boost::shared_ptr<Derivation> d = v.candidates.top();
|
||||
boost::weak_ptr<Derivation> d = v.candidates.top();
|
||||
v.candidates.pop();
|
||||
// Add it to the k-best list.
|
||||
v.kBestList.push_back(d);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the neighbours of Derivation d and add them to v's candidate queue.
|
||||
void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
|
||||
std::size_t globalK)
|
||||
{
|
||||
// Create the neighbours of Derivation d.
|
||||
for (std::size_t i = 0; i < d.backPointers.size(); ++i) {
|
||||
Vertex &predVertex = *d.edge.tail[i];
|
||||
// Ensure that predVertex's k-best list contains enough derivations.
|
||||
for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
|
||||
Vertex &pred = *d.edge.tail[i];
|
||||
// Ensure that pred's k-best list contains enough derivations.
|
||||
std::size_t k = d.backPointers[i] + 2;
|
||||
LazyKthBest(predVertex, k, globalK);
|
||||
if (predVertex.kBestList.size() < k) {
|
||||
// predVertex's derivations have been exhausted.
|
||||
LazyKthBest(pred, k, globalK);
|
||||
if (pred.kBestList.size() < k) {
|
||||
// pred's derivations have been exhausted.
|
||||
continue;
|
||||
}
|
||||
// Create the neighbour.
|
||||
boost::shared_ptr<Derivation> next(new Derivation(d, i));
|
||||
// Check if it has been created before.
|
||||
std::pair<Vertex::DerivationSet::iterator, bool> p = v.seen.insert(next);
|
||||
std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
|
||||
if (p.second) {
|
||||
v.candidates.push(next); // Haven't previously seen it.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct a Derivation corresponding to a ChartHypothesis.
|
||||
// Construct the 1-best Derivation that ends at edge e.
|
||||
ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
|
||||
{
|
||||
edge = e;
|
||||
backPointers.resize(edge.tail.size(), 0);
|
||||
std::size_t arity = edge.tail.size();
|
||||
backPointers.resize(arity, 0);
|
||||
subderivations.reserve(arity);
|
||||
for (std::size_t i = 0; i < arity; ++i) {
|
||||
const Vertex &pred = *edge.tail[i];
|
||||
assert(pred.kBestList.size() == 1);
|
||||
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
|
||||
subderivations.push_back(sub);
|
||||
}
|
||||
scoreBreakdown = edge.head->hypothesis.GetScoreBreakdown();
|
||||
score = edge.head->hypothesis.GetTotalScore();
|
||||
}
|
||||
@ -244,14 +264,16 @@ ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
|
||||
edge.head = d.edge.head;
|
||||
edge.tail = d.edge.tail;
|
||||
backPointers = d.backPointers;
|
||||
subderivations = d.subderivations;
|
||||
std::size_t j = ++backPointers[i];
|
||||
scoreBreakdown = d.scoreBreakdown;
|
||||
// Deduct the score of the old subderivation.
|
||||
const Derivation &oldSubderivation = *(edge.tail[i]->kBestList[j-1]);
|
||||
scoreBreakdown.MinusEquals(oldSubderivation.scoreBreakdown);
|
||||
scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown);
|
||||
// Update the subderivation pointer.
|
||||
boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
|
||||
subderivations[i] = newSub;
|
||||
// Add the score of the new subderivation.
|
||||
const Derivation &newSubderivation = *(edge.tail[i]->kBestList[j]);
|
||||
scoreBreakdown.PlusEquals(newSubderivation.scoreBreakdown);
|
||||
scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown);
|
||||
score = scoreBreakdown.GetWeightedScore();
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "ScoreComponentCollection.h"
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <boost/weak_ptr.hpp>
|
||||
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
@ -53,17 +54,46 @@ public:
|
||||
|
||||
UnweightedHyperarc edge;
|
||||
std::vector<std::size_t> backPointers;
|
||||
std::vector<boost::shared_ptr<Derivation> > subderivations;
|
||||
ScoreComponentCollection scoreBreakdown;
|
||||
float score;
|
||||
};
|
||||
|
||||
struct DerivationOrderer {
|
||||
bool operator()(const boost::shared_ptr<Derivation> &d1,
|
||||
const boost::shared_ptr<Derivation> &d2) const {
|
||||
return d1->score < d2->score;
|
||||
bool operator()(const boost::weak_ptr<Derivation> &d1,
|
||||
const boost::weak_ptr<Derivation> &d2) const {
|
||||
boost::shared_ptr<Derivation> s1(d1);
|
||||
boost::shared_ptr<Derivation> s2(d2);
|
||||
return s1->score < s2->score;
|
||||
}
|
||||
};
|
||||
|
||||
struct Vertex {
|
||||
typedef std::priority_queue<boost::weak_ptr<Derivation>,
|
||||
std::vector<boost::weak_ptr<Derivation> >,
|
||||
DerivationOrderer> DerivationQueue;
|
||||
|
||||
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
|
||||
|
||||
const ChartHypothesis &hypothesis;
|
||||
std::vector<boost::weak_ptr<Derivation> > kBestList;
|
||||
DerivationQueue candidates;
|
||||
bool visited;
|
||||
};
|
||||
|
||||
typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
|
||||
|
||||
// Extract the k-best list from the search hypergraph given the full, sorted
|
||||
// list of top-level vertices.
|
||||
void Extract(const std::vector<const ChartHypothesis*> &topHypos,
|
||||
std::size_t k, KBestVec &);
|
||||
|
||||
static Phrase GetOutputPhrase(const Derivation &);
|
||||
|
||||
private:
|
||||
typedef boost::unordered_map<const ChartHypothesis *,
|
||||
boost::shared_ptr<Vertex> > VertexMap;
|
||||
|
||||
struct DerivationHasher {
|
||||
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
|
||||
std::size_t seed = 0;
|
||||
@ -83,36 +113,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
struct Vertex {
|
||||
typedef std::priority_queue<boost::shared_ptr<Derivation>,
|
||||
std::vector<boost::shared_ptr<Derivation> >,
|
||||
DerivationOrderer> DerivationQueue;
|
||||
|
||||
typedef boost::unordered_set<boost::shared_ptr<Derivation>,
|
||||
DerivationHasher,
|
||||
DerivationEqualityPred> DerivationSet;
|
||||
|
||||
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
|
||||
|
||||
const ChartHypothesis &hypothesis;
|
||||
std::vector<boost::shared_ptr<Derivation> > kBestList;
|
||||
DerivationQueue candidates;
|
||||
DerivationSet seen;
|
||||
bool visited;
|
||||
};
|
||||
|
||||
typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
|
||||
|
||||
// Extract the k-best list from the search hypergraph given the full, sorted
|
||||
// list of top-level vertices.
|
||||
void Extract(const std::vector<const ChartHypothesis*> &topHypos,
|
||||
std::size_t k, KBestVec &);
|
||||
|
||||
static Phrase GetOutputPhrase(const Derivation &);
|
||||
|
||||
private:
|
||||
typedef boost::unordered_map<const ChartHypothesis *,
|
||||
boost::shared_ptr<Vertex> > VertexMap;
|
||||
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
|
||||
DerivationEqualityPred> DerivationSet;
|
||||
|
||||
UnweightedHyperarc CreateEdge(const ChartHypothesis &);
|
||||
boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
|
||||
@ -121,6 +123,7 @@ private:
|
||||
void LazyNext(Vertex &, const Derivation &, std::size_t);
|
||||
|
||||
VertexMap m_vertexMap;
|
||||
DerivationSet m_derivations;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
Loading…
Reference in New Issue
Block a user