prune arcList & LatticePathCollection (contenders for n-best paths)

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1134 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2007-01-22 12:30:06 +00:00
parent ff538c2e2f
commit e1823a52ac
12 changed files with 121 additions and 42 deletions

View File

@ -133,7 +133,7 @@ int main(int argc, char* argv[])
{
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
LatticePathList nBestList;
manager.CalcNBest(nBestSize, nBestList,staticData.OnlyDistinctNBest());
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
ioStream->OutputNBestList(nBestList, source->GetTranslationId());
//RemoveAllInColl(nBestList);
}

View File

@ -265,6 +265,10 @@
RelativePath=".\src\LatticePath.cpp"
>
</File>
<File
RelativePath=".\src\LatticePathCollection.cpp"
>
</File>
<File
RelativePath=".\src\LexicalReordering.cpp"
>

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <iostream>
#include <limits>
#include <vector>
#include <algorithm>
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "DummyScoreProducers.h"
@ -404,13 +405,40 @@ void Hypothesis::PrintHypothesis(const InputType &source, float /*weightDistorti
//PrintLMScores();
}
void Hypothesis::InitializeArcs()
void Hypothesis::CleanupArcList()
{
// point this hypo's main hypo to itself
SetWinningHypo(this);
if (!m_arcList) return;
/* keep only number of arcs we need to create all n-best paths.
* However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData *staticData = StaticData::Instance();
size_t nBestSize = staticData->GetNBestSize();
bool distinctNBest = staticData->GetDistinctNBest();
if (!distinctNBest && m_arcList->size() > nBestSize)
{
nth_element(m_arcList->begin()
, m_arcList->begin() + nBestSize - 1
, m_arcList->end()
, CompareHypothesisTotalScore());
// delete bad ones
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool();
ArcList::iterator iter;
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter)
{
Hypothesis *arc = *iter;
pool.freeObject(arc);
}
m_arcList->erase(m_arcList->begin() + nBestSize
, m_arcList->end());
}
// set all arc's main hypo variable to this hypo
ArcList::iterator iter = m_arcList->begin();
for (; iter != m_arcList->end() ; ++iter)

View File

@ -230,7 +230,7 @@ public:
}
void AddArc(Hypothesis *loserHypo);
void InitializeArcs();
void CleanupArcList();
//! returns a list alternative previous hypotheses (or NULL if n-best support is disabled)
inline const ArcList* GetArcList() const
@ -256,3 +256,13 @@ public:
};
std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
// sorting helper
struct CompareHypothesisTotalScore
{
bool operator()(const Hypothesis* hypo1, const Hypothesis* hypo2) const
{
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
}
};

View File

@ -205,26 +205,17 @@ const Hypothesis *HypothesisCollection::GetBestHypothesis() const
return NULL;
}
// sorting helper
struct HypothesisSortDescending
{
const bool operator()(const Hypothesis* hypo1, const Hypothesis* hypo2) const
{
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
}
};
vector<const Hypothesis*> HypothesisCollection::GetSortedList() const
{
vector<const Hypothesis*> ret; ret.reserve(m_hypos.size());
std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
sort(ret.begin(), ret.end(), HypothesisSortDescending());
sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
return ret;
}
void HypothesisCollection::InitializeArcs()
void HypothesisCollection::CleanupArcList()
{
// only necessary if n-best calculations are enabled
if (!m_nBestIsEnabled) return;
@ -233,7 +224,7 @@ void HypothesisCollection::InitializeArcs()
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
{
Hypothesis *mainHypo = *iter;
mainHypo->InitializeArcs();
mainHypo->CleanupArcList();
}
}

View File

@ -181,7 +181,7 @@ public:
/** make all arcs in point to the equiv hypothesis that contains them.
* Ie update doubly linked list be hypo & arcs
*/
void InitializeArcs();
void CleanupArcList();
TO_STRING();
};

View File

@ -0,0 +1,33 @@
#include "LatticePathCollection.h"
void LatticePathCollection::Prune(size_t newSize)
{
assert( m_collection.size() == m_uniquePath.size() );
if (m_collection.size() <= newSize)
return; // don't need to prune
CollectionType::reverse_iterator iterRev;
for (iterRev = m_collection.rbegin() ; iterRev != m_collection.rend() ; ++iterRev)
{
LatticePath *latticePath = *iterRev;
// delete path in m_uniquePath
m_uniquePath.erase(latticePath->GetEdges());
delete latticePath;
if (m_uniquePath.size() == newSize)
break;
}
// delete path in m_collection
CollectionType::iterator iter = m_collection.begin();
for (size_t i = 0 ; i < newSize ; ++i)
iter++;
m_collection.erase(iter, m_collection.end());
assert( m_collection.size() == m_uniquePath.size() );
}

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <set>
#include <iostream>
#include "LatticePath.h"
struct CompareLatticePathCollection
{
@ -32,21 +33,34 @@ struct CompareLatticePathCollection
}
};
/** priority queue used in Manager to store list of contenders for N-Best list.
* Stored in order of total score so that the best path can just be popped from the top
*/
class LatticePathCollection
{
friend std::ostream& operator<<(std::ostream&, const LatticePathCollection&);
protected:
std::multiset<LatticePath*, CompareLatticePathCollection> m_collection;
std::set< std::vector<const Hypothesis *> > m_uniquePath;
public:
// iters
typedef std::multiset<LatticePath*, CompareLatticePathCollection>::iterator iterator;
typedef std::multiset<LatticePath*, CompareLatticePathCollection>::const_iterator const_iterator;
iterator begin() { return m_collection.begin(); }
iterator end() { return m_collection.end(); }
const_iterator begin() const { return m_collection.begin(); }
const_iterator end() const { return m_collection.end(); }
typedef std::multiset<LatticePath*, CompareLatticePathCollection> CollectionType;
CollectionType m_collection;
std::set< std::vector<const Hypothesis *> > m_uniquePath;
// not sure if really needed. does the partitioning algorithm create duplicate paths ?
public:
//iterator begin() { return m_collection.begin(); }
LatticePath *pop()
{
LatticePath *top = *m_collection.begin();
// Detach
// delete from m_uniquePath as well
const std::vector<const Hypothesis *> &edges = top->GetEdges();
m_uniquePath.erase(edges);
m_collection.erase(m_collection.begin());
return top;
}
~LatticePathCollection()
{
@ -72,22 +86,15 @@ public:
{
return m_collection.size();
}
void Detach(const LatticePathCollection::iterator &iter)
{
// delete from m_uniquePath as well
const LatticePath *latticePath = *iter;
const std::vector<const Hypothesis *> &edges = latticePath->GetEdges();
m_uniquePath.erase(edges);
m_collection.erase(iter);
}
void Prune(size_t newSize);
};
inline std::ostream& operator<<(std::ostream& out, const LatticePathCollection& pathColl)
{
LatticePathCollection::const_iterator iter;
LatticePathCollection::CollectionType::const_iterator iter;
for (iter = pathColl.begin() ; iter != pathColl.end() ; ++iter)
for (iter = pathColl.m_collection.begin() ; iter != pathColl.m_collection.end() ; ++iter)
{
const LatticePath &path = **iter;
out << path << std::endl;

View File

@ -26,11 +26,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LatticePath.h"
/** used to return n-best list of Lattice Paths from the Manager to the caller */
class LatticePathList
{
protected:
std::list<const LatticePath*> m_collection;
std::set< std::vector<const Hypothesis *> > m_uniquePath;
// not sure if really needed. does the partitioning algorithm create duplicate paths ?
public:
// iters
typedef std::list<const LatticePath*>::iterator iterator;

View File

@ -23,6 +23,7 @@ libmoses_a_SOURCES = \
LanguageModelSingleFactor.cpp \
LanguageModelSkip.cpp \
LatticePath.cpp \
LatticePathCollection.cpp \
LexicalReordering.cpp \
Manager.cpp \
md5.cpp \

View File

@ -92,7 +92,7 @@ void Manager::ProcessSentence()
VERBOSE(3,"processing hypothesis from next stack");
sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
VERBOSE(3,std::endl);
sourceHypoColl.InitializeArcs();
sourceHypoColl.CleanupArcList();
// go through each hypothesis on the stack and try to expand it
HypothesisCollection::const_iterator iterHypo;
@ -324,7 +324,7 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co
for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * 20) ; iteration++)
{
// get next best from list of contenders
LatticePath *path = *contenders.begin();
LatticePath *path = contenders.pop();
assert(path);
bool addPath = true;
if(onlyDistinct)
@ -342,7 +342,10 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co
path->CreateDeviantPaths(contenders);
}
contenders.Detach(contenders.begin());
if(!onlyDistinct)
{
contenders.Prune(count);
}
}
}

View File

@ -297,7 +297,7 @@ public:
const WordPenaltyProducer *GetWordPenaltyProducer() const { return m_wpProducer; }
bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;}
bool OnlyDistinctNBest() const {return m_onlyDistinctNBest;}
bool GetDistinctNBest() const {return m_onlyDistinctNBest;}
const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
size_t GetMaxNumFactors() const { return m_maxNumFactors; }