mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
prune arcList & LatticePathCollection (contenders for n-best paths)
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1134 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
ff538c2e2f
commit
e1823a52ac
@ -133,7 +133,7 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
LatticePathList nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList,staticData.OnlyDistinctNBest());
|
||||
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
ioStream->OutputNBestList(nBestList, source->GetTranslationId());
|
||||
//RemoveAllInColl(nBestList);
|
||||
}
|
||||
|
@ -265,6 +265,10 @@
|
||||
RelativePath=".\src\LatticePath.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\LatticePathCollection.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\LexicalReordering.cpp"
|
||||
>
|
||||
|
@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "TranslationOption.h"
|
||||
#include "TranslationOptionCollection.h"
|
||||
#include "DummyScoreProducers.h"
|
||||
@ -404,13 +405,40 @@ void Hypothesis::PrintHypothesis(const InputType &source, float /*weightDistorti
|
||||
//PrintLMScores();
|
||||
}
|
||||
|
||||
void Hypothesis::InitializeArcs()
|
||||
void Hypothesis::CleanupArcList()
|
||||
{
|
||||
// point this hypo's main hypo to itself
|
||||
SetWinningHypo(this);
|
||||
|
||||
if (!m_arcList) return;
|
||||
|
||||
/* keep only number of arcs we need to create all n-best paths.
|
||||
* However, may not be enough if only unique candidates are needed,
|
||||
* so we'll keep all of arc list if nedd distinct n-best list
|
||||
*/
|
||||
const StaticData *staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData->GetNBestSize();
|
||||
bool distinctNBest = staticData->GetDistinctNBest();
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize)
|
||||
{
|
||||
nth_element(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
, m_arcList->end()
|
||||
, CompareHypothesisTotalScore());
|
||||
|
||||
// delete bad ones
|
||||
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool();
|
||||
ArcList::iterator iter;
|
||||
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter)
|
||||
{
|
||||
Hypothesis *arc = *iter;
|
||||
pool.freeObject(arc);
|
||||
}
|
||||
m_arcList->erase(m_arcList->begin() + nBestSize
|
||||
, m_arcList->end());
|
||||
}
|
||||
|
||||
// set all arc's main hypo variable to this hypo
|
||||
ArcList::iterator iter = m_arcList->begin();
|
||||
for (; iter != m_arcList->end() ; ++iter)
|
||||
|
@ -230,7 +230,7 @@ public:
|
||||
}
|
||||
|
||||
void AddArc(Hypothesis *loserHypo);
|
||||
void InitializeArcs();
|
||||
void CleanupArcList();
|
||||
|
||||
//! returns a list alternative previous hypotheses (or NULL if n-best support is disabled)
|
||||
inline const ArcList* GetArcList() const
|
||||
@ -256,3 +256,13 @@ public:
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
|
||||
|
||||
// sorting helper
|
||||
struct CompareHypothesisTotalScore
|
||||
{
|
||||
bool operator()(const Hypothesis* hypo1, const Hypothesis* hypo2) const
|
||||
{
|
||||
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -205,26 +205,17 @@ const Hypothesis *HypothesisCollection::GetBestHypothesis() const
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// sorting helper
|
||||
struct HypothesisSortDescending
|
||||
{
|
||||
const bool operator()(const Hypothesis* hypo1, const Hypothesis* hypo2) const
|
||||
{
|
||||
return hypo1->GetTotalScore() > hypo2->GetTotalScore();
|
||||
}
|
||||
};
|
||||
|
||||
vector<const Hypothesis*> HypothesisCollection::GetSortedList() const
|
||||
{
|
||||
vector<const Hypothesis*> ret; ret.reserve(m_hypos.size());
|
||||
std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
|
||||
sort(ret.begin(), ret.end(), HypothesisSortDescending());
|
||||
sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void HypothesisCollection::InitializeArcs()
|
||||
void HypothesisCollection::CleanupArcList()
|
||||
{
|
||||
// only necessary if n-best calculations are enabled
|
||||
if (!m_nBestIsEnabled) return;
|
||||
@ -233,7 +224,7 @@ void HypothesisCollection::InitializeArcs()
|
||||
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
|
||||
{
|
||||
Hypothesis *mainHypo = *iter;
|
||||
mainHypo->InitializeArcs();
|
||||
mainHypo->CleanupArcList();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -181,7 +181,7 @@ public:
|
||||
/** make all arcs in point to the equiv hypothesis that contains them.
|
||||
* Ie update doubly linked list be hypo & arcs
|
||||
*/
|
||||
void InitializeArcs();
|
||||
void CleanupArcList();
|
||||
|
||||
TO_STRING();
|
||||
};
|
||||
|
33
moses/src/LatticePathCollection.cpp
Normal file
33
moses/src/LatticePathCollection.cpp
Normal file
@ -0,0 +1,33 @@
|
||||
|
||||
#include "LatticePathCollection.h"
|
||||
|
||||
void LatticePathCollection::Prune(size_t newSize)
|
||||
{
|
||||
assert( m_collection.size() == m_uniquePath.size() );
|
||||
if (m_collection.size() <= newSize)
|
||||
return; // don't need to prune
|
||||
|
||||
CollectionType::reverse_iterator iterRev;
|
||||
for (iterRev = m_collection.rbegin() ; iterRev != m_collection.rend() ; ++iterRev)
|
||||
{
|
||||
LatticePath *latticePath = *iterRev;
|
||||
|
||||
// delete path in m_uniquePath
|
||||
m_uniquePath.erase(latticePath->GetEdges());
|
||||
|
||||
delete latticePath;
|
||||
if (m_uniquePath.size() == newSize)
|
||||
break;
|
||||
}
|
||||
|
||||
// delete path in m_collection
|
||||
CollectionType::iterator iter = m_collection.begin();
|
||||
for (size_t i = 0 ; i < newSize ; ++i)
|
||||
iter++;
|
||||
|
||||
m_collection.erase(iter, m_collection.end());
|
||||
|
||||
assert( m_collection.size() == m_uniquePath.size() );
|
||||
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include "LatticePath.h"
|
||||
|
||||
struct CompareLatticePathCollection
|
||||
{
|
||||
@ -32,21 +33,34 @@ struct CompareLatticePathCollection
|
||||
}
|
||||
};
|
||||
|
||||
/** priority queue used in Manager to store list of contenders for N-Best list.
|
||||
* Stored in order of total score so that the best path can just be popped from the top
|
||||
*/
|
||||
class LatticePathCollection
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const LatticePathCollection&);
|
||||
|
||||
protected:
|
||||
std::multiset<LatticePath*, CompareLatticePathCollection> m_collection;
|
||||
std::set< std::vector<const Hypothesis *> > m_uniquePath;
|
||||
|
||||
public:
|
||||
// iters
|
||||
typedef std::multiset<LatticePath*, CompareLatticePathCollection>::iterator iterator;
|
||||
typedef std::multiset<LatticePath*, CompareLatticePathCollection>::const_iterator const_iterator;
|
||||
|
||||
iterator begin() { return m_collection.begin(); }
|
||||
iterator end() { return m_collection.end(); }
|
||||
const_iterator begin() const { return m_collection.begin(); }
|
||||
const_iterator end() const { return m_collection.end(); }
|
||||
typedef std::multiset<LatticePath*, CompareLatticePathCollection> CollectionType;
|
||||
CollectionType m_collection;
|
||||
std::set< std::vector<const Hypothesis *> > m_uniquePath;
|
||||
// not sure if really needed. does the partitioning algorithm create duplicate paths ?
|
||||
|
||||
public:
|
||||
//iterator begin() { return m_collection.begin(); }
|
||||
LatticePath *pop()
|
||||
{
|
||||
LatticePath *top = *m_collection.begin();
|
||||
|
||||
// Detach
|
||||
// delete from m_uniquePath as well
|
||||
const std::vector<const Hypothesis *> &edges = top->GetEdges();
|
||||
m_uniquePath.erase(edges);
|
||||
|
||||
m_collection.erase(m_collection.begin());
|
||||
|
||||
return top;
|
||||
}
|
||||
|
||||
~LatticePathCollection()
|
||||
{
|
||||
@ -72,22 +86,15 @@ public:
|
||||
{
|
||||
return m_collection.size();
|
||||
}
|
||||
void Detach(const LatticePathCollection::iterator &iter)
|
||||
{
|
||||
// delete from m_uniquePath as well
|
||||
const LatticePath *latticePath = *iter;
|
||||
const std::vector<const Hypothesis *> &edges = latticePath->GetEdges();
|
||||
m_uniquePath.erase(edges);
|
||||
|
||||
m_collection.erase(iter);
|
||||
}
|
||||
void Prune(size_t newSize);
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const LatticePathCollection& pathColl)
|
||||
{
|
||||
LatticePathCollection::const_iterator iter;
|
||||
LatticePathCollection::CollectionType::const_iterator iter;
|
||||
|
||||
for (iter = pathColl.begin() ; iter != pathColl.end() ; ++iter)
|
||||
for (iter = pathColl.m_collection.begin() ; iter != pathColl.m_collection.end() ; ++iter)
|
||||
{
|
||||
const LatticePath &path = **iter;
|
||||
out << path << std::endl;
|
||||
|
@ -26,11 +26,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "LatticePath.h"
|
||||
|
||||
|
||||
/** used to return n-best list of Lattice Paths from the Manager to the caller */
|
||||
class LatticePathList
|
||||
{
|
||||
protected:
|
||||
std::list<const LatticePath*> m_collection;
|
||||
std::set< std::vector<const Hypothesis *> > m_uniquePath;
|
||||
// not sure if really needed. does the partitioning algorithm create duplicate paths ?
|
||||
public:
|
||||
// iters
|
||||
typedef std::list<const LatticePath*>::iterator iterator;
|
||||
|
@ -23,6 +23,7 @@ libmoses_a_SOURCES = \
|
||||
LanguageModelSingleFactor.cpp \
|
||||
LanguageModelSkip.cpp \
|
||||
LatticePath.cpp \
|
||||
LatticePathCollection.cpp \
|
||||
LexicalReordering.cpp \
|
||||
Manager.cpp \
|
||||
md5.cpp \
|
||||
|
@ -92,7 +92,7 @@ void Manager::ProcessSentence()
|
||||
VERBOSE(3,"processing hypothesis from next stack");
|
||||
sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
|
||||
VERBOSE(3,std::endl);
|
||||
sourceHypoColl.InitializeArcs();
|
||||
sourceHypoColl.CleanupArcList();
|
||||
|
||||
// go through each hypothesis on the stack and try to expand it
|
||||
HypothesisCollection::const_iterator iterHypo;
|
||||
@ -324,7 +324,7 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co
|
||||
for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * 20) ; iteration++)
|
||||
{
|
||||
// get next best from list of contenders
|
||||
LatticePath *path = *contenders.begin();
|
||||
LatticePath *path = contenders.pop();
|
||||
assert(path);
|
||||
bool addPath = true;
|
||||
if(onlyDistinct)
|
||||
@ -342,7 +342,10 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co
|
||||
path->CreateDeviantPaths(contenders);
|
||||
}
|
||||
|
||||
contenders.Detach(contenders.begin());
|
||||
if(!onlyDistinct)
|
||||
{
|
||||
contenders.Prune(count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,7 +297,7 @@ public:
|
||||
const WordPenaltyProducer *GetWordPenaltyProducer() const { return m_wpProducer; }
|
||||
|
||||
bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;}
|
||||
bool OnlyDistinctNBest() const {return m_onlyDistinctNBest;}
|
||||
bool GetDistinctNBest() const {return m_onlyDistinctNBest;}
|
||||
const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
|
||||
size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
|
||||
size_t GetMaxNumFactors() const { return m_maxNumFactors; }
|
||||
|
Loading…
Reference in New Issue
Block a user