add NSCubePruningPerBitmap

This commit is contained in:
Hieu Hoang 2016-01-11 14:57:32 +00:00
parent 2c78a26b74
commit 421e7c909c
9 changed files with 736 additions and 0 deletions

View File

@ -52,12 +52,19 @@ external-lib boost_serialization ;
Search/SearchNormalBatch.cpp
Search/Stack.cpp
Search/Stacks.cpp
Search/CubePruning/Misc.cpp
Search/CubePruning/Search.cpp
Search/CubePruning/Stack.cpp
Search/CubePruningPerBitmap/Misc.cpp
Search/CubePruningPerBitmap/Search.cpp
Search/CubePruningPerBitmap/Stacks.cpp
Search/CubePruningPerMiniStack/Misc.cpp
Search/CubePruningPerMiniStack/Search.cpp
Search/CubePruningPerMiniStack/Stacks.cpp
legacy/Bitmap.cpp
legacy/Bitmaps.cpp
legacy/Factor.cpp

View File

@ -0,0 +1,162 @@
/*
* CubePruning.cpp
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#include "Misc.h"
#include "../CubePruning/Stack.h"
#include "../Manager.h"
#include "../../MemPool.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerBitmap
{
////////////////////////////////////////////////////////////////////////
QueueItem *QueueItem::Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler)
{
QueueItem *ret;
if (currItem) {
// reuse incoming queue item to create new item
ret = currItem;
ret->Init(mgr, edge, hypoIndex, tpIndex);
}
else if (!queueItemRecycler.empty()) {
// use item from recycle bin
ret = queueItemRecycler.back();
ret->Init(mgr, edge, hypoIndex, tpIndex);
queueItemRecycler.pop_back();
}
else {
// create new item
ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex);
}
return ret;
}
QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
:edge(&edge)
,hypoIndex(hypoIndex)
,tpIndex(tpIndex)
{
CreateHypothesis(mgr);
}
void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex)
{
this->edge = &edge;
this->hypoIndex = hypoIndex;
this->tpIndex = tpIndex;
CreateHypothesis(mgr);
}
void QueueItem::CreateHypothesis(Manager &mgr)
{
const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex];
const TargetPhrase &tp = edge->tps[tpIndex];
//cerr << "hypoIndex=" << hypoIndex << endl;
//cerr << "edge.hypos=" << edge.hypos.size() << endl;
//cerr << prevHypo << endl;
//cerr << *prevHypo << endl;
hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr);
hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore);
hypo->EvaluateWhenApplied();
}
////////////////////////////////////////////////////////////////////////
CubeEdge::CubeEdge(
Manager &mgr,
const NSCubePruning::MiniStack &miniStack,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap)
:miniStack(miniStack)
,path(path)
,tps(tps)
,newBitmap(newBitmap)
{
estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap);
}
std::ostream& operator<<(std::ostream &out, const CubeEdge &obj)
{
out << obj.newBitmap;
return out;
}
bool
CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const
{
//UTIL_THROW_IF2(x >= (1<<17), "Error");
//UTIL_THROW_IF2(y >= (1<<17), "Error");
SeenPositionItem val(this, (x<<16) + y);
std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val);
return pairRet.second;
}
void CubeEdge::CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
if (miniStack.GetSortedAndPruneHypos(mgr).size()) {
assert(tps.GetSize());
QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler);
queue.push(item);
bool setSeen = SetSeenPosition(0, 0, seenPositions);
assert(setSeen);
}
}
void CubeEdge::CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler)
{
size_t hypoIndex = item->hypoIndex;
size_t tpIndex = item->tpIndex;
if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) {
// reuse incoming queue item to create new item
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler);
assert(newItem == item);
queue.push(newItem);
item = NULL;
}
if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) {
QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler);
queue.push(newItem);
item = NULL;
}
if (item) {
// recycle unused queue item
queueItemRecycler.push_back(item);
}
}
}
}

View File

@ -0,0 +1,113 @@
/*
* CubePruning.h
*
* Created on: 27 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <vector>
#include <queue>
#include "../../legacy/Range.h"
#include "../Hypothesis.h"
#include "../../TypeDef.h"
#include "../../Vector.h"
#include "../CubePruning/Stack.h"
namespace Moses2
{
class Manager;
class InputPath;
class TargetPhrases;
class Bitmap;
namespace NSCubePruningPerBitmap
{
class CubeEdge;
///////////////////////////////////////////
class QueueItem
{
~QueueItem(); // NOT IMPLEMENTED. Use MemPool
public:
static QueueItem *Create(QueueItem *currItem,
Manager &mgr,
CubeEdge &edge,
size_t hypoIndex,
size_t tpIndex,
std::deque<QueueItem*> &queueItemRecycler);
QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex);
CubeEdge *edge;
size_t hypoIndex, tpIndex;
Hypothesis *hypo;
protected:
void CreateHypothesis(Manager &mgr);
};
///////////////////////////////////////////
class QueueItemOrderer
{
public:
bool operator()(QueueItem* itemA, QueueItem* itemB) const {
HypothesisFutureScoreOrderer orderer;
return !orderer(itemA->hypo, itemB->hypo);
}
};
///////////////////////////////////////////
class CubeEdge
{
friend std::ostream& operator<<(std::ostream &, const CubeEdge &);
public:
typedef std::priority_queue<QueueItem*,
std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >,
QueueItemOrderer> Queue;
typedef std::pair<const CubeEdge*, int> SeenPositionItem;
typedef boost::unordered_set<SeenPositionItem,
boost::hash<SeenPositionItem>,
std::equal_to<SeenPositionItem>,
MemPoolAllocator<SeenPositionItem> > SeenPositions;
const NSCubePruning::MiniStack &miniStack;
const InputPath &path;
const TargetPhrases &tps;
const Bitmap &newBitmap;
SCORE estimatedScore;
CubeEdge(Manager &mgr,
const NSCubePruning::MiniStack &miniStack,
const InputPath &path,
const TargetPhrases &tps,
const Bitmap &newBitmap);
bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const;
void CreateFirst(Manager &mgr,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
void CreateNext(Manager &mgr,
QueueItem *item,
Queue &queue,
SeenPositions &seenPositions,
std::deque<QueueItem*> &queueItemRecycler);
protected:
};
}
}

View File

@ -0,0 +1,259 @@
/*
* Search.cpp
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "Search.h"
#include "../Manager.h"
#include "../Hypothesis.h"
#include "../../InputPaths.h"
#include "../../InputPath.h"
#include "../../System.h"
#include "../../Sentence.h"
#include "../../TranslationTask.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerBitmap
{
////////////////////////////////////////////////////////////////////////
Search::Search(Manager &mgr)
:Moses2::Search(mgr)
,m_stacks(mgr)
,m_cubeEdgeAlloc(mgr.GetPool())
,m_queue(QueueItemOrderer(),
std::vector<QueueItem*, MemPoolAllocator<QueueItem*> >(MemPoolAllocator<QueueItem*>(mgr.GetPool())) )
,m_seenPositions(MemPoolAllocator<CubeEdge::SeenPositionItem>(mgr.GetPool()))
{
}
Search::~Search()
{
}
void Search::Decode()
{
// init stacks
m_stacks.Init(m_mgr.GetInput().GetSize() + 1);
const Bitmap &initBitmap = m_mgr.GetBitmaps().GetInitialBitmap();
Hypothesis *initHypo = Hypothesis::Create(m_mgr.GetSystemPool(), m_mgr);
initHypo->Init(m_mgr, m_mgr.GetInputPaths().GetBlank(), m_mgr.GetInitPhrase(), initBitmap);
initHypo->EmptyHypothesisState(m_mgr.GetInput());
m_stacks.Add(initHypo, m_mgr.GetHypoRecycle());
for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) {
CreateSearchGraph(stackInd);
}
for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
Decode(stackInd);
//cerr << m_stacks << endl;
}
//DebugCounts();
}
void Search::Decode(size_t stackInd)
{
NSCubePruning::Stack &stack = m_stacks[stackInd];
BOOST_FOREACH(NSCubePruning::Stack::Coll::value_type &val, stack.GetColl()) {
NSCubePruning::MiniStack &miniStack = *val.second;
Decode(miniStack);
}
}
void Search::Decode(NSCubePruning::MiniStack &miniStack)
{
Recycler<Hypothesis*> &hypoRecycler = m_mgr.GetHypoRecycle();
// reuse queue from previous stack. Clear it first
std::vector<QueueItem*, MemPoolAllocator<QueueItem*> > &container = Container(m_queue);
//cerr << "container=" << container.size() << endl;
BOOST_FOREACH(QueueItem *item, container) {
// recycle unused hypos from queue
Hypothesis *hypo = item->hypo;
hypoRecycler.Recycle(hypo);
// recycle queue item
m_queueItemRecycler.push_back(item);
}
container.clear();
m_seenPositions.clear();
//Prefetch(stackInd);
// add top hypo from every edge into queue
CubeEdges &edges = *m_cubeEdges[&miniStack];
BOOST_FOREACH(CubeEdge *edge, edges) {
//cerr << "edge=" << *edge << endl;
edge->CreateFirst(m_mgr, m_queue, m_seenPositions, m_queueItemRecycler);
}
size_t pops = 0;
while (!m_queue.empty() && pops < m_mgr.system.popLimit) {
// get best hypo from queue, add to stack
//cerr << "queue=" << queue.size() << endl;
QueueItem *item = m_queue.top();
m_queue.pop();
CubeEdge *edge = item->edge;
// prefetching
/*
Hypothesis::Prefetch(m_mgr); // next hypo in recycler
edge.Prefetch(m_mgr, item, m_queue, m_seenPositions); //next hypos of current item
QueueItem *itemNext = m_queue.top();
CubeEdge &edgeNext = itemNext->edge;
edgeNext.Prefetch(m_mgr, itemNext, m_queue, m_seenPositions); //next hypos of NEXT item
*/
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, hypoRecycler);
edge->CreateNext(m_mgr, item, m_queue, m_seenPositions, m_queueItemRecycler);
++pops;
}
/*
// create hypo from every edge. Increase diversity
while (!m_queue.empty()) {
QueueItem *item = m_queue.top();
m_queue.pop();
if (item->hypoIndex == 0 && item->tpIndex == 0) {
CubeEdge &edge = item->edge;
// add hypo to stack
Hypothesis *hypo = item->hypo;
//cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl;
m_stacks.Add(hypo, m_mgr.GetHypoRecycle());
}
}
*/
}
void Search::CreateSearchGraph(size_t stackInd)
{
NSCubePruning::Stack &stack = m_stacks[stackInd];
MemPool &pool = m_mgr.GetPool();
BOOST_FOREACH(const NSCubePruning::Stack::Coll::value_type &val, stack.GetColl()) {
const Bitmap &hypoBitmap = *val.first.first;
size_t hypoEndPos = val.first.second;
//cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl;
// create edges to next hypos from existing hypos
const InputPaths &paths = m_mgr.GetInputPaths();
BOOST_FOREACH(const InputPath &path, paths) {
const Range &pathRange = path.range;
//cerr << "pathRange=" << pathRange << endl;
if (!path.IsUsed()) {
continue;
}
if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) {
continue;
}
const Bitmap &newBitmap = m_mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange);
// sort hypo for a particular bitmap and hypoEndPos
const NSCubePruning::MiniStack &miniStack = *val.second;
// add cube edge
BOOST_FOREACH(const TargetPhrases *tps, path.targetPhrases) {
if (tps && tps->GetSize()) {
// create next mini stack
NSCubePruning::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange);
CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(m_mgr, miniStack, path, *tps, newBitmap);
CubeEdges *edges;
boost::unordered_map<NSCubePruning::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack);
if (iter == m_cubeEdges.end()) {
edges = new (pool.Allocate<CubeEdges>()) CubeEdges(m_cubeEdgeAlloc);
m_cubeEdges[&nextMiniStack] = edges;
}
else {
edges = iter->second;
}
edges->push_back(edge);
}
}
}
}
}
const Hypothesis *Search::GetBestHypothesis() const
{
const NSCubePruning::Stack &lastStack = m_stacks.Back();
std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1);
const Hypothesis *best = NULL;
if (sortedHypos.size()) {
best = sortedHypos[0];
}
return best;
}
void Search::DebugCounts()
{
std::map<size_t, size_t> counts;
for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) {
//cerr << "stackInd=" << stackInd << endl;
const NSCubePruning::Stack &stack = m_stacks[stackInd];
BOOST_FOREACH(const NSCubePruning::Stack::Coll::value_type &val, stack.GetColl()) {
const NSCubePruning::MiniStack &miniStack = *val.second;
size_t count = miniStack.GetColl().size();
if (counts.find(count) == counts.end()) {
counts[count] = 0;
}
else {
++counts[count];
}
}
//cerr << m_stacks << endl;
}
std::map<size_t, size_t>::const_iterator iter;
for (iter = counts.begin(); iter != counts.end(); ++iter) {
cerr << iter->first << "=" << iter->second << " ";
}
cerr << endl;
}
}
}

View File

@ -0,0 +1,67 @@
/*
* Search.h
*
* Created on: 16 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/pool/pool_alloc.hpp>
#include <boost/unordered_map.hpp>
#include "../Search.h"
#include "Misc.h"
#include "Stacks.h"
#include "../../legacy/Range.h"
namespace Moses2
{
class Bitmap;
class Hypothesis;
class InputPath;
class TargetPhrases;
namespace NSCubePruning
{
class MiniStack;
}
namespace NSCubePruningPerBitmap
{
class Search : public Moses2::Search
{
public:
Search(Manager &mgr);
virtual ~Search();
virtual void Decode();
const Hypothesis *GetBestHypothesis() const;
protected:
Stacks m_stacks;
CubeEdge::Queue m_queue;
CubeEdge::SeenPositions m_seenPositions;
// CUBE PRUNING VARIABLES
// setup
MemPoolAllocator<CubeEdge*> m_cubeEdgeAlloc;
typedef std::vector<CubeEdge*, MemPoolAllocator<CubeEdge*> > CubeEdges;
boost::unordered_map<NSCubePruning::MiniStack*, CubeEdges*> m_cubeEdges;
std::deque<QueueItem*> m_queueItemRecycler;
// CUBE PRUNING
// decoding
void CreateSearchGraph(size_t stackInd);
void Decode(size_t stackInd);
void Decode(NSCubePruning::MiniStack &miniStack);
void DebugCounts();
};
}
}

View File

@ -0,0 +1,72 @@
/*
* Stacks.cpp
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#include "Stacks.h"
#include "../../System.h"
#include "../Manager.h"
using namespace std;
namespace Moses2
{
namespace NSCubePruningPerBitmap
{
Stacks::Stacks(const Manager &mgr)
:m_mgr(mgr)
{
}
Stacks::~Stacks()
{
}
void Stacks::Init(size_t numStacks)
{
m_stacks.resize(numStacks);
for (size_t i = 0; i < m_stacks.size(); ++i) {
m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruning::Stack>()) NSCubePruning::Stack(m_mgr);
}
}
std::ostream& operator<<(std::ostream &out, const Stacks &obj)
{
for (size_t i = 0; i < obj.GetSize(); ++i) {
const NSCubePruning::Stack &stack = *obj.m_stacks[i];
out << stack.GetHypoSize() << " ";
}
return out;
}
void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle)
{
size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
NSCubePruning::Stack &stack = *m_stacks[numWordsCovered];
stack.Add(hypo, hypoRecycle);
}
NSCubePruning::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange)
{
size_t numWordsCovered = newBitmap.GetNumWordsCovered();
//cerr << "numWordsCovered=" << numWordsCovered << endl;
NSCubePruning::Stack &stack = *m_stacks[numWordsCovered];
NSCubePruning::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos());
stack.GetMiniStack(key);
}
}
}

View File

@ -0,0 +1,51 @@
/*
* Stacks.h
*
* Created on: 6 Nov 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include "../CubePruning/Stack.h"
#include "../../Recycler.h"
namespace Moses2
{
class Manager;
namespace NSCubePruningPerBitmap
{
class Stacks {
friend std::ostream& operator<<(std::ostream &, const Stacks &);
public:
Stacks(const Manager &mgr);
virtual ~Stacks();
void Init(size_t numStacks);
size_t GetSize() const
{ return m_stacks.size(); }
const NSCubePruning::Stack &Back() const
{ return *m_stacks.back(); }
NSCubePruning::Stack &operator[](size_t ind)
{ return *m_stacks[ind]; }
void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle);
NSCubePruning::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange);
protected:
const Manager &m_mgr;
std::vector<NSCubePruning::Stack*> m_stacks;
};
}
}

View File

@ -12,6 +12,7 @@
#include "SearchNormalBatch.h"
#include "CubePruning/Search.h"
#include "CubePruningPerMiniStack/Search.h"
#include "CubePruningPerBitmap/Search.h"
#include "../System.h"
#include "../TargetPhrases.h"
#include "../TargetPhrase.h"
@ -83,6 +84,9 @@ void Manager::Init()
case CubePruningPerMiniStack:
m_search = new NSCubePruningPerMiniStack::Search(*this);
break;
case CubePruningPerBitmap:
m_search = new NSCubePruningPerBitmap::Search(*this);
break;
default:
cerr << "Unknown search algorithm" << endl;
abort();

View File

@ -47,6 +47,7 @@ enum SearchAlgorithm {
SyntaxT2S_SCFG = 8,
SyntaxF2S = 9,
CubePruningPerMiniStack = 10,
CubePruningPerBitmap = 11,
DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
};