From d8dfda7de9898ca61084cf62d1b03411d9bdee3c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 7 Jan 2016 18:43:55 +0000 Subject: [PATCH] added NSCubePruningPerMiniStack --- contrib/other-builds/moses2/Jamfile | 1 + .../Search/CubePruningPerMiniStack/Search.cpp | 245 ++++++++++++++++++ .../Search/CubePruningPerMiniStack/Search.h | 59 +++++ .../other-builds/moses2/Search/Manager.cpp | 4 + contrib/other-builds/moses2/TypeDef.h | 1 + 5 files changed, 310 insertions(+) create mode 100644 contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.cpp create mode 100644 contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.h diff --git a/contrib/other-builds/moses2/Jamfile b/contrib/other-builds/moses2/Jamfile index 959eb1cf0..5625f50fe 100644 --- a/contrib/other-builds/moses2/Jamfile +++ b/contrib/other-builds/moses2/Jamfile @@ -55,6 +55,7 @@ external-lib boost_serialization ; Search/CubePruning/Misc.cpp Search/CubePruning/Search.cpp Search/CubePruning/Stack.cpp + Search/CubePruningPerMiniStack/Search.cpp legacy/Bitmap.cpp legacy/Bitmaps.cpp legacy/Factor.cpp diff --git a/contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.cpp b/contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.cpp new file mode 100644 index 000000000..fc5264a81 --- /dev/null +++ b/contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.cpp @@ -0,0 +1,245 @@ +/* + * Search.cpp + * + * Created on: 16 Nov 2015 + * Author: hieu + */ +#include +#include "Search.h" +#include "../CubePruning/Stack.h" +#include "../Manager.h" +#include "../Hypothesis.h" +#include "../../InputPaths.h" +#include "../../InputPath.h" +#include "../../System.h" +#include "../../Sentence.h" +#include "../../TranslationTask.h" +#include "../../legacy/Util2.h" + +using namespace std; + +namespace Moses2 +{ + +namespace NSCubePruningPerMiniStack +{ + +//////////////////////////////////////////////////////////////////////// +Search::Search(Manager &mgr) +:Moses2::Search(mgr) +,m_stack(mgr) +,m_cubeEdgeAlloc(mgr.GetPool()) + +,m_queue(NSCubePruning::QueueItemOrderer(), + std::vector >(MemPoolAllocator(mgr.GetPool())) ) + +,m_seenPositions(MemPoolAllocator(mgr.GetPool())) +{ +} + +Search::~Search() +{ +} + +void Search::Decode() +{ + // init cue edges + m_cubeEdges.resize(m_mgr.GetInput().GetSize() + 1); + for (size_t i = 0; i < m_cubeEdges.size(); ++i) { + m_cubeEdges[i] = new (m_mgr.GetPool().Allocate()) CubeEdges(m_cubeEdgeAlloc); + } + + const Bitmap &initBitmap = m_mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(m_mgr.GetSystemPool(), m_mgr); + initHypo->Init(m_mgr, m_mgr.GetInputPaths().GetBlank(), m_mgr.GetInitPhrase(), initBitmap); + initHypo->EmptyHypothesisState(m_mgr.GetInput()); + + m_stack.Add(initHypo, m_mgr.GetHypoRecycle()); + PostDecode(0); + + for (size_t stackInd = 1; stackInd < m_mgr.GetInput().GetSize() + 1; ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + m_stack.Clear(); + Decode(stackInd); + PostDecode(stackInd); + + //cerr << m_stacks << endl; + } + +} + +// grab the underlying contain of priority queue +///////////////////////////////////////////////// +template + S& Container(priority_queue& q) { + struct HackedQueue : private priority_queue { + static S& Container(priority_queue& q) { + return q.*&HackedQueue::c; + } + }; + return HackedQueue::Container(q); +} +///////////////////////////////////////////////// + +void Search::Decode(size_t stackInd) +{ + Recycler &hypoRecycler = m_mgr.GetHypoRecycle(); + + // reuse queue from previous stack. Clear it first + std::vector > &container = Container(m_queue); + //cerr << "container=" << container.size() << endl; + BOOST_FOREACH(NSCubePruning::QueueItem *item, container) { + // recycle unused hypos from queue + Hypothesis *hypo = item->hypo; + hypoRecycler.Recycle(hypo); + + // recycle queue item + m_queueItemRecycler.push_back(item); + } + container.clear(); + + m_seenPositions.clear(); + + //Prefetch(stackInd); + + // add top hypo from every edge into queue + CubeEdges &edges = *m_cubeEdges[stackInd]; + + BOOST_FOREACH(NSCubePruning::CubeEdge *edge, edges) { + //cerr << "edge=" << *edge << endl; + edge->CreateFirst(m_mgr, m_queue, m_seenPositions, m_queueItemRecycler); + } + + size_t pops = 0; + while (!m_queue.empty() && pops < m_mgr.system.popLimit) { + // get best hypo from queue, add to stack + //cerr << "queue=" << queue.size() << endl; + NSCubePruning::QueueItem *item = m_queue.top(); + m_queue.pop(); + + NSCubePruning::CubeEdge *edge = item->edge; + + // prefetching + /* + Hypothesis::Prefetch(m_mgr); // next hypo in recycler + edge.Prefetch(m_mgr, item, m_queue, m_seenPositions); //next hypos of current item + + QueueItem *itemNext = m_queue.top(); + CubeEdge &edgeNext = itemNext->edge; + edgeNext.Prefetch(m_mgr, itemNext, m_queue, m_seenPositions); //next hypos of NEXT item + */ + + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stack.Add(hypo, hypoRecycler); + + edge->CreateNext(m_mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); + + ++pops; + } + + /* + // create hypo from every edge. Increase diversity + while (!m_queue.empty()) { + QueueItem *item = m_queue.top(); + m_queue.pop(); + + if (item->hypoIndex == 0 && item->tpIndex == 0) { + CubeEdge &edge = item->edge; + + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, m_mgr.GetHypoRecycle()); + } + } + */ +} + +void Search::PostDecode(size_t stackInd) +{ + MemPool &pool = m_mgr.GetPool(); + + BOOST_FOREACH(const NSCubePruning::Stack::Coll::value_type &val, m_stack.GetColl()) { + const Bitmap &hypoBitmap = *val.first.first; + size_t hypoEndPos = val.first.second; + //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; + + // create edges to next hypos from existing hypos + const InputPaths &paths = m_mgr.GetInputPaths(); + + BOOST_FOREACH(const InputPath &path, paths) { + const Range &pathRange = path.range; + //cerr << "pathRange=" << pathRange << endl; + + if (!path.IsUsed()) { + continue; + } + if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { + continue; + } + + const Bitmap &newBitmap = m_mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + size_t numWords = newBitmap.GetNumWordsCovered(); + + CubeEdges &edges = *m_cubeEdges[numWords]; + + // sort hypo for a particular bitmap and hypoEndPos + NSCubePruning::CubeEdge::Hypotheses &sortedHypos = val.second->GetSortedAndPruneHypos(m_mgr); + + BOOST_FOREACH(const TargetPhrases *tps, path.targetPhrases) { + if (tps && tps->GetSize()) { + NSCubePruning::CubeEdge *edge = new (pool.Allocate()) NSCubePruning::CubeEdge(m_mgr, sortedHypos, path, *tps, newBitmap); + edges.push_back(edge); + } + } + } + } +} + +const Hypothesis *Search::GetBestHypothesis() const +{ + std::vector sortedHypos = m_stack.GetBestHypos(1); + + const Hypothesis *best = NULL; + if (sortedHypos.size()) { + best = sortedHypos[0]; + } + return best; +} + +void Search::Prefetch(size_t stackInd) +{ + CubeEdges &edges = *m_cubeEdges[stackInd]; + + BOOST_FOREACH(NSCubePruning::CubeEdge *edge, edges) { + __builtin_prefetch(edge); + + BOOST_FOREACH(const Hypothesis *hypo, edge->hypos) { + __builtin_prefetch(hypo); + + const TargetPhrase &tp = hypo->GetTargetPhrase(); + __builtin_prefetch(&tp); + + } + + BOOST_FOREACH(const TargetPhrase *tp, edge->tps) { + __builtin_prefetch(tp); + + size_t size = tp->GetSize(); + for (size_t i = 0; i < size; ++i) { + const Word &word = (*tp)[i]; + __builtin_prefetch(&word); + } + } + + } +} + +} + +} + + diff --git a/contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.h b/contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.h new file mode 100644 index 000000000..90493c43a --- /dev/null +++ b/contrib/other-builds/moses2/Search/CubePruningPerMiniStack/Search.h @@ -0,0 +1,59 @@ +/* + * Search.h + * + * Created on: 16 Nov 2015 + * Author: hieu + */ + +#pragma once +#include +#include "../Search.h" +#include "../CubePruning/Misc.h" +#include "../CubePruning/Stack.h" +#include "../../legacy/Range.h" + +namespace Moses2 +{ + +class Bitmap; +class Hypothesis; +class InputPath; +class TargetPhrases; + +namespace NSCubePruningPerMiniStack +{ + +class Search : public Moses2::Search +{ +public: + Search(Manager &mgr); + virtual ~Search(); + + virtual void Decode(); + const Hypothesis *GetBestHypothesis() const; + +protected: + NSCubePruning::Stack m_stack; + + NSCubePruning::CubeEdge::Queue m_queue; + NSCubePruning::CubeEdge::SeenPositions m_seenPositions; + + // CUBE PRUNING VARIABLES + // setup + MemPoolAllocator m_cubeEdgeAlloc; + typedef std::vector > CubeEdges; + std::vector m_cubeEdges; + + std::deque m_queueItemRecycler; + + // CUBE PRUNING + // decoding + void Decode(size_t stackInd); + void PostDecode(size_t stackInd); + void Prefetch(size_t stackInd); +}; + +} + +} + diff --git a/contrib/other-builds/moses2/Search/Manager.cpp b/contrib/other-builds/moses2/Search/Manager.cpp index 9684bbfa4..da5b0423d 100644 --- a/contrib/other-builds/moses2/Search/Manager.cpp +++ b/contrib/other-builds/moses2/Search/Manager.cpp @@ -11,6 +11,7 @@ #include "SearchNormal.h" #include "SearchNormalBatch.h" #include "CubePruning/Search.h" +#include "CubePruningPerMiniStack/Search.h" #include "../System.h" #include "../TargetPhrases.h" #include "../TargetPhrase.h" @@ -79,6 +80,9 @@ void Manager::Init() case CubePruning: m_search = new NSCubePruning::Search(*this); break; + case CubePruningPerMiniStack: + m_search = new NSCubePruningPerMiniStack::Search(*this); + break; default: cerr << "Unknown search algorithm" << endl; abort(); diff --git a/contrib/other-builds/moses2/TypeDef.h b/contrib/other-builds/moses2/TypeDef.h index bc14646aa..1f9a41a2a 100644 --- a/contrib/other-builds/moses2/TypeDef.h +++ b/contrib/other-builds/moses2/TypeDef.h @@ -46,6 +46,7 @@ enum SearchAlgorithm { SyntaxT2S = 7, SyntaxT2S_SCFG = 8, SyntaxF2S = 9, + CubePruningPerMiniStack = 10, DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm };