From d34b107b91c2b91ef2fc7ee070f32a85b36c222e Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 8 Jun 2015 14:00:31 +0100 Subject: [PATCH] Initial check-in. --- .../TranslationModel/UG/mm/ug_prep_phrases.h | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 moses/TranslationModel/UG/mm/ug_prep_phrases.h diff --git a/moses/TranslationModel/UG/mm/ug_prep_phrases.h b/moses/TranslationModel/UG/mm/ug_prep_phrases.h new file mode 100644 index 000000000..421d6f090 --- /dev/null +++ b/moses/TranslationModel/UG/mm/ug_prep_phrases.h @@ -0,0 +1,83 @@ +// -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; -*- +// Functions for multi-threaded pre-fetching of phrase table entries +// Author: Ulrich Germann + +#include "moses/TranslationModel/UG/generic/threading/ug_thread_pool.h" +#include "moses/thread_safe_container.h" +#include "ug_bitext.h" +#include "ug_lru_cache.h" + +namespace Moses { +namespace bitext { + +template // , typename BITEXT> +struct StatsCollector +{ + typedef lru_cache::LRU_Cache< uint64_t, pstats > hcache_t; + typedef ThreadSafeContainer > pcache_t; + typedef map > lcache_t; + iptr const> bitext; // underlying bitext + sampling_method method; // sampling method + size_t sample_size; // sample size + sptr bias; // sampling bias + hcache_t* hcache; // "history" cache + pcache_t* pcache; // permanent cache + size_t pcache_th; // threshold for adding items to pcache + sptr lcache; // local cache + ug::ThreadPool* tpool; // thread pool to run jobs on + + StatsCollector(iptr > xbitext, + sptr const xbias) + : method(ranked_sampling) + , sample_size(100) + , bias(xbias) + , hcache(NULL) + , pcache(NULL) + , pcache_th(10000) + , tpool(NULL) + { + bitext = xbitext; + } + + void + process(typename TSA::tree_iterator& m, + typename TSA::tree_iterator& r) + { + if (!lcache) lcache.reset(new lcache_t); + if (m.down()) + { + do + { + if (!r.extend(m.getToken(-1)->id())) continue; + this->process(m, r); + uint64_t pid = r.getPid(); + sptr stats; + if (hcache) stats = hcache->get(pid); + if (!stats && pcache) + { + sptr const* foo = pcache->get(pid); + if (foo) stats = *foo; + } + if (!stats) // need to sample + { + BitextSampler s(bitext.get(), r, bias, sample_size, method); + stats = s.stats(); + if (hcache) hcache->set(pid,stats); + if (pcache && r.ca() >= pcache_th) pcache->set(pid,stats); + if (tpool) tpool->add(s); + else s(); + } + (*lcache)[pid] = stats; + r.up(); + } + while (m.over()); + m.up(); + } + } +}; +} // end of namespace bitext +} // end of namespace Moses + +#if 0 +#endif + // r.up();