diff --git a/lm/CMakeLists.txt b/lm/CMakeLists.txt index 195fc730c..5fca22c71 100644 --- a/lm/CMakeLists.txt +++ b/lm/CMakeLists.txt @@ -71,7 +71,7 @@ foreach(exe ${EXE_LIST}) add_executable(${exe} ${exe}_main.cc $ $) # Link the executable against boost - target_link_libraries(${exe} ${Boost_LIBRARIES}) + target_link_libraries(${exe} ${Boost_LIBRARIES} pthread) # Group executables together set_target_properties(${exe} PROPERTIES FOLDER executables) @@ -104,7 +104,7 @@ if(BUILD_TESTING) set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK) # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES}) + target_link_libraries(${test} ${Boost_LIBRARIES} pthread) # model_test requires an extra command line parameter if ("${test}" STREQUAL "model_test") diff --git a/lm/builder/CMakeLists.txt b/lm/builder/CMakeLists.txt index d84a7f7da..01b415da2 100644 --- a/lm/builder/CMakeLists.txt +++ b/lm/builder/CMakeLists.txt @@ -45,7 +45,7 @@ add_library(kenlm_builder OBJECT ${KENLM_BUILDER_SOURCE}) add_executable(lmplz lmplz_main.cc $ $ $ $) # Link the executable against boost -target_link_libraries(lmplz ${Boost_LIBRARIES}) +target_link_libraries(lmplz ${Boost_LIBRARIES} pthread) # Group executables together set_target_properties(lmplz PROPERTIES FOLDER executables) @@ -68,7 +68,7 @@ if(BUILD_TESTING) set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK") # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES}) + target_link_libraries(${test} ${Boost_LIBRARIES} pthread) # Specify command arguments for how to run each unit test # diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc index 6374bcf04..a62ef43d2 100644 --- a/lm/builder/interpolate.cc +++ b/lm/builder/interpolate.cc @@ -9,6 +9,7 @@ #include "util/fixed_array.hh" #include "util/murmur_hash.hh" +#include #include #include diff --git a/lm/builder/pipeline.cc b/lm/builder/pipeline.cc index 69972e278..64e30f724 100644 --- a/lm/builder/pipeline.cc +++ b/lm/builder/pipeline.cc @@ -191,6 +191,9 @@ class Master { chains_.clear(); std::cerr << "Chain sizes:"; for (std::size_t i = 0; i < config_.order; ++i) { + // Always have enough for at least one record. + // This was crashing if e.g. there was no 5-gram. + assignments[i] = std::max(assignments[i], block_count[i] * NGram::TotalSize(i + 1)); std::cerr << ' ' << (i+1) << ":" << assignments[i]; chains_.push_back(util::stream::ChainConfig(NGram::TotalSize(i + 1), block_count[i], assignments[i])); } diff --git a/lm/filter/CMakeLists.txt b/lm/filter/CMakeLists.txt index 4e791cef8..d4616cc32 100644 --- a/lm/filter/CMakeLists.txt +++ b/lm/filter/CMakeLists.txt @@ -52,7 +52,7 @@ foreach(exe ${EXE_LIST}) add_executable(${exe} ${exe}_main.cc $ $ $) # Link the executable against boost - target_link_libraries(${exe} ${Boost_LIBRARIES}) + target_link_libraries(${exe} ${Boost_LIBRARIES} pthread) # Group executables together set_target_properties(${exe} PROPERTIES FOLDER executables) diff --git a/lm/kenlm_benchmark_main.cc b/lm/kenlm_benchmark_main.cc index d8b659139..1704ec6c9 100644 --- a/lm/kenlm_benchmark_main.cc +++ b/lm/kenlm_benchmark_main.cc @@ -31,9 +31,7 @@ template void QueryFromBytes(const Model &model, int Width kEOS = model.GetVocabulary().EndSentence(); Width buf[4096]; float sum = 0.0; - while (true) { - std::size_t got = util::ReadOrEOF(fd_in, buf, sizeof(buf)); - if (!got) break; + while (std::size_t got = util::ReadOrEOF(fd_in, buf, sizeof(buf))) { UTIL_THROW_IF2(got % sizeof(Width), "File size not a multiple of vocab id size " << sizeof(Width)); got /= sizeof(Width); // Do even stuff first. diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index c52cdbc06..6f7f5e99b 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -80,7 +80,7 @@ if(BUILD_TESTING) set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK) # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES}) + target_link_libraries(${test} ${Boost_LIBRARIES} pthread) # file_piece_test requires an extra command line parameter if ("${test}" STREQUAL "file_piece_test") diff --git a/util/fixed_array.hh b/util/fixed_array.hh index 9083d39ee..c67e8ed95 100644 --- a/util/fixed_array.hh +++ b/util/fixed_array.hh @@ -11,10 +11,10 @@ namespace util { /** - * Defines a fixed-size collection. + * Defines an array with fixed maximum size. * - * Ever want an array of things by they don't have a default constructor or are - * non-copyable? FixedArray allows constructing one at a time. + * Ever want an array of things but they don't have a default constructor or + * are non-copyable? FixedArray allows constructing one at a time. */ template class FixedArray { public: diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh index f32b64ea3..53fbbe996 100644 --- a/util/probing_hash_table.hh +++ b/util/probing_hash_table.hh @@ -26,6 +26,65 @@ struct IdentityHash { template T operator()(T arg) const { return arg; } }; +class DivMod { + public: + explicit DivMod(std::size_t buckets) : buckets_(buckets) {} + + static std::size_t RoundBuckets(std::size_t from) { + return from; + } + + template It Ideal(It begin, uint64_t hash) const { + return begin + (hash % buckets_); + } + + template void Next(BaseIt begin, BaseIt end, OutIt &it) const { + if (++it == end) it = begin; + } + + void Double() { + buckets_ *= 2; + } + + private: + std::size_t buckets_; +}; + +class Power2Mod { + public: + explicit Power2Mod(std::size_t buckets) { + UTIL_THROW_IF(!buckets || (((buckets - 1) & buckets)), ProbingSizeException, "Size " << buckets << " is not a power of 2."); + mask_ = buckets - 1; + } + + // Round up to next power of 2. + static std::size_t RoundBuckets(std::size_t from) { + --from; + from |= from >> 1; + from |= from >> 2; + from |= from >> 4; + from |= from >> 8; + from |= from >> 16; + from |= from >> 32; + return from + 1; + } + + template It Ideal(It begin, uint64_t hash) const { + return begin + (hash & mask_); + } + + template void Next(BaseIt begin, BaseIt /*end*/, OutIt &it) const { + it = begin + ((it - begin + 1) & mask_); + } + + void Double() { + mask_ = (mask_ << 1) | 1; + } + + private: + std::size_t mask_; +}; + template class AutoProbing; /* Non-standard hash table @@ -36,7 +95,7 @@ template class AutoProbing; * Uses linear probing to find value. * Only insert and lookup operations. */ -template > class ProbingHashTable { +template , class ModT = DivMod> class ProbingHashTable { public: typedef EntryT Entry; typedef typename Entry::Key Key; @@ -44,14 +103,15 @@ template (multiplier * static_cast(entries))); + uint64_t buckets = Mod::RoundBuckets(std::max(entries + 1, static_cast(multiplier * static_cast(entries)))); return buckets * sizeof(Entry); } // Must be assigned to later. - ProbingHashTable() : entries_(0) + ProbingHashTable() : mod_(1), entries_(0) #ifdef DEBUG , initialized_(false) #endif @@ -59,11 +119,12 @@ template (start)), - buckets_(allocated / sizeof(Entry)), - end_(begin_ + buckets_), + end_(begin_ + allocated / sizeof(Entry)), + buckets_(end_ - begin_), invalid_(invalid), hash_(hash_func), equal_(equal_func), + mod_(end_ - begin_), entries_(0) #ifdef DEBUG , initialized_(true) @@ -75,6 +136,13 @@ template MutableIterator Insert(const T &t) { #ifdef DEBUG assert(initialized_); @@ -83,12 +151,12 @@ template bool FindOrInsert(const T &t, MutableIterator &out) { #ifdef DEBUG assert(initialized_); #endif - for (MutableIterator i = Ideal(t.GetKey());;) { + for (MutableIterator i = Ideal(t.GetKey());;mod_.Next(begin_, end_, i)) { Key got(i->GetKey()); if (equal_(got, t.GetKey())) { out = i; return true; } if (equal_(got, invalid_)) { @@ -97,7 +165,6 @@ template GetKey()); if (equal_(got, key)) { out = i; return true; } if (equal_(got, invalid_)) return false; - if (++i == end_) i = begin_; } } // Like UnsafeMutableFind, but the key must be there. template MutableIterator UnsafeMutableMustFind(const Key key) { - for (MutableIterator i(Ideal(key));;) { + for (MutableIterator i(Ideal(key));; mod_.Next(begin_, end_, i)) { Key got(i->GetKey()); if (equal_(got, key)) { return i; } assert(!equal_(got, invalid_)); - if (++i == end_) i = begin_; } } - - template bool Find(const Key key, ConstIterator &out) const { + // Iterator is both input and output. + template bool FindFromIdeal(const Key key, ConstIterator &i) const { #ifdef DEBUG assert(initialized_); #endif - for (ConstIterator i(Ideal(key));;) { + for (;; mod_.Next(begin_, end_, i)) { Key got(i->GetKey()); - if (equal_(got, key)) { out = i; return true; } + if (equal_(got, key)) return true; if (equal_(got, invalid_)) return false; - if (++i == end_) i = begin_; } } + template bool Find(const Key key, ConstIterator &out) const { + out = Ideal(key); + return FindFromIdeal(key, out); + } + // Like Find but we're sure it must be there. template ConstIterator MustFind(const Key key) const { - for (ConstIterator i(Ideal(key));;) { + for (ConstIterator i(Ideal(key));; mod_.Next(begin_, end_, i)) { Key got(i->GetKey()); if (equal_(got, key)) { return i; } assert(!equal_(got, invalid_)); - if (++i == end_) i = begin_; } } @@ -174,6 +242,7 @@ template ; - MutableIterator Ideal(const Key key) { - return begin_ + (hash_(key) % buckets_); - } - ConstIterator Ideal(const Key key) const { - return begin_ + (hash_(key) % buckets_); - } - template MutableIterator UncheckedInsert(const T &t) { - for (MutableIterator i(Ideal(t.GetKey()));;) { + for (MutableIterator i(Ideal(t.GetKey()));; mod_.Next(begin_, end_, i)) { if (equal_(i->GetKey(), invalid_)) { *i = t; return i; } - if (++i == end_) { i = begin_; } } } MutableIterator begin_; - std::size_t buckets_; MutableIterator end_; + std::size_t buckets_; Key invalid_; Hash hash_; Equal equal_; + Mod mod_; + std::size_t entries_; #ifdef DEBUG bool initialized_; @@ -259,7 +322,7 @@ template > class AutoProbing { private: - typedef ProbingHashTable Backend; + typedef ProbingHashTable Backend; public: static std::size_t MemUsage(std::size_t size, float multiplier = 1.5) { return Backend::Size(size, multiplier); @@ -272,7 +335,7 @@ template -#include - #include namespace util { @@ -16,34 +14,143 @@ struct Entry { Key GetKey() const { return key; } }; -typedef util::ProbingHashTable Table; +// I don't care if this doesn't run on Windows. Empirically /dev/urandom was faster than boost::random's Mersenne Twister. +class URandom { + public: + URandom() : + it_(buf_ + 1024), end_(buf_ + 1024), + file_(util::OpenReadOrThrow("/dev/urandom")) {} -void Test(uint64_t entries, uint64_t lookups, float multiplier = 1.5) { - std::size_t size = Table::Size(entries, multiplier); + uint64_t Get() { + if (it_ == end_) { + it_ = buf_; + util::ReadOrThrow(file_.get(), buf_, sizeof(buf_)); + it_ = buf_; + } + return *it_++; + } + + void Batch(uint64_t *begin, uint64_t *end) { + util::ReadOrThrow(file_.get(), begin, (end - begin) * sizeof(uint64_t)); + } + + private: + uint64_t buf_[1024]; + uint64_t *it_, *end_; + + util::scoped_fd file_; +}; + +struct PrefetchEntry { + uint64_t key; + const Entry *pointer; +}; + +const std::size_t kPrefetchSize = 4; +template class PrefetchQueue { + public: + explicit PrefetchQueue(Table &table) : table_(table), cur_(0), twiddle_(false) { + for (PrefetchEntry *i = entries_; i != entries_ + kPrefetchSize; ++i) + i->pointer = NULL; + } + + void Add(uint64_t key) { + if (Cur().pointer) { + twiddle_ ^= table_.FindFromIdeal(Cur().key, Cur().pointer); + } + Cur().key = key; + Cur().pointer = table_.Ideal(key); + __builtin_prefetch(Cur().pointer, 0, 0); + Next(); + } + + bool Drain() { + if (Cur().pointer) { + for (PrefetchEntry *i = &Cur(); i < entries_ + kPrefetchSize; ++i) { + twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer); + } + } + for (PrefetchEntry *i = entries_; i < &Cur(); ++i) { + twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer); + } + return twiddle_; + } + + private: + PrefetchEntry &Cur() { return entries_[cur_]; } + void Next() { + ++cur_; + cur_ = cur_ % kPrefetchSize; + } + + Table &table_; + PrefetchEntry entries_[kPrefetchSize]; + std::size_t cur_; + + bool twiddle_; + + PrefetchQueue(const PrefetchQueue&); + void operator=(const PrefetchQueue&); +}; + +/*template class Immediate { + public: + + private: + Table &table_; +};*/ + +std::size_t Size(uint64_t entries, float multiplier = 1.5) { + typedef util::ProbingHashTable, Power2Mod> Table; + // Always round up to power of 2 for fair comparison. + return Power2Mod::RoundBuckets(Table::Size(entries, multiplier) / sizeof(Entry)) * sizeof(Entry); +} + +template bool Test(URandom &rn, uint64_t entries, const uint64_t *const queries_begin, const uint64_t *const queries_end, float multiplier = 1.5) { + typedef util::ProbingHashTable, Mod> Table; + std::size_t size = Size(entries, multiplier); scoped_malloc backing(util::CallocOrThrow(size)); Table table(backing.get(), size); - boost::random::mt19937 gen; - boost::random::uniform_int_distribution<> dist(std::numeric_limits::min(), std::numeric_limits::max()); + double start = UserTime(); for (uint64_t i = 0; i < entries; ++i) { Entry entry; - entry.key = dist(gen); + entry.key = rn.Get(); table.Insert(entry); } - double inserted = UserTime(); - bool meaningless = true; - for (uint64_t i = 0; i < lookups; ++i) { - Table::ConstIterator it; - meaningless ^= table.Find(dist(gen), it); + double inserted = UserTime() - start; + double before_lookup = UserTime(); + PrefetchQueue queue(table); + for (const uint64_t *i = queries_begin; i != queries_end; ++i) { + queue.Add(*i); +/* typename Table::ConstIterator it; + meaningless ^= table.Find(*i, it);*/ } - std::cout << meaningless << ' ' << entries << ' ' << multiplier << ' ' << (inserted - start) << ' ' << (UserTime() - inserted) / static_cast(lookups) << std::endl; + bool meaningless = queue.Drain(); + std::cout << entries << ' ' << size << ' ' << (inserted / static_cast(entries)) << ' ' << (UserTime() - before_lookup) / static_cast(queries_end - queries_begin) << '\n'; + return meaningless; +} + +template bool TestRun(uint64_t lookups = 20000000, float multiplier = 1.5) { + URandom rn; + util::scoped_malloc queries(util::CallocOrThrow(lookups * sizeof(uint64_t))); + rn.Batch(static_cast(queries.get()), static_cast(queries.get()) + lookups); + uint64_t physical_mem_limit = util::GuessPhysicalMemory() / 2; + bool meaningless = true; + for (uint64_t i = 4; Size(i / multiplier) < physical_mem_limit; i *= 4) { + meaningless ^= util::Test(rn, i / multiplier, static_cast(queries.get()), static_cast(queries.get()) + lookups, multiplier); + } + return meaningless; } } // namespace } // namespace util int main() { - for (uint64_t i = 1; i <= 10000000ULL; i *= 10) { - util::Test(i, 10000000); - } + bool meaningless = false; + std::cout << "#Integer division\n"; + meaningless ^= util::TestRun(); + std::cout << "#Masking\n"; + meaningless ^= util::TestRun(); + std::cerr << "Meaningless: " << meaningless << '\n'; } diff --git a/util/stream/CMakeLists.txt b/util/stream/CMakeLists.txt index d3eddbe5c..3e47f73e6 100644 --- a/util/stream/CMakeLists.txt +++ b/util/stream/CMakeLists.txt @@ -55,7 +55,7 @@ if(BUILD_TESTING) set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK) # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES}) + target_link_libraries(${test} ${Boost_LIBRARIES} pthread) # Specify command arguments for how to run each unit test # diff --git a/util/stream/rewindable_stream.cc b/util/stream/rewindable_stream.cc index 2867bf8ab..726e2a72d 100644 --- a/util/stream/rewindable_stream.cc +++ b/util/stream/rewindable_stream.cc @@ -1,6 +1,8 @@ #include "util/stream/rewindable_stream.hh" #include "util/pcqueue.hh" +#include + namespace util { namespace stream {