diff --git a/lm/binary_format.cc b/lm/binary_format.cc index 2b34a778a..802943f57 100644 --- a/lm/binary_format.cc +++ b/lm/binary_format.cc @@ -169,8 +169,7 @@ void *BinaryFormat::SetupJustVocab(std::size_t memory_size, uint8_t order) { vocab_size_ = memory_size; if (!write_mmap_) { header_size_ = 0; - util::MapAnonymous(memory_size, memory_vocab_); - util::AdviseHugePages(memory_vocab_.get(), memory_size); + util::HugeMalloc(memory_size, true, memory_vocab_); return reinterpret_cast(memory_vocab_.get()); } header_size_ = TotalHeaderSize(order); @@ -181,16 +180,16 @@ void *BinaryFormat::SetupJustVocab(std::size_t memory_size, uint8_t order) { switch (write_method_) { case Config::WRITE_MMAP: mapping_.reset(util::MapZeroedWrite(file_.get(), total), total, util::scoped_memory::MMAP_ALLOCATED); + util::AdviseHugePages(vocab_base, total); vocab_base = mapping_.get(); break; case Config::WRITE_AFTER: util::ResizeOrThrow(file_.get(), 0); - util::MapAnonymous(total, memory_vocab_); + util::HugeMalloc(total, true, memory_vocab_); vocab_base = memory_vocab_.get(); break; } strncpy(reinterpret_cast(vocab_base), kMagicIncomplete, header_size_); - util::AdviseHugePages(vocab_base, total); return reinterpret_cast(vocab_base) + header_size_; } @@ -200,7 +199,7 @@ void *BinaryFormat::GrowForSearch(std::size_t memory_size, std::size_t vocab_pad std::size_t new_size = header_size_ + vocab_size_ + vocab_pad_ + memory_size; vocab_string_offset_ = new_size; if (!write_mmap_ || write_method_ == Config::WRITE_AFTER) { - util::MapAnonymous(memory_size, memory_search_); + util::HugeMalloc(memory_size, true, memory_search_); assert(header_size_ == 0 || write_mmap_); vocab_base = reinterpret_cast(memory_vocab_.get()) + header_size_; util::AdviseHugePages(memory_search_.get(), memory_size); diff --git a/lm/builder/corpus_count.cc b/lm/builder/corpus_count.cc index 04815d805..0414c2261 100644 --- a/lm/builder/corpus_count.cc +++ b/lm/builder/corpus_count.cc @@ -5,7 +5,7 @@ #include "lm/lm_exception.hh" #include "lm/vocab.hh" #include "lm/word_index.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include "util/file_piece.hh" #include "util/murmur_hash.hh" diff --git a/lm/builder/debug_print.hh b/lm/builder/debug_print.hh index 193a6892c..4b9f306d8 100644 --- a/lm/builder/debug_print.hh +++ b/lm/builder/debug_print.hh @@ -4,21 +4,21 @@ #include "lm/builder/payload.hh" #include "lm/common/print.hh" #include "lm/common/ngram_stream.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include namespace lm { namespace builder { // Not defined, only specialized. -template void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload); -template <> inline void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload) { +template void PrintPayload(util::FileStream &to, const BuildingPayload &payload); +template <> inline void PrintPayload(util::FileStream &to, const BuildingPayload &payload) { to << payload.count; } -template <> inline void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload) { +template <> inline void PrintPayload(util::FileStream &to, const BuildingPayload &payload) { to << log10(payload.uninterp.prob) << ' ' << log10(payload.uninterp.gamma); } -template <> inline void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload) { +template <> inline void PrintPayload(util::FileStream &to, const BuildingPayload &payload) { to << payload.complete.prob << ' ' << payload.complete.backoff; } @@ -36,7 +36,7 @@ template class Print { void Run(const util::stream::ChainPositions &chains) { util::scoped_fd fd(to_); - util::FakeOFStream out(to_); + util::FileStream out(to_); NGramStreams streams(chains); for (NGramStream *s = streams.begin(); s != streams.end(); ++s) { DumpStream(*s, out); @@ -45,13 +45,13 @@ template class Print { void Run(const util::stream::ChainPosition &position) { util::scoped_fd fd(to_); - util::FakeOFStream out(to_); + util::FileStream out(to_); NGramStream stream(position); DumpStream(stream, out); } private: - void DumpStream(NGramStream &stream, util::FakeOFStream &to) { + void DumpStream(NGramStream &stream, util::FileStream &to) { for (; stream; ++stream) { PrintPayload(to, stream->Value()); for (const WordIndex *w = stream->begin(); w != stream->end(); ++w) { diff --git a/lm/builder/dump_counts_main.cc b/lm/builder/dump_counts_main.cc index a4c9478b6..26078d0e7 100644 --- a/lm/builder/dump_counts_main.cc +++ b/lm/builder/dump_counts_main.cc @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) { UTIL_THROW_IF(*i >= vocab.Size(), util::Exception, "Vocab ID " << *i << " is larger than the vocab file's maximum of " << vocab.Size() << ". Are you sure you have the right order and vocab file for these counts?"); std::cout << vocab.Lookup(*i) << ' '; } - // TODO don't use std::cout because it is slow. Add fast uint64_t printing support to FakeOFStream. + // TODO don't use std::cout because it is slow. Add fast uint64_t printing support to FileStream. std::cout << *reinterpret_cast(words + order) << '\n'; } } diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc index 7dcb3623c..a62ef43d2 100644 --- a/lm/builder/interpolate.cc +++ b/lm/builder/interpolate.cc @@ -12,7 +12,6 @@ #include #include #include -#include namespace lm { namespace builder { namespace { diff --git a/lm/builder/output.cc b/lm/builder/output.cc index c92283ac6..604fa22e6 100644 --- a/lm/builder/output.cc +++ b/lm/builder/output.cc @@ -2,7 +2,7 @@ #include "lm/common/model_buffer.hh" #include "lm/common/print.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/stream/multi_stream.hh" #include @@ -41,7 +41,7 @@ void Output::Apply(HookType hook_type, util::stream::Chains &chains) { void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) { if (verbose_header_) { - util::FakeOFStream out(file_.get(), 50); + util::FileStream out(file_.get(), 50); out << "# Input file: " << info.input_file << '\n'; out << "# Token count: " << info.token_count << '\n'; out << "# Smoothing: Modified Kneser-Ney" << '\n'; diff --git a/lm/common/model_buffer.cc b/lm/common/model_buffer.cc index 431d4ae4c..ae9b08c4f 100644 --- a/lm/common/model_buffer.cc +++ b/lm/common/model_buffer.cc @@ -1,6 +1,6 @@ #include "lm/common/model_buffer.hh" #include "util/exception.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include "util/file_piece.hh" #include "util/stream/io.hh" @@ -68,7 +68,7 @@ void ModelBuffer::Sink(util::stream::Chains &chains, const std::vector } if (keep_buffer_) { util::scoped_fd metadata(util::CreateOrThrow((file_base_ + ".kenlm_intermediate").c_str())); - util::FakeOFStream meta(metadata.get(), 200); + util::FileStream meta(metadata.get(), 200); meta << kMetadataHeader << "\nCounts"; for (std::vector::const_iterator i = counts_.begin(); i != counts_.end(); ++i) { meta << ' ' << *i; diff --git a/lm/common/print.cc b/lm/common/print.cc index cd2a80260..518b62f51 100644 --- a/lm/common/print.cc +++ b/lm/common/print.cc @@ -1,7 +1,7 @@ #include "lm/common/print.hh" #include "lm/common/ngram_stream.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include "util/mmap.hh" #include "util/scoped.hh" @@ -24,7 +24,7 @@ VocabReconstitute::VocabReconstitute(int fd) { } namespace { -template void PrintLead(const VocabReconstitute &vocab, ProxyStream &stream, util::FakeOFStream &out) { +template void PrintLead(const VocabReconstitute &vocab, ProxyStream &stream, util::FileStream &out) { out << stream->Value().prob << '\t' << vocab.Lookup(*stream->begin()); for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) { out << ' ' << vocab.Lookup(*i); @@ -34,7 +34,7 @@ template void PrintLead(const VocabReconstitute &vocab, ProxyStr void PrintARPA::Run(const util::stream::ChainPositions &positions) { VocabReconstitute vocab(vocab_fd_); - util::FakeOFStream out(out_fd_); + util::FileStream out(out_fd_); out << "\\data\\\n"; for (size_t i = 0; i < positions.size(); ++i) { out << "ngram " << (i+1) << '=' << counts_[i] << '\n'; diff --git a/lm/filter/arpa_io.cc b/lm/filter/arpa_io.cc index 92c821aaf..2cae60f9a 100644 --- a/lm/filter/arpa_io.cc +++ b/lm/filter/arpa_io.cc @@ -1,5 +1,6 @@ #include "lm/filter/arpa_io.hh" #include "util/file_piece.hh" +#include "util/string_stream.hh" #include #include @@ -22,14 +23,8 @@ ARPAInputException::ARPAInputException(const StringPiece &message, const StringP ARPAInputException::~ARPAInputException() throw() {} -ARPAOutputException::ARPAOutputException(const char *message, const std::string &file_name) throw() { - *this << message << " in file " << file_name; -} - -ARPAOutputException::~ARPAOutputException() throw() {} - // Seeking is the responsibility of the caller. -void WriteCounts(std::ostream &out, const std::vector &number) { +template void WriteCounts(Stream &out, const std::vector &number) { out << "\n\\data\\\n"; for (unsigned int i = 0; i < number.size(); ++i) { out << "ngram " << i+1 << "=" << number[i] << '\n'; @@ -38,9 +33,10 @@ void WriteCounts(std::ostream &out, const std::vector &number) { } size_t SizeNeededForCounts(const std::vector &number) { - std::ostringstream buf; - WriteCounts(buf, number); - return buf.tellp(); + std::string buf; + util::StringStream stream(buf); + WriteCounts(stream, number); + return buf.size(); } bool IsEntirelyWhiteSpace(const StringPiece &line) { @@ -50,44 +46,21 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) { return true; } -ARPAOutput::ARPAOutput(const char *name, size_t buffer_size) : file_name_(name), buffer_(new char[buffer_size]) { - try { - file_.exceptions(std::ostream::eofbit | std::ostream::failbit | std::ostream::badbit); - if (!file_.rdbuf()->pubsetbuf(buffer_.get(), buffer_size)) { - std::cerr << "Warning: could not enlarge buffer for " << name << std::endl; - buffer_.reset(); - } - file_.open(name, std::ios::out | std::ios::binary); - } catch (const std::ios_base::failure &f) { - throw ARPAOutputException("Opening", file_name_); - } -} +ARPAOutput::ARPAOutput(const char *name, size_t buffer_size) + : file_backing_(util::CreateOrThrow(name)), file_(file_backing_.get(), buffer_size) {} void ARPAOutput::ReserveForCounts(std::streampos reserve) { - try { - for (std::streampos i = 0; i < reserve; i += std::streampos(1)) { - file_ << '\n'; - } - } catch (const std::ios_base::failure &f) { - throw ARPAOutputException("Writing blanks to reserve space for counts to ", file_name_); + for (std::streampos i = 0; i < reserve; i += std::streampos(1)) { + file_ << '\n'; } } void ARPAOutput::BeginLength(unsigned int length) { - fast_counter_ = 0; - try { - file_ << '\\' << length << "-grams:" << '\n'; - } catch (const std::ios_base::failure &f) { - throw ARPAOutputException("Writing n-gram header to ", file_name_); - } + file_ << '\\' << length << "-grams:" << '\n'; } void ARPAOutput::EndLength(unsigned int length) { - try { - file_ << '\n'; - } catch (const std::ios_base::failure &f) { - throw ARPAOutputException("Writing blank at end of count list to ", file_name_); - } + file_ << '\n'; if (length > counts_.size()) { counts_.resize(length); } @@ -95,14 +68,10 @@ void ARPAOutput::EndLength(unsigned int length) { } void ARPAOutput::Finish() { - try { - file_ << "\\end\\\n"; - file_.seekp(0); - WriteCounts(file_, counts_); - file_ << std::flush; - } catch (const std::ios_base::failure &f) { - throw ARPAOutputException("Finishing including writing counts at beginning to ", file_name_); - } + file_ << "\\end\\\n"; + file_.seekp(0); + WriteCounts(file_, counts_); + file_.flush(); } } // namespace lm diff --git a/lm/filter/arpa_io.hh b/lm/filter/arpa_io.hh index 0f6c8be75..7489270db 100644 --- a/lm/filter/arpa_io.hh +++ b/lm/filter/arpa_io.hh @@ -4,6 +4,7 @@ */ #include "lm/read_arpa.hh" #include "util/exception.hh" +#include "util/file_stream.hh" #include "util/string_piece.hh" #include "util/tokenize_piece.hh" @@ -28,17 +29,6 @@ class ARPAInputException : public util::Exception { virtual ~ARPAInputException() throw(); }; -class ARPAOutputException : public util::ErrnoException { - public: - ARPAOutputException(const char *prefix, const std::string &file_name) throw(); - virtual ~ARPAOutputException() throw(); - - const std::string &File() const throw() { return file_name_; } - - private: - const std::string file_name_; -}; - // Handling for the counts of n-grams at the beginning of ARPA files. size_t SizeNeededForCounts(const std::vector &number); @@ -55,11 +45,7 @@ class ARPAOutput : boost::noncopyable { void BeginLength(unsigned int length); void AddNGram(const StringPiece &line) { - try { - file_ << line << '\n'; - } catch (const std::ios_base::failure &f) { - throw ARPAOutputException("Writing an n-gram", file_name_); - } + file_ << line << '\n'; ++fast_counter_; } @@ -76,9 +62,8 @@ class ARPAOutput : boost::noncopyable { void Finish(); private: - const std::string file_name_; - boost::scoped_array buffer_; - std::fstream file_; + util::scoped_fd file_backing_; + util::FileStream file_; size_t fast_counter_; std::vector counts_; }; diff --git a/lm/filter/count_io.hh b/lm/filter/count_io.hh index 02eb78baa..1af6676c4 100644 --- a/lm/filter/count_io.hh +++ b/lm/filter/count_io.hh @@ -5,7 +5,7 @@ #include #include -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include "util/file_piece.hh" @@ -28,7 +28,7 @@ class CountOutput : boost::noncopyable { } private: - util::FakeOFStream file_; + util::FileStream file_; }; class CountBatch { diff --git a/lm/filter/phrase_table_vocab_main.cc b/lm/filter/phrase_table_vocab_main.cc index e8a8d0265..9ffa35f93 100644 --- a/lm/filter/phrase_table_vocab_main.cc +++ b/lm/filter/phrase_table_vocab_main.cc @@ -1,4 +1,4 @@ -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file_piece.hh" #include "util/murmur_hash.hh" #include "util/pool.hh" @@ -68,7 +68,7 @@ class TargetWords { } void Print() const { - util::FakeOFStream out(1); + util::FileStream out(1); for (std::vector >::const_iterator i = vocab_.begin(); i != vocab_.end(); ++i) { for (boost::unordered_set::const_iterator j = i->begin(); j != i->end(); ++j) { out << *j << ' '; diff --git a/lm/kenlm_benchmark_main.cc b/lm/kenlm_benchmark_main.cc index 1704ec6c9..c9ee16525 100644 --- a/lm/kenlm_benchmark_main.cc +++ b/lm/kenlm_benchmark_main.cc @@ -1,5 +1,5 @@ #include "lm/model.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include "util/file_piece.hh" #include "util/usage.hh" @@ -10,7 +10,7 @@ namespace { template void ConvertToBytes(const Model &model, int fd_in) { util::FilePiece in(fd_in); - util::FakeOFStream out(1); + util::FileStream out(1); Width width; StringPiece word; const Width end_sentence = (Width)model.GetVocabulary().EndSentence(); @@ -30,10 +30,19 @@ template void QueryFromBytes(const Model &model, int const lm::ngram::State *next_state = begin_state; Width kEOS = model.GetVocabulary().EndSentence(); Width buf[4096]; - float sum = 0.0; + + uint64_t completed = 0; + double loaded = util::CPUTime(); + + std::cout << "CPU_to_load: " << loaded << std::endl; + + // Numerical precision: batch sums. + double total = 0.0; while (std::size_t got = util::ReadOrEOF(fd_in, buf, sizeof(buf))) { + float sum = 0.0; UTIL_THROW_IF2(got % sizeof(Width), "File size not a multiple of vocab id size " << sizeof(Width)); got /= sizeof(Width); + completed += got; // Do even stuff first. const Width *even_end = buf + (got & ~1); // Alternating states @@ -49,8 +58,13 @@ template void QueryFromBytes(const Model &model, int sum += model.FullScore(*next_state, *i, state[2]).prob; next_state = (*i++ == kEOS) ? begin_state : &state[2]; } + total += sum; } - std::cout << "Sum is " << sum << std::endl; + double after = util::CPUTime(); + std::cerr << "Probability sum is " << total << std::endl; + std::cout << "Queries: " << completed << std::endl; + std::cout << "CPU_excluding_load: " << (after - loaded) << "\nCPU_per_query: " << ((after - loaded) / static_cast(completed)) << std::endl; + std::cout << "RSSMax: " << util::RSSMax() << std::endl; } template void DispatchFunction(const Model &model, bool query) { @@ -62,7 +76,10 @@ template void DispatchFunction(const Model &model, bo } template void DispatchWidth(const char *file, bool query) { - Model model(file); + lm::ngram::Config config; + config.load_method = util::READ; + std::cerr << "Using load_method = READ." << std::endl; + Model model(file, config); lm::WordIndex bound = model.GetVocabulary().Bound(); if (bound <= 256) { DispatchFunction(model, query); @@ -116,11 +133,10 @@ int main(int argc, char *argv[]) { << argv[0] << " vocab $model <$text >$text.vocab\n" << "#Ensure files are in RAM.\n" << "cat $text.vocab $model >/dev/null\n" - << "#Timed query against the model, including loading.\n" - << "time " << argv[0] << " query $model <$text.vocab\n"; + << "#Timed query against the model.\n" + << argv[0] << " query $model <$text.vocab\n"; return 1; } Dispatch(argv[2], !strcmp(argv[1], "query")); - util::PrintUsage(std::cerr); return 0; } diff --git a/lm/ngram_query.hh b/lm/ngram_query.hh index b19c5aa4f..4430841c9 100644 --- a/lm/ngram_query.hh +++ b/lm/ngram_query.hh @@ -3,7 +3,7 @@ #include "lm/enumerate_vocab.hh" #include "lm/model.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file_piece.hh" #include "util/usage.hh" @@ -42,7 +42,7 @@ class QueryPrinter { } private: - util::FakeOFStream out_; + util::FileStream out_; bool print_word_; bool print_line_; bool print_summary_; diff --git a/lm/vocab.cc b/lm/vocab.cc index 5696e60b3..3d83e0452 100644 --- a/lm/vocab.cc +++ b/lm/vocab.cc @@ -6,7 +6,7 @@ #include "lm/config.hh" #include "lm/weights.hh" #include "util/exception.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include "util/joint_sort.hh" #include "util/murmur_hash.hh" @@ -182,7 +182,7 @@ void SortedVocabulary::ComputeRenumbering(WordIndex types, int from_words, int t std::sort(entries.begin(), entries.end()); // Write out new vocab file. { - util::FakeOFStream out(to_words); + util::FileStream out(to_words); out << "" << '\0'; for (std::vector::const_iterator i = entries.begin(); i != entries.end(); ++i) { out << i->str << '\0'; diff --git a/lm/vocab.hh b/lm/vocab.hh index b42566f23..59740e853 100644 --- a/lm/vocab.hh +++ b/lm/vocab.hh @@ -4,7 +4,7 @@ #include "lm/enumerate_vocab.hh" #include "lm/lm_exception.hh" #include "lm/virtual_interface.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/murmur_hash.hh" #include "util/pool.hh" #include "util/probing_hash_table.hh" @@ -44,7 +44,7 @@ class ImmediateWriteWordsWrapper : public EnumerateVocab { private: EnumerateVocab *inner_; - util::FakeOFStream stream_; + util::FileStream stream_; }; // When the binary size isn't known yet. @@ -225,7 +225,7 @@ class WriteUniqueWords { } private: - util::FakeOFStream word_list_; + util::FileStream word_list_; }; class NoOpUniqueWords { diff --git a/util/exception.cc b/util/exception.cc index 588f5eae5..e644d2cb7 100644 --- a/util/exception.cc +++ b/util/exception.cc @@ -7,47 +7,41 @@ #include #include +#if defined(_WIN32) || defined(_WIN64) +#include +#include +#endif + namespace util { Exception::Exception() throw() {} Exception::~Exception() throw() {} -Exception::Exception(const Exception &from) : std::exception() { - stream_ << from.stream_.str(); -} - -Exception &Exception::operator=(const Exception &from) { - stream_ << from.stream_.str(); - return *this; -} - -const char *Exception::what() const throw() { - text_ = stream_.str(); - return text_.c_str(); -} - void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) { /* The child class might have set some text, but we want this to come first. * Another option would be passing this information to the constructor, but * then child classes would have to accept constructor arguments and pass * them down. */ - text_ = stream_.str(); - stream_.str(""); - stream_ << file << ':' << line; - if (func) stream_ << " in " << func << " threw "; + std::string old_text; + std::swap(old_text, what_); + StringStream stream(what_); + stream << file << ':' << line; + if (func) stream << " in " << func << " threw "; if (child_name) { - stream_ << child_name; + stream << child_name; } else { #ifdef __GXX_RTTI - stream_ << typeid(this).name(); + stream << typeid(this).name(); #else - stream_ << "an exception"; + stream << "an exception"; #endif } - if (condition) stream_ << " because `" << condition; - stream_ << "'.\n"; - stream_ << text_; + if (condition) { + stream << " because `" << condition << '\''; + } + stream << ".\n"; + stream << old_text; } namespace { @@ -95,4 +89,17 @@ ErrnoException::~ErrnoException() throw() {} OverflowException::OverflowException() throw() {} OverflowException::~OverflowException() throw() {} +#if defined(_WIN32) || defined(_WIN64) +WindowsException::WindowsException() throw() { + unsigned int last_error = GetLastError(); + char error_msg[256] = ""; + if (!FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, last_error, LANG_NEUTRAL, error_msg, sizeof(error_msg), NULL)) { + *this << "Windows error " << GetLastError() << " while formatting Windows error " << last_error << ". "; + } else { + *this << "Windows error " << last_error << ": " << error_msg; + } +} +WindowsException::~WindowsException() throw() {} +#endif + } // namespace util diff --git a/util/exception.hh b/util/exception.hh index d67a6f9fb..00207b242 100644 --- a/util/exception.hh +++ b/util/exception.hh @@ -1,12 +1,16 @@ #ifndef UTIL_EXCEPTION_H #define UTIL_EXCEPTION_H +#include "util/string_stream.hh" + #include #include -#include #include #include +// TODO(hieu) delete this +#include + namespace util { template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data); @@ -16,11 +20,7 @@ class Exception : public std::exception { Exception() throw(); virtual ~Exception() throw(); - Exception(const Exception &from); - Exception &operator=(const Exception &from); - - // Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe. - const char *what() const throw(); + const char *what() const throw() { return what_.c_str(); } // For use by the UTIL_THROW macros. void SetLocation( @@ -38,8 +38,7 @@ class Exception : public std::exception { typedef T Identity; }; - std::stringstream stream_; - mutable std::string text_; + std::string what_; }; /* This implements the normal operator<< for Exception and all its children. @@ -47,7 +46,12 @@ class Exception : public std::exception { * boost::enable_if. */ template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data) { - e.stream_ << data; + // TODO(hieu): change this to + // StringStream(e.what_) << data; + + std::stringstream moses_hack; + moses_hack << data; + e.what_ += moses_hack.str(); return e; } @@ -149,6 +153,15 @@ inline std::size_t CheckOverflow(uint64_t value) { return CheckOverflowInternal(value); } +#if defined(_WIN32) || defined(_WIN64) +/* Thrown for Windows specific operations. */ +class WindowsException : public Exception { + public: + WindowsException() throw(); + ~WindowsException() throw(); +}; +#endif + } // namespace util #endif // UTIL_EXCEPTION_H diff --git a/util/fake_ofstream.hh b/util/fake_ofstream.hh deleted file mode 100644 index d35bf0d83..000000000 --- a/util/fake_ofstream.hh +++ /dev/null @@ -1,137 +0,0 @@ -/* Like std::ofstream but without being incredibly slow. Backed by a raw fd. - * Supports most of the built-in types except for void* and long double. - */ -#ifndef UTIL_FAKE_OFSTREAM_H -#define UTIL_FAKE_OFSTREAM_H - -#include "util/file.hh" -#include "util/float_to_string.hh" -#include "util/integer_to_string.hh" -#include "util/scoped.hh" -#include "util/string_piece.hh" - -#include -#include - -#include - -namespace util { -class FakeOFStream { - public: - // Maximum over all ToString operations. - // static const std::size_t kMinBuf = 20; - // This was causing compile failures in debug, so now 20 is written directly. - // - // Does not take ownership of out. - // Allows default constructor, but must call SetFD. - explicit FakeOFStream(int out = -1, std::size_t buffer_size = 1048576) - : buf_(util::MallocOrThrow(std::max(buffer_size, (size_t)20))), - current_(static_cast(buf_.get())), - end_(current_ + std::max(buffer_size, (size_t)20)), - fd_(out) {} - - ~FakeOFStream() { - // Could have called Finish already - flush(); - } - - void SetFD(int to) { - flush(); - fd_ = to; - } - - FakeOFStream &write(const void *data, std::size_t length) { - if (UTIL_LIKELY(current_ + length <= end_)) { - std::memcpy(current_, data, length); - current_ += length; - return *this; - } - flush(); - if (current_ + length <= end_) { - std::memcpy(current_, data, length); - current_ += length; - } else { - util::WriteOrThrow(fd_, data, length); - } - return *this; - } - - // This also covers std::string and char* - FakeOFStream &operator<<(StringPiece str) { - return write(str.data(), str.size()); - } - - // For anything with ToStringBuf::kBytes, define operator<< using ToString. - // This includes uint64_t, int64_t, uint32_t, int32_t, uint16_t, int16_t, - // float, double - private: - template struct EnableIfKludge { - typedef FakeOFStream type; - }; - public: - template typename EnableIfKludge::kBytes>::type &operator<<(const T value) { - EnsureRemaining(ToStringBuf::kBytes); - current_ = ToString(value, current_); - assert(current_ <= end_); - return *this; - } - - FakeOFStream &operator<<(char c) { - EnsureRemaining(1); - *current_++ = c; - return *this; - } - - FakeOFStream &operator<<(unsigned char c) { - EnsureRemaining(1); - *current_++ = static_cast(c); - return *this; - } - - /* clang on OS X appears to consider std::size_t aka unsigned long distinct - * from uint64_t. So this function makes clang work. gcc considers - * uint64_t and std::size_t the same (on 64-bit) so this isn't necessary. - * But it does no harm since gcc sees it as a specialization of the - * EnableIfKludge template. - * Also, delegating to *this << static_cast(value) would loop - * indefinitely on gcc. - */ - FakeOFStream &operator<<(std::size_t value) { - EnsureRemaining(ToStringBuf::kBytes); - current_ = ToString(static_cast(value), current_); - return *this; - } - - // Note this does not sync. - void flush() { - if (current_ != buf_.get()) { - util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get()); - current_ = static_cast(buf_.get()); - } - } - - // Not necessary, but does assure the data is cleared. - void Finish() { - flush(); - buf_.reset(); - current_ = NULL; - util::FSyncOrThrow(fd_); - } - - private: - void EnsureRemaining(std::size_t amount) { - if (UTIL_UNLIKELY(current_ + amount > end_)) { - flush(); - assert(current_ + amount <= end_); - } - } - - util::scoped_malloc buf_; - char *current_, *end_; - - int fd_; -}; - -} // namespace - -#endif diff --git a/util/fake_ostream.hh b/util/fake_ostream.hh new file mode 100644 index 000000000..331742f6a --- /dev/null +++ b/util/fake_ostream.hh @@ -0,0 +1,128 @@ +#ifndef UTIL_FAKE_OSTREAM_H +#define UTIL_FAKE_OSTREAM_H + +#include "util/float_to_string.hh" +#include "util/integer_to_string.hh" +#include "util/string_piece.hh" + +#include +#include + +#include + +namespace util { + +/* Like std::ostream but without being incredibly slow. + * Supports most of the built-in types except for long double. + * + * The FakeOStream class is intended to be inherited from. The inherting class + * should provide: + * public: + * Derived &flush(); + * Derived &write(const void *data, std::size_t length); + * + * private: or protected: + * friend class FakeOStream; + * char *Ensure(std::size_t amount); + * void AdvanceTo(char *to); + * + * The Ensure function makes enough space for an in-place write and returns + * where to write. The AdvanceTo function happens after the write, saying how + * much was actually written. + * + * Precondition: + * amount <= kToStringMaxBytes for in-place writes. + */ +template class FakeOStream { + public: + FakeOStream() {} + + // This also covers std::string and char* + Derived &operator<<(StringPiece str) { + return C().write(str.data(), str.size()); + } + + // For anything with ToStringBuf::kBytes, define operator<< using ToString. + // This includes uint64_t, int64_t, uint32_t, int32_t, uint16_t, int16_t, + // float, double + private: + template struct EnableIfKludge { + typedef Derived type; + }; + public: + template typename EnableIfKludge::kBytes>::type &operator<<(const T value) { + return CallToString(value); + } + + /* clang on OS X appears to consider std::size_t aka unsigned long distinct + * from uint64_t. So this function makes clang work. gcc considers + * uint64_t and std::size_t the same (on 64-bit) so this isn't necessary. + * But it does no harm since gcc sees it as a specialization of the + * EnableIfKludge template. + * Also, delegating to *this << static_cast(value) would loop + * indefinitely on gcc. + */ + Derived &operator<<(std::size_t value) { return CoerceToString(value); } + + // union types will map to int, but don't pass the template magic above in gcc. + Derived &operator<<(int value) { return CoerceToString(value); } + + // gcc considers these distinct from uint64_t + Derived &operator<<(unsigned long long value) { return CoerceToString(value); } + Derived &operator<<(signed long long value) { return CoerceToString(value); } + + // Character types that get copied as bytes instead of displayed as integers. + Derived &operator<<(char val) { return put(val); } + Derived &operator<<(signed char val) { return put(static_cast(val)); } + Derived &operator<<(unsigned char val) { return put(static_cast(val)); } + + // This is here to catch all the other pointer types. + Derived &operator<<(const void *value) { return CallToString(value); } + // This is here because the above line also catches const char*. + Derived &operator<<(const char *value) { return *this << StringPiece(value); } + Derived &operator<<(char *value) { return *this << StringPiece(value); } + + Derived &put(char val) { + char *c = C().Ensure(1); + *c = val; + C().AdvanceTo(++c); + return C(); + } + + char widen(char val) const { return val; } + + private: + // References to derived class for convenience. + Derived &C() { + return *static_cast(this); + } + + const Derived &C() const { + return *static_cast(this); + } + + template ::is_signed> struct Coerce {}; + + template struct Coerce { typedef uint16_t To; }; + template struct Coerce { typedef uint32_t To; }; + template struct Coerce { typedef uint64_t To; }; + + template struct Coerce { typedef int16_t To; }; + template struct Coerce { typedef int32_t To; }; + template struct Coerce { typedef int64_t To; }; + + template Derived &CoerceToString(const From value) { + return CallToString(static_cast::To>(value)); + } + + // This is separate to prevent an infinite loop if the compiler considers + // types the same (i.e. gcc std::size_t and uint64_t or uint32_t). + template Derived &CallToString(const T value) { + C().AdvanceTo(ToString(value, C().Ensure(ToStringBuf::kBytes))); + return C(); + } +}; + +} // namespace + +#endif // UTIL_FAKE_OSTREAM_H diff --git a/util/file.cc b/util/file.cc index be272f9bc..e8976bc10 100644 --- a/util/file.cc +++ b/util/file.cc @@ -147,17 +147,33 @@ std::size_t GuardLarge(std::size_t size) { } } +#if defined(_WIN32) || defined(_WIN64) +namespace { +const std::size_t kMaxDWORD = static_cast(4294967295UL); +} // namespace +#endif + std::size_t PartialRead(int fd, void *to, std::size_t amount) { #if defined(_WIN32) || defined(_WIN64) - int ret = _read(fd, to, GuardLarge(amount)); + DWORD ret; + HANDLE file_handle = reinterpret_cast(_get_osfhandle(fd)); + DWORD larger_size = static_cast(std::min(kMaxDWORD, amount)); + DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size. + if (!ReadFile(file_handle, to, larger_size, &ret, NULL)) + { + DWORD last_error = GetLastError(); + if (last_error != ERROR_NOT_ENOUGH_MEMORY || !ReadFile(file_handle, to, smaller_size, &ret, NULL)) { + UTIL_THROW(WindowsException, "Windows error in ReadFile."); + } + } #else errno = 0; ssize_t ret; do { ret = read(fd, to, GuardLarge(amount)); } while (ret == -1 && errno == EINTR); -#endif UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes"); +#endif return static_cast(ret); } @@ -212,12 +228,6 @@ void WriteOrThrow(FILE *to, const void *data, std::size_t size) { UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size); } -#if defined(_WIN32) || defined(_WIN64) -namespace { -const std::size_t kMaxDWORD = static_cast(4294967295UL); -} // namespace -#endif - void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) { uint8_t *to = static_cast(to_void); while (size) { @@ -230,7 +240,7 @@ void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) { memset(&overlapped, 0, sizeof(OVERLAPPED)); overlapped.Offset = static_cast(off); overlapped.OffsetHigh = static_cast(off >> 32); - UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), Exception, "ReadFile failed for offset " << off); + UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), WindowsException, "ReadFile failed for offset " << off); #else ssize_t ret; errno = 0; diff --git a/util/file_piece.cc b/util/file_piece.cc index 7017942e6..0a4d3a9da 100644 --- a/util/file_piece.cc +++ b/util/file_piece.cc @@ -56,7 +56,7 @@ FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buf InitializeNoRead("istream", min_buffer); fallback_to_read_ = true; - data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); + HugeMalloc(default_map_size_, false, data_); position_ = data_.begin(); position_end_ = position_; @@ -282,7 +282,7 @@ void FilePiece::TransitionToRead() { assert(!fallback_to_read_); fallback_to_read_ = true; data_.reset(); - data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); + HugeMalloc(default_map_size_, false, data_); position_ = data_.begin(); position_end_ = position_; @@ -313,8 +313,7 @@ void FilePiece::ReadShift() { // Buffer too small. std::size_t valid_length = position_end_ - position_; default_map_size_ *= 2; - data_.call_realloc(default_map_size_); - UTIL_THROW_IF(!data_.get(), ErrnoException, "realloc failed for " << default_map_size_); + HugeRealloc(default_map_size_, false, data_); position_ = data_.begin(); position_end_ = position_ + valid_length; } else { diff --git a/util/file_piece_test.cc b/util/file_piece_test.cc index 11e2ab3aa..d03cd312d 100644 --- a/util/file_piece_test.cc +++ b/util/file_piece_test.cc @@ -1,7 +1,7 @@ // Tests might fail if you have creative characters in your path. Sue me. #include "util/file_piece.hh" -#include "util/fake_ofstream.hh" +#include "util/file_stream.hh" #include "util/file.hh" #include "util/scoped.hh" @@ -138,7 +138,7 @@ BOOST_AUTO_TEST_CASE(Numbers) { scoped_fd file(MakeTemp(FileLocation())); const float floating = 3.2; { - util::FakeOFStream writing(file.get()); + util::FileStream writing(file.get()); writing << "94389483984398493890287 " << floating << " 5"; } SeekOrThrow(file.get(), 0); diff --git a/util/file_stream.hh b/util/file_stream.hh new file mode 100644 index 000000000..ae9ad5aa7 --- /dev/null +++ b/util/file_stream.hh @@ -0,0 +1,89 @@ +/* Like std::ofstream but without being incredibly slow. Backed by a raw fd. + * Supports most of the built-in types except for long double. + */ +#ifndef UTIL_FILE_STREAM_H +#define UTIL_FILE_STREAM_H + +#include "util/fake_ostream.hh" +#include "util/file.hh" +#include "util/scoped.hh" + +#include +#include + +#include + +namespace util { + +class FileStream : public FakeOStream { + public: + FileStream(int out = -1, std::size_t buffer_size = 8192) + : buf_(util::MallocOrThrow(std::max(buffer_size, kToStringMaxBytes))), + current_(static_cast(buf_.get())), + end_(current_ + std::max(buffer_size, kToStringMaxBytes)), + fd_(out) {} + + ~FileStream() { + flush(); + } + + void SetFD(int to) { + flush(); + fd_ = to; + } + + FileStream &flush() { + if (current_ != buf_.get()) { + util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get()); + current_ = static_cast(buf_.get()); + } + return *this; + } + + // For writes of arbitrary size. + FileStream &write(const void *data, std::size_t length) { + if (UTIL_LIKELY(current_ + length <= end_)) { + std::memcpy(current_, data, length); + current_ += length; + return *this; + } + flush(); + if (current_ + length <= end_) { + std::memcpy(current_, data, length); + current_ += length; + } else { + util::WriteOrThrow(fd_, data, length); + } + return *this; + } + + FileStream &seekp(uint64_t to) { + util::SeekOrThrow(fd_, to); + return *this; + } + + protected: + friend class FakeOStream; + // For writes directly to buffer guaranteed to have amount < buffer size. + char *Ensure(std::size_t amount) { + if (UTIL_UNLIKELY(current_ + amount > end_)) { + flush(); + assert(current_ + amount <= end_); + } + return current_; + } + + void AdvanceTo(char *to) { + current_ = to; + assert(current_ <= end_); + } + + private: + util::scoped_malloc buf_; + char *current_, *end_; + int fd_; +}; + +} // namespace + +#endif diff --git a/util/integer_to_string.cc b/util/integer_to_string.cc index 6b8766119..5150d7973 100644 --- a/util/integer_to_string.cc +++ b/util/integer_to_string.cc @@ -1,3 +1,4 @@ +#include /* Fast integer to string conversion. Source: https://github.com/miloyip/itoa-benchmark Local modifications: @@ -637,4 +638,28 @@ char *ToString(uint16_t value, char *to) { return ToString((uint32_t)value, to); } +// void * to string. This hasn't been optimized at all really. +namespace { +const char kHexDigits[] = "0123456789abcdef"; +} // namespace + +char *ToString(const void *v, char *to) { + // Apparently it's 0, not 0x0. + if (!v) { + *to++ = '0'; + return to; + } + + *to++ = '0'; + *to++ = 'x'; + uintptr_t value = reinterpret_cast(v); + uint8_t shift = sizeof(void*) * 8 - 4; + for (; !(value >> shift); shift -= 4) {} + for (; ; shift -= 4) { + *to++ = kHexDigits[(value >> shift) & 0xf]; + if (!shift) break; + } + return to; +} + } // namespace util diff --git a/util/integer_to_string.hh b/util/integer_to_string.hh index 0d975b14e..9ac25bd78 100644 --- a/util/integer_to_string.hh +++ b/util/integer_to_string.hh @@ -18,6 +18,8 @@ char *ToString(int64_t value, char *to); char *ToString(uint16_t value, char *to); char *ToString(int16_t value, char *to); +char *ToString(const void *value, char *to); + inline char *ToString(bool value, char *to) { *to++ = '0' + value; return to; @@ -51,6 +53,14 @@ template <> struct ToStringBuf { enum { kBytes = 20 }; }; +template <> struct ToStringBuf { + // Either 18 on 64-bit or 10 on 32-bit. + enum { kBytes = sizeof(const void*) * 2 + 2 }; +}; + +// Maximum over this and float. +enum { kToStringMaxBytes = 20 }; + } // namespace util #endif // UTIL_INTEGER_TO_STRING_H diff --git a/util/integer_to_string_test.cc b/util/integer_to_string_test.cc index ded1ecec7..d090f64a8 100644 --- a/util/integer_to_string_test.cc +++ b/util/integer_to_string_test.cc @@ -21,9 +21,9 @@ template void TestValue(const T value) { template void TestCorners() { TestValue(std::numeric_limits::min()); TestValue(std::numeric_limits::max()); - TestValue(static_cast(0)); - TestValue(static_cast(-1)); - TestValue(static_cast(1)); + TestValue((T)0); + TestValue((T)-1); + TestValue((T)1); } BOOST_AUTO_TEST_CASE(Corners) { @@ -33,6 +33,7 @@ BOOST_AUTO_TEST_CASE(Corners) { TestCorners(); TestCorners(); TestCorners(); + TestCorners(); } template void TestAll() { @@ -62,4 +63,14 @@ BOOST_AUTO_TEST_CASE(Tens) { Test10s(); } +BOOST_AUTO_TEST_CASE(Pointers) { + for (uintptr_t i = 1; i < std::numeric_limits::max() / 10; i *= 10) { + TestValue((const void*)i); + } + for (uintptr_t i = 0; i < 256; ++i) { + TestValue((const void*)i); + TestValue((const void*)(i + 0xf00)); + } +} + }} // namespaces diff --git a/util/mmap.cc b/util/mmap.cc index 7dcb57ba3..b70fd7573 100644 --- a/util/mmap.cc +++ b/util/mmap.cc @@ -27,7 +27,7 @@ namespace util { -long SizePage() { +std::size_t SizePage() { #if defined(_WIN32) || defined(_WIN64) SYSTEM_INFO si; GetSystemInfo(&si); @@ -37,22 +37,6 @@ long SizePage() { #endif } -void SyncOrThrow(void *start, size_t length) { -#if defined(_WIN32) || defined(_WIN64) - UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap"); -#else - UTIL_THROW_IF(length && msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap"); -#endif -} - -void UnmapOrThrow(void *start, size_t length) { -#if defined(_WIN32) || defined(_WIN64) - UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file"); -#else - UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed"); -#endif -} - scoped_mmap::~scoped_mmap() { if (data_ != (void*)-1) { try { @@ -66,14 +50,24 @@ scoped_mmap::~scoped_mmap() { } } +namespace { +template T RoundUpPow2(T value, T mult) { + return ((value - 1) & ~(mult - 1)) + mult; +} +} // namespace + +scoped_memory::scoped_memory(std::size_t size, bool zeroed) : data_(NULL), size_(0), source_(NONE_ALLOCATED) { + HugeMalloc(size, zeroed, *this); +} + void scoped_memory::reset(void *data, std::size_t size, Alloc source) { switch(source_) { + case MMAP_ROUND_UP_ALLOCATED: + scoped_mmap(data_, RoundUpPow2(size_, (std::size_t)SizePage())); + break; case MMAP_ALLOCATED: scoped_mmap(data_, size_); break; - case ARRAY_ALLOCATED: - delete [] reinterpret_cast(data_); - break; case MALLOC_ALLOCATED: free(data_); break; @@ -85,7 +79,7 @@ void scoped_memory::reset(void *data, std::size_t size, Alloc source) { source_ = source; } -void scoped_memory::call_realloc(std::size_t size) { +/*void scoped_memory::call_realloc(std::size_t size) { assert(source_ == MALLOC_ALLOCATED || source_ == NONE_ALLOCATED); void *new_data = realloc(data_, size); if (!new_data) { @@ -95,7 +89,17 @@ void scoped_memory::call_realloc(std::size_t size) { size_ = size; source_ = MALLOC_ALLOCATED; } -} +}*/ + +const int kFileFlags = +#if defined(_WIN32) || defined(_WIN64) + 0 // MapOrThrow ignores flags on windows +#elif defined(MAP_FILE) + MAP_FILE | MAP_SHARED +#else + MAP_SHARED +#endif + ; void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset) { #ifdef MAP_POPULATE // Linux specific @@ -126,15 +130,168 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int return ret; } -const int kFileFlags = +void SyncOrThrow(void *start, size_t length) { #if defined(_WIN32) || defined(_WIN64) - 0 // MapOrThrow ignores flags on windows -#elif defined(MAP_FILE) - MAP_FILE | MAP_SHARED + UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap"); #else - MAP_SHARED + UTIL_THROW_IF(length && msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap"); #endif - ; +} + +void UnmapOrThrow(void *start, size_t length) { +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file"); +#else + UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed"); +#endif +} + +// Linux huge pages. +#ifdef __linux__ + +namespace { + +bool AnonymousMap(std::size_t size, int flags, bool populate, util::scoped_memory &to) { + if (populate) flags |= MAP_POPULATE; + void *ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | flags, -1, 0); + if (ret == MAP_FAILED) return false; + to.reset(ret, size, scoped_memory::MMAP_ALLOCATED); + return true; +} + +bool TryHuge(std::size_t size, uint8_t alignment_bits, bool populate, util::scoped_memory &to) { + // Don't bother with these cases. + if (size < (1ULL << alignment_bits) || (1ULL << alignment_bits) < SizePage()) + return false; + + // First try: Linux >= 3.8 with manually configured hugetlb pages available. +#ifdef MAP_HUGE_SHIFT + if (AnonymousMap(size, MAP_HUGETLB | (alignment_bits << MAP_HUGE_SHIFT), populate, to)) + return true; +#endif + + // Second try: manually configured hugetlb pages exist, but kernel too old to + // pick size or not available. This might pick the wrong size huge pages, + // but the sysadmin must have made them available in the first place. + if (AnonymousMap(size, MAP_HUGETLB, populate, to)) + return true; + + // Third try: align to a multiple of the huge page size by overallocating. + // I feel bad about doing this, but it's also how posix_memalign is + // implemented. And the memory is virtual. + + // Round up requested size to multiple of page size. This will allow the pages after to be munmapped. + std::size_t size_up = RoundUpPow2(size, SizePage()); + + std::size_t ask = size_up + (1 << alignment_bits) - SizePage(); + // Don't populate because this is asking for more than we will use. + scoped_mmap larger(mmap(NULL, ask, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), ask); + if (larger.get() == MAP_FAILED) return false; + + // Throw out pages before the alignment point. + uintptr_t base = reinterpret_cast(larger.get()); + // Round up to next multiple of alignment. + uintptr_t rounded_up = RoundUpPow2(base, static_cast(1) << alignment_bits); + if (base != rounded_up) { + // If this throws an exception (which it shouldn't) then we want to unmap the whole thing by keeping it in larger. + UnmapOrThrow(larger.get(), rounded_up - base); + larger.steal(); + larger.reset(reinterpret_cast(rounded_up), ask - (rounded_up - base)); + } + + // Throw out pages after the requested size. + assert(larger.size() >= size_up); + if (larger.size() > size_up) { + // This is where we assume size_up is a multiple of page size. + UnmapOrThrow(static_cast(larger.get()) + size_up, larger.size() - size_up); + larger.reset(larger.steal(), size_up); + } + madvise(larger.get(), size_up, MADV_HUGEPAGE); + to.reset(larger.steal(), size, scoped_memory::MMAP_ROUND_UP_ALLOCATED); + return true; +} + +} // namespace + +#endif + +void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to) { + to.reset(); +#ifdef __linux__ + // TODO: architectures/page sizes other than 2^21 and 2^30. + // Attempt 1 GB pages. + // If the user asked for zeroed memory, assume they want it populated. + if (size >= (1ULL << 30) && TryHuge(size, 30, zeroed, to)) + return; + // Attempt 2 MB pages. + if (size >= (1ULL << 21) && TryHuge(size, 21, zeroed, to)) + return; +#endif // __linux__ + // Non-linux will always do this, as will small allocations on Linux. + to.reset(zeroed ? calloc(1, size) : malloc(size), size, scoped_memory::MALLOC_ALLOCATED); + UTIL_THROW_IF(!to.get(), ErrnoException, "Failed to allocate " << size << " bytes"); +} + +#ifdef __linux__ +const std::size_t kTransitionHuge = std::max(1ULL << 21, SizePage()); +#endif // __linux__ + +void HugeRealloc(std::size_t to, bool zero_new, scoped_memory &mem) { + if (!to) { + mem.reset(); + return; + } + std::size_t from_size = mem.size(); + switch (mem.source()) { + case scoped_memory::NONE_ALLOCATED: + HugeMalloc(to, zero_new, mem); + return; +#ifdef __linux__ + case scoped_memory::MMAP_ROUND_UP_ALLOCATED: + // for mremap's benefit. + from_size = RoundUpPow2(from_size, SizePage()); + case scoped_memory::MMAP_ALLOCATED: + // Downsizing below barrier? + if (to <= SizePage()) { + scoped_malloc replacement(malloc(to)); + memcpy(replacement.get(), mem.get(), std::min(to, mem.size())); + if (zero_new && to > mem.size()) + memset(static_cast(replacement.get()) + mem.size(), 0, to - mem.size()); + mem.reset(replacement.release(), to, scoped_memory::MALLOC_ALLOCATED); + } else { + void *new_addr = mremap(mem.get(), from_size, to, MREMAP_MAYMOVE); + UTIL_THROW_IF(!new_addr, ErrnoException, "Failed to mremap from " << from_size << " to " << to); + mem.steal(); + mem.reset(new_addr, to, scoped_memory::MMAP_ALLOCATED); + } + return; +#endif // __linux__ + case scoped_memory::MALLOC_ALLOCATED: +#ifdef __linux__ + // Transition larger allocations to huge pages, but don't keep trying if we're still malloc allocated. + if (to >= kTransitionHuge && mem.size() < kTransitionHuge) { + scoped_memory replacement; + HugeMalloc(to, zero_new, replacement); + memcpy(replacement.get(), mem.get(), mem.size()); + // This can't throw. + mem.reset(replacement.get(), replacement.size(), replacement.source()); + replacement.steal(); + return; + } +#endif // __linux__ + { + void *new_addr = std::realloc(mem.get(), to); + UTIL_THROW_IF(!new_addr, ErrnoException, "realloc to " << to << " bytes failed."); + if (zero_new && to > mem.size()) + memset(static_cast(new_addr) + mem.size(), 0, to - mem.size()); + mem.steal(); + mem.reset(new_addr, to, scoped_memory::MALLOC_ALLOCATED); + } + return; + default: + UTIL_THROW(Exception, "HugeRealloc called with type " << mem.source()); + } +} void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out) { switch (method) { @@ -151,33 +308,17 @@ void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scope case POPULATE_OR_READ: #endif case READ: - out.reset(MallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED); + HugeMalloc(size, false, out); SeekOrThrow(fd, offset); ReadOrThrow(fd, out.get(), size); break; case PARALLEL_READ: - out.reset(MallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED); + HugeMalloc(size, false, out); ParallelRead(fd, out.get(), size, offset); break; } } -// Allocates zeroed memory in to. -void MapAnonymous(std::size_t size, util::scoped_memory &to) { - to.reset(); -#if defined(_WIN32) || defined(_WIN64) - to.reset(calloc(1, size), size, scoped_memory::MALLOC_ALLOCATED); -#else - to.reset(MapOrThrow(size, true, -# if defined(MAP_ANONYMOUS) - MAP_ANONYMOUS | MAP_PRIVATE // Linux -# else - MAP_ANON | MAP_PRIVATE // BSD -# endif - , false, -1, 0), size, scoped_memory::MMAP_ALLOCATED); -#endif -} - void *MapZeroedWrite(int fd, std::size_t size) { ResizeOrThrow(fd, 0); ResizeOrThrow(fd, size); diff --git a/util/mmap.hh b/util/mmap.hh index 9ac604975..b474dc75b 100644 --- a/util/mmap.hh +++ b/util/mmap.hh @@ -12,7 +12,7 @@ namespace util { class scoped_fd; -long SizePage(); +std::size_t SizePage(); // (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here. class scoped_mmap { @@ -37,6 +37,13 @@ class scoped_mmap { reset((void*)-1, 0); } + void *steal() { + void *ret = data_; + data_ = (void*)-1; + size_ = 0; + return ret; + } + private: void *data_; std::size_t size_; @@ -51,13 +58,21 @@ class scoped_mmap { */ class scoped_memory { public: - typedef enum {MMAP_ALLOCATED, ARRAY_ALLOCATED, MALLOC_ALLOCATED, NONE_ALLOCATED} Alloc; + typedef enum { + MMAP_ROUND_UP_ALLOCATED, // The size was rounded up to a multiple of page size. Do the same before munmap. + MMAP_ALLOCATED, // munmap + MALLOC_ALLOCATED, // free + NONE_ALLOCATED // nothing here! + } Alloc; scoped_memory(void *data, std::size_t size, Alloc source) : data_(data), size_(size), source_(source) {} scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {} + // Calls HugeMalloc + scoped_memory(std::size_t to, bool zero_new); + ~scoped_memory() { reset(); } void *get() const { return data_; } @@ -71,9 +86,13 @@ class scoped_memory { void reset(void *data, std::size_t size, Alloc from); - // realloc allows the current data to escape hence the need for this call - // If realloc fails, destroys the original too and get() returns NULL. - void call_realloc(std::size_t to); + void *steal() { + void *ret = data_; + data_ = NULL; + size_ = 0; + source_ = NONE_ALLOCATED; + return ret; + } private: void *data_; @@ -85,6 +104,30 @@ class scoped_memory { scoped_memory &operator=(const scoped_memory &); }; +extern const int kFileFlags; + +// Cross-platform, error-checking wrapper for mmap(). +void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0); + +// msync wrapper +void SyncOrThrow(void *start, size_t length); + +// Cross-platform, error-checking wrapper for munmap(). +void UnmapOrThrow(void *start, size_t length); + +// Allocate memory, promising that all/vast majority of it will be used. Tries +// hard to use huge pages on Linux. +// If you want zeroed memory, pass zeroed = true. +void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to); + +// Reallocates memory ala realloc but with option to zero the new memory. +// On Linux, the memory can come from anonymous mmap or malloc/calloc. +// On non-Linux, only malloc/calloc is supported. +// +// To summarize, any memory from HugeMalloc or HugeRealloc can be resized with +// this. +void HugeRealloc(std::size_t size, bool new_zeroed, scoped_memory &mem); + typedef enum { // mmap with no prepopulate LAZY, @@ -98,25 +141,12 @@ typedef enum { PARALLEL_READ, } LoadMethod; -extern const int kFileFlags; - -// Cross-platform, error-checking wrapper for mmap(). -void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0); - -// Cross-platform, error-checking wrapper for munmap(). -void UnmapOrThrow(void *start, size_t length); - void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out); -void MapAnonymous(std::size_t size, scoped_memory &to); - // Open file name with mmap of size bytes, all of which are initially zero. void *MapZeroedWrite(int fd, std::size_t size); void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file); -// msync wrapper -void SyncOrThrow(void *start, size_t length); - // Forward rolling memory map with no overlap. class Rolling { public: diff --git a/util/pool.cc b/util/pool.cc index e0e4c61f6..246417c16 100644 --- a/util/pool.cc +++ b/util/pool.cc @@ -4,6 +4,8 @@ #include +#include + namespace util { Pool::Pool() { diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh index 53fbbe996..d17016a4f 100644 --- a/util/probing_hash_table.hh +++ b/util/probing_hash_table.hh @@ -2,7 +2,7 @@ #define UTIL_PROBING_HASH_TABLE_H #include "util/exception.hh" -#include "util/scoped.hh" +#include "util/mmap.hh" #include #include @@ -336,9 +336,11 @@ template (backend_.buckets_ - 1, backend_.buckets_ * 0.9); + if (!KeyIsRawZero(invalid)) { + Clear(); + } } // Assumes that the key is unique. Multiple insertions won't cause a failure, just inconsistent lookup. @@ -379,16 +381,23 @@ template (backend_.buckets_ - 1, backend_.buckets_ * 0.9); + } + + bool KeyIsRawZero(const Key &key) { + for (const uint8_t *i = reinterpret_cast(&key); i < reinterpret_cast(&key) + sizeof(Key); ++i) { + if (*i) return false; + } + return true; } std::size_t allocated_; - util::scoped_malloc mem_; + util::scoped_memory mem_; Backend backend_; std::size_t threshold_; }; diff --git a/util/probing_hash_table_benchmark_main.cc b/util/probing_hash_table_benchmark_main.cc index c5129480f..583d21f5e 100644 --- a/util/probing_hash_table_benchmark_main.cc +++ b/util/probing_hash_table_benchmark_main.cc @@ -1,6 +1,6 @@ #include "util/file.hh" #include "util/probing_hash_table.hh" -#include "util/scoped.hh" +#include "util/mmap.hh" #include "util/usage.hh" #include @@ -46,11 +46,12 @@ struct PrefetchEntry { const Entry *pointer; }; -const std::size_t kPrefetchSize = 4; -template class PrefetchQueue { +template class PrefetchQueue { public: + typedef TableT Table; + explicit PrefetchQueue(Table &table) : table_(table), cur_(0), twiddle_(false) { - for (PrefetchEntry *i = entries_; i != entries_ + kPrefetchSize; ++i) + for (PrefetchEntry *i = entries_; i != entries_ + PrefetchSize; ++i) i->pointer = NULL; } @@ -66,7 +67,7 @@ template class PrefetchQueue { bool Drain() { if (Cur().pointer) { - for (PrefetchEntry *i = &Cur(); i < entries_ + kPrefetchSize; ++i) { + for (PrefetchEntry *i = &Cur(); i < entries_ + PrefetchSize; ++i) { twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer); } } @@ -80,11 +81,11 @@ template class PrefetchQueue { PrefetchEntry &Cur() { return entries_[cur_]; } void Next() { ++cur_; - cur_ = cur_ % kPrefetchSize; + cur_ = cur_ % PrefetchSize; } Table &table_; - PrefetchEntry entries_[kPrefetchSize]; + PrefetchEntry entries_[PrefetchSize]; std::size_t cur_; bool twiddle_; @@ -93,12 +94,23 @@ template class PrefetchQueue { void operator=(const PrefetchQueue&); }; -/*template class Immediate { +template class Immediate { public: + typedef TableT Table; + + explicit Immediate(Table &table) : table_(table), twiddle_(false) {} + + void Add(uint64_t key) { + typename Table::ConstIterator it; + twiddle_ ^= table_.Find(key, it); + } + + bool Drain() const { return twiddle_; } private: Table &table_; -};*/ + bool twiddle_; +}; std::size_t Size(uint64_t entries, float multiplier = 1.5) { typedef util::ProbingHashTable, Power2Mod> Table; @@ -106,39 +118,54 @@ std::size_t Size(uint64_t entries, float multiplier = 1.5) { return Power2Mod::RoundBuckets(Table::Size(entries, multiplier) / sizeof(Entry)) * sizeof(Entry); } -template bool Test(URandom &rn, uint64_t entries, const uint64_t *const queries_begin, const uint64_t *const queries_end, float multiplier = 1.5) { - typedef util::ProbingHashTable, Mod> Table; +template bool Test(URandom &rn, uint64_t entries, const uint64_t *const queries_begin, const uint64_t *const queries_end, bool ordinary_malloc, float multiplier = 1.5) { std::size_t size = Size(entries, multiplier); - scoped_malloc backing(util::CallocOrThrow(size)); - Table table(backing.get(), size); + scoped_memory backing; + if (ordinary_malloc) { + backing.reset(util::CallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED); + } else { + util::HugeMalloc(size, true, backing); + } + typename Queue::Table table(backing.get(), size); - double start = UserTime(); + double start = CPUTime(); for (uint64_t i = 0; i < entries; ++i) { Entry entry; entry.key = rn.Get(); table.Insert(entry); } - double inserted = UserTime() - start; - double before_lookup = UserTime(); - PrefetchQueue queue(table); + double inserted = CPUTime() - start; + double before_lookup = CPUTime(); + Queue queue(table); for (const uint64_t *i = queries_begin; i != queries_end; ++i) { queue.Add(*i); -/* typename Table::ConstIterator it; - meaningless ^= table.Find(*i, it);*/ } bool meaningless = queue.Drain(); - std::cout << entries << ' ' << size << ' ' << (inserted / static_cast(entries)) << ' ' << (UserTime() - before_lookup) / static_cast(queries_end - queries_begin) << '\n'; + std::cout << ' ' << (inserted / static_cast(entries)) << ' ' << (CPUTime() - before_lookup) / static_cast(queries_end - queries_begin) << std::flush; return meaningless; } -template bool TestRun(uint64_t lookups = 20000000, float multiplier = 1.5) { +bool TestRun(uint64_t lookups = 20000000, float multiplier = 1.5) { URandom rn; - util::scoped_malloc queries(util::CallocOrThrow(lookups * sizeof(uint64_t))); + util::scoped_memory queries; + HugeMalloc(lookups * sizeof(uint64_t), true, queries); rn.Batch(static_cast(queries.get()), static_cast(queries.get()) + lookups); uint64_t physical_mem_limit = util::GuessPhysicalMemory() / 2; bool meaningless = true; for (uint64_t i = 4; Size(i / multiplier) < physical_mem_limit; i *= 4) { - meaningless ^= util::Test(rn, i / multiplier, static_cast(queries.get()), static_cast(queries.get()) + lookups, multiplier); + std::cout << static_cast(i / multiplier) << ' ' << Size(i / multiplier); + typedef util::ProbingHashTable, Power2Mod> Table; + typedef util::ProbingHashTable, DivMod> TableDiv; + const uint64_t *const queries_begin = static_cast(queries.get()); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier); + meaningless ^= util::Test >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier); + std::cout << std::endl; } return meaningless; } @@ -148,9 +175,7 @@ template bool TestRun(uint64_t lookups = 20000000, float multiplier int main() { bool meaningless = false; - std::cout << "#Integer division\n"; - meaningless ^= util::TestRun(); - std::cout << "#Masking\n"; - meaningless ^= util::TestRun(); + std::cout << "#CPU time\n"; + meaningless ^= util::TestRun(); std::cerr << "Meaningless: " << meaningless << '\n'; } diff --git a/util/scoped.cc b/util/scoped.cc index 84f4344b7..817aa2424 100644 --- a/util/scoped.cc +++ b/util/scoped.cc @@ -27,7 +27,7 @@ void *MallocOrThrow(std::size_t requested) { } void *CallocOrThrow(std::size_t requested) { - return InspectAddr(std::calloc(1, requested), requested, "calloc"); + return InspectAddr(std::calloc(requested, 1), requested, "calloc"); } void scoped_malloc::call_realloc(std::size_t requested) { diff --git a/util/stream/rewindable_stream.cc b/util/stream/rewindable_stream.cc index 9421b25c3..726e2a72d 100644 --- a/util/stream/rewindable_stream.cc +++ b/util/stream/rewindable_stream.cc @@ -1,6 +1,5 @@ #include "util/stream/rewindable_stream.hh" #include "util/pcqueue.hh" -#include #include diff --git a/util/string_stream.hh b/util/string_stream.hh new file mode 100644 index 000000000..730403d70 --- /dev/null +++ b/util/string_stream.hh @@ -0,0 +1,44 @@ +#ifndef UTIL_STRING_STREAM_H +#define UTIL_STRING_STREAM_H + +#include "util/fake_ostream.hh" + +#include +#include + +namespace util { + +class StringStream : public FakeOStream { + public: + // Semantics: appends to string. Remember to clear first! + explicit StringStream(std::string &out) + : out_(out) {} + + StringStream &flush() { return *this; } + + StringStream &write(const void *data, std::size_t length) { + out_.append(static_cast(data), length); + return *this; + } + + protected: + friend class FakeOStream; + char *Ensure(std::size_t amount) { + std::size_t current = out_.size(); + out_.resize(out_.size() + amount); + return &out_[current]; + } + + void AdvanceTo(char *to) { + assert(to <= &*out_.end()); + assert(to >= &*out_.begin()); + out_.resize(to - &*out_.begin()); + } + + private: + std::string &out_; +}; + +} // namespace + +#endif // UTIL_STRING_STREAM_H diff --git a/util/string_stream_test.cc b/util/string_stream_test.cc new file mode 100644 index 000000000..7d25711c2 --- /dev/null +++ b/util/string_stream_test.cc @@ -0,0 +1,57 @@ +#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE +#define BOOST_TEST_MODULE FakeOStreamTest + +#include "util/string_stream.hh" +#include +#include + +#include +#include + +namespace util { namespace { + +template void TestEqual(const T value) { + std::string str; + StringStream(str) << value; + BOOST_CHECK_EQUAL(boost::lexical_cast(value), str); +} + +template void TestCorners() { + TestEqual(std::numeric_limits::max()); + TestEqual(std::numeric_limits::min()); + TestEqual(static_cast(0)); + TestEqual(static_cast(-1)); + TestEqual(static_cast(1)); +} + +BOOST_AUTO_TEST_CASE(Integer) { + TestCorners(); + TestCorners(); + TestCorners(); + + TestCorners(); + TestCorners(); + TestCorners(); + + TestCorners(); + TestCorners(); + TestCorners(); + + TestCorners(); + TestCorners(); + TestCorners(); + + TestCorners(); + TestCorners(); + TestCorners(); + + TestCorners(); +} + +enum TinyEnum { EnumValue }; + +BOOST_AUTO_TEST_CASE(EnumCase) { + TestEqual(EnumValue); +} + +}} // namespaces diff --git a/util/usage.cc b/util/usage.cc index 5f66b17d2..7965e9645 100644 --- a/util/usage.cc +++ b/util/usage.cc @@ -135,14 +135,26 @@ double WallTime() { return Subtract(GetWall(), kRecordStart.Started()); } -double UserTime() { -#if !defined(_WIN32) && !defined(_WIN64) +double CPUTime() { +#if defined(_WIN32) || defined(_WIN64) + return 0.0; +#else struct rusage usage; if (getrusage(RUSAGE_SELF, &usage)) return 0.0; - return DoubleSec(usage.ru_utime); + return DoubleSec(usage.ru_utime) + DoubleSec(usage.ru_stime); +#endif +} + +uint64_t RSSMax() { +#if defined(_WIN32) || defined(_WIN64) + return 0; +#else + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage)) + return 0; + return static_cast(usage.ru_maxrss) * 1024; #endif - return 0.0; } void PrintUsage(std::ostream &out) { @@ -274,6 +286,7 @@ template uint64_t ParseNum(const std::string &arg) { return static_cast(static_cast(value) * static_cast(mem) / 100.0); } + if (after == "k") after == "K"; std::string units("bKMGTPEZY"); std::string::size_type index = units.find(after[0]); UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%."); diff --git a/util/usage.hh b/util/usage.hh index dff81b59d..2f1b3e992 100644 --- a/util/usage.hh +++ b/util/usage.hh @@ -9,7 +9,11 @@ namespace util { // Time in seconds since process started. Zero on unsupported platforms. double WallTime(); -double UserTime(); +// User + system time. +double CPUTime(); + +// Resident usage in bytes. +uint64_t RSSMax(); void PrintUsage(std::ostream &to);