KenLM a590a3a4dadf516a1cff28c8f1c06aa89766f519 including StringStream

TODO: kill istream
This commit is contained in:
Kenneth Heafield 2015-09-29 16:58:02 +01:00
parent 82527fc8b2
commit ea8e19f286
38 changed files with 850 additions and 403 deletions

View File

@ -169,8 +169,7 @@ void *BinaryFormat::SetupJustVocab(std::size_t memory_size, uint8_t order) {
vocab_size_ = memory_size;
if (!write_mmap_) {
header_size_ = 0;
util::MapAnonymous(memory_size, memory_vocab_);
util::AdviseHugePages(memory_vocab_.get(), memory_size);
util::HugeMalloc(memory_size, true, memory_vocab_);
return reinterpret_cast<uint8_t*>(memory_vocab_.get());
}
header_size_ = TotalHeaderSize(order);
@ -181,16 +180,16 @@ void *BinaryFormat::SetupJustVocab(std::size_t memory_size, uint8_t order) {
switch (write_method_) {
case Config::WRITE_MMAP:
mapping_.reset(util::MapZeroedWrite(file_.get(), total), total, util::scoped_memory::MMAP_ALLOCATED);
util::AdviseHugePages(vocab_base, total);
vocab_base = mapping_.get();
break;
case Config::WRITE_AFTER:
util::ResizeOrThrow(file_.get(), 0);
util::MapAnonymous(total, memory_vocab_);
util::HugeMalloc(total, true, memory_vocab_);
vocab_base = memory_vocab_.get();
break;
}
strncpy(reinterpret_cast<char*>(vocab_base), kMagicIncomplete, header_size_);
util::AdviseHugePages(vocab_base, total);
return reinterpret_cast<uint8_t*>(vocab_base) + header_size_;
}
@ -200,7 +199,7 @@ void *BinaryFormat::GrowForSearch(std::size_t memory_size, std::size_t vocab_pad
std::size_t new_size = header_size_ + vocab_size_ + vocab_pad_ + memory_size;
vocab_string_offset_ = new_size;
if (!write_mmap_ || write_method_ == Config::WRITE_AFTER) {
util::MapAnonymous(memory_size, memory_search_);
util::HugeMalloc(memory_size, true, memory_search_);
assert(header_size_ == 0 || write_mmap_);
vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get()) + header_size_;
util::AdviseHugePages(memory_search_.get(), memory_size);

View File

@ -5,7 +5,7 @@
#include "lm/lm_exception.hh"
#include "lm/vocab.hh"
#include "lm/word_index.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/file_piece.hh"
#include "util/murmur_hash.hh"

View File

@ -4,21 +4,21 @@
#include "lm/builder/payload.hh"
#include "lm/common/print.hh"
#include "lm/common/ngram_stream.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include <boost/lexical_cast.hpp>
namespace lm { namespace builder {
// Not defined, only specialized.
template <class T> void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload);
template <> inline void PrintPayload<uint64_t>(util::FakeOFStream &to, const BuildingPayload &payload) {
template <class T> void PrintPayload(util::FileStream &to, const BuildingPayload &payload);
template <> inline void PrintPayload<uint64_t>(util::FileStream &to, const BuildingPayload &payload) {
to << payload.count;
}
template <> inline void PrintPayload<Uninterpolated>(util::FakeOFStream &to, const BuildingPayload &payload) {
template <> inline void PrintPayload<Uninterpolated>(util::FileStream &to, const BuildingPayload &payload) {
to << log10(payload.uninterp.prob) << ' ' << log10(payload.uninterp.gamma);
}
template <> inline void PrintPayload<ProbBackoff>(util::FakeOFStream &to, const BuildingPayload &payload) {
template <> inline void PrintPayload<ProbBackoff>(util::FileStream &to, const BuildingPayload &payload) {
to << payload.complete.prob << ' ' << payload.complete.backoff;
}
@ -36,7 +36,7 @@ template <class V> class Print {
void Run(const util::stream::ChainPositions &chains) {
util::scoped_fd fd(to_);
util::FakeOFStream out(to_);
util::FileStream out(to_);
NGramStreams<BuildingPayload> streams(chains);
for (NGramStream<BuildingPayload> *s = streams.begin(); s != streams.end(); ++s) {
DumpStream(*s, out);
@ -45,13 +45,13 @@ template <class V> class Print {
void Run(const util::stream::ChainPosition &position) {
util::scoped_fd fd(to_);
util::FakeOFStream out(to_);
util::FileStream out(to_);
NGramStream<BuildingPayload> stream(position);
DumpStream(stream, out);
}
private:
void DumpStream(NGramStream<BuildingPayload> &stream, util::FakeOFStream &to) {
void DumpStream(NGramStream<BuildingPayload> &stream, util::FileStream &to) {
for (; stream; ++stream) {
PrintPayload<V>(to, stream->Value());
for (const WordIndex *w = stream->begin(); w != stream->end(); ++w) {

View File

@ -30,7 +30,7 @@ int main(int argc, char *argv[]) {
UTIL_THROW_IF(*i >= vocab.Size(), util::Exception, "Vocab ID " << *i << " is larger than the vocab file's maximum of " << vocab.Size() << ". Are you sure you have the right order and vocab file for these counts?");
std::cout << vocab.Lookup(*i) << ' ';
}
// TODO don't use std::cout because it is slow. Add fast uint64_t printing support to FakeOFStream.
// TODO don't use std::cout because it is slow. Add fast uint64_t printing support to FileStream.
std::cout << *reinterpret_cast<const uint64_t*>(words + order) << '\n';
}
}

View File

@ -12,7 +12,6 @@
#include <iostream>
#include <cassert>
#include <cmath>
#include <iostream>
namespace lm { namespace builder {
namespace {

View File

@ -2,7 +2,7 @@
#include "lm/common/model_buffer.hh"
#include "lm/common/print.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/stream/multi_stream.hh"
#include <iostream>
@ -41,7 +41,7 @@ void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
if (verbose_header_) {
util::FakeOFStream out(file_.get(), 50);
util::FileStream out(file_.get(), 50);
out << "# Input file: " << info.input_file << '\n';
out << "# Token count: " << info.token_count << '\n';
out << "# Smoothing: Modified Kneser-Ney" << '\n';

View File

@ -1,6 +1,6 @@
#include "lm/common/model_buffer.hh"
#include "util/exception.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/file_piece.hh"
#include "util/stream/io.hh"
@ -68,7 +68,7 @@ void ModelBuffer::Sink(util::stream::Chains &chains, const std::vector<uint64_t>
}
if (keep_buffer_) {
util::scoped_fd metadata(util::CreateOrThrow((file_base_ + ".kenlm_intermediate").c_str()));
util::FakeOFStream meta(metadata.get(), 200);
util::FileStream meta(metadata.get(), 200);
meta << kMetadataHeader << "\nCounts";
for (std::vector<uint64_t>::const_iterator i = counts_.begin(); i != counts_.end(); ++i) {
meta << ' ' << *i;

View File

@ -1,7 +1,7 @@
#include "lm/common/print.hh"
#include "lm/common/ngram_stream.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/mmap.hh"
#include "util/scoped.hh"
@ -24,7 +24,7 @@ VocabReconstitute::VocabReconstitute(int fd) {
}
namespace {
template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStream<Payload> &stream, util::FakeOFStream &out) {
template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStream<Payload> &stream, util::FileStream &out) {
out << stream->Value().prob << '\t' << vocab.Lookup(*stream->begin());
for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
out << ' ' << vocab.Lookup(*i);
@ -34,7 +34,7 @@ template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStr
void PrintARPA::Run(const util::stream::ChainPositions &positions) {
VocabReconstitute vocab(vocab_fd_);
util::FakeOFStream out(out_fd_);
util::FileStream out(out_fd_);
out << "\\data\\\n";
for (size_t i = 0; i < positions.size(); ++i) {
out << "ngram " << (i+1) << '=' << counts_[i] << '\n';

View File

@ -1,5 +1,6 @@
#include "lm/filter/arpa_io.hh"
#include "util/file_piece.hh"
#include "util/string_stream.hh"
#include <iostream>
#include <ostream>
@ -22,14 +23,8 @@ ARPAInputException::ARPAInputException(const StringPiece &message, const StringP
ARPAInputException::~ARPAInputException() throw() {}
ARPAOutputException::ARPAOutputException(const char *message, const std::string &file_name) throw() {
*this << message << " in file " << file_name;
}
ARPAOutputException::~ARPAOutputException() throw() {}
// Seeking is the responsibility of the caller.
void WriteCounts(std::ostream &out, const std::vector<uint64_t> &number) {
template <class Stream> void WriteCounts(Stream &out, const std::vector<uint64_t> &number) {
out << "\n\\data\\\n";
for (unsigned int i = 0; i < number.size(); ++i) {
out << "ngram " << i+1 << "=" << number[i] << '\n';
@ -38,9 +33,10 @@ void WriteCounts(std::ostream &out, const std::vector<uint64_t> &number) {
}
size_t SizeNeededForCounts(const std::vector<uint64_t> &number) {
std::ostringstream buf;
WriteCounts(buf, number);
return buf.tellp();
std::string buf;
util::StringStream stream(buf);
WriteCounts(stream, number);
return buf.size();
}
bool IsEntirelyWhiteSpace(const StringPiece &line) {
@ -50,44 +46,21 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) {
return true;
}
ARPAOutput::ARPAOutput(const char *name, size_t buffer_size) : file_name_(name), buffer_(new char[buffer_size]) {
try {
file_.exceptions(std::ostream::eofbit | std::ostream::failbit | std::ostream::badbit);
if (!file_.rdbuf()->pubsetbuf(buffer_.get(), buffer_size)) {
std::cerr << "Warning: could not enlarge buffer for " << name << std::endl;
buffer_.reset();
}
file_.open(name, std::ios::out | std::ios::binary);
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Opening", file_name_);
}
}
ARPAOutput::ARPAOutput(const char *name, size_t buffer_size)
: file_backing_(util::CreateOrThrow(name)), file_(file_backing_.get(), buffer_size) {}
void ARPAOutput::ReserveForCounts(std::streampos reserve) {
try {
for (std::streampos i = 0; i < reserve; i += std::streampos(1)) {
file_ << '\n';
}
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing blanks to reserve space for counts to ", file_name_);
for (std::streampos i = 0; i < reserve; i += std::streampos(1)) {
file_ << '\n';
}
}
void ARPAOutput::BeginLength(unsigned int length) {
fast_counter_ = 0;
try {
file_ << '\\' << length << "-grams:" << '\n';
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing n-gram header to ", file_name_);
}
file_ << '\\' << length << "-grams:" << '\n';
}
void ARPAOutput::EndLength(unsigned int length) {
try {
file_ << '\n';
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing blank at end of count list to ", file_name_);
}
file_ << '\n';
if (length > counts_.size()) {
counts_.resize(length);
}
@ -95,14 +68,10 @@ void ARPAOutput::EndLength(unsigned int length) {
}
void ARPAOutput::Finish() {
try {
file_ << "\\end\\\n";
file_.seekp(0);
WriteCounts(file_, counts_);
file_ << std::flush;
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Finishing including writing counts at beginning to ", file_name_);
}
file_ << "\\end\\\n";
file_.seekp(0);
WriteCounts(file_, counts_);
file_.flush();
}
} // namespace lm

View File

@ -4,6 +4,7 @@
*/
#include "lm/read_arpa.hh"
#include "util/exception.hh"
#include "util/file_stream.hh"
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
@ -28,17 +29,6 @@ class ARPAInputException : public util::Exception {
virtual ~ARPAInputException() throw();
};
class ARPAOutputException : public util::ErrnoException {
public:
ARPAOutputException(const char *prefix, const std::string &file_name) throw();
virtual ~ARPAOutputException() throw();
const std::string &File() const throw() { return file_name_; }
private:
const std::string file_name_;
};
// Handling for the counts of n-grams at the beginning of ARPA files.
size_t SizeNeededForCounts(const std::vector<uint64_t> &number);
@ -55,11 +45,7 @@ class ARPAOutput : boost::noncopyable {
void BeginLength(unsigned int length);
void AddNGram(const StringPiece &line) {
try {
file_ << line << '\n';
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing an n-gram", file_name_);
}
file_ << line << '\n';
++fast_counter_;
}
@ -76,9 +62,8 @@ class ARPAOutput : boost::noncopyable {
void Finish();
private:
const std::string file_name_;
boost::scoped_array<char> buffer_;
std::fstream file_;
util::scoped_fd file_backing_;
util::FileStream file_;
size_t fast_counter_;
std::vector<uint64_t> counts_;
};

View File

@ -5,7 +5,7 @@
#include <iostream>
#include <string>
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/file_piece.hh"
@ -28,7 +28,7 @@ class CountOutput : boost::noncopyable {
}
private:
util::FakeOFStream file_;
util::FileStream file_;
};
class CountBatch {

View File

@ -1,4 +1,4 @@
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file_piece.hh"
#include "util/murmur_hash.hh"
#include "util/pool.hh"
@ -68,7 +68,7 @@ class TargetWords {
}
void Print() const {
util::FakeOFStream out(1);
util::FileStream out(1);
for (std::vector<boost::unordered_set<const char *> >::const_iterator i = vocab_.begin(); i != vocab_.end(); ++i) {
for (boost::unordered_set<const char *>::const_iterator j = i->begin(); j != i->end(); ++j) {
out << *j << ' ';

View File

@ -1,5 +1,5 @@
#include "lm/model.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/file_piece.hh"
#include "util/usage.hh"
@ -10,7 +10,7 @@ namespace {
template <class Model, class Width> void ConvertToBytes(const Model &model, int fd_in) {
util::FilePiece in(fd_in);
util::FakeOFStream out(1);
util::FileStream out(1);
Width width;
StringPiece word;
const Width end_sentence = (Width)model.GetVocabulary().EndSentence();
@ -30,10 +30,19 @@ template <class Model, class Width> void QueryFromBytes(const Model &model, int
const lm::ngram::State *next_state = begin_state;
Width kEOS = model.GetVocabulary().EndSentence();
Width buf[4096];
float sum = 0.0;
uint64_t completed = 0;
double loaded = util::CPUTime();
std::cout << "CPU_to_load: " << loaded << std::endl;
// Numerical precision: batch sums.
double total = 0.0;
while (std::size_t got = util::ReadOrEOF(fd_in, buf, sizeof(buf))) {
float sum = 0.0;
UTIL_THROW_IF2(got % sizeof(Width), "File size not a multiple of vocab id size " << sizeof(Width));
got /= sizeof(Width);
completed += got;
// Do even stuff first.
const Width *even_end = buf + (got & ~1);
// Alternating states
@ -49,8 +58,13 @@ template <class Model, class Width> void QueryFromBytes(const Model &model, int
sum += model.FullScore(*next_state, *i, state[2]).prob;
next_state = (*i++ == kEOS) ? begin_state : &state[2];
}
total += sum;
}
std::cout << "Sum is " << sum << std::endl;
double after = util::CPUTime();
std::cerr << "Probability sum is " << total << std::endl;
std::cout << "Queries: " << completed << std::endl;
std::cout << "CPU_excluding_load: " << (after - loaded) << "\nCPU_per_query: " << ((after - loaded) / static_cast<double>(completed)) << std::endl;
std::cout << "RSSMax: " << util::RSSMax() << std::endl;
}
template <class Model, class Width> void DispatchFunction(const Model &model, bool query) {
@ -62,7 +76,10 @@ template <class Model, class Width> void DispatchFunction(const Model &model, bo
}
template <class Model> void DispatchWidth(const char *file, bool query) {
Model model(file);
lm::ngram::Config config;
config.load_method = util::READ;
std::cerr << "Using load_method = READ." << std::endl;
Model model(file, config);
lm::WordIndex bound = model.GetVocabulary().Bound();
if (bound <= 256) {
DispatchFunction<Model, uint8_t>(model, query);
@ -116,11 +133,10 @@ int main(int argc, char *argv[]) {
<< argv[0] << " vocab $model <$text >$text.vocab\n"
<< "#Ensure files are in RAM.\n"
<< "cat $text.vocab $model >/dev/null\n"
<< "#Timed query against the model, including loading.\n"
<< "time " << argv[0] << " query $model <$text.vocab\n";
<< "#Timed query against the model.\n"
<< argv[0] << " query $model <$text.vocab\n";
return 1;
}
Dispatch(argv[2], !strcmp(argv[1], "query"));
util::PrintUsage(std::cerr);
return 0;
}

View File

@ -3,7 +3,7 @@
#include "lm/enumerate_vocab.hh"
#include "lm/model.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file_piece.hh"
#include "util/usage.hh"
@ -42,7 +42,7 @@ class QueryPrinter {
}
private:
util::FakeOFStream out_;
util::FileStream out_;
bool print_word_;
bool print_line_;
bool print_summary_;

View File

@ -6,7 +6,7 @@
#include "lm/config.hh"
#include "lm/weights.hh"
#include "util/exception.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/joint_sort.hh"
#include "util/murmur_hash.hh"
@ -182,7 +182,7 @@ void SortedVocabulary::ComputeRenumbering(WordIndex types, int from_words, int t
std::sort(entries.begin(), entries.end());
// Write out new vocab file.
{
util::FakeOFStream out(to_words);
util::FileStream out(to_words);
out << "<unk>" << '\0';
for (std::vector<RenumberEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i) {
out << i->str << '\0';

View File

@ -4,7 +4,7 @@
#include "lm/enumerate_vocab.hh"
#include "lm/lm_exception.hh"
#include "lm/virtual_interface.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/murmur_hash.hh"
#include "util/pool.hh"
#include "util/probing_hash_table.hh"
@ -44,7 +44,7 @@ class ImmediateWriteWordsWrapper : public EnumerateVocab {
private:
EnumerateVocab *inner_;
util::FakeOFStream stream_;
util::FileStream stream_;
};
// When the binary size isn't known yet.
@ -225,7 +225,7 @@ class WriteUniqueWords {
}
private:
util::FakeOFStream word_list_;
util::FileStream word_list_;
};
class NoOpUniqueWords {

View File

@ -7,47 +7,41 @@
#include <cerrno>
#include <cstring>
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
#endif
namespace util {
Exception::Exception() throw() {}
Exception::~Exception() throw() {}
Exception::Exception(const Exception &from) : std::exception() {
stream_ << from.stream_.str();
}
Exception &Exception::operator=(const Exception &from) {
stream_ << from.stream_.str();
return *this;
}
const char *Exception::what() const throw() {
text_ = stream_.str();
return text_.c_str();
}
void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) {
/* The child class might have set some text, but we want this to come first.
* Another option would be passing this information to the constructor, but
* then child classes would have to accept constructor arguments and pass
* them down.
*/
text_ = stream_.str();
stream_.str("");
stream_ << file << ':' << line;
if (func) stream_ << " in " << func << " threw ";
std::string old_text;
std::swap(old_text, what_);
StringStream stream(what_);
stream << file << ':' << line;
if (func) stream << " in " << func << " threw ";
if (child_name) {
stream_ << child_name;
stream << child_name;
} else {
#ifdef __GXX_RTTI
stream_ << typeid(this).name();
stream << typeid(this).name();
#else
stream_ << "an exception";
stream << "an exception";
#endif
}
if (condition) stream_ << " because `" << condition;
stream_ << "'.\n";
stream_ << text_;
if (condition) {
stream << " because `" << condition << '\'';
}
stream << ".\n";
stream << old_text;
}
namespace {
@ -95,4 +89,17 @@ ErrnoException::~ErrnoException() throw() {}
OverflowException::OverflowException() throw() {}
OverflowException::~OverflowException() throw() {}
#if defined(_WIN32) || defined(_WIN64)
WindowsException::WindowsException() throw() {
unsigned int last_error = GetLastError();
char error_msg[256] = "";
if (!FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, last_error, LANG_NEUTRAL, error_msg, sizeof(error_msg), NULL)) {
*this << "Windows error " << GetLastError() << " while formatting Windows error " << last_error << ". ";
} else {
*this << "Windows error " << last_error << ": " << error_msg;
}
}
WindowsException::~WindowsException() throw() {}
#endif
} // namespace util

View File

@ -1,12 +1,16 @@
#ifndef UTIL_EXCEPTION_H
#define UTIL_EXCEPTION_H
#include "util/string_stream.hh"
#include <exception>
#include <limits>
#include <sstream>
#include <string>
#include <stdint.h>
// TODO(hieu) delete this
#include <sstream>
namespace util {
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
@ -16,11 +20,7 @@ class Exception : public std::exception {
Exception() throw();
virtual ~Exception() throw();
Exception(const Exception &from);
Exception &operator=(const Exception &from);
// Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe.
const char *what() const throw();
const char *what() const throw() { return what_.c_str(); }
// For use by the UTIL_THROW macros.
void SetLocation(
@ -38,8 +38,7 @@ class Exception : public std::exception {
typedef T Identity;
};
std::stringstream stream_;
mutable std::string text_;
std::string what_;
};
/* This implements the normal operator<< for Exception and all its children.
@ -47,7 +46,12 @@ class Exception : public std::exception {
* boost::enable_if.
*/
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
e.stream_ << data;
// TODO(hieu): change this to
// StringStream(e.what_) << data;
std::stringstream moses_hack;
moses_hack << data;
e.what_ += moses_hack.str();
return e;
}
@ -149,6 +153,15 @@ inline std::size_t CheckOverflow(uint64_t value) {
return CheckOverflowInternal<sizeof(std::size_t)>(value);
}
#if defined(_WIN32) || defined(_WIN64)
/* Thrown for Windows specific operations. */
class WindowsException : public Exception {
public:
WindowsException() throw();
~WindowsException() throw();
};
#endif
} // namespace util
#endif // UTIL_EXCEPTION_H

View File

@ -1,137 +0,0 @@
/* Like std::ofstream but without being incredibly slow. Backed by a raw fd.
* Supports most of the built-in types except for void* and long double.
*/
#ifndef UTIL_FAKE_OFSTREAM_H
#define UTIL_FAKE_OFSTREAM_H
#include "util/file.hh"
#include "util/float_to_string.hh"
#include "util/integer_to_string.hh"
#include "util/scoped.hh"
#include "util/string_piece.hh"
#include <cassert>
#include <cstring>
#include <stdint.h>
namespace util {
class FakeOFStream {
public:
// Maximum over all ToString operations.
// static const std::size_t kMinBuf = 20;
// This was causing compile failures in debug, so now 20 is written directly.
//
// Does not take ownership of out.
// Allows default constructor, but must call SetFD.
explicit FakeOFStream(int out = -1, std::size_t buffer_size = 1048576)
: buf_(util::MallocOrThrow(std::max(buffer_size, (size_t)20))),
current_(static_cast<char*>(buf_.get())),
end_(current_ + std::max(buffer_size, (size_t)20)),
fd_(out) {}
~FakeOFStream() {
// Could have called Finish already
flush();
}
void SetFD(int to) {
flush();
fd_ = to;
}
FakeOFStream &write(const void *data, std::size_t length) {
if (UTIL_LIKELY(current_ + length <= end_)) {
std::memcpy(current_, data, length);
current_ += length;
return *this;
}
flush();
if (current_ + length <= end_) {
std::memcpy(current_, data, length);
current_ += length;
} else {
util::WriteOrThrow(fd_, data, length);
}
return *this;
}
// This also covers std::string and char*
FakeOFStream &operator<<(StringPiece str) {
return write(str.data(), str.size());
}
// For anything with ToStringBuf<T>::kBytes, define operator<< using ToString.
// This includes uint64_t, int64_t, uint32_t, int32_t, uint16_t, int16_t,
// float, double
private:
template <int Arg> struct EnableIfKludge {
typedef FakeOFStream type;
};
public:
template <class T> typename EnableIfKludge<ToStringBuf<T>::kBytes>::type &operator<<(const T value) {
EnsureRemaining(ToStringBuf<T>::kBytes);
current_ = ToString(value, current_);
assert(current_ <= end_);
return *this;
}
FakeOFStream &operator<<(char c) {
EnsureRemaining(1);
*current_++ = c;
return *this;
}
FakeOFStream &operator<<(unsigned char c) {
EnsureRemaining(1);
*current_++ = static_cast<char>(c);
return *this;
}
/* clang on OS X appears to consider std::size_t aka unsigned long distinct
* from uint64_t. So this function makes clang work. gcc considers
* uint64_t and std::size_t the same (on 64-bit) so this isn't necessary.
* But it does no harm since gcc sees it as a specialization of the
* EnableIfKludge template.
* Also, delegating to *this << static_cast<uint64_t>(value) would loop
* indefinitely on gcc.
*/
FakeOFStream &operator<<(std::size_t value) {
EnsureRemaining(ToStringBuf<uint64_t>::kBytes);
current_ = ToString(static_cast<uint64_t>(value), current_);
return *this;
}
// Note this does not sync.
void flush() {
if (current_ != buf_.get()) {
util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get());
current_ = static_cast<char*>(buf_.get());
}
}
// Not necessary, but does assure the data is cleared.
void Finish() {
flush();
buf_.reset();
current_ = NULL;
util::FSyncOrThrow(fd_);
}
private:
void EnsureRemaining(std::size_t amount) {
if (UTIL_UNLIKELY(current_ + amount > end_)) {
flush();
assert(current_ + amount <= end_);
}
}
util::scoped_malloc buf_;
char *current_, *end_;
int fd_;
};
} // namespace
#endif

128
util/fake_ostream.hh Normal file
View File

@ -0,0 +1,128 @@
#ifndef UTIL_FAKE_OSTREAM_H
#define UTIL_FAKE_OSTREAM_H
#include "util/float_to_string.hh"
#include "util/integer_to_string.hh"
#include "util/string_piece.hh"
#include <cassert>
#include <limits>
#include <stdint.h>
namespace util {
/* Like std::ostream but without being incredibly slow.
* Supports most of the built-in types except for long double.
*
* The FakeOStream class is intended to be inherited from. The inherting class
* should provide:
* public:
* Derived &flush();
* Derived &write(const void *data, std::size_t length);
*
* private: or protected:
* friend class FakeOStream;
* char *Ensure(std::size_t amount);
* void AdvanceTo(char *to);
*
* The Ensure function makes enough space for an in-place write and returns
* where to write. The AdvanceTo function happens after the write, saying how
* much was actually written.
*
* Precondition:
* amount <= kToStringMaxBytes for in-place writes.
*/
template <class Derived> class FakeOStream {
public:
FakeOStream() {}
// This also covers std::string and char*
Derived &operator<<(StringPiece str) {
return C().write(str.data(), str.size());
}
// For anything with ToStringBuf<T>::kBytes, define operator<< using ToString.
// This includes uint64_t, int64_t, uint32_t, int32_t, uint16_t, int16_t,
// float, double
private:
template <int Arg> struct EnableIfKludge {
typedef Derived type;
};
public:
template <class T> typename EnableIfKludge<ToStringBuf<T>::kBytes>::type &operator<<(const T value) {
return CallToString(value);
}
/* clang on OS X appears to consider std::size_t aka unsigned long distinct
* from uint64_t. So this function makes clang work. gcc considers
* uint64_t and std::size_t the same (on 64-bit) so this isn't necessary.
* But it does no harm since gcc sees it as a specialization of the
* EnableIfKludge template.
* Also, delegating to *this << static_cast<uint64_t>(value) would loop
* indefinitely on gcc.
*/
Derived &operator<<(std::size_t value) { return CoerceToString(value); }
// union types will map to int, but don't pass the template magic above in gcc.
Derived &operator<<(int value) { return CoerceToString(value); }
// gcc considers these distinct from uint64_t
Derived &operator<<(unsigned long long value) { return CoerceToString(value); }
Derived &operator<<(signed long long value) { return CoerceToString(value); }
// Character types that get copied as bytes instead of displayed as integers.
Derived &operator<<(char val) { return put(val); }
Derived &operator<<(signed char val) { return put(static_cast<char>(val)); }
Derived &operator<<(unsigned char val) { return put(static_cast<char>(val)); }
// This is here to catch all the other pointer types.
Derived &operator<<(const void *value) { return CallToString(value); }
// This is here because the above line also catches const char*.
Derived &operator<<(const char *value) { return *this << StringPiece(value); }
Derived &operator<<(char *value) { return *this << StringPiece(value); }
Derived &put(char val) {
char *c = C().Ensure(1);
*c = val;
C().AdvanceTo(++c);
return C();
}
char widen(char val) const { return val; }
private:
// References to derived class for convenience.
Derived &C() {
return *static_cast<Derived*>(this);
}
const Derived &C() const {
return *static_cast<const Derived*>(this);
}
template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed> struct Coerce {};
template <class From> struct Coerce<From, 2, false> { typedef uint16_t To; };
template <class From> struct Coerce<From, 4, false> { typedef uint32_t To; };
template <class From> struct Coerce<From, 8, false> { typedef uint64_t To; };
template <class From> struct Coerce<From, 2, true> { typedef int16_t To; };
template <class From> struct Coerce<From, 4, true> { typedef int32_t To; };
template <class From> struct Coerce<From, 8, true> { typedef int64_t To; };
template <class From> Derived &CoerceToString(const From value) {
return CallToString(static_cast<typename Coerce<From>::To>(value));
}
// This is separate to prevent an infinite loop if the compiler considers
// types the same (i.e. gcc std::size_t and uint64_t or uint32_t).
template <class T> Derived &CallToString(const T value) {
C().AdvanceTo(ToString(value, C().Ensure(ToStringBuf<T>::kBytes)));
return C();
}
};
} // namespace
#endif // UTIL_FAKE_OSTREAM_H

View File

@ -147,17 +147,33 @@ std::size_t GuardLarge(std::size_t size) {
}
}
#if defined(_WIN32) || defined(_WIN64)
namespace {
const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
} // namespace
#endif
std::size_t PartialRead(int fd, void *to, std::size_t amount) {
#if defined(_WIN32) || defined(_WIN64)
int ret = _read(fd, to, GuardLarge(amount));
DWORD ret;
HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
DWORD larger_size = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, amount));
DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size.
if (!ReadFile(file_handle, to, larger_size, &ret, NULL))
{
DWORD last_error = GetLastError();
if (last_error != ERROR_NOT_ENOUGH_MEMORY || !ReadFile(file_handle, to, smaller_size, &ret, NULL)) {
UTIL_THROW(WindowsException, "Windows error in ReadFile.");
}
}
#else
errno = 0;
ssize_t ret;
do {
ret = read(fd, to, GuardLarge(amount));
} while (ret == -1 && errno == EINTR);
#endif
UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
#endif
return static_cast<std::size_t>(ret);
}
@ -212,12 +228,6 @@ void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
}
#if defined(_WIN32) || defined(_WIN64)
namespace {
const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
} // namespace
#endif
void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
uint8_t *to = static_cast<uint8_t*>(to_void);
while (size) {
@ -230,7 +240,7 @@ void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
memset(&overlapped, 0, sizeof(OVERLAPPED));
overlapped.Offset = static_cast<DWORD>(off);
overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), Exception, "ReadFile failed for offset " << off);
UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), WindowsException, "ReadFile failed for offset " << off);
#else
ssize_t ret;
errno = 0;

View File

@ -56,7 +56,7 @@ FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buf
InitializeNoRead("istream", min_buffer);
fallback_to_read_ = true;
data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
HugeMalloc(default_map_size_, false, data_);
position_ = data_.begin();
position_end_ = position_;
@ -282,7 +282,7 @@ void FilePiece::TransitionToRead() {
assert(!fallback_to_read_);
fallback_to_read_ = true;
data_.reset();
data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
HugeMalloc(default_map_size_, false, data_);
position_ = data_.begin();
position_end_ = position_;
@ -313,8 +313,7 @@ void FilePiece::ReadShift() {
// Buffer too small.
std::size_t valid_length = position_end_ - position_;
default_map_size_ *= 2;
data_.call_realloc(default_map_size_);
UTIL_THROW_IF(!data_.get(), ErrnoException, "realloc failed for " << default_map_size_);
HugeRealloc(default_map_size_, false, data_);
position_ = data_.begin();
position_end_ = position_ + valid_length;
} else {

View File

@ -1,7 +1,7 @@
// Tests might fail if you have creative characters in your path. Sue me.
#include "util/file_piece.hh"
#include "util/fake_ofstream.hh"
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/scoped.hh"
@ -138,7 +138,7 @@ BOOST_AUTO_TEST_CASE(Numbers) {
scoped_fd file(MakeTemp(FileLocation()));
const float floating = 3.2;
{
util::FakeOFStream writing(file.get());
util::FileStream writing(file.get());
writing << "94389483984398493890287 " << floating << " 5";
}
SeekOrThrow(file.get(), 0);

89
util/file_stream.hh Normal file
View File

@ -0,0 +1,89 @@
/* Like std::ofstream but without being incredibly slow. Backed by a raw fd.
* Supports most of the built-in types except for long double.
*/
#ifndef UTIL_FILE_STREAM_H
#define UTIL_FILE_STREAM_H
#include "util/fake_ostream.hh"
#include "util/file.hh"
#include "util/scoped.hh"
#include <cassert>
#include <cstring>
#include <stdint.h>
namespace util {
class FileStream : public FakeOStream<FileStream> {
public:
FileStream(int out = -1, std::size_t buffer_size = 8192)
: buf_(util::MallocOrThrow(std::max<std::size_t>(buffer_size, kToStringMaxBytes))),
current_(static_cast<char*>(buf_.get())),
end_(current_ + std::max<std::size_t>(buffer_size, kToStringMaxBytes)),
fd_(out) {}
~FileStream() {
flush();
}
void SetFD(int to) {
flush();
fd_ = to;
}
FileStream &flush() {
if (current_ != buf_.get()) {
util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get());
current_ = static_cast<char*>(buf_.get());
}
return *this;
}
// For writes of arbitrary size.
FileStream &write(const void *data, std::size_t length) {
if (UTIL_LIKELY(current_ + length <= end_)) {
std::memcpy(current_, data, length);
current_ += length;
return *this;
}
flush();
if (current_ + length <= end_) {
std::memcpy(current_, data, length);
current_ += length;
} else {
util::WriteOrThrow(fd_, data, length);
}
return *this;
}
FileStream &seekp(uint64_t to) {
util::SeekOrThrow(fd_, to);
return *this;
}
protected:
friend class FakeOStream<FileStream>;
// For writes directly to buffer guaranteed to have amount < buffer size.
char *Ensure(std::size_t amount) {
if (UTIL_UNLIKELY(current_ + amount > end_)) {
flush();
assert(current_ + amount <= end_);
}
return current_;
}
void AdvanceTo(char *to) {
current_ = to;
assert(current_ <= end_);
}
private:
util::scoped_malloc buf_;
char *current_, *end_;
int fd_;
};
} // namespace
#endif

View File

@ -1,3 +1,4 @@
#include <iostream>
/* Fast integer to string conversion.
Source: https://github.com/miloyip/itoa-benchmark
Local modifications:
@ -637,4 +638,28 @@ char *ToString(uint16_t value, char *to) {
return ToString((uint32_t)value, to);
}
// void * to string. This hasn't been optimized at all really.
namespace {
const char kHexDigits[] = "0123456789abcdef";
} // namespace
char *ToString(const void *v, char *to) {
// Apparently it's 0, not 0x0.
if (!v) {
*to++ = '0';
return to;
}
*to++ = '0';
*to++ = 'x';
uintptr_t value = reinterpret_cast<uintptr_t>(v);
uint8_t shift = sizeof(void*) * 8 - 4;
for (; !(value >> shift); shift -= 4) {}
for (; ; shift -= 4) {
*to++ = kHexDigits[(value >> shift) & 0xf];
if (!shift) break;
}
return to;
}
} // namespace util

View File

@ -18,6 +18,8 @@ char *ToString(int64_t value, char *to);
char *ToString(uint16_t value, char *to);
char *ToString(int16_t value, char *to);
char *ToString(const void *value, char *to);
inline char *ToString(bool value, char *to) {
*to++ = '0' + value;
return to;
@ -51,6 +53,14 @@ template <> struct ToStringBuf<int64_t> {
enum { kBytes = 20 };
};
template <> struct ToStringBuf<const void*> {
// Either 18 on 64-bit or 10 on 32-bit.
enum { kBytes = sizeof(const void*) * 2 + 2 };
};
// Maximum over this and float.
enum { kToStringMaxBytes = 20 };
} // namespace util
#endif // UTIL_INTEGER_TO_STRING_H

View File

@ -21,9 +21,9 @@ template <class T> void TestValue(const T value) {
template <class T> void TestCorners() {
TestValue(std::numeric_limits<T>::min());
TestValue(std::numeric_limits<T>::max());
TestValue(static_cast<T>(0));
TestValue(static_cast<T>(-1));
TestValue(static_cast<T>(1));
TestValue((T)0);
TestValue((T)-1);
TestValue((T)1);
}
BOOST_AUTO_TEST_CASE(Corners) {
@ -33,6 +33,7 @@ BOOST_AUTO_TEST_CASE(Corners) {
TestCorners<int16_t>();
TestCorners<int32_t>();
TestCorners<int64_t>();
TestCorners<const void*>();
}
template <class T> void TestAll() {
@ -62,4 +63,14 @@ BOOST_AUTO_TEST_CASE(Tens) {
Test10s<int32_t>();
}
BOOST_AUTO_TEST_CASE(Pointers) {
for (uintptr_t i = 1; i < std::numeric_limits<uintptr_t>::max() / 10; i *= 10) {
TestValue((const void*)i);
}
for (uintptr_t i = 0; i < 256; ++i) {
TestValue((const void*)i);
TestValue((const void*)(i + 0xf00));
}
}
}} // namespaces

View File

@ -27,7 +27,7 @@
namespace util {
long SizePage() {
std::size_t SizePage() {
#if defined(_WIN32) || defined(_WIN64)
SYSTEM_INFO si;
GetSystemInfo(&si);
@ -37,22 +37,6 @@ long SizePage() {
#endif
}
void SyncOrThrow(void *start, size_t length) {
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap");
#else
UTIL_THROW_IF(length && msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap");
#endif
}
void UnmapOrThrow(void *start, size_t length) {
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file");
#else
UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed");
#endif
}
scoped_mmap::~scoped_mmap() {
if (data_ != (void*)-1) {
try {
@ -66,14 +50,24 @@ scoped_mmap::~scoped_mmap() {
}
}
namespace {
template <class T> T RoundUpPow2(T value, T mult) {
return ((value - 1) & ~(mult - 1)) + mult;
}
} // namespace
scoped_memory::scoped_memory(std::size_t size, bool zeroed) : data_(NULL), size_(0), source_(NONE_ALLOCATED) {
HugeMalloc(size, zeroed, *this);
}
void scoped_memory::reset(void *data, std::size_t size, Alloc source) {
switch(source_) {
case MMAP_ROUND_UP_ALLOCATED:
scoped_mmap(data_, RoundUpPow2(size_, (std::size_t)SizePage()));
break;
case MMAP_ALLOCATED:
scoped_mmap(data_, size_);
break;
case ARRAY_ALLOCATED:
delete [] reinterpret_cast<char*>(data_);
break;
case MALLOC_ALLOCATED:
free(data_);
break;
@ -85,7 +79,7 @@ void scoped_memory::reset(void *data, std::size_t size, Alloc source) {
source_ = source;
}
void scoped_memory::call_realloc(std::size_t size) {
/*void scoped_memory::call_realloc(std::size_t size) {
assert(source_ == MALLOC_ALLOCATED || source_ == NONE_ALLOCATED);
void *new_data = realloc(data_, size);
if (!new_data) {
@ -95,7 +89,17 @@ void scoped_memory::call_realloc(std::size_t size) {
size_ = size;
source_ = MALLOC_ALLOCATED;
}
}
}*/
const int kFileFlags =
#if defined(_WIN32) || defined(_WIN64)
0 // MapOrThrow ignores flags on windows
#elif defined(MAP_FILE)
MAP_FILE | MAP_SHARED
#else
MAP_SHARED
#endif
;
void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset) {
#ifdef MAP_POPULATE // Linux specific
@ -126,15 +130,168 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int
return ret;
}
const int kFileFlags =
void SyncOrThrow(void *start, size_t length) {
#if defined(_WIN32) || defined(_WIN64)
0 // MapOrThrow ignores flags on windows
#elif defined(MAP_FILE)
MAP_FILE | MAP_SHARED
UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap");
#else
MAP_SHARED
UTIL_THROW_IF(length && msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap");
#endif
;
}
void UnmapOrThrow(void *start, size_t length) {
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file");
#else
UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed");
#endif
}
// Linux huge pages.
#ifdef __linux__
namespace {
bool AnonymousMap(std::size_t size, int flags, bool populate, util::scoped_memory &to) {
if (populate) flags |= MAP_POPULATE;
void *ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | flags, -1, 0);
if (ret == MAP_FAILED) return false;
to.reset(ret, size, scoped_memory::MMAP_ALLOCATED);
return true;
}
bool TryHuge(std::size_t size, uint8_t alignment_bits, bool populate, util::scoped_memory &to) {
// Don't bother with these cases.
if (size < (1ULL << alignment_bits) || (1ULL << alignment_bits) < SizePage())
return false;
// First try: Linux >= 3.8 with manually configured hugetlb pages available.
#ifdef MAP_HUGE_SHIFT
if (AnonymousMap(size, MAP_HUGETLB | (alignment_bits << MAP_HUGE_SHIFT), populate, to))
return true;
#endif
// Second try: manually configured hugetlb pages exist, but kernel too old to
// pick size or not available. This might pick the wrong size huge pages,
// but the sysadmin must have made them available in the first place.
if (AnonymousMap(size, MAP_HUGETLB, populate, to))
return true;
// Third try: align to a multiple of the huge page size by overallocating.
// I feel bad about doing this, but it's also how posix_memalign is
// implemented. And the memory is virtual.
// Round up requested size to multiple of page size. This will allow the pages after to be munmapped.
std::size_t size_up = RoundUpPow2(size, SizePage());
std::size_t ask = size_up + (1 << alignment_bits) - SizePage();
// Don't populate because this is asking for more than we will use.
scoped_mmap larger(mmap(NULL, ask, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), ask);
if (larger.get() == MAP_FAILED) return false;
// Throw out pages before the alignment point.
uintptr_t base = reinterpret_cast<uintptr_t>(larger.get());
// Round up to next multiple of alignment.
uintptr_t rounded_up = RoundUpPow2(base, static_cast<uintptr_t>(1) << alignment_bits);
if (base != rounded_up) {
// If this throws an exception (which it shouldn't) then we want to unmap the whole thing by keeping it in larger.
UnmapOrThrow(larger.get(), rounded_up - base);
larger.steal();
larger.reset(reinterpret_cast<void*>(rounded_up), ask - (rounded_up - base));
}
// Throw out pages after the requested size.
assert(larger.size() >= size_up);
if (larger.size() > size_up) {
// This is where we assume size_up is a multiple of page size.
UnmapOrThrow(static_cast<uint8_t*>(larger.get()) + size_up, larger.size() - size_up);
larger.reset(larger.steal(), size_up);
}
madvise(larger.get(), size_up, MADV_HUGEPAGE);
to.reset(larger.steal(), size, scoped_memory::MMAP_ROUND_UP_ALLOCATED);
return true;
}
} // namespace
#endif
void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to) {
to.reset();
#ifdef __linux__
// TODO: architectures/page sizes other than 2^21 and 2^30.
// Attempt 1 GB pages.
// If the user asked for zeroed memory, assume they want it populated.
if (size >= (1ULL << 30) && TryHuge(size, 30, zeroed, to))
return;
// Attempt 2 MB pages.
if (size >= (1ULL << 21) && TryHuge(size, 21, zeroed, to))
return;
#endif // __linux__
// Non-linux will always do this, as will small allocations on Linux.
to.reset(zeroed ? calloc(1, size) : malloc(size), size, scoped_memory::MALLOC_ALLOCATED);
UTIL_THROW_IF(!to.get(), ErrnoException, "Failed to allocate " << size << " bytes");
}
#ifdef __linux__
const std::size_t kTransitionHuge = std::max<std::size_t>(1ULL << 21, SizePage());
#endif // __linux__
void HugeRealloc(std::size_t to, bool zero_new, scoped_memory &mem) {
if (!to) {
mem.reset();
return;
}
std::size_t from_size = mem.size();
switch (mem.source()) {
case scoped_memory::NONE_ALLOCATED:
HugeMalloc(to, zero_new, mem);
return;
#ifdef __linux__
case scoped_memory::MMAP_ROUND_UP_ALLOCATED:
// for mremap's benefit.
from_size = RoundUpPow2(from_size, SizePage());
case scoped_memory::MMAP_ALLOCATED:
// Downsizing below barrier?
if (to <= SizePage()) {
scoped_malloc replacement(malloc(to));
memcpy(replacement.get(), mem.get(), std::min(to, mem.size()));
if (zero_new && to > mem.size())
memset(static_cast<uint8_t*>(replacement.get()) + mem.size(), 0, to - mem.size());
mem.reset(replacement.release(), to, scoped_memory::MALLOC_ALLOCATED);
} else {
void *new_addr = mremap(mem.get(), from_size, to, MREMAP_MAYMOVE);
UTIL_THROW_IF(!new_addr, ErrnoException, "Failed to mremap from " << from_size << " to " << to);
mem.steal();
mem.reset(new_addr, to, scoped_memory::MMAP_ALLOCATED);
}
return;
#endif // __linux__
case scoped_memory::MALLOC_ALLOCATED:
#ifdef __linux__
// Transition larger allocations to huge pages, but don't keep trying if we're still malloc allocated.
if (to >= kTransitionHuge && mem.size() < kTransitionHuge) {
scoped_memory replacement;
HugeMalloc(to, zero_new, replacement);
memcpy(replacement.get(), mem.get(), mem.size());
// This can't throw.
mem.reset(replacement.get(), replacement.size(), replacement.source());
replacement.steal();
return;
}
#endif // __linux__
{
void *new_addr = std::realloc(mem.get(), to);
UTIL_THROW_IF(!new_addr, ErrnoException, "realloc to " << to << " bytes failed.");
if (zero_new && to > mem.size())
memset(static_cast<uint8_t*>(new_addr) + mem.size(), 0, to - mem.size());
mem.steal();
mem.reset(new_addr, to, scoped_memory::MALLOC_ALLOCATED);
}
return;
default:
UTIL_THROW(Exception, "HugeRealloc called with type " << mem.source());
}
}
void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out) {
switch (method) {
@ -151,33 +308,17 @@ void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scope
case POPULATE_OR_READ:
#endif
case READ:
out.reset(MallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
HugeMalloc(size, false, out);
SeekOrThrow(fd, offset);
ReadOrThrow(fd, out.get(), size);
break;
case PARALLEL_READ:
out.reset(MallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
HugeMalloc(size, false, out);
ParallelRead(fd, out.get(), size, offset);
break;
}
}
// Allocates zeroed memory in to.
void MapAnonymous(std::size_t size, util::scoped_memory &to) {
to.reset();
#if defined(_WIN32) || defined(_WIN64)
to.reset(calloc(1, size), size, scoped_memory::MALLOC_ALLOCATED);
#else
to.reset(MapOrThrow(size, true,
# if defined(MAP_ANONYMOUS)
MAP_ANONYMOUS | MAP_PRIVATE // Linux
# else
MAP_ANON | MAP_PRIVATE // BSD
# endif
, false, -1, 0), size, scoped_memory::MMAP_ALLOCATED);
#endif
}
void *MapZeroedWrite(int fd, std::size_t size) {
ResizeOrThrow(fd, 0);
ResizeOrThrow(fd, size);

View File

@ -12,7 +12,7 @@ namespace util {
class scoped_fd;
long SizePage();
std::size_t SizePage();
// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
class scoped_mmap {
@ -37,6 +37,13 @@ class scoped_mmap {
reset((void*)-1, 0);
}
void *steal() {
void *ret = data_;
data_ = (void*)-1;
size_ = 0;
return ret;
}
private:
void *data_;
std::size_t size_;
@ -51,13 +58,21 @@ class scoped_mmap {
*/
class scoped_memory {
public:
typedef enum {MMAP_ALLOCATED, ARRAY_ALLOCATED, MALLOC_ALLOCATED, NONE_ALLOCATED} Alloc;
typedef enum {
MMAP_ROUND_UP_ALLOCATED, // The size was rounded up to a multiple of page size. Do the same before munmap.
MMAP_ALLOCATED, // munmap
MALLOC_ALLOCATED, // free
NONE_ALLOCATED // nothing here!
} Alloc;
scoped_memory(void *data, std::size_t size, Alloc source)
: data_(data), size_(size), source_(source) {}
scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
// Calls HugeMalloc
scoped_memory(std::size_t to, bool zero_new);
~scoped_memory() { reset(); }
void *get() const { return data_; }
@ -71,9 +86,13 @@ class scoped_memory {
void reset(void *data, std::size_t size, Alloc from);
// realloc allows the current data to escape hence the need for this call
// If realloc fails, destroys the original too and get() returns NULL.
void call_realloc(std::size_t to);
void *steal() {
void *ret = data_;
data_ = NULL;
size_ = 0;
source_ = NONE_ALLOCATED;
return ret;
}
private:
void *data_;
@ -85,6 +104,30 @@ class scoped_memory {
scoped_memory &operator=(const scoped_memory &);
};
extern const int kFileFlags;
// Cross-platform, error-checking wrapper for mmap().
void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0);
// msync wrapper
void SyncOrThrow(void *start, size_t length);
// Cross-platform, error-checking wrapper for munmap().
void UnmapOrThrow(void *start, size_t length);
// Allocate memory, promising that all/vast majority of it will be used. Tries
// hard to use huge pages on Linux.
// If you want zeroed memory, pass zeroed = true.
void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to);
// Reallocates memory ala realloc but with option to zero the new memory.
// On Linux, the memory can come from anonymous mmap or malloc/calloc.
// On non-Linux, only malloc/calloc is supported.
//
// To summarize, any memory from HugeMalloc or HugeRealloc can be resized with
// this.
void HugeRealloc(std::size_t size, bool new_zeroed, scoped_memory &mem);
typedef enum {
// mmap with no prepopulate
LAZY,
@ -98,25 +141,12 @@ typedef enum {
PARALLEL_READ,
} LoadMethod;
extern const int kFileFlags;
// Cross-platform, error-checking wrapper for mmap().
void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0);
// Cross-platform, error-checking wrapper for munmap().
void UnmapOrThrow(void *start, size_t length);
void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out);
void MapAnonymous(std::size_t size, scoped_memory &to);
// Open file name with mmap of size bytes, all of which are initially zero.
void *MapZeroedWrite(int fd, std::size_t size);
void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file);
// msync wrapper
void SyncOrThrow(void *start, size_t length);
// Forward rolling memory map with no overlap.
class Rolling {
public:

View File

@ -4,6 +4,8 @@
#include <cstdlib>
#include <algorithm>
namespace util {
Pool::Pool() {

View File

@ -2,7 +2,7 @@
#define UTIL_PROBING_HASH_TABLE_H
#include "util/exception.hh"
#include "util/scoped.hh"
#include "util/mmap.hh"
#include <algorithm>
#include <cstddef>
@ -336,9 +336,11 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
typedef EqualT Equal;
AutoProbing(std::size_t initial_size = 5, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal()) :
allocated_(Backend::Size(initial_size, 1.5)), mem_(util::MallocOrThrow(allocated_)), backend_(mem_.get(), allocated_, invalid, hash_func, equal_func) {
threshold_ = initial_size * 1.2;
Clear();
allocated_(Backend::Size(initial_size, 1.2)), mem_(allocated_, KeyIsRawZero(invalid)), backend_(mem_.get(), allocated_, invalid, hash_func, equal_func) {
threshold_ = std::min<std::size_t>(backend_.buckets_ - 1, backend_.buckets_ * 0.9);
if (!KeyIsRawZero(invalid)) {
Clear();
}
}
// Assumes that the key is unique. Multiple insertions won't cause a failure, just inconsistent lookup.
@ -379,16 +381,23 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
private:
void DoubleIfNeeded() {
if (Size() < threshold_)
if (UTIL_LIKELY(Size() < threshold_))
return;
mem_.call_realloc(backend_.DoubleTo());
HugeRealloc(backend_.DoubleTo(), KeyIsRawZero(backend_.invalid_), mem_);
allocated_ = backend_.DoubleTo();
backend_.Double(mem_.get());
threshold_ *= 2;
backend_.Double(mem_.get(), !KeyIsRawZero(backend_.invalid_));
threshold_ = std::min<std::size_t>(backend_.buckets_ - 1, backend_.buckets_ * 0.9);
}
bool KeyIsRawZero(const Key &key) {
for (const uint8_t *i = reinterpret_cast<const uint8_t*>(&key); i < reinterpret_cast<const uint8_t*>(&key) + sizeof(Key); ++i) {
if (*i) return false;
}
return true;
}
std::size_t allocated_;
util::scoped_malloc mem_;
util::scoped_memory mem_;
Backend backend_;
std::size_t threshold_;
};

View File

@ -1,6 +1,6 @@
#include "util/file.hh"
#include "util/probing_hash_table.hh"
#include "util/scoped.hh"
#include "util/mmap.hh"
#include "util/usage.hh"
#include <iostream>
@ -46,11 +46,12 @@ struct PrefetchEntry {
const Entry *pointer;
};
const std::size_t kPrefetchSize = 4;
template <class Table> class PrefetchQueue {
template <class TableT, unsigned PrefetchSize> class PrefetchQueue {
public:
typedef TableT Table;
explicit PrefetchQueue(Table &table) : table_(table), cur_(0), twiddle_(false) {
for (PrefetchEntry *i = entries_; i != entries_ + kPrefetchSize; ++i)
for (PrefetchEntry *i = entries_; i != entries_ + PrefetchSize; ++i)
i->pointer = NULL;
}
@ -66,7 +67,7 @@ template <class Table> class PrefetchQueue {
bool Drain() {
if (Cur().pointer) {
for (PrefetchEntry *i = &Cur(); i < entries_ + kPrefetchSize; ++i) {
for (PrefetchEntry *i = &Cur(); i < entries_ + PrefetchSize; ++i) {
twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer);
}
}
@ -80,11 +81,11 @@ template <class Table> class PrefetchQueue {
PrefetchEntry &Cur() { return entries_[cur_]; }
void Next() {
++cur_;
cur_ = cur_ % kPrefetchSize;
cur_ = cur_ % PrefetchSize;
}
Table &table_;
PrefetchEntry entries_[kPrefetchSize];
PrefetchEntry entries_[PrefetchSize];
std::size_t cur_;
bool twiddle_;
@ -93,12 +94,23 @@ template <class Table> class PrefetchQueue {
void operator=(const PrefetchQueue&);
};
/*template <class Table> class Immediate {
template <class TableT> class Immediate {
public:
typedef TableT Table;
explicit Immediate(Table &table) : table_(table), twiddle_(false) {}
void Add(uint64_t key) {
typename Table::ConstIterator it;
twiddle_ ^= table_.Find(key, it);
}
bool Drain() const { return twiddle_; }
private:
Table &table_;
};*/
bool twiddle_;
};
std::size_t Size(uint64_t entries, float multiplier = 1.5) {
typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, Power2Mod> Table;
@ -106,39 +118,54 @@ std::size_t Size(uint64_t entries, float multiplier = 1.5) {
return Power2Mod::RoundBuckets(Table::Size(entries, multiplier) / sizeof(Entry)) * sizeof(Entry);
}
template <class Mod> bool Test(URandom &rn, uint64_t entries, const uint64_t *const queries_begin, const uint64_t *const queries_end, float multiplier = 1.5) {
typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, Mod> Table;
template <class Queue> bool Test(URandom &rn, uint64_t entries, const uint64_t *const queries_begin, const uint64_t *const queries_end, bool ordinary_malloc, float multiplier = 1.5) {
std::size_t size = Size(entries, multiplier);
scoped_malloc backing(util::CallocOrThrow(size));
Table table(backing.get(), size);
scoped_memory backing;
if (ordinary_malloc) {
backing.reset(util::CallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
} else {
util::HugeMalloc(size, true, backing);
}
typename Queue::Table table(backing.get(), size);
double start = UserTime();
double start = CPUTime();
for (uint64_t i = 0; i < entries; ++i) {
Entry entry;
entry.key = rn.Get();
table.Insert(entry);
}
double inserted = UserTime() - start;
double before_lookup = UserTime();
PrefetchQueue<Table> queue(table);
double inserted = CPUTime() - start;
double before_lookup = CPUTime();
Queue queue(table);
for (const uint64_t *i = queries_begin; i != queries_end; ++i) {
queue.Add(*i);
/* typename Table::ConstIterator it;
meaningless ^= table.Find(*i, it);*/
}
bool meaningless = queue.Drain();
std::cout << entries << ' ' << size << ' ' << (inserted / static_cast<double>(entries)) << ' ' << (UserTime() - before_lookup) / static_cast<double>(queries_end - queries_begin) << '\n';
std::cout << ' ' << (inserted / static_cast<double>(entries)) << ' ' << (CPUTime() - before_lookup) / static_cast<double>(queries_end - queries_begin) << std::flush;
return meaningless;
}
template <class Mod> bool TestRun(uint64_t lookups = 20000000, float multiplier = 1.5) {
bool TestRun(uint64_t lookups = 20000000, float multiplier = 1.5) {
URandom rn;
util::scoped_malloc queries(util::CallocOrThrow(lookups * sizeof(uint64_t)));
util::scoped_memory queries;
HugeMalloc(lookups * sizeof(uint64_t), true, queries);
rn.Batch(static_cast<uint64_t*>(queries.get()), static_cast<uint64_t*>(queries.get()) + lookups);
uint64_t physical_mem_limit = util::GuessPhysicalMemory() / 2;
bool meaningless = true;
for (uint64_t i = 4; Size(i / multiplier) < physical_mem_limit; i *= 4) {
meaningless ^= util::Test<Mod>(rn, i / multiplier, static_cast<const uint64_t*>(queries.get()), static_cast<const uint64_t*>(queries.get()) + lookups, multiplier);
std::cout << static_cast<std::size_t>(i / multiplier) << ' ' << Size(i / multiplier);
typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, Power2Mod> Table;
typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, DivMod> TableDiv;
const uint64_t *const queries_begin = static_cast<const uint64_t*>(queries.get());
meaningless ^= util::Test<Immediate<TableDiv> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
meaningless ^= util::Test<Immediate<Table> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 4> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
meaningless ^= util::Test<Immediate<Table> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 2> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 4> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 8> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
meaningless ^= util::Test<PrefetchQueue<Table, 16> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
std::cout << std::endl;
}
return meaningless;
}
@ -148,9 +175,7 @@ template <class Mod> bool TestRun(uint64_t lookups = 20000000, float multiplier
int main() {
bool meaningless = false;
std::cout << "#Integer division\n";
meaningless ^= util::TestRun<util::DivMod>();
std::cout << "#Masking\n";
meaningless ^= util::TestRun<util::Power2Mod>();
std::cout << "#CPU time\n";
meaningless ^= util::TestRun();
std::cerr << "Meaningless: " << meaningless << '\n';
}

View File

@ -27,7 +27,7 @@ void *MallocOrThrow(std::size_t requested) {
}
void *CallocOrThrow(std::size_t requested) {
return InspectAddr(std::calloc(1, requested), requested, "calloc");
return InspectAddr(std::calloc(requested, 1), requested, "calloc");
}
void scoped_malloc::call_realloc(std::size_t requested) {

View File

@ -1,6 +1,5 @@
#include "util/stream/rewindable_stream.hh"
#include "util/pcqueue.hh"
#include <iostream>
#include <iostream>

44
util/string_stream.hh Normal file
View File

@ -0,0 +1,44 @@
#ifndef UTIL_STRING_STREAM_H
#define UTIL_STRING_STREAM_H
#include "util/fake_ostream.hh"
#include <cassert>
#include <string>
namespace util {
class StringStream : public FakeOStream<StringStream> {
public:
// Semantics: appends to string. Remember to clear first!
explicit StringStream(std::string &out)
: out_(out) {}
StringStream &flush() { return *this; }
StringStream &write(const void *data, std::size_t length) {
out_.append(static_cast<const char*>(data), length);
return *this;
}
protected:
friend class FakeOStream<StringStream>;
char *Ensure(std::size_t amount) {
std::size_t current = out_.size();
out_.resize(out_.size() + amount);
return &out_[current];
}
void AdvanceTo(char *to) {
assert(to <= &*out_.end());
assert(to >= &*out_.begin());
out_.resize(to - &*out_.begin());
}
private:
std::string &out_;
};
} // namespace
#endif // UTIL_STRING_STREAM_H

View File

@ -0,0 +1,57 @@
#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
#define BOOST_TEST_MODULE FakeOStreamTest
#include "util/string_stream.hh"
#include <boost/test/unit_test.hpp>
#include <boost/lexical_cast.hpp>
#include <cstddef>
#include <limits>
namespace util { namespace {
template <class T> void TestEqual(const T value) {
std::string str;
StringStream(str) << value;
BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), str);
}
template <class T> void TestCorners() {
TestEqual(std::numeric_limits<T>::max());
TestEqual(std::numeric_limits<T>::min());
TestEqual(static_cast<T>(0));
TestEqual(static_cast<T>(-1));
TestEqual(static_cast<T>(1));
}
BOOST_AUTO_TEST_CASE(Integer) {
TestCorners<char>();
TestCorners<signed char>();
TestCorners<unsigned char>();
TestCorners<short>();
TestCorners<signed short>();
TestCorners<unsigned short>();
TestCorners<int>();
TestCorners<unsigned int>();
TestCorners<signed int>();
TestCorners<long>();
TestCorners<unsigned long>();
TestCorners<signed long>();
TestCorners<long long>();
TestCorners<unsigned long long>();
TestCorners<signed long long>();
TestCorners<std::size_t>();
}
enum TinyEnum { EnumValue };
BOOST_AUTO_TEST_CASE(EnumCase) {
TestEqual(EnumValue);
}
}} // namespaces

View File

@ -135,14 +135,26 @@ double WallTime() {
return Subtract(GetWall(), kRecordStart.Started());
}
double UserTime() {
#if !defined(_WIN32) && !defined(_WIN64)
double CPUTime() {
#if defined(_WIN32) || defined(_WIN64)
return 0.0;
#else
struct rusage usage;
if (getrusage(RUSAGE_SELF, &usage))
return 0.0;
return DoubleSec(usage.ru_utime);
return DoubleSec(usage.ru_utime) + DoubleSec(usage.ru_stime);
#endif
}
uint64_t RSSMax() {
#if defined(_WIN32) || defined(_WIN64)
return 0;
#else
struct rusage usage;
if (getrusage(RUSAGE_SELF, &usage))
return 0;
return static_cast<uint64_t>(usage.ru_maxrss) * 1024;
#endif
return 0.0;
}
void PrintUsage(std::ostream &out) {
@ -274,6 +286,7 @@ template <class Num> uint64_t ParseNum(const std::string &arg) {
return static_cast<uint64_t>(static_cast<double>(value) * static_cast<double>(mem) / 100.0);
}
if (after == "k") after == "K";
std::string units("bKMGTPEZY");
std::string::size_type index = units.find(after[0]);
UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%.");

View File

@ -9,7 +9,11 @@ namespace util {
// Time in seconds since process started. Zero on unsupported platforms.
double WallTime();
double UserTime();
// User + system time.
double CPUTime();
// Resident usage in bytes.
uint64_t RSSMax();
void PrintUsage(std::ostream &to);