mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
KenLM 014bced6
This commit is contained in:
parent
8af1d6a017
commit
a7731ce432
@ -1,6 +1,7 @@
|
||||
#include "lm/read_arpa.hh"
|
||||
|
||||
#include "lm/blank.hh"
|
||||
#include "util/file.hh"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
@ -65,13 +65,13 @@ class PartialViewProxy {
|
||||
|
||||
typedef util::ProxyIterator<PartialViewProxy> PartialIter;
|
||||
|
||||
FILE *DiskFlush(const void *mem_begin, const void *mem_end, const util::TempMaker &maker) {
|
||||
util::scoped_fd file(maker.Make());
|
||||
FILE *DiskFlush(const void *mem_begin, const void *mem_end, const std::string &temp_prefix) {
|
||||
util::scoped_fd file(util::MakeTemp(temp_prefix));
|
||||
util::WriteOrThrow(file.get(), mem_begin, (uint8_t*)mem_end - (uint8_t*)mem_begin);
|
||||
return util::FDOpenOrThrow(file);
|
||||
}
|
||||
|
||||
FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const util::TempMaker &maker, std::size_t entry_size, unsigned char order) {
|
||||
FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const std::string &temp_prefix, std::size_t entry_size, unsigned char order) {
|
||||
const size_t context_size = sizeof(WordIndex) * (order - 1);
|
||||
// Sort just the contexts using the same memory.
|
||||
PartialIter context_begin(PartialViewProxy(begin + sizeof(WordIndex), entry_size, context_size));
|
||||
@ -84,7 +84,7 @@ FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const util::TempMaker &make
|
||||
#endif
|
||||
(context_begin, context_end, util::SizedCompare<EntryCompare, PartialViewProxy>(EntryCompare(order - 1)));
|
||||
|
||||
util::scoped_FILE out(maker.MakeFile());
|
||||
util::scoped_FILE out(util::FMakeTemp(temp_prefix));
|
||||
|
||||
// Write out to file and uniqueify at the same time. Could have used unique_copy if there was an appropriate OutputIterator.
|
||||
if (context_begin == context_end) return out.release();
|
||||
@ -114,12 +114,12 @@ struct FirstCombine {
|
||||
}
|
||||
};
|
||||
|
||||
template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_file, const util::TempMaker &maker, std::size_t weights_size, unsigned char order, const Combine &combine) {
|
||||
template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_file, const std::string &temp_prefix, std::size_t weights_size, unsigned char order, const Combine &combine) {
|
||||
std::size_t entry_size = sizeof(WordIndex) * order + weights_size;
|
||||
RecordReader first, second;
|
||||
first.Init(first_file, entry_size);
|
||||
second.Init(second_file, entry_size);
|
||||
util::scoped_FILE out_file(maker.MakeFile());
|
||||
util::scoped_FILE out_file(util::FMakeTemp(temp_prefix));
|
||||
EntryCompare less(order);
|
||||
while (first && second) {
|
||||
if (less(first.Data(), second.Data())) {
|
||||
@ -177,9 +177,8 @@ void RecordReader::Rewind() {
|
||||
}
|
||||
|
||||
SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<uint64_t> &counts, size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab) {
|
||||
util::TempMaker maker(file_prefix);
|
||||
PositiveProbWarn warn(config.positive_log_probability);
|
||||
unigram_.reset(maker.Make());
|
||||
unigram_.reset(util::MakeTemp(file_prefix));
|
||||
{
|
||||
// In case <unk> appears.
|
||||
size_t size_out = (counts[0] + 1) * sizeof(ProbBackoff);
|
||||
@ -202,7 +201,7 @@ SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<u
|
||||
if (!mem.get()) UTIL_THROW(util::ErrnoException, "malloc failed for sort buffer size " << buffer);
|
||||
|
||||
for (unsigned char order = 2; order <= counts.size(); ++order) {
|
||||
ConvertToSorted(f, vocab, counts, maker, order, warn, mem.get(), buffer);
|
||||
ConvertToSorted(f, vocab, counts, file_prefix, order, warn, mem.get(), buffer);
|
||||
}
|
||||
ReadEnd(f);
|
||||
}
|
||||
@ -227,7 +226,7 @@ class Closer {
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const util::TempMaker &maker, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size) {
|
||||
void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &file_prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size) {
|
||||
ReadNGramHeader(f, order);
|
||||
const size_t count = counts[order - 1];
|
||||
// Size of weights. Does it include backoff?
|
||||
@ -261,8 +260,8 @@ void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vo
|
||||
std::sort
|
||||
#endif
|
||||
(NGramIter(proxy_begin), NGramIter(proxy_end), util::SizedCompare<EntryCompare>(EntryCompare(order)));
|
||||
files.push_back(DiskFlush(begin, out_end, maker));
|
||||
contexts.push_back(WriteContextFile(begin, out_end, maker, entry_size, order));
|
||||
files.push_back(DiskFlush(begin, out_end, file_prefix));
|
||||
contexts.push_back(WriteContextFile(begin, out_end, file_prefix, entry_size, order));
|
||||
|
||||
done += (out_end - begin) / entry_size;
|
||||
}
|
||||
@ -270,10 +269,10 @@ void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vo
|
||||
// All individual files created. Merge them.
|
||||
|
||||
while (files.size() > 1) {
|
||||
files.push_back(MergeSortedFiles(files[0], files[1], maker, weights_size, order, ThrowCombine()));
|
||||
files.push_back(MergeSortedFiles(files[0], files[1], file_prefix, weights_size, order, ThrowCombine()));
|
||||
files_closer.PopFront();
|
||||
files_closer.PopFront();
|
||||
contexts.push_back(MergeSortedFiles(contexts[0], contexts[1], maker, 0, order - 1, FirstCombine()));
|
||||
contexts.push_back(MergeSortedFiles(contexts[0], contexts[1], file_prefix, 0, order - 1, FirstCombine()));
|
||||
contexts_closer.PopFront();
|
||||
contexts_closer.PopFront();
|
||||
}
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
namespace util {
|
||||
class FilePiece;
|
||||
class TempMaker;
|
||||
} // namespace util
|
||||
|
||||
namespace lm {
|
||||
@ -101,7 +100,7 @@ class SortedFiles {
|
||||
}
|
||||
|
||||
private:
|
||||
void ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const util::TempMaker &maker, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size);
|
||||
void ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size);
|
||||
|
||||
util::scoped_fd unigram_;
|
||||
|
||||
|
@ -79,11 +79,6 @@ ErrnoException::ErrnoException() throw() : errno_(errno) {
|
||||
|
||||
ErrnoException::~ErrnoException() throw() {}
|
||||
|
||||
EndOfFileException::EndOfFileException() throw() {
|
||||
*this << "End of file";
|
||||
}
|
||||
EndOfFileException::~EndOfFileException() throw() {}
|
||||
|
||||
OverflowException::OverflowException() throw() {}
|
||||
OverflowException::~OverflowException() throw() {}
|
||||
|
||||
|
@ -44,7 +44,7 @@ class Exception : public std::exception {
|
||||
};
|
||||
|
||||
/* This implements the normal operator<< for Exception and all its children.
|
||||
* SNIFAE means it only applies to Exception. Think of this as an ersatz
|
||||
* SFINAE means it only applies to Exception. Think of this as an ersatz
|
||||
* boost::enable_if.
|
||||
*/
|
||||
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
|
||||
@ -62,30 +62,26 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define UTIL_SET_LOCATION(UTIL_e, child, condition) do { \
|
||||
(UTIL_e).SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, (child), (condition)); \
|
||||
} while (0)
|
||||
|
||||
/* Create an instance of Exception, add the message Modify, and throw it.
|
||||
* Modify is appended to the what() message and can contain << for ostream
|
||||
* operations.
|
||||
*
|
||||
* do .. while kludge to swallow trailing ; character
|
||||
* http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
|
||||
* Arg can be a constructor argument to the exception.
|
||||
*/
|
||||
#define UTIL_THROW(Exception, Modify) do { \
|
||||
Exception UTIL_e; \
|
||||
UTIL_SET_LOCATION(UTIL_e, #Exception, NULL); \
|
||||
#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
|
||||
Exception UTIL_e Arg; \
|
||||
UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
|
||||
UTIL_e << Modify; \
|
||||
throw UTIL_e; \
|
||||
} while (0)
|
||||
|
||||
#define UTIL_THROW_VAR(Var, Modify) do { \
|
||||
Exception &UTIL_e = (Var); \
|
||||
UTIL_SET_LOCATION(UTIL_e, NULL, NULL); \
|
||||
UTIL_e << Modify; \
|
||||
throw UTIL_e; \
|
||||
} while (0)
|
||||
#define UTIL_THROW_ARG(Exception, Arg, Modify) \
|
||||
UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
|
||||
|
||||
#define UTIL_THROW(Exception, Modify) \
|
||||
UTIL_THROW_BACKEND(NULL, Exception, , Modify);
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
|
||||
@ -93,15 +89,16 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
|
||||
#define UTIL_UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#define UTIL_THROW_IF(Condition, Exception, Modify) do { \
|
||||
#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
|
||||
if (UTIL_UNLIKELY(Condition)) { \
|
||||
Exception UTIL_e; \
|
||||
UTIL_SET_LOCATION(UTIL_e, #Exception, #Condition); \
|
||||
UTIL_e << Modify; \
|
||||
throw UTIL_e; \
|
||||
UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define UTIL_THROW_IF(Condition, Exception, Modify) \
|
||||
UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
|
||||
|
||||
// Exception that records errno and adds it to the message.
|
||||
class ErrnoException : public Exception {
|
||||
public:
|
||||
ErrnoException() throw();
|
||||
@ -114,12 +111,7 @@ class ErrnoException : public Exception {
|
||||
int errno_;
|
||||
};
|
||||
|
||||
class EndOfFileException : public Exception {
|
||||
public:
|
||||
EndOfFileException() throw();
|
||||
~EndOfFileException() throw();
|
||||
};
|
||||
|
||||
// Utilities for overflow checking.
|
||||
class OverflowException : public Exception {
|
||||
public:
|
||||
OverflowException() throw();
|
||||
|
151
util/file.cc
151
util/file.cc
@ -7,9 +7,11 @@
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
@ -40,6 +42,18 @@ scoped_FILE::~scoped_FILE() {
|
||||
}
|
||||
}
|
||||
|
||||
// Note that ErrnoException records errno before NameFromFD is called.
|
||||
FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
|
||||
*this << "in " << name_guess_ << ' ';
|
||||
}
|
||||
|
||||
FDException::~FDException() throw() {}
|
||||
|
||||
EndOfFileException::EndOfFileException() throw() {
|
||||
*this << "End of file";
|
||||
}
|
||||
EndOfFileException::~EndOfFileException() throw() {}
|
||||
|
||||
int OpenReadOrThrow(const char *name) {
|
||||
int ret;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@ -78,8 +92,14 @@ uint64_t SizeFile(int fd) {
|
||||
#endif
|
||||
}
|
||||
|
||||
uint64_t SizeOrThrow(int fd) {
|
||||
uint64_t ret = SizeFile(fd);
|
||||
UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ResizeOrThrow(int fd, uint64_t to) {
|
||||
UTIL_THROW_IF(
|
||||
UTIL_THROW_IF_ARG(
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
_chsize_s
|
||||
#elif defined(OS_ANDROID)
|
||||
@ -87,7 +107,7 @@ void ResizeOrThrow(int fd, uint64_t to) {
|
||||
#else
|
||||
ftruncate
|
||||
#endif
|
||||
(fd, to), ErrnoException, "Resizing to " << to << " bytes failed");
|
||||
(fd, to), FDException, (fd), "while resizing to " << to << " bytes");
|
||||
}
|
||||
|
||||
std::size_t PartialRead(int fd, void *to, std::size_t amount) {
|
||||
@ -95,9 +115,13 @@ std::size_t PartialRead(int fd, void *to, std::size_t amount) {
|
||||
amount = min(static_cast<std::size_t>(INT_MAX), amount);
|
||||
int ret = _read(fd, to, amount);
|
||||
#else
|
||||
ssize_t ret = read(fd, to, amount);
|
||||
errno = 0;
|
||||
ssize_t ret;
|
||||
do {
|
||||
ret = read(fd, to, amount);
|
||||
} while (ret == -1 && errno == EINTR);
|
||||
#endif
|
||||
UTIL_THROW_IF(ret < 0, ErrnoException, "Reading " << amount << " from fd " << fd << " failed.");
|
||||
UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
|
||||
return static_cast<std::size_t>(ret);
|
||||
}
|
||||
|
||||
@ -105,7 +129,7 @@ void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
|
||||
uint8_t *to = static_cast<uint8_t*>(to_void);
|
||||
while (amount) {
|
||||
std::size_t ret = PartialRead(fd, to, amount);
|
||||
UTIL_THROW_IF(ret == 0, EndOfFileException, " in fd " << fd << " but there should be " << amount << " more bytes to read.");
|
||||
UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
|
||||
amount -= ret;
|
||||
to += ret;
|
||||
}
|
||||
@ -123,29 +147,59 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
|
||||
return amount;
|
||||
}
|
||||
|
||||
void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
|
||||
uint8_t *to = static_cast<uint8_t*>(to_void);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
UTIL_THROW(Exception, "TODO: PReadOrThrow for windows using ReadFile http://stackoverflow.com/questions/766477/are-there-equivalents-to-pread-on-different-platforms");
|
||||
#else
|
||||
for (;size ;) {
|
||||
ssize_t ret;
|
||||
errno = 0;
|
||||
do {
|
||||
#ifdef OS_ANDROID
|
||||
ret = pread64(fd, to, size, off);
|
||||
#else
|
||||
ret = pread(fd, to, size, off);
|
||||
#endif
|
||||
} while (ret == -1 && errno == EINTR);
|
||||
if (ret <= 0) {
|
||||
UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
|
||||
UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
|
||||
}
|
||||
size -= ret;
|
||||
off += ret;
|
||||
to += ret;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
|
||||
const uint8_t *data = static_cast<const uint8_t*>(data_void);
|
||||
while (size) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
int ret = write(fd, data, min(static_cast<std::size_t>(INT_MAX), size));
|
||||
#else
|
||||
ssize_t ret = write(fd, data, size);
|
||||
errno = 0;
|
||||
ssize_t ret;
|
||||
do {
|
||||
ret = write(fd, data, size);
|
||||
} while (ret == -1 && errno == EINTR);
|
||||
#endif
|
||||
if (ret < 1) UTIL_THROW(util::ErrnoException, "Write failed");
|
||||
UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
|
||||
data += ret;
|
||||
size -= ret;
|
||||
}
|
||||
}
|
||||
|
||||
void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
|
||||
assert(size);
|
||||
UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), util::ErrnoException, "Short write; requested size " << size);
|
||||
if (!size) return;
|
||||
UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
|
||||
}
|
||||
|
||||
void FSyncOrThrow(int fd) {
|
||||
// Apparently windows doesn't have fsync?
|
||||
#if !defined(_WIN32) && !defined(_WIN64)
|
||||
UTIL_THROW_IF(-1 == fsync(fd), ErrnoException, "Sync of " << fd << " failed.");
|
||||
UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "Syncing");
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -164,7 +218,7 @@ typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
|
||||
|
||||
// Can't we all just get along?
|
||||
void InternalSeek(int fd, int64_t off, int whence) {
|
||||
UTIL_THROW_IF(
|
||||
UTIL_THROW_IF_ARG(
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
(__int64)-1 == _lseeki64(fd, off, whence),
|
||||
#elif defined(OS_ANDROID)
|
||||
@ -172,7 +226,7 @@ void InternalSeek(int fd, int64_t off, int whence) {
|
||||
#else
|
||||
(off_t)-1 == lseek(fd, off, whence),
|
||||
#endif
|
||||
ErrnoException, "Seek failed");
|
||||
FDException, (fd), "while seeking to " << off << " whence " << whence);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@ -190,22 +244,18 @@ void SeekEnd(int fd) {
|
||||
|
||||
std::FILE *FDOpenOrThrow(scoped_fd &file) {
|
||||
std::FILE *ret = fdopen(file.get(), "r+b");
|
||||
if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen descriptor " << file.get());
|
||||
UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
|
||||
file.release();
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
|
||||
std::FILE *ret = fdopen(file.get(), "rb");
|
||||
if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen descriptor " << file.get());
|
||||
UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
|
||||
file.release();
|
||||
return ret;
|
||||
}
|
||||
|
||||
TempMaker::TempMaker(const std::string &prefix) : base_(prefix) {
|
||||
base_ += "XXXXXX";
|
||||
}
|
||||
|
||||
// Sigh. Windows temporary file creation is full of race conditions.
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
/* mkstemp extracted from libc/sysdeps/posix/tempname.c. Copyright
|
||||
@ -322,23 +372,76 @@ int
|
||||
mkstemp_and_unlink(char *tmpl) {
|
||||
int ret = mkstemp(tmpl);
|
||||
if (ret != -1) {
|
||||
UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl);
|
||||
UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int TempMaker::Make() const {
|
||||
std::string name(base_);
|
||||
int MakeTemp(const std::string &base) {
|
||||
std::string name(base);
|
||||
name += "XXXXXX";
|
||||
name.push_back(0);
|
||||
int ret;
|
||||
UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), util::ErrnoException, "Failed to make a temporary based on " << base_);
|
||||
UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::FILE *TempMaker::MakeFile() const {
|
||||
util::scoped_fd file(Make());
|
||||
std::FILE *FMakeTemp(const std::string &base) {
|
||||
util::scoped_fd file(MakeTemp(base));
|
||||
return FDOpenOrThrow(file);
|
||||
}
|
||||
|
||||
int DupOrThrow(int fd) {
|
||||
int ret = dup(fd);
|
||||
UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
|
||||
return ret;
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Try to name things but be willing to fail too.
|
||||
bool TryName(int fd, std::string &out) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
return false;
|
||||
#else
|
||||
std::string name("/proc/self/fd/");
|
||||
std::ostringstream convert;
|
||||
convert << fd;
|
||||
name += convert.str();
|
||||
|
||||
struct stat sb;
|
||||
if (-1 == lstat(name.c_str(), &sb))
|
||||
return false;
|
||||
out.resize(sb.st_size + 1);
|
||||
ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
|
||||
if (-1 == ret)
|
||||
return false;
|
||||
if (ret > sb.st_size) {
|
||||
// Increased in size?!
|
||||
return false;
|
||||
}
|
||||
out.resize(ret);
|
||||
// Don't use the non-file names.
|
||||
if (!out.empty() && out[0] != '/')
|
||||
return false;
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string NameFromFD(int fd) {
|
||||
std::string ret;
|
||||
if (TryName(fd, ret)) return ret;
|
||||
switch (fd) {
|
||||
case 0: return "stdin";
|
||||
case 1: return "stdout";
|
||||
case 2: return "stderr";
|
||||
}
|
||||
ret = "fd ";
|
||||
std::ostringstream convert;
|
||||
convert << fd;
|
||||
ret += convert.str();
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
|
52
util/file.hh
52
util/file.hh
@ -1,6 +1,8 @@
|
||||
#ifndef UTIL_FILE__
|
||||
#define UTIL_FILE__
|
||||
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
@ -17,7 +19,7 @@ class scoped_fd {
|
||||
|
||||
~scoped_fd();
|
||||
|
||||
void reset(int to) {
|
||||
void reset(int to = -1) {
|
||||
scoped_fd other(fd_);
|
||||
fd_ = to;
|
||||
}
|
||||
@ -63,6 +65,32 @@ class scoped_FILE {
|
||||
std::FILE *file_;
|
||||
};
|
||||
|
||||
/* Thrown for any operation where the fd is known. */
|
||||
class FDException : public ErrnoException {
|
||||
public:
|
||||
explicit FDException(int fd) throw();
|
||||
|
||||
virtual ~FDException() throw();
|
||||
|
||||
// This may no longer be valid if the exception was thrown past open.
|
||||
int FD() const { return fd_; }
|
||||
|
||||
// Guess from NameFromFD.
|
||||
const std::string &NameGuess() const { return name_guess_; }
|
||||
|
||||
private:
|
||||
int fd_;
|
||||
|
||||
std::string name_guess_;
|
||||
};
|
||||
|
||||
// End of file reached.
|
||||
class EndOfFileException : public Exception {
|
||||
public:
|
||||
EndOfFileException() throw();
|
||||
~EndOfFileException() throw();
|
||||
};
|
||||
|
||||
// Open for read only.
|
||||
int OpenReadOrThrow(const char *name);
|
||||
// Create file if it doesn't exist, truncate if it does. Opened for write.
|
||||
@ -71,12 +99,15 @@ int CreateOrThrow(const char *name);
|
||||
// Return value for SizeFile when it can't size properly.
|
||||
const uint64_t kBadSize = (uint64_t)-1;
|
||||
uint64_t SizeFile(int fd);
|
||||
uint64_t SizeOrThrow(int fd);
|
||||
|
||||
void ResizeOrThrow(int fd, uint64_t to);
|
||||
|
||||
std::size_t PartialRead(int fd, void *to, std::size_t size);
|
||||
void ReadOrThrow(int fd, void *to, std::size_t size);
|
||||
std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
|
||||
// Positioned: unix only for now.
|
||||
void PReadOrThrow(int fd, void *to, std::size_t size, uint64_t off);
|
||||
|
||||
void WriteOrThrow(int fd, const void *data_void, std::size_t size);
|
||||
void WriteOrThrow(FILE *to, const void *data, std::size_t size);
|
||||
@ -91,17 +122,18 @@ void SeekEnd(int fd);
|
||||
std::FILE *FDOpenOrThrow(scoped_fd &file);
|
||||
std::FILE *FDOpenReadOrThrow(scoped_fd &file);
|
||||
|
||||
class TempMaker {
|
||||
public:
|
||||
explicit TempMaker(const std::string &prefix);
|
||||
// Temporary files
|
||||
int MakeTemp(const std::string &prefix);
|
||||
std::FILE *FMakeTemp(const std::string &prefix);
|
||||
|
||||
// These will already be unlinked for you.
|
||||
int Make() const;
|
||||
std::FILE *MakeFile() const;
|
||||
// dup an fd.
|
||||
int DupOrThrow(int fd);
|
||||
|
||||
private:
|
||||
std::string base_;
|
||||
};
|
||||
/* Attempt get file name from fd. This won't always work (i.e. on Windows or
|
||||
* a pipe). The file might have been renamed. It's intended for diagnostics
|
||||
* and logging only.
|
||||
*/
|
||||
std::string NameFromFD(int fd);
|
||||
|
||||
} // namespace util
|
||||
|
||||
|
@ -34,10 +34,17 @@ FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t
|
||||
Initialize(name, show_progress, min_buffer);
|
||||
}
|
||||
|
||||
FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
|
||||
namespace {
|
||||
std::string NamePossiblyFind(int fd, const char *name) {
|
||||
if (name) return name;
|
||||
return NameFromFD(fd);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
|
||||
file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
|
||||
progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
|
||||
Initialize(name, show_progress, min_buffer);
|
||||
progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
|
||||
Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
|
||||
}
|
||||
|
||||
FilePiece::~FilePiece() {}
|
||||
|
@ -29,7 +29,7 @@ class FilePiece {
|
||||
// 1 MB default.
|
||||
explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
|
||||
// Takes ownership of fd. name is used for messages.
|
||||
explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
|
||||
explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
|
||||
|
||||
~FilePiece();
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
// Tests might fail if you have creative characters in your path. Sue me.
|
||||
#include "util/file_piece.hh"
|
||||
|
||||
#include "util/file.hh"
|
||||
#include "util/scoped.hh"
|
||||
|
||||
#define BOOST_TEST_MODULE FilePieceTest
|
||||
|
@ -23,7 +23,7 @@ class scoped_malloc {
|
||||
|
||||
void call_realloc(std::size_t to) {
|
||||
void *ret;
|
||||
UTIL_THROW_IF(!(ret = std::realloc(p_, to)) && to, util::ErrnoException, "realloc to " << to << " bytes failed.");
|
||||
UTIL_THROW_IF(!(ret = std::realloc(p_, to)) && to, ErrnoException, "realloc to " << to << " bytes failed.");
|
||||
p_ = ret;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user