diff --git a/Makefile b/Makefile index f549f234..c23c96ea 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ LDFLAGS=-L$(TBB_LIBDIR) -Wl,-rpath=$(TBB_LIBDIR) \ LIBS=-lcrypto -pthread -ltbb -lmimalloc -lz -lxxhash -ldl OBJS=main.o object_file.o input_sections.o output_chunks.o mapfile.o perf.o \ linker_script.o archive_file.o output_file.o subprocess.o gc_sections.o \ - icf.o symbols.o cmdline.o filepath.o glob.o passes.o tar.o \ + icf.o symbols.o cmdline.o filepath.o glob.o passes.o tar.o compress.o \ arch_x86_64.o arch_i386.o DEBUG ?= 0 diff --git a/compress.cc b/compress.cc new file mode 100644 index 00000000..09ed03d5 --- /dev/null +++ b/compress.cc @@ -0,0 +1,102 @@ +// This file implements a multi-threaded zlib compression routine. +// +// Multiple pieces of raw zlib-format compressed data can be merged +// just by concatenation as long as they are terminated with +// Z_SYNC_FLUSH. + +#include "mold.h" + +#include +#include + +static constexpr i64 SHARD_SIZE = 1024 * 1024; + +static std::vector split(std::string_view input) { + std::vector shards; + + while (input.size() >= SHARD_SIZE) { + shards.push_back(input.substr(0, SHARD_SIZE)); + input = input.substr(SHARD_SIZE); + } + if (!input.empty()) + shards.push_back(input); + return shards; +} + +static std::vector do_compress(std::string_view input) { + // Initialize zlib stream. Since debug info is generally compressed + // pretty well, we chose compression level 3. + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + int r = deflateInit2(&strm, 3, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + assert(r == Z_OK); + + // Set an input buffer + strm.avail_in = input.size(); + strm.next_in = (u8 *)input.data(); + + // Set an output buffer. deflateBound() returns an upper bound + // on the compression size. +16 for Z_SYNC_FLUSH. + std::vector buf(deflateBound(&strm, strm.avail_in) + 16); + + strm.avail_out = buf.size(); + strm.next_out = buf.data(); + + r = deflate(&strm, Z_SYNC_FLUSH); + assert(r == Z_OK); + assert(strm.avail_out > 0); + + buf.resize(buf.size() - strm.avail_out); + deflateEnd(&strm); + return buf; +} + +Compress::Compress(std::string_view input) { + std::vector inputs = split(input); + std::vector adlers(inputs.size()); + shards.resize(inputs.size()); + + // Compress each shard + tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) { + adlers[i] = adler32(1, (u8 *)inputs[i].data(), inputs[i].size()); + shards[i] = do_compress(inputs[i]); + }); + + // Combine checksums + checksum = adlers[0]; + for (i64 i = 1; i < inputs.size(); i++) + checksum = adler32_combine(checksum, adlers[i], inputs[i].size()); +} + +i64 Compress::size() const { + i64 size = 2; // +2 for header + for (const std::vector &shard : shards) + size += shard.size(); + return size + 6; // +6 for trailer and checksum +} + +void Compress::write_to(u8 *buf) { + // Write a zlib-format header + buf[0] = 0x78; + buf[1] = 0x9c; + + // Copy compressed data + std::vector offsets(shards.size()); + offsets[0] = 2; // +2 for header + for (i64 i = 1; i < shards.size(); i++) + offsets[i] = offsets[i - 1] + shards[i - 1].size(); + + tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) { + memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size()); + }); + + // Write a trailer + u8 *end = buf + size(); + end[-6] = 3; + end[-5] = 0; + + // Write a checksum + write32be(end - 4, checksum); +} diff --git a/mold.h b/mold.h index 25cfe30e..9c4f6b66 100644 --- a/mold.h +++ b/mold.h @@ -53,6 +53,7 @@ template class Symbol; template struct Context; template struct FdeRecord; template struct CieRecord; +class Compress; class TarFile; template void cleanup(); @@ -812,7 +813,8 @@ public: void copy_buf(Context &ctx) override; private: - std::unique_ptr contents; + ElfChdr chdr = {}; + std::unique_ptr contents; }; template @@ -1241,6 +1243,21 @@ template void parse_nonpositional_args(Context &ctx, std::vector &remaining); +// +// compress.cc +// + +class Compress { +public: + Compress(std::string_view input); + void write_to(u8 *buf); + i64 size() const; + +private: + std::vector> shards; + u64 checksum = 0; +}; + // // tar.cc // @@ -2127,3 +2144,17 @@ template inline void sort(T &vec, U less) { std::stable_sort(vec.begin(), vec.end(), less); } + +inline u64 read64be(u8 *buf) { + return ((u64)buf[0] << 56) | ((u64)buf[1] << 48) | + ((u64)buf[2] << 40) | ((u64)buf[3] << 32) | + ((u64)buf[4] << 24) | ((u64)buf[5] << 16) | + ((u64)buf[6] << 8) | (u64)buf[7]; +} + +inline void write32be(u8 *buf, u32 val) { + buf[0] = val >> 24; + buf[1] = val >> 16; + buf[2] = val >> 8; + buf[3] = val; +} diff --git a/object_file.cc b/object_file.cc index 4bd71d34..70abaf5f 100644 --- a/object_file.cc +++ b/object_file.cc @@ -181,13 +181,6 @@ u32 ObjectFile::read_note_gnu_property(Context &ctx, return ret; } -static u64 read64be(u8 *buf) { - return ((u64)buf[0] << 56) | ((u64)buf[1] << 48) | - ((u64)buf[2] << 40) | ((u64)buf[3] << 32) | - ((u64)buf[4] << 24) | ((u64)buf[5] << 16) | - ((u64)buf[6] << 8) | (u64)buf[7]; -} - template std::pair *> ObjectFile::uncompress_contents(Context &ctx, const ElfShdr &shdr, diff --git a/output_chunks.cc b/output_chunks.cc index 122262d0..2c3526ea 100644 --- a/output_chunks.cc +++ b/output_chunks.cc @@ -1634,26 +1634,23 @@ CompressedSection::CompressedSection(Context &ctx, OutputChunk &chunk) std::unique_ptr buf(new u8[chunk.shdr.sh_size]); chunk.write_to(ctx, buf.get()); - ElfChdr hdr = {}; - hdr.ch_type = ELFCOMPRESS_ZLIB; - hdr.ch_size = chunk.shdr.sh_size; - hdr.ch_addralign = chunk.shdr.sh_addralign; + chdr.ch_type = ELFCOMPRESS_ZLIB; + chdr.ch_size = chunk.shdr.sh_size; + chdr.ch_addralign = chunk.shdr.sh_addralign; - unsigned long size = compressBound(chunk.shdr.sh_size); - contents.reset(new u8[sizeof(hdr) + size]); - memcpy(contents.get(), &hdr, sizeof(hdr)); - int res = compress2(contents.get() + sizeof(hdr), &size, buf.get(), - chunk.shdr.sh_size, Z_DEFAULT_COMPRESSION); + contents.reset(new Compress({(char *)buf.get(), chunk.shdr.sh_size})); this->shdr = chunk.shdr; this->shdr.sh_flags |= SHF_COMPRESSED; this->shdr.sh_addralign = 1; - this->shdr.sh_size = sizeof(hdr) + size; + this->shdr.sh_size = sizeof(chdr) + contents->size(); } template void CompressedSection::copy_buf(Context &ctx) { - memcpy(ctx.buf + this->shdr.sh_offset, contents.get(), this->shdr.sh_size); + u8 *base = ctx.buf + this->shdr.sh_offset; + memcpy(base, &chdr, sizeof(chdr)); + contents->write_to(base + sizeof(chdr)); } template