1
1
mirror of https://github.com/rui314/mold.git synced 2024-09-21 09:57:18 +03:00

Compress debug sections in parallel

This commit is contained in:
Rui Ueyama 2021-05-06 22:33:49 +09:00
parent 13003d2479
commit d93ad61a60
5 changed files with 143 additions and 20 deletions

View File

@ -14,7 +14,7 @@ LDFLAGS=-L$(TBB_LIBDIR) -Wl,-rpath=$(TBB_LIBDIR) \
LIBS=-lcrypto -pthread -ltbb -lmimalloc -lz -lxxhash -ldl
OBJS=main.o object_file.o input_sections.o output_chunks.o mapfile.o perf.o \
linker_script.o archive_file.o output_file.o subprocess.o gc_sections.o \
icf.o symbols.o cmdline.o filepath.o glob.o passes.o tar.o \
icf.o symbols.o cmdline.o filepath.o glob.o passes.o tar.o compress.o \
arch_x86_64.o arch_i386.o
DEBUG ?= 0

102
compress.cc Normal file
View File

@ -0,0 +1,102 @@
// This file implements a multi-threaded zlib compression routine.
//
// Multiple pieces of raw zlib-format compressed data can be merged
// just by concatenation as long as they are terminated with
// Z_SYNC_FLUSH.
#include "mold.h"
#include <tbb/parallel_for_each.h>
#include <zlib.h>
static constexpr i64 SHARD_SIZE = 1024 * 1024;
static std::vector<std::string_view> split(std::string_view input) {
std::vector<std::string_view> shards;
while (input.size() >= SHARD_SIZE) {
shards.push_back(input.substr(0, SHARD_SIZE));
input = input.substr(SHARD_SIZE);
}
if (!input.empty())
shards.push_back(input);
return shards;
}
static std::vector<u8> do_compress(std::string_view input) {
// Initialize zlib stream. Since debug info is generally compressed
// pretty well, we chose compression level 3.
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
int r = deflateInit2(&strm, 3, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
assert(r == Z_OK);
// Set an input buffer
strm.avail_in = input.size();
strm.next_in = (u8 *)input.data();
// Set an output buffer. deflateBound() returns an upper bound
// on the compression size. +16 for Z_SYNC_FLUSH.
std::vector<u8> buf(deflateBound(&strm, strm.avail_in) + 16);
strm.avail_out = buf.size();
strm.next_out = buf.data();
r = deflate(&strm, Z_SYNC_FLUSH);
assert(r == Z_OK);
assert(strm.avail_out > 0);
buf.resize(buf.size() - strm.avail_out);
deflateEnd(&strm);
return buf;
}
Compress::Compress(std::string_view input) {
std::vector<std::string_view> inputs = split(input);
std::vector<u64> adlers(inputs.size());
shards.resize(inputs.size());
// Compress each shard
tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) {
adlers[i] = adler32(1, (u8 *)inputs[i].data(), inputs[i].size());
shards[i] = do_compress(inputs[i]);
});
// Combine checksums
checksum = adlers[0];
for (i64 i = 1; i < inputs.size(); i++)
checksum = adler32_combine(checksum, adlers[i], inputs[i].size());
}
i64 Compress::size() const {
i64 size = 2; // +2 for header
for (const std::vector<u8> &shard : shards)
size += shard.size();
return size + 6; // +6 for trailer and checksum
}
void Compress::write_to(u8 *buf) {
// Write a zlib-format header
buf[0] = 0x78;
buf[1] = 0x9c;
// Copy compressed data
std::vector<i64> offsets(shards.size());
offsets[0] = 2; // +2 for header
for (i64 i = 1; i < shards.size(); i++)
offsets[i] = offsets[i - 1] + shards[i - 1].size();
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size());
});
// Write a trailer
u8 *end = buf + size();
end[-6] = 3;
end[-5] = 0;
// Write a checksum
write32be(end - 4, checksum);
}

33
mold.h
View File

@ -53,6 +53,7 @@ template <typename E> class Symbol;
template <typename E> struct Context;
template <typename E> struct FdeRecord;
template <typename E> struct CieRecord;
class Compress;
class TarFile;
template <typename E> void cleanup();
@ -812,7 +813,8 @@ public:
void copy_buf(Context<E> &ctx) override;
private:
std::unique_ptr<u8[]> contents;
ElfChdr<E> chdr = {};
std::unique_ptr<Compress> contents;
};
template <typename E>
@ -1241,6 +1243,21 @@ template <typename E>
void parse_nonpositional_args(Context<E> &ctx,
std::vector<std::string_view> &remaining);
//
// compress.cc
//
class Compress {
public:
Compress(std::string_view input);
void write_to(u8 *buf);
i64 size() const;
private:
std::vector<std::vector<u8>> shards;
u64 checksum = 0;
};
//
// tar.cc
//
@ -2127,3 +2144,17 @@ template <typename T, typename U>
inline void sort(T &vec, U less) {
std::stable_sort(vec.begin(), vec.end(), less);
}
inline u64 read64be(u8 *buf) {
return ((u64)buf[0] << 56) | ((u64)buf[1] << 48) |
((u64)buf[2] << 40) | ((u64)buf[3] << 32) |
((u64)buf[4] << 24) | ((u64)buf[5] << 16) |
((u64)buf[6] << 8) | (u64)buf[7];
}
inline void write32be(u8 *buf, u32 val) {
buf[0] = val >> 24;
buf[1] = val >> 16;
buf[2] = val >> 8;
buf[3] = val;
}

View File

@ -181,13 +181,6 @@ u32 ObjectFile<E>::read_note_gnu_property(Context<E> &ctx,
return ret;
}
static u64 read64be(u8 *buf) {
return ((u64)buf[0] << 56) | ((u64)buf[1] << 48) |
((u64)buf[2] << 40) | ((u64)buf[3] << 32) |
((u64)buf[4] << 24) | ((u64)buf[5] << 16) |
((u64)buf[6] << 8) | (u64)buf[7];
}
template <typename E>
std::pair<std::string_view, const ElfShdr<E> *>
ObjectFile<E>::uncompress_contents(Context<E> &ctx, const ElfShdr<E> &shdr,

View File

@ -1634,26 +1634,23 @@ CompressedSection<E>::CompressedSection(Context<E> &ctx, OutputChunk<E> &chunk)
std::unique_ptr<u8[]> buf(new u8[chunk.shdr.sh_size]);
chunk.write_to(ctx, buf.get());
ElfChdr<E> hdr = {};
hdr.ch_type = ELFCOMPRESS_ZLIB;
hdr.ch_size = chunk.shdr.sh_size;
hdr.ch_addralign = chunk.shdr.sh_addralign;
chdr.ch_type = ELFCOMPRESS_ZLIB;
chdr.ch_size = chunk.shdr.sh_size;
chdr.ch_addralign = chunk.shdr.sh_addralign;
unsigned long size = compressBound(chunk.shdr.sh_size);
contents.reset(new u8[sizeof(hdr) + size]);
memcpy(contents.get(), &hdr, sizeof(hdr));
int res = compress2(contents.get() + sizeof(hdr), &size, buf.get(),
chunk.shdr.sh_size, Z_DEFAULT_COMPRESSION);
contents.reset(new Compress({(char *)buf.get(), chunk.shdr.sh_size}));
this->shdr = chunk.shdr;
this->shdr.sh_flags |= SHF_COMPRESSED;
this->shdr.sh_addralign = 1;
this->shdr.sh_size = sizeof(hdr) + size;
this->shdr.sh_size = sizeof(chdr) + contents->size();
}
template <typename E>
void CompressedSection<E>::copy_buf(Context<E> &ctx) {
memcpy(ctx.buf + this->shdr.sh_offset, contents.get(), this->shdr.sh_size);
u8 *base = ctx.buf + this->shdr.sh_offset;
memcpy(base, &chdr, sizeof(chdr));
contents->write_to(base + sizeof(chdr));
}
template <typename E>