mirror of
https://github.com/rui314/mold.git
synced 2024-10-26 13:10:46 +03:00
Compare commits
11 Commits
7000503664
...
6c305541e6
Author | SHA1 | Date | |
---|---|---|---|
|
6c305541e6 | ||
|
18da5b654e | ||
|
97a1e218c5 | ||
|
f9e4cb1a7f | ||
|
60760a892a | ||
|
596ffa959a | ||
|
cd3b817f13 | ||
|
19de40fed4 | ||
|
e78e12b15b | ||
|
5b4377842b | ||
|
9ab4ab14ab |
@ -364,6 +364,7 @@ endforeach()
|
||||
# Add other non-template source files.
|
||||
target_sources(mold PRIVATE
|
||||
common/compress.cc
|
||||
common/crc32.cc
|
||||
common/demangle.cc
|
||||
common/filepath.cc
|
||||
common/glob.cc
|
||||
|
@ -155,7 +155,7 @@ example, `gcc` is used as the linker driver. Use the `-fuse-ld` option if your
|
||||
GCC is recent enough to recognize this option.
|
||||
|
||||
If you want to use mold for all projects, add the above snippet to
|
||||
`~/.config/config.nims`.
|
||||
`~/.config/nim/config.nims`.
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "integers.h"
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bit>
|
||||
#include <bitset>
|
||||
@ -899,6 +900,13 @@ std::optional<std::string_view> demangle_rust(std::string_view name);
|
||||
void acquire_global_lock();
|
||||
void release_global_lock();
|
||||
|
||||
//
|
||||
// crc32.cc
|
||||
//
|
||||
|
||||
u32 compute_crc32(u32 crc, u8 *buf, i64 len);
|
||||
std::vector<u8> crc32_solve(u32 current, u32 desired);
|
||||
|
||||
//
|
||||
// compress.cc
|
||||
//
|
||||
|
60
common/crc32.cc
Normal file
60
common/crc32.cc
Normal file
@ -0,0 +1,60 @@
|
||||
#include "common.h"
|
||||
|
||||
#include <tbb/parallel_for_each.h>
|
||||
#include <zlib.h>
|
||||
|
||||
namespace mold {
|
||||
|
||||
// This function "forges" a CRC. That is, given the current and a desired
|
||||
// CRC32 value, crc32_solve() returns a binary blob to add to the end of
|
||||
// the original data to yield the desired CRC. Trailing garbage is ignored
|
||||
// by many bianry file formats, so you can create a file with a desired
|
||||
// CRC using crc32_solve(). We need it for --separate-debug-file.
|
||||
std::vector<u8> crc32_solve(u32 current, u32 desired) {
|
||||
constexpr u32 poly = 0xedb88320;
|
||||
u32 x = ~desired;
|
||||
|
||||
// Each iteration computes x = (x * x^-1) mod poly.
|
||||
for (i64 i = 0; i < 32; i++) {
|
||||
x = std::rotl(x, 1);
|
||||
x ^= (x & 1) * (poly << 1);
|
||||
}
|
||||
|
||||
x ^= ~current;
|
||||
|
||||
std::vector<u8> out(4);
|
||||
out[0] = x;
|
||||
out[1] = x >> 8;
|
||||
out[2] = x >> 16;
|
||||
out[3] = x >> 24;
|
||||
return out;
|
||||
}
|
||||
|
||||
// Compute a CRC for given data in parallel
|
||||
u32 compute_crc32(u32 crc, u8 *buf, i64 len) {
|
||||
struct Shard {
|
||||
u8 *buf;
|
||||
i64 len;
|
||||
u32 crc;
|
||||
};
|
||||
|
||||
constexpr i64 shard_size = 1024 * 1024; // 1 MiB
|
||||
std::vector<Shard> shards;
|
||||
|
||||
while (len > 0) {
|
||||
i64 sz = std::min(len, shard_size);
|
||||
shards.push_back({buf, sz, 0});
|
||||
buf += sz;
|
||||
len -= sz;
|
||||
}
|
||||
|
||||
tbb::parallel_for_each(shards.begin(), shards.end(), [](Shard &shard) {
|
||||
shard.crc = crc32_z(0, shard.buf, shard.len);
|
||||
});
|
||||
|
||||
for (Shard &shard : shards)
|
||||
crc = crc32_combine(crc, shard.crc, shard.len);
|
||||
return crc;
|
||||
}
|
||||
|
||||
} // namespace mold
|
@ -85,6 +85,8 @@ Options:
|
||||
--defsym=SYMBOL=VALUE Define a symbol alias
|
||||
--demangle Demangle C++ symbols in log messages (default)
|
||||
--no-demangle
|
||||
--detach Create separate debug info file in the background (default)
|
||||
--no-detach
|
||||
--enable-new-dtags Emit DT_RUNPATH for --rpath (default)
|
||||
--disable-new-dtags Emit DT_RPATH for --rpath
|
||||
--execute-only Make executable segments unreadable
|
||||
@ -143,6 +145,8 @@ Options:
|
||||
--rpath-link DIR Ignored
|
||||
--run COMMAND ARG... Run COMMAND with mold as /usr/bin/ld
|
||||
--section-start=SECTION=ADDR Set address for section
|
||||
--separate-debug-file[=FILE] Separate debug info to the specified file
|
||||
--no-separate-debug-file
|
||||
--shared, --Bshareable Create a shared library
|
||||
--shuffle-sections[=SEED] Randomize the output by shuffling input sections
|
||||
--sort-common Ignored
|
||||
@ -526,6 +530,7 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
|
||||
std::optional<SeparateCodeKind> z_separate_code;
|
||||
std::optional<bool> report_undefined;
|
||||
std::optional<bool> z_relro;
|
||||
std::optional<std::string> separate_debug_file;
|
||||
std::optional<u64> shuffle_sections_seed;
|
||||
std::unordered_set<std::string_view> rpaths;
|
||||
|
||||
@ -756,6 +761,10 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
|
||||
ctx.arg.demangle = true;
|
||||
} else if (read_flag("no-demangle")) {
|
||||
ctx.arg.demangle = false;
|
||||
} else if (read_flag("detach")) {
|
||||
ctx.arg.detach = true;
|
||||
} else if (read_flag("no-detach")) {
|
||||
ctx.arg.detach = false;
|
||||
} else if (read_flag("default-symver")) {
|
||||
ctx.arg.default_symver = true;
|
||||
} else if (read_flag("noinhibit-exec")) {
|
||||
@ -1003,6 +1012,12 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
|
||||
ctx.arg.z_origin = true;
|
||||
} else if (read_z_flag("nodefaultlib")) {
|
||||
ctx.arg.z_nodefaultlib = true;
|
||||
} else if (read_eq("separate-debug-file")) {
|
||||
separate_debug_file = arg;
|
||||
} else if (read_flag("separate-debug-file")) {
|
||||
separate_debug_file = "";
|
||||
} else if (read_flag("no-separate-debug-file")) {
|
||||
separate_debug_file.reset();
|
||||
} else if (read_z_flag("separate-loadable-segments")) {
|
||||
z_separate_code = SEPARATE_LOADABLE_SEGMENTS;
|
||||
} else if (read_z_flag("separate-code")) {
|
||||
@ -1394,9 +1409,20 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
|
||||
ctx.default_version = VER_NDX_LAST_RESERVED + 1;
|
||||
}
|
||||
|
||||
if (separate_debug_file) {
|
||||
if (separate_debug_file->empty())
|
||||
ctx.arg.separate_debug_file = ctx.arg.output + ".dbg";
|
||||
else
|
||||
ctx.arg.separate_debug_file = *separate_debug_file;
|
||||
}
|
||||
|
||||
if (ctx.arg.shared && warn_shared_textrel)
|
||||
ctx.arg.warn_textrel = true;
|
||||
|
||||
// We don't want the background process to write to stdout
|
||||
if (ctx.arg.stats || ctx.arg.perf)
|
||||
ctx.arg.detach = false;
|
||||
|
||||
ctx.arg.undefined.push_back(ctx.arg.entry);
|
||||
|
||||
for (i64 i = 0; i < ctx.arg.defsyms.size(); i++) {
|
||||
|
27
elf/main.cc
27
elf/main.cc
@ -559,14 +559,17 @@ int elf_main(int argc, char **argv) {
|
||||
// Compute the is_weak bit for each imported symbol.
|
||||
compute_imported_symbol_weakness(ctx);
|
||||
|
||||
// Compute sizes of output sections while assigning offsets
|
||||
// within an output section to input sections.
|
||||
compute_section_sizes(ctx);
|
||||
|
||||
// Sort sections by section attributes so that we'll have to
|
||||
// create as few segments as possible.
|
||||
sort_output_sections(ctx);
|
||||
|
||||
if (!ctx.arg.separate_debug_file.empty())
|
||||
separate_debug_sections(ctx);
|
||||
|
||||
// Compute sizes of output sections while assigning offsets
|
||||
// within an output section to input sections.
|
||||
compute_section_sizes(ctx);
|
||||
|
||||
// If --packed_dyn_relocs=relr was given, base relocations are stored
|
||||
// to a .relr.dyn section in a compressed form. Construct a compressed
|
||||
// relocations now so that we can fix section sizes and file layout.
|
||||
@ -653,17 +656,18 @@ int elf_main(int argc, char **argv) {
|
||||
// .note.gnu.build-id section contains a cryptographic hash of the
|
||||
// entire output file. Now that we wrote everything except build-id,
|
||||
// we can compute it.
|
||||
if (ctx.buildid) {
|
||||
compute_build_id(ctx);
|
||||
ctx.buildid->copy_buf(ctx);
|
||||
}
|
||||
if (ctx.buildid)
|
||||
write_build_id(ctx);
|
||||
|
||||
// .gdb_index's contents cannot be constructed before applying
|
||||
// relocations to other debug sections. We have relocated debug
|
||||
// sections now, so write the .gdb_index section.
|
||||
if (ctx.gdb_index)
|
||||
if (ctx.gdb_index && ctx.arg.separate_debug_file.empty())
|
||||
write_gdb_index(ctx);
|
||||
|
||||
if (!ctx.arg.separate_debug_file.empty())
|
||||
write_gnu_debuglink(ctx);
|
||||
|
||||
t_copy.stop();
|
||||
ctx.checkpoint();
|
||||
|
||||
@ -682,6 +686,9 @@ int elf_main(int argc, char **argv) {
|
||||
if (ctx.arg.print_map)
|
||||
print_map(ctx);
|
||||
|
||||
if (!ctx.arg.separate_debug_file.empty())
|
||||
write_separate_debug_file(ctx);
|
||||
|
||||
// Show stats numbers
|
||||
if (ctx.arg.stats)
|
||||
show_stats(ctx);
|
||||
@ -692,9 +699,7 @@ int elf_main(int argc, char **argv) {
|
||||
std::cout << std::flush;
|
||||
std::cerr << std::flush;
|
||||
|
||||
if (ctx.arg.fork)
|
||||
notify_parent();
|
||||
|
||||
release_global_lock();
|
||||
|
||||
if (ctx.arg.quick_exit)
|
||||
|
33
elf/mold.h
33
elf/mold.h
@ -373,7 +373,7 @@ public:
|
||||
virtual ~Chunk() = default;
|
||||
virtual bool is_header() { return false; }
|
||||
virtual OutputSection<E> *to_osec() { return nullptr; }
|
||||
virtual MergedSection<E> *to_merged_section() { return nullptr; }
|
||||
virtual void compute_section_size(Context<E> &ctx) {}
|
||||
virtual i64 get_reldyn_size(Context<E> &ctx) const { return 0; }
|
||||
virtual void construct_relr(Context<E> &ctx) {}
|
||||
virtual void copy_buf(Context<E> &ctx) {}
|
||||
@ -480,6 +480,7 @@ public:
|
||||
}
|
||||
|
||||
OutputSection<E> *to_osec() override { return this; }
|
||||
void compute_section_size(Context<E> &ctx) override;
|
||||
void construct_relr(Context<E> &ctx) override;
|
||||
void copy_buf(Context<E> &ctx) override;
|
||||
void write_to(Context<E> &ctx, u8 *buf) override;
|
||||
@ -806,9 +807,8 @@ public:
|
||||
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
|
||||
u64 hash, i64 p2align);
|
||||
|
||||
MergedSection<E> *to_merged_section() override { return this; }
|
||||
void resolve(Context<E> &ctx);
|
||||
void assign_offsets(Context<E> &ctx);
|
||||
void compute_section_size(Context<E> &ctx) override;
|
||||
void copy_buf(Context<E> &ctx) override;
|
||||
void write_to(Context<E> &ctx, u8 *buf) override;
|
||||
void print_stats(Context<E> &ctx);
|
||||
@ -993,6 +993,22 @@ private:
|
||||
std::map<u32, u32> properties;
|
||||
};
|
||||
|
||||
template <typename E>
|
||||
class GnuDebuglinkSection : public Chunk<E> {
|
||||
public:
|
||||
GnuDebuglinkSection() {
|
||||
this->name = ".gnu_debuglink";
|
||||
this->shdr.sh_type = SHT_PROGBITS;
|
||||
this->shdr.sh_addralign = 4;
|
||||
}
|
||||
|
||||
void update_shdr(Context<E> &ctx) override;
|
||||
void copy_buf(Context<E> &ctx) override;
|
||||
|
||||
std::string filename;
|
||||
u32 crc32 = 0;
|
||||
};
|
||||
|
||||
template <typename E>
|
||||
class GdbIndexSection : public Chunk<E> {
|
||||
public:
|
||||
@ -1439,11 +1455,14 @@ template <typename E> void apply_version_script(Context<E> &);
|
||||
template <typename E> void parse_symbol_version(Context<E> &);
|
||||
template <typename E> void compute_import_export(Context<E> &);
|
||||
template <typename E> void compute_address_significance(Context<E> &);
|
||||
template <typename E> void separate_debug_sections(Context<E> &);
|
||||
template <typename E> void compute_section_headers(Context<E> &);
|
||||
template <typename E> i64 set_osec_offsets(Context<E> &);
|
||||
template <typename E> void fix_synthetic_symbols(Context<E> &);
|
||||
template <typename E> i64 compress_debug_sections(Context<E> &);
|
||||
template <typename E> void compute_build_id(Context<E> &);
|
||||
template <typename E> void write_build_id(Context<E> &);
|
||||
template <typename E> void write_gnu_debuglink(Context<E> &);
|
||||
template <typename E> void write_separate_debug_file(Context<E> &ctx);
|
||||
template <typename E> void write_dependency_file(Context<E> &);
|
||||
template <typename E> void show_stats(Context<E> &);
|
||||
|
||||
@ -1721,6 +1740,7 @@ struct Context {
|
||||
bool color_diagnostics = false;
|
||||
bool default_symver = false;
|
||||
bool demangle = true;
|
||||
bool detach = true;
|
||||
bool discard_all = false;
|
||||
bool discard_locals = false;
|
||||
bool eh_frame_hdr = true;
|
||||
@ -1807,6 +1827,7 @@ struct Context {
|
||||
std::string package_metadata;
|
||||
std::string plugin;
|
||||
std::string rpaths;
|
||||
std::string separate_debug_file;
|
||||
std::string soname;
|
||||
std::string sysroot;
|
||||
std::unique_ptr<std::unordered_set<std::string_view>> retain_symbols_file;
|
||||
@ -1885,6 +1906,9 @@ struct Context {
|
||||
|
||||
tbb::concurrent_hash_map<Symbol<E> *, std::vector<std::string>> undef_errors;
|
||||
|
||||
// For --separate-debug-file
|
||||
std::vector<Chunk<E> *> debug_chunks;
|
||||
|
||||
// Output chunks
|
||||
OutputEhdr<E> *ehdr = nullptr;
|
||||
OutputShdr<E> *shdr = nullptr;
|
||||
@ -1900,6 +1924,7 @@ struct Context {
|
||||
DynstrSection<E> *dynstr = nullptr;
|
||||
HashSection<E> *hash = nullptr;
|
||||
GnuHashSection<E> *gnu_hash = nullptr;
|
||||
GnuDebuglinkSection<E> *gnu_debuglink = nullptr;
|
||||
ShstrtabSection<E> *shstrtab = nullptr;
|
||||
PltSection<E> *plt = nullptr;
|
||||
PltGotSection<E> *pltgot = nullptr;
|
||||
|
@ -865,6 +865,84 @@ void DynamicSection<E>::copy_buf(Context<E> &ctx) {
|
||||
write_vector(ctx.buf + this->shdr.sh_offset, contents);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
|
||||
std::span<T> span(input);
|
||||
std::vector<std::span<T>> vec;
|
||||
|
||||
while (span.size() >= unit) {
|
||||
vec.push_back(span.subspan(0, unit));
|
||||
span = span.subspan(unit);
|
||||
}
|
||||
if (!span.empty())
|
||||
vec.push_back(span);
|
||||
return vec;
|
||||
}
|
||||
|
||||
|
||||
// Assign offsets to OutputSection members
|
||||
template <typename E>
|
||||
void OutputSection<E>::compute_section_size(Context<E> &ctx) {
|
||||
ElfShdr<E> &shdr = this->shdr;
|
||||
|
||||
// On most RISC systems, we need to create so-called "range extension
|
||||
// thunks" to extend branch instructions reach, as their jump
|
||||
// instructions' reach is limited. create_range_extension_thunks()
|
||||
// computes the size of the section while inserting thunks.
|
||||
if constexpr (needs_thunk<E>) {
|
||||
if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) {
|
||||
create_range_extension_thunks(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Since one output section may contain millions of input sections,
|
||||
// we first split input sections into groups and assign offsets to
|
||||
// groups.
|
||||
struct Group {
|
||||
std::span<InputSection<E> *> members;
|
||||
i64 size = 0;
|
||||
i64 p2align = 0;
|
||||
i64 offset = 0;
|
||||
};
|
||||
|
||||
std::span<InputSection<E> *> mem = members;
|
||||
std::vector<Group> groups;
|
||||
constexpr i64 group_size = 10000;
|
||||
|
||||
while (!mem.empty()) {
|
||||
i64 sz = std::min<i64>(group_size, mem.size());
|
||||
groups.push_back({mem.subspan(0, sz)});
|
||||
mem = mem.subspan(sz);
|
||||
}
|
||||
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
|
||||
group.p2align = std::max<i64>(group.p2align, isec->p2align);
|
||||
}
|
||||
});
|
||||
|
||||
shdr.sh_size = 0;
|
||||
|
||||
for (i64 i = 0; i < groups.size(); i++) {
|
||||
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
|
||||
groups[i].offset = shdr.sh_size;
|
||||
shdr.sh_size += groups[i].size;
|
||||
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
|
||||
}
|
||||
|
||||
// Assign offsets to input sections.
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
i64 offset = group.offset;
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
offset = align_to(offset, 1 << isec->p2align);
|
||||
isec->offset = offset;
|
||||
offset += isec->sh_size;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void OutputSection<E>::copy_buf(Context<E> &ctx) {
|
||||
if (this->shdr.sh_type != SHT_NOBITS)
|
||||
@ -1621,10 +1699,14 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
|
||||
};
|
||||
|
||||
i64 shndx = -1;
|
||||
InputSection<E> *isec = sym.get_input_section();
|
||||
|
||||
if (sym.has_copyrel) {
|
||||
// Symbol in .copyrel
|
||||
shndx = sym.is_copyrel_readonly ? ctx.copyrel_relro->shndx : ctx.copyrel->shndx;
|
||||
esym.st_value = sym.get_addr(ctx);
|
||||
} else if (sym.file->is_dso || sym.esym().is_undef()) {
|
||||
// Undefined symbol in a DSO
|
||||
esym.st_shndx = SHN_UNDEF;
|
||||
esym.st_size = 0;
|
||||
if (sym.is_canonical)
|
||||
@ -1637,7 +1719,7 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
|
||||
// Section fragment
|
||||
shndx = frag->output_section.shndx;
|
||||
esym.st_value = sym.get_addr(ctx);
|
||||
} else if (!sym.get_input_section()) {
|
||||
} else if (!isec) {
|
||||
// Absolute symbol
|
||||
esym.st_shndx = SHN_ABS;
|
||||
esym.st_value = sym.get_addr(ctx);
|
||||
@ -1651,7 +1733,25 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
|
||||
esym.st_type = STT_FUNC;
|
||||
esym.st_visibility = sym.visibility;
|
||||
esym.st_value = sym.get_plt_addr(ctx);
|
||||
} else if (!isec->output_section) {
|
||||
// Symbol in a mergeable non-SHF_ALLOC section, such as .debug_str
|
||||
assert(!(isec->shdr().sh_flags & SHF_ALLOC));
|
||||
assert(isec->shdr().sh_flags & SHF_MERGE);
|
||||
assert(!sym.file->is_dso);
|
||||
|
||||
ObjectFile<E> *file = (ObjectFile<E> *)sym.file;
|
||||
MergeableSection<E> *m =
|
||||
file->mergeable_sections[file->get_shndx(sym.esym())].get();
|
||||
|
||||
SectionFragment<E> *frag;
|
||||
i64 frag_addend;
|
||||
std::tie(frag, frag_addend) = m->get_fragment(sym.esym().st_value);
|
||||
|
||||
shndx = m->parent.shndx;
|
||||
esym.st_visibility = sym.visibility;
|
||||
esym.st_value = frag->get_addr(ctx) + frag_addend;
|
||||
} else {
|
||||
// Symbol in a regular section
|
||||
shndx = get_st_shndx(sym);
|
||||
esym.st_visibility = sym.visibility;
|
||||
esym.st_value = sym.get_addr(ctx, NO_PLT);
|
||||
@ -1987,7 +2087,7 @@ void MergedSection<E>::resolve(Context<E> &ctx) {
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void MergedSection<E>::assign_offsets(Context<E> &ctx) {
|
||||
void MergedSection<E>::compute_section_size(Context<E> &ctx) {
|
||||
if (!resolved)
|
||||
resolve(ctx);
|
||||
|
||||
@ -2848,6 +2948,20 @@ void ComdatGroupSection<E>::copy_buf(Context<E> &ctx) {
|
||||
*buf++ = chunk->shndx;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void GnuDebuglinkSection<E>::update_shdr(Context<E> &ctx) {
|
||||
filename = std::filesystem::path(ctx.arg.separate_debug_file).filename().string();
|
||||
this->shdr.sh_size = align_to(filename.size() + 1, 4) + 4;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void GnuDebuglinkSection<E>::copy_buf(Context<E> &ctx) {
|
||||
u8 *buf = ctx.buf + this->shdr.sh_offset;
|
||||
memset(buf, 0, this->shdr.sh_size);
|
||||
write_string(buf, filename);
|
||||
*(U32<E> *)(buf + this->shdr.sh_size - 4) = crc32;
|
||||
}
|
||||
|
||||
using E = MOLD_TARGET;
|
||||
|
||||
template class Chunk<E>;
|
||||
@ -2886,6 +3000,7 @@ template class GdbIndexSection<E>;
|
||||
template class CompressedSection<E>;
|
||||
template class RelocSection<E>;
|
||||
template class ComdatGroupSection<E>;
|
||||
template class GnuDebuglinkSection<E>;
|
||||
|
||||
template OutputSection<E> *find_section(Context<E> &, u32);
|
||||
template OutputSection<E> *find_section(Context<E> &, std::string_view);
|
||||
|
265
elf/passes.cc
265
elf/passes.cc
@ -156,6 +156,8 @@ void create_synthetic_sections(Context<E> &ctx) {
|
||||
ctx.verdef = push(new VerdefSection<E>);
|
||||
if (ctx.arg.emit_relocs)
|
||||
ctx.eh_frame_reloc = push(new EhFrameRelocSection<E>);
|
||||
if (!ctx.arg.separate_debug_file.empty())
|
||||
ctx.gnu_debuglink = push(new GnuDebuglinkSection<E>);
|
||||
|
||||
if (ctx.arg.shared || !ctx.dsos.empty() || ctx.arg.pie) {
|
||||
ctx.dynamic = push(new DynamicSection<E>(ctx));
|
||||
@ -466,20 +468,6 @@ static std::string get_cmdline_args(Context<E> &ctx) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
|
||||
std::span<T> span(input);
|
||||
std::vector<std::span<T>> vec;
|
||||
|
||||
while (span.size() >= unit) {
|
||||
vec.push_back(span.subspan(0, unit));
|
||||
span = span.subspan(unit);
|
||||
}
|
||||
if (!span.empty())
|
||||
vec.push_back(span);
|
||||
return vec;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
static bool has_ctors_and_init_array(Context<E> &ctx) {
|
||||
bool x = false;
|
||||
@ -1351,84 +1339,24 @@ template <typename E>
|
||||
void compute_section_sizes(Context<E> &ctx) {
|
||||
Timer t(ctx, "compute_section_sizes");
|
||||
|
||||
struct Group {
|
||||
i64 size = 0;
|
||||
i64 p2align = 0;
|
||||
i64 offset = 0;
|
||||
std::span<InputSection<E> *> members;
|
||||
};
|
||||
|
||||
// Assign offsets to OutputSection members
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
OutputSection<E> *osec = chunk->to_osec();
|
||||
if (!osec)
|
||||
return;
|
||||
|
||||
// This pattern will be processed in the next loop.
|
||||
if constexpr (needs_thunk<E>)
|
||||
if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable)
|
||||
return;
|
||||
|
||||
// Since one output section may contain millions of input sections,
|
||||
// we first split input sections into groups and assign offsets to
|
||||
// groups.
|
||||
std::vector<Group> groups;
|
||||
constexpr i64 group_size = 10000;
|
||||
|
||||
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
|
||||
groups.push_back(Group{.members = span});
|
||||
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
|
||||
group.p2align = std::max<i64>(group.p2align, isec->p2align);
|
||||
}
|
||||
});
|
||||
|
||||
ElfShdr<E> &shdr = osec->shdr;
|
||||
shdr.sh_size = 0;
|
||||
|
||||
for (i64 i = 0; i < groups.size(); i++) {
|
||||
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
|
||||
groups[i].offset = shdr.sh_size;
|
||||
shdr.sh_size += groups[i].size;
|
||||
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
|
||||
}
|
||||
|
||||
// Assign offsets to input sections.
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
i64 offset = group.offset;
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
offset = align_to(offset, 1 << isec->p2align);
|
||||
isec->offset = offset;
|
||||
offset += isec->sh_size;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
// Assign offsets to MergedSection members
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
if (MergedSection<E> *sec = chunk->to_merged_section())
|
||||
sec->assign_offsets(ctx);
|
||||
});
|
||||
|
||||
// On ARM32 or ARM64, we may need to create so-called "range extension
|
||||
// thunks" to extend branch instructions reach, as they can jump only
|
||||
// to ±16 MiB or ±128 MiB, respecitvely.
|
||||
//
|
||||
// In the following loop, We compute the sizes of sections while
|
||||
// inserting thunks. This pass cannot be parallelized. That is,
|
||||
// create_range_extension_thunks is parallelized internally, but the
|
||||
// function itself is not thread-safe.
|
||||
if constexpr (needs_thunk<E>) {
|
||||
Timer t(ctx, "create_range_extension_thunks");
|
||||
|
||||
if (!ctx.arg.relocatable)
|
||||
// Chunk<E>::compute_section_size may obtain a global lock to create
|
||||
// range extension thunks. I don't know why, but using parallel_for
|
||||
// loop both inside and outside of the lock may cause a deadlock. It
|
||||
// might be a bug in TBB. For now, I'll avoid using parallel_for_each
|
||||
// here.
|
||||
for (Chunk<E> *chunk : ctx.chunks)
|
||||
if (OutputSection<E> *osec = chunk->to_osec())
|
||||
if (osec->shdr.sh_flags & SHF_EXECINSTR)
|
||||
osec->create_range_extension_thunks(ctx);
|
||||
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
|
||||
chunk->compute_section_size(ctx);
|
||||
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
if (!(chunk->shdr.sh_flags & SHF_EXECINSTR))
|
||||
chunk->compute_section_size(ctx);
|
||||
});
|
||||
} else {
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
chunk->compute_section_size(ctx);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -2676,6 +2604,24 @@ static i64 set_file_offsets(Context<E> &ctx) {
|
||||
return fileoff;
|
||||
}
|
||||
|
||||
// Remove debug sections from ctx.chunks and save them to ctx.debug_chunks.
|
||||
// This is for --separate-debug-file.
|
||||
template <typename E>
|
||||
void separate_debug_sections(Context<E> &ctx) {
|
||||
auto is_debug_section = [&](Chunk<E> *chunk) {
|
||||
if (chunk->shdr.sh_flags & SHF_ALLOC)
|
||||
return false;
|
||||
return chunk == ctx.gdb_index || chunk == ctx.symtab || chunk == ctx.strtab ||
|
||||
chunk->name.starts_with(".debug_");
|
||||
};
|
||||
|
||||
auto mid = std::stable_partition(ctx.chunks.begin(), ctx.chunks.end(),
|
||||
is_debug_section);
|
||||
|
||||
ctx.debug_chunks = {ctx.chunks.begin(), mid};
|
||||
ctx.chunks.erase(ctx.chunks.begin(), mid);
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void compute_section_headers(Context<E> &ctx) {
|
||||
// Update sh_size for each chunk.
|
||||
@ -3006,23 +2952,34 @@ static void blake3_hash(u8 *buf, i64 size, u8 *out) {
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void compute_build_id(Context<E> &ctx) {
|
||||
Timer t(ctx, "compute_build_id");
|
||||
std::vector<std::span<u8>> get_shards(Context<E> &ctx) {
|
||||
constexpr i64 shard_size = 4 * 1024 * 1024; // 4 MiB
|
||||
std::span<u8> buf = {ctx.buf, (size_t)ctx.output_file->filesize};
|
||||
std::vector<std::span<u8>> vec;
|
||||
|
||||
while (!buf.empty()) {
|
||||
i64 sz = std::min<i64>(shard_size, buf.size());
|
||||
vec.push_back(buf.subspan(0, sz));
|
||||
buf = buf.subspan(sz);
|
||||
}
|
||||
return vec;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void write_build_id(Context<E> &ctx) {
|
||||
Timer t(ctx, "write_build_id");
|
||||
|
||||
switch (ctx.arg.build_id.kind) {
|
||||
case BuildId::HEX:
|
||||
ctx.buildid->contents = ctx.arg.build_id.value;
|
||||
break;
|
||||
case BuildId::HASH: {
|
||||
i64 shard_size = 4 * 1024 * 1024;
|
||||
i64 filesize = ctx.output_file->filesize;
|
||||
i64 num_shards = align_to(filesize, shard_size) / shard_size;
|
||||
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
|
||||
std::vector<std::span<u8>> shards = get_shards(ctx);
|
||||
std::vector<u8> hashes(shards.size() * BLAKE3_OUT_LEN);
|
||||
|
||||
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
|
||||
u8 *begin = ctx.buf + shard_size * i;
|
||||
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
|
||||
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
|
||||
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
|
||||
blake3_hash(shards[i].data(), shards[i].size(),
|
||||
hashes.data() + i * BLAKE3_OUT_LEN);
|
||||
|
||||
#ifdef HAVE_MADVISE
|
||||
// Make the kernel page out the file contents we've just written
|
||||
@ -3033,7 +2990,7 @@ void compute_build_id(Context<E> &ctx) {
|
||||
});
|
||||
|
||||
u8 buf[BLAKE3_OUT_LEN];
|
||||
blake3_hash(shards.data(), shards.size(), buf);
|
||||
blake3_hash(hashes.data(), hashes.size(), buf);
|
||||
|
||||
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
|
||||
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
|
||||
@ -3052,8 +3009,109 @@ void compute_build_id(Context<E> &ctx) {
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
|
||||
ctx.buildid->copy_buf(ctx);
|
||||
}
|
||||
|
||||
// A .gnu_debuglink section contains a filename and a CRC32 checksum of a
|
||||
// debug info file. When we are writing a .gnu_debuglink, we don't know
|
||||
// its CRC32 checksum because we haven't created a debug info file. So we
|
||||
// write a dummy value instead.
|
||||
//
|
||||
// We can't choose a random value as a dummy value for build
|
||||
// reproducibility. We also don't want to write a fixed value for all
|
||||
// files because the CRC checksum is in this section to prevent using
|
||||
// wrong file on debugging. gdb rejects a debug info file if its CRC
|
||||
// doesn't match with the one in .gdb_debuglink.
|
||||
//
|
||||
// Therefore, we'll try to make our CRC checksum as unique as possible.
|
||||
// We'll remember that checksum, and after creating a debug info file, add
|
||||
// a few bytes of garbage at the end of it so that the debug info file's
|
||||
// CRC checksum becomes the one that we have precomputed.
|
||||
template <typename E>
|
||||
void write_gnu_debuglink(Context<E> &ctx) {
|
||||
Timer t(ctx, "write_gnu_debuglink");
|
||||
u32 crc32;
|
||||
|
||||
if (ctx.buildid) {
|
||||
crc32 = compute_crc32(0, ctx.buildid->contents.data(),
|
||||
ctx.buildid->contents.size());
|
||||
} else {
|
||||
std::vector<std::span<u8>> shards = get_shards(ctx);
|
||||
std::vector<U64<E>> hashes(shards.size());
|
||||
|
||||
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
|
||||
hashes[i] = hash_string({(char *)shards[i].data(), shards[i].size()});
|
||||
});
|
||||
crc32 = compute_crc32(0, (u8 *)hashes.data(), hashes.size() * 8);
|
||||
}
|
||||
|
||||
ctx.gnu_debuglink->crc32 = crc32;
|
||||
ctx.gnu_debuglink->copy_buf(ctx);
|
||||
}
|
||||
|
||||
// Write a separate debug file. This function is called after we finish
|
||||
// writing to the usual output file.
|
||||
template <typename E>
|
||||
void write_separate_debug_file(Context<E> &ctx) {
|
||||
Timer t(ctx, "write_separate_debug_file");
|
||||
|
||||
// We want to write to the debug info file in background so that the
|
||||
// user doesn't have to wait for it to complete.
|
||||
if (ctx.arg.detach)
|
||||
notify_parent();
|
||||
|
||||
// A debug info file contains all sections as the original file, though
|
||||
// most of them can be empty as if they were bss sections. We convert
|
||||
// real sections into dummy sections here.
|
||||
for (i64 i = 0; i < ctx.chunks.size(); i++) {
|
||||
Chunk<E> *chunk = ctx.chunks[i];
|
||||
if (chunk != ctx.ehdr && chunk != ctx.shdr && chunk != ctx.shstrtab &&
|
||||
chunk->shdr.sh_type != SHT_NOTE) {
|
||||
Chunk<E> *sec = new OutputSection<E>(chunk->name, SHT_NULL);
|
||||
sec->shdr = chunk->shdr;
|
||||
sec->shdr.sh_type = SHT_NOBITS;
|
||||
|
||||
ctx.chunks[i] = sec;
|
||||
ctx.chunk_pool.emplace_back(sec);
|
||||
}
|
||||
}
|
||||
|
||||
// Restore debug info sections that had been set aside while we were
|
||||
// creating the main file.
|
||||
tbb::parallel_for_each(ctx.debug_chunks, [&](Chunk<E> *chunk) {
|
||||
chunk->compute_section_size(ctx);
|
||||
});
|
||||
|
||||
append(ctx.chunks, ctx.debug_chunks);
|
||||
|
||||
// Write to the debug info file as if it were a regular output file.
|
||||
compute_section_headers(ctx);
|
||||
i64 filesize = set_osec_offsets(ctx);
|
||||
|
||||
ctx.output_file =
|
||||
OutputFile<Context<E>>::open(ctx, ctx.arg.separate_debug_file,
|
||||
filesize, 0666);
|
||||
ctx.buf = ctx.output_file->buf;
|
||||
|
||||
copy_chunks(ctx);
|
||||
|
||||
if (ctx.gdb_index)
|
||||
write_gdb_index(ctx);
|
||||
|
||||
// Reverse-compute a CRC32 value so that the CRC32 checksum embedded to
|
||||
// the .gnu_debuglink section in the main executable matches with the
|
||||
// debug info file's CRC32 checksum.
|
||||
u32 crc = compute_crc32(0, ctx.buf, filesize);
|
||||
|
||||
std::vector<u8> &buf2 = ctx.output_file->buf2;
|
||||
if (!buf2.empty())
|
||||
crc = compute_crc32(crc, buf2.data(), buf2.size());
|
||||
|
||||
std::vector<u8> trailer = crc32_solve(crc, ctx.gnu_debuglink->crc32);
|
||||
append(ctx.output_file->buf2, trailer);
|
||||
ctx.output_file->close(ctx);
|
||||
}
|
||||
|
||||
// Write Makefile-style dependency rules to a file specified by
|
||||
// --dependency-file. This is analogous to the compiler's -M flag.
|
||||
@ -3188,11 +3246,14 @@ template void apply_version_script(Context<E> &);
|
||||
template void parse_symbol_version(Context<E> &);
|
||||
template void compute_import_export(Context<E> &);
|
||||
template void compute_address_significance(Context<E> &);
|
||||
template void separate_debug_sections(Context<E> &);
|
||||
template void compute_section_headers(Context<E> &);
|
||||
template i64 set_osec_offsets(Context<E> &);
|
||||
template void fix_synthetic_symbols(Context<E> &);
|
||||
template i64 compress_debug_sections(Context<E> &);
|
||||
template void compute_build_id(Context<E> &);
|
||||
template void write_build_id(Context<E> &);
|
||||
template void write_gnu_debuglink(Context<E> &);
|
||||
template void write_separate_debug_file(Context<E> &);
|
||||
template void write_dependency_file(Context<E> &);
|
||||
template void show_stats(Context<E> &);
|
||||
|
||||
|
@ -60,6 +60,7 @@ void notify_parent() {
|
||||
char buf[] = {1};
|
||||
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
|
||||
assert(n == 1);
|
||||
pipe_write_fd = -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -170,6 +170,10 @@ static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
|
||||
|
||||
template <>
|
||||
void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
// This function is not thread-safe because it mutates symbols' members
|
||||
static std::mutex mu;
|
||||
std::scoped_lock lock(mu);
|
||||
|
||||
std::span<InputSection<E> *> m = members;
|
||||
if (m.empty())
|
||||
return;
|
||||
@ -247,10 +251,8 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
|
||||
// Scan relocations between B and C to collect symbols that need
|
||||
// entries in the new thunk.
|
||||
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
|
||||
[&](InputSection<E> *isec) {
|
||||
scan_rels(ctx, *isec, *thunk, thunk_idx);
|
||||
});
|
||||
for (i64 i = b; i < c; i++)
|
||||
scan_rels(ctx, *m[i], *thunk, thunk_idx);
|
||||
|
||||
// Now that we know the number of symbols in the thunk, we can compute
|
||||
// the thunk's size.
|
||||
@ -270,16 +272,15 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
}
|
||||
|
||||
// Scan relocations again to fix symbol offsets in the last thunk.
|
||||
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
|
||||
[&](InputSection<E> *isec) {
|
||||
std::span<Symbol<E> *> syms = isec->file.symbols;
|
||||
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
|
||||
std::span<ThunkRef> thunk_refs = isec->extra.thunk_refs;
|
||||
for (i64 i = b; i < c; i++) {
|
||||
std::span<Symbol<E> *> syms = m[i]->file.symbols;
|
||||
std::span<const ElfRel<E>> rels = m[i]->get_rels(ctx);
|
||||
std::span<ThunkRef> thunk_refs = m[i]->extra.thunk_refs;
|
||||
|
||||
for (i64 i = 0; i < rels.size(); i++)
|
||||
if (thunk_refs[i].thunk_idx == thunk_idx)
|
||||
thunk_refs[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx;
|
||||
});
|
||||
for (i64 j = 0; j < rels.size(); j++)
|
||||
if (thunk_refs[j].thunk_idx == thunk_idx)
|
||||
thunk_refs[j].sym_idx = syms[rels[j].r_sym]->extra.thunk_sym_idx;
|
||||
}
|
||||
|
||||
// Move B forward to point to the begining of the next batch.
|
||||
b = c;
|
||||
|
26
test/elf/separate-debug-file.sh
Executable file
26
test/elf/separate-debug-file.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
. $(dirname $0)/common.inc
|
||||
|
||||
on_qemu && skip
|
||||
command -v gdb >& /dev/null || skip
|
||||
command -v flock >& /dev/null || skip
|
||||
|
||||
cat <<EOF > $t/a.c
|
||||
#include <stdio.h>
|
||||
int main() {
|
||||
printf("Hello world\n");
|
||||
}
|
||||
EOF
|
||||
|
||||
$CC -c -o $t/a.o $t/a.c -g
|
||||
$CC -B. -o $t/exe1 $t/a.o -Wl,--separate-debug-file
|
||||
readelf -SW $t/exe1 | grep -Fq .gnu_debuglink
|
||||
|
||||
$CC -c -o $t/a.o $t/a.c -g
|
||||
$CC -B. -o $t/exe2 $t/a.o -Wl,--separate-debug-file -Wl,--no-build-id
|
||||
readelf -SW $t/exe2 | grep -Fq .gnu_debuglink
|
||||
|
||||
sleep 1
|
||||
|
||||
gdb $t/exe1 -ex 'list main' -ex 'quit' | grep -Fq printf
|
||||
gdb $t/exe2 -ex 'list main' -ex 'quit' | grep -Fq printf
|
@ -37,5 +37,5 @@ grep -Eq '.note.baz\s+NOTE.+000008 00 A 0 0 8' $t/log
|
||||
grep -Eq '.note.nonalloc\s+NOTE.+000008 00 0 0 1' $t/log
|
||||
|
||||
readelf --segments $t/exe > $t/log
|
||||
grep -Fq '01 .note.baz .note.foo .note.bar' $t/log
|
||||
grep -Fq '01 .note.bar .note.baz .note.foo' $t/log
|
||||
! grep -q 'NOTE.*0x0000000000000000 0x0000000000000000' $t/log || false
|
||||
|
@ -29,4 +29,4 @@ EOF
|
||||
./mold -o $t/exe $t/a.o $t/b.o $t/c.o $t/d.o
|
||||
|
||||
readelf --segments $t/exe > $t/log
|
||||
grep -Fq '01 .note.a .note.c .note.b' $t/log
|
||||
grep -Fq '01 .note.a .note.b .note.c' $t/log
|
||||
|
Loading…
Reference in New Issue
Block a user