1
1
mirror of https://github.com/rui314/mold.git synced 2024-08-16 00:10:55 +03:00

Compare commits

...

11 Commits

Author SHA1 Message Date
Felix Yan
96da0dc23e
Merge 46c4b826aa into 18da5b654e 2024-07-09 05:47:42 +01:00
Rui Ueyama
18da5b654e Add --no-detach to write to a separate debug file in the foreground
--detach is the default.
2024-07-09 12:06:41 +09:00
Rui Ueyama
97a1e218c5 Simplify crc32_solve()
The code was originally written by Pete Cawley
https://gist.github.com/corsix/bdfc8f2f1dc0f28de39f74de9bf4f060
2024-07-09 10:15:26 +09:00
Rui Ueyama
f9e4cb1a7f Add a missing #include 2024-07-08 10:43:40 +09:00
Rui Ueyama
60760a892a Attempt to fix CI 2024-07-08 09:59:07 +09:00
Rui Ueyama
596ffa959a Add --separate-debug-info
This option is to separate debug info to a different file. The debug
info file's filename is stored to the main output file's .gnu_debuglink
section. gdb can read the section contents and followg the link to
find debug info in another file.

Fixes https://github.com/rui314/mold/issues/1294
2024-07-08 09:28:32 +09:00
Rui Ueyama
cd3b817f13 Make notify_parent idempotent 2024-07-07 22:49:35 +09:00
Rui Ueyama
19de40fed4 Refactor 2024-07-07 22:45:17 +09:00
Rui Ueyama
e78e12b15b Refactor 2024-07-07 22:24:42 +09:00
Rui Ueyama
5b4377842b Fix CI 2024-07-07 21:50:53 +09:00
Felix Yan
46c4b826aa
Correct typos in docs/bugs.md 2022-08-22 12:25:28 +03:00
14 changed files with 466 additions and 137 deletions

View File

@ -364,6 +364,7 @@ endforeach()
# Add other non-template source files.
target_sources(mold PRIVATE
common/compress.cc
common/crc32.cc
common/demangle.cc
common/filepath.cc
common/glob.cc

View File

@ -2,6 +2,7 @@
#include "integers.h"
#include <array>
#include <atomic>
#include <bit>
#include <bitset>
@ -899,6 +900,13 @@ std::optional<std::string_view> demangle_rust(std::string_view name);
void acquire_global_lock();
void release_global_lock();
//
// crc32.cc
//
u32 compute_crc32(u32 crc, u8 *buf, i64 len);
std::vector<u8> crc32_solve(u32 current, u32 desired);
//
// compress.cc
//

60
common/crc32.cc Normal file
View File

@ -0,0 +1,60 @@
#include "common.h"
#include <tbb/parallel_for_each.h>
#include <zlib.h>
namespace mold {
// This function "forges" a CRC. That is, given the current and a desired
// CRC32 value, crc32_solve() returns a binary blob to add to the end of
// the original data to yield the desired CRC. Trailing garbage is ignored
// by many bianry file formats, so you can create a file with a desired
// CRC using crc32_solve(). We need it for --separate-debug-file.
std::vector<u8> crc32_solve(u32 current, u32 desired) {
constexpr u32 poly = 0xedb88320;
u32 x = ~desired;
// Each iteration computes x = (x * x^-1) mod poly.
for (i64 i = 0; i < 32; i++) {
x = std::rotl(x, 1);
x ^= (x & 1) * (poly << 1);
}
x ^= ~current;
std::vector<u8> out(4);
out[0] = x;
out[1] = x >> 8;
out[2] = x >> 16;
out[3] = x >> 24;
return out;
}
// Compute a CRC for given data in parallel
u32 compute_crc32(u32 crc, u8 *buf, i64 len) {
struct Shard {
u8 *buf;
i64 len;
u32 crc;
};
constexpr i64 shard_size = 1024 * 1024; // 1 MiB
std::vector<Shard> shards;
while (len > 0) {
i64 sz = std::min(len, shard_size);
shards.push_back({buf, sz, 0});
buf += sz;
len -= sz;
}
tbb::parallel_for_each(shards.begin(), shards.end(), [](Shard &shard) {
shard.crc = crc32_z(0, shard.buf, shard.len);
});
for (Shard &shard : shards)
crc = crc32_combine(crc, shard.crc, shard.len);
return crc;
}
} // namespace mold

View File

@ -88,9 +88,9 @@ Problem: A statically-linked "hello world" program crashes after
reading a thread-local variable.
Investigation: Thread-local variables are very different from other
types of varaibles because there may be more than one instance of the
types of variables because there may be more than one instance of the
same variable in memory. Each thread has its copy of thread-local
varaibles. `%fs` segment register points the end of the variable area
variables. `%fs` segment register points the end of the variable area
for the current thread, and the variables are accessed as an offset
from `%fs`.

View File

@ -85,6 +85,8 @@ Options:
--defsym=SYMBOL=VALUE Define a symbol alias
--demangle Demangle C++ symbols in log messages (default)
--no-demangle
--detach Create separate debug info file in the background (default)
--no-detach
--enable-new-dtags Emit DT_RUNPATH for --rpath (default)
--disable-new-dtags Emit DT_RPATH for --rpath
--execute-only Make executable segments unreadable
@ -143,6 +145,8 @@ Options:
--rpath-link DIR Ignored
--run COMMAND ARG... Run COMMAND with mold as /usr/bin/ld
--section-start=SECTION=ADDR Set address for section
--separate-debug-file[=FILE] Separate debug info to the specified file
--no-separate-debug-file
--shared, --Bshareable Create a shared library
--shuffle-sections[=SEED] Randomize the output by shuffling input sections
--sort-common Ignored
@ -526,6 +530,7 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
std::optional<SeparateCodeKind> z_separate_code;
std::optional<bool> report_undefined;
std::optional<bool> z_relro;
std::optional<std::string> separate_debug_file;
std::optional<u64> shuffle_sections_seed;
std::unordered_set<std::string_view> rpaths;
@ -756,6 +761,10 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.arg.demangle = true;
} else if (read_flag("no-demangle")) {
ctx.arg.demangle = false;
} else if (read_flag("detach")) {
ctx.arg.detach = true;
} else if (read_flag("no-detach")) {
ctx.arg.detach = false;
} else if (read_flag("default-symver")) {
ctx.arg.default_symver = true;
} else if (read_flag("noinhibit-exec")) {
@ -1003,6 +1012,12 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.arg.z_origin = true;
} else if (read_z_flag("nodefaultlib")) {
ctx.arg.z_nodefaultlib = true;
} else if (read_eq("separate-debug-file")) {
separate_debug_file = arg;
} else if (read_flag("separate-debug-file")) {
separate_debug_file = "";
} else if (read_flag("no-separate-debug-file")) {
separate_debug_file.reset();
} else if (read_z_flag("separate-loadable-segments")) {
z_separate_code = SEPARATE_LOADABLE_SEGMENTS;
} else if (read_z_flag("separate-code")) {
@ -1394,9 +1409,20 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.default_version = VER_NDX_LAST_RESERVED + 1;
}
if (separate_debug_file) {
if (separate_debug_file->empty())
ctx.arg.separate_debug_file = ctx.arg.output + ".dbg";
else
ctx.arg.separate_debug_file = *separate_debug_file;
}
if (ctx.arg.shared && warn_shared_textrel)
ctx.arg.warn_textrel = true;
// We don't want the background process to write to stdout
if (ctx.arg.stats || ctx.arg.perf)
ctx.arg.detach = false;
ctx.arg.undefined.push_back(ctx.arg.entry);
for (i64 i = 0; i < ctx.arg.defsyms.size(); i++) {

View File

@ -559,14 +559,17 @@ int elf_main(int argc, char **argv) {
// Compute the is_weak bit for each imported symbol.
compute_imported_symbol_weakness(ctx);
// Compute sizes of output sections while assigning offsets
// within an output section to input sections.
compute_section_sizes(ctx);
// Sort sections by section attributes so that we'll have to
// create as few segments as possible.
sort_output_sections(ctx);
if (!ctx.arg.separate_debug_file.empty())
separate_debug_sections(ctx);
// Compute sizes of output sections while assigning offsets
// within an output section to input sections.
compute_section_sizes(ctx);
// If --packed_dyn_relocs=relr was given, base relocations are stored
// to a .relr.dyn section in a compressed form. Construct a compressed
// relocations now so that we can fix section sizes and file layout.
@ -653,17 +656,18 @@ int elf_main(int argc, char **argv) {
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid) {
compute_build_id(ctx);
ctx.buildid->copy_buf(ctx);
}
if (ctx.buildid)
write_build_id(ctx);
// .gdb_index's contents cannot be constructed before applying
// relocations to other debug sections. We have relocated debug
// sections now, so write the .gdb_index section.
if (ctx.gdb_index)
if (ctx.gdb_index && ctx.arg.separate_debug_file.empty())
write_gdb_index(ctx);
if (!ctx.arg.separate_debug_file.empty())
write_gnu_debuglink(ctx);
t_copy.stop();
ctx.checkpoint();
@ -682,6 +686,9 @@ int elf_main(int argc, char **argv) {
if (ctx.arg.print_map)
print_map(ctx);
if (!ctx.arg.separate_debug_file.empty())
write_separate_debug_file(ctx);
// Show stats numbers
if (ctx.arg.stats)
show_stats(ctx);
@ -692,9 +699,7 @@ int elf_main(int argc, char **argv) {
std::cout << std::flush;
std::cerr << std::flush;
if (ctx.arg.fork)
notify_parent();
notify_parent();
release_global_lock();
if (ctx.arg.quick_exit)

View File

@ -373,7 +373,7 @@ public:
virtual ~Chunk() = default;
virtual bool is_header() { return false; }
virtual OutputSection<E> *to_osec() { return nullptr; }
virtual MergedSection<E> *to_merged_section() { return nullptr; }
virtual void compute_section_size(Context<E> &ctx) {}
virtual i64 get_reldyn_size(Context<E> &ctx) const { return 0; }
virtual void construct_relr(Context<E> &ctx) {}
virtual void copy_buf(Context<E> &ctx) {}
@ -480,6 +480,7 @@ public:
}
OutputSection<E> *to_osec() override { return this; }
void compute_section_size(Context<E> &ctx) override;
void construct_relr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_to(Context<E> &ctx, u8 *buf) override;
@ -806,9 +807,8 @@ public:
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
u64 hash, i64 p2align);
MergedSection<E> *to_merged_section() override { return this; }
void resolve(Context<E> &ctx);
void assign_offsets(Context<E> &ctx);
void compute_section_size(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_to(Context<E> &ctx, u8 *buf) override;
void print_stats(Context<E> &ctx);
@ -993,6 +993,22 @@ private:
std::map<u32, u32> properties;
};
template <typename E>
class GnuDebuglinkSection : public Chunk<E> {
public:
GnuDebuglinkSection() {
this->name = ".gnu_debuglink";
this->shdr.sh_type = SHT_PROGBITS;
this->shdr.sh_addralign = 4;
}
void update_shdr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
std::string filename;
u32 crc32 = 0;
};
template <typename E>
class GdbIndexSection : public Chunk<E> {
public:
@ -1439,11 +1455,14 @@ template <typename E> void apply_version_script(Context<E> &);
template <typename E> void parse_symbol_version(Context<E> &);
template <typename E> void compute_import_export(Context<E> &);
template <typename E> void compute_address_significance(Context<E> &);
template <typename E> void separate_debug_sections(Context<E> &);
template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &);
template <typename E> void fix_synthetic_symbols(Context<E> &);
template <typename E> i64 compress_debug_sections(Context<E> &);
template <typename E> void compute_build_id(Context<E> &);
template <typename E> void write_build_id(Context<E> &);
template <typename E> void write_gnu_debuglink(Context<E> &);
template <typename E> void write_separate_debug_file(Context<E> &ctx);
template <typename E> void write_dependency_file(Context<E> &);
template <typename E> void show_stats(Context<E> &);
@ -1721,6 +1740,7 @@ struct Context {
bool color_diagnostics = false;
bool default_symver = false;
bool demangle = true;
bool detach = true;
bool discard_all = false;
bool discard_locals = false;
bool eh_frame_hdr = true;
@ -1807,6 +1827,7 @@ struct Context {
std::string package_metadata;
std::string plugin;
std::string rpaths;
std::string separate_debug_file;
std::string soname;
std::string sysroot;
std::unique_ptr<std::unordered_set<std::string_view>> retain_symbols_file;
@ -1885,6 +1906,9 @@ struct Context {
tbb::concurrent_hash_map<Symbol<E> *, std::vector<std::string>> undef_errors;
// For --separate-debug-file
std::vector<Chunk<E> *> debug_chunks;
// Output chunks
OutputEhdr<E> *ehdr = nullptr;
OutputShdr<E> *shdr = nullptr;
@ -1900,6 +1924,7 @@ struct Context {
DynstrSection<E> *dynstr = nullptr;
HashSection<E> *hash = nullptr;
GnuHashSection<E> *gnu_hash = nullptr;
GnuDebuglinkSection<E> *gnu_debuglink = nullptr;
ShstrtabSection<E> *shstrtab = nullptr;
PltSection<E> *plt = nullptr;
PltGotSection<E> *pltgot = nullptr;

View File

@ -865,6 +865,84 @@ void DynamicSection<E>::copy_buf(Context<E> &ctx) {
write_vector(ctx.buf + this->shdr.sh_offset, contents);
}
template <typename T>
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
std::span<T> span(input);
std::vector<std::span<T>> vec;
while (span.size() >= unit) {
vec.push_back(span.subspan(0, unit));
span = span.subspan(unit);
}
if (!span.empty())
vec.push_back(span);
return vec;
}
// Assign offsets to OutputSection members
template <typename E>
void OutputSection<E>::compute_section_size(Context<E> &ctx) {
ElfShdr<E> &shdr = this->shdr;
// On most RISC systems, we need to create so-called "range extension
// thunks" to extend branch instructions reach, as their jump
// instructions' reach is limited. create_range_extension_thunks()
// computes the size of the section while inserting thunks.
if constexpr (needs_thunk<E>) {
if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) {
create_range_extension_thunks(ctx);
return;
}
}
// Since one output section may contain millions of input sections,
// we first split input sections into groups and assign offsets to
// groups.
struct Group {
std::span<InputSection<E> *> members;
i64 size = 0;
i64 p2align = 0;
i64 offset = 0;
};
std::span<InputSection<E> *> mem = members;
std::vector<Group> groups;
constexpr i64 group_size = 10000;
while (!mem.empty()) {
i64 sz = std::min<i64>(group_size, mem.size());
groups.push_back({mem.subspan(0, sz)});
mem = mem.subspan(sz);
}
tbb::parallel_for_each(groups, [](Group &group) {
for (InputSection<E> *isec : group.members) {
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
group.p2align = std::max<i64>(group.p2align, isec->p2align);
}
});
shdr.sh_size = 0;
for (i64 i = 0; i < groups.size(); i++) {
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
groups[i].offset = shdr.sh_size;
shdr.sh_size += groups[i].size;
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
}
// Assign offsets to input sections.
tbb::parallel_for_each(groups, [](Group &group) {
i64 offset = group.offset;
for (InputSection<E> *isec : group.members) {
offset = align_to(offset, 1 << isec->p2align);
isec->offset = offset;
offset += isec->sh_size;
}
});
}
template <typename E>
void OutputSection<E>::copy_buf(Context<E> &ctx) {
if (this->shdr.sh_type != SHT_NOBITS)
@ -1621,10 +1699,14 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
};
i64 shndx = -1;
InputSection<E> *isec = sym.get_input_section();
if (sym.has_copyrel) {
// Symbol in .copyrel
shndx = sym.is_copyrel_readonly ? ctx.copyrel_relro->shndx : ctx.copyrel->shndx;
esym.st_value = sym.get_addr(ctx);
} else if (sym.file->is_dso || sym.esym().is_undef()) {
// Undefined symbol in a DSO
esym.st_shndx = SHN_UNDEF;
esym.st_size = 0;
if (sym.is_canonical)
@ -1637,7 +1719,7 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
// Section fragment
shndx = frag->output_section.shndx;
esym.st_value = sym.get_addr(ctx);
} else if (!sym.get_input_section()) {
} else if (!isec) {
// Absolute symbol
esym.st_shndx = SHN_ABS;
esym.st_value = sym.get_addr(ctx);
@ -1651,7 +1733,25 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
esym.st_type = STT_FUNC;
esym.st_visibility = sym.visibility;
esym.st_value = sym.get_plt_addr(ctx);
} else if (!isec->output_section) {
// Symbol in a mergeable non-SHF_ALLOC section, such as .debug_str
assert(!(isec->shdr().sh_flags & SHF_ALLOC));
assert(isec->shdr().sh_flags & SHF_MERGE);
assert(!sym.file->is_dso);
ObjectFile<E> *file = (ObjectFile<E> *)sym.file;
MergeableSection<E> *m =
file->mergeable_sections[file->get_shndx(sym.esym())].get();
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = m->get_fragment(sym.esym().st_value);
shndx = m->parent.shndx;
esym.st_visibility = sym.visibility;
esym.st_value = frag->get_addr(ctx) + frag_addend;
} else {
// Symbol in a regular section
shndx = get_st_shndx(sym);
esym.st_visibility = sym.visibility;
esym.st_value = sym.get_addr(ctx, NO_PLT);
@ -1987,7 +2087,7 @@ void MergedSection<E>::resolve(Context<E> &ctx) {
}
template <typename E>
void MergedSection<E>::assign_offsets(Context<E> &ctx) {
void MergedSection<E>::compute_section_size(Context<E> &ctx) {
if (!resolved)
resolve(ctx);
@ -2848,6 +2948,20 @@ void ComdatGroupSection<E>::copy_buf(Context<E> &ctx) {
*buf++ = chunk->shndx;
}
template <typename E>
void GnuDebuglinkSection<E>::update_shdr(Context<E> &ctx) {
filename = std::filesystem::path(ctx.arg.separate_debug_file).filename().string();
this->shdr.sh_size = align_to(filename.size() + 1, 4) + 4;
}
template <typename E>
void GnuDebuglinkSection<E>::copy_buf(Context<E> &ctx) {
u8 *buf = ctx.buf + this->shdr.sh_offset;
memset(buf, 0, this->shdr.sh_size);
write_string(buf, filename);
*(U32<E> *)(buf + this->shdr.sh_size - 4) = crc32;
}
using E = MOLD_TARGET;
template class Chunk<E>;
@ -2886,6 +3000,7 @@ template class GdbIndexSection<E>;
template class CompressedSection<E>;
template class RelocSection<E>;
template class ComdatGroupSection<E>;
template class GnuDebuglinkSection<E>;
template OutputSection<E> *find_section(Context<E> &, u32);
template OutputSection<E> *find_section(Context<E> &, std::string_view);

View File

@ -156,6 +156,8 @@ void create_synthetic_sections(Context<E> &ctx) {
ctx.verdef = push(new VerdefSection<E>);
if (ctx.arg.emit_relocs)
ctx.eh_frame_reloc = push(new EhFrameRelocSection<E>);
if (!ctx.arg.separate_debug_file.empty())
ctx.gnu_debuglink = push(new GnuDebuglinkSection<E>);
if (ctx.arg.shared || !ctx.dsos.empty() || ctx.arg.pie) {
ctx.dynamic = push(new DynamicSection<E>(ctx));
@ -466,20 +468,6 @@ static std::string get_cmdline_args(Context<E> &ctx) {
return ss.str();
}
template <typename T>
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
std::span<T> span(input);
std::vector<std::span<T>> vec;
while (span.size() >= unit) {
vec.push_back(span.subspan(0, unit));
span = span.subspan(unit);
}
if (!span.empty())
vec.push_back(span);
return vec;
}
template <typename E>
static bool has_ctors_and_init_array(Context<E> &ctx) {
bool x = false;
@ -1351,84 +1339,24 @@ template <typename E>
void compute_section_sizes(Context<E> &ctx) {
Timer t(ctx, "compute_section_sizes");
struct Group {
i64 size = 0;
i64 p2align = 0;
i64 offset = 0;
std::span<InputSection<E> *> members;
};
// Assign offsets to OutputSection members
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
OutputSection<E> *osec = chunk->to_osec();
if (!osec)
return;
// This pattern will be processed in the next loop.
if constexpr (needs_thunk<E>)
if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable)
return;
// Since one output section may contain millions of input sections,
// we first split input sections into groups and assign offsets to
// groups.
std::vector<Group> groups;
constexpr i64 group_size = 10000;
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
groups.push_back(Group{.members = span});
tbb::parallel_for_each(groups, [](Group &group) {
for (InputSection<E> *isec : group.members) {
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
group.p2align = std::max<i64>(group.p2align, isec->p2align);
}
});
ElfShdr<E> &shdr = osec->shdr;
shdr.sh_size = 0;
for (i64 i = 0; i < groups.size(); i++) {
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
groups[i].offset = shdr.sh_size;
shdr.sh_size += groups[i].size;
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
}
// Assign offsets to input sections.
tbb::parallel_for_each(groups, [](Group &group) {
i64 offset = group.offset;
for (InputSection<E> *isec : group.members) {
offset = align_to(offset, 1 << isec->p2align);
isec->offset = offset;
offset += isec->sh_size;
}
});
});
// Assign offsets to MergedSection members
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
if (MergedSection<E> *sec = chunk->to_merged_section())
sec->assign_offsets(ctx);
});
// On ARM32 or ARM64, we may need to create so-called "range extension
// thunks" to extend branch instructions reach, as they can jump only
// to ±16 MiB or ±128 MiB, respecitvely.
//
// In the following loop, We compute the sizes of sections while
// inserting thunks. This pass cannot be parallelized. That is,
// create_range_extension_thunks is parallelized internally, but the
// function itself is not thread-safe.
if constexpr (needs_thunk<E>) {
Timer t(ctx, "create_range_extension_thunks");
// Chunk<E>::compute_section_size may obtain a global lock to create
// range extension thunks. I don't know why, but using parallel_for
// loop both inside and outside of the lock may cause a deadlock. It
// might be a bug in TBB. For now, I'll avoid using parallel_for_each
// here.
for (Chunk<E> *chunk : ctx.chunks)
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
chunk->compute_section_size(ctx);
if (!ctx.arg.relocatable)
for (Chunk<E> *chunk : ctx.chunks)
if (OutputSection<E> *osec = chunk->to_osec())
if (osec->shdr.sh_flags & SHF_EXECINSTR)
osec->create_range_extension_thunks(ctx);
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
if (!(chunk->shdr.sh_flags & SHF_EXECINSTR))
chunk->compute_section_size(ctx);
});
} else {
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
chunk->compute_section_size(ctx);
});
}
}
@ -2676,6 +2604,24 @@ static i64 set_file_offsets(Context<E> &ctx) {
return fileoff;
}
// Remove debug sections from ctx.chunks and save them to ctx.debug_chunks.
// This is for --separate-debug-file.
template <typename E>
void separate_debug_sections(Context<E> &ctx) {
auto is_debug_section = [&](Chunk<E> *chunk) {
if (chunk->shdr.sh_flags & SHF_ALLOC)
return false;
return chunk == ctx.gdb_index || chunk == ctx.symtab || chunk == ctx.strtab ||
chunk->name.starts_with(".debug_");
};
auto mid = std::stable_partition(ctx.chunks.begin(), ctx.chunks.end(),
is_debug_section);
ctx.debug_chunks = {ctx.chunks.begin(), mid};
ctx.chunks.erase(ctx.chunks.begin(), mid);
}
template <typename E>
void compute_section_headers(Context<E> &ctx) {
// Update sh_size for each chunk.
@ -3006,23 +2952,34 @@ static void blake3_hash(u8 *buf, i64 size, u8 *out) {
}
template <typename E>
void compute_build_id(Context<E> &ctx) {
Timer t(ctx, "compute_build_id");
std::vector<std::span<u8>> get_shards(Context<E> &ctx) {
constexpr i64 shard_size = 4 * 1024 * 1024; // 4 MiB
std::span<u8> buf = {ctx.buf, (size_t)ctx.output_file->filesize};
std::vector<std::span<u8>> vec;
while (!buf.empty()) {
i64 sz = std::min<i64>(shard_size, buf.size());
vec.push_back(buf.subspan(0, sz));
buf = buf.subspan(sz);
}
return vec;
}
template <typename E>
void write_build_id(Context<E> &ctx) {
Timer t(ctx, "write_build_id");
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
ctx.buildid->contents = ctx.arg.build_id.value;
break;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
std::vector<std::span<u8>> shards = get_shards(ctx);
std::vector<u8> hashes(shards.size() * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
blake3_hash(shards[i].data(), shards[i].size(),
hashes.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
@ -3033,7 +2990,7 @@ void compute_build_id(Context<E> &ctx) {
});
u8 buf[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), buf);
blake3_hash(hashes.data(), hashes.size(), buf);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
@ -3052,8 +3009,109 @@ void compute_build_id(Context<E> &ctx) {
default:
unreachable();
}
ctx.buildid->copy_buf(ctx);
}
// A .gnu_debuglink section contains a filename and a CRC32 checksum of a
// debug info file. When we are writing a .gnu_debuglink, we don't know
// its CRC32 checksum because we haven't created a debug info file. So we
// write a dummy value instead.
//
// We can't choose a random value as a dummy value for build
// reproducibility. We also don't want to write a fixed value for all
// files because the CRC checksum is in this section to prevent using
// wrong file on debugging. gdb rejects a debug info file if its CRC
// doesn't match with the one in .gdb_debuglink.
//
// Therefore, we'll try to make our CRC checksum as unique as possible.
// We'll remember that checksum, and after creating a debug info file, add
// a few bytes of garbage at the end of it so that the debug info file's
// CRC checksum becomes the one that we have precomputed.
template <typename E>
void write_gnu_debuglink(Context<E> &ctx) {
Timer t(ctx, "write_gnu_debuglink");
u32 crc32;
if (ctx.buildid) {
crc32 = compute_crc32(0, ctx.buildid->contents.data(),
ctx.buildid->contents.size());
} else {
std::vector<std::span<u8>> shards = get_shards(ctx);
std::vector<U64<E>> hashes(shards.size());
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
hashes[i] = hash_string({(char *)shards[i].data(), shards[i].size()});
});
crc32 = compute_crc32(0, (u8 *)hashes.data(), hashes.size() * 8);
}
ctx.gnu_debuglink->crc32 = crc32;
ctx.gnu_debuglink->copy_buf(ctx);
}
// Write a separate debug file. This function is called after we finish
// writing to the usual output file.
template <typename E>
void write_separate_debug_file(Context<E> &ctx) {
Timer t(ctx, "write_separate_debug_file");
// We want to write to the debug info file in background so that the
// user doesn't have to wait for it to complete.
if (ctx.arg.detach)
notify_parent();
// A debug info file contains all sections as the original file, though
// most of them can be empty as if they were bss sections. We convert
// real sections into dummy sections here.
for (i64 i = 0; i < ctx.chunks.size(); i++) {
Chunk<E> *chunk = ctx.chunks[i];
if (chunk != ctx.ehdr && chunk != ctx.shdr && chunk != ctx.shstrtab &&
chunk->shdr.sh_type != SHT_NOTE) {
Chunk<E> *sec = new OutputSection<E>(chunk->name, SHT_NULL);
sec->shdr = chunk->shdr;
sec->shdr.sh_type = SHT_NOBITS;
ctx.chunks[i] = sec;
ctx.chunk_pool.emplace_back(sec);
}
}
// Restore debug info sections that had been set aside while we were
// creating the main file.
tbb::parallel_for_each(ctx.debug_chunks, [&](Chunk<E> *chunk) {
chunk->compute_section_size(ctx);
});
append(ctx.chunks, ctx.debug_chunks);
// Write to the debug info file as if it were a regular output file.
compute_section_headers(ctx);
i64 filesize = set_osec_offsets(ctx);
ctx.output_file =
OutputFile<Context<E>>::open(ctx, ctx.arg.separate_debug_file,
filesize, 0666);
ctx.buf = ctx.output_file->buf;
copy_chunks(ctx);
if (ctx.gdb_index)
write_gdb_index(ctx);
// Reverse-compute a CRC32 value so that the CRC32 checksum embedded to
// the .gnu_debuglink section in the main executable matches with the
// debug info file's CRC32 checksum.
u32 crc = compute_crc32(0, ctx.buf, filesize);
std::vector<u8> &buf2 = ctx.output_file->buf2;
if (!buf2.empty())
crc = compute_crc32(crc, buf2.data(), buf2.size());
std::vector<u8> trailer = crc32_solve(crc, ctx.gnu_debuglink->crc32);
append(ctx.output_file->buf2, trailer);
ctx.output_file->close(ctx);
}
// Write Makefile-style dependency rules to a file specified by
// --dependency-file. This is analogous to the compiler's -M flag.
@ -3188,11 +3246,14 @@ template void apply_version_script(Context<E> &);
template void parse_symbol_version(Context<E> &);
template void compute_import_export(Context<E> &);
template void compute_address_significance(Context<E> &);
template void separate_debug_sections(Context<E> &);
template void compute_section_headers(Context<E> &);
template i64 set_osec_offsets(Context<E> &);
template void fix_synthetic_symbols(Context<E> &);
template i64 compress_debug_sections(Context<E> &);
template void compute_build_id(Context<E> &);
template void write_build_id(Context<E> &);
template void write_gnu_debuglink(Context<E> &);
template void write_separate_debug_file(Context<E> &);
template void write_dependency_file(Context<E> &);
template void show_stats(Context<E> &);

View File

@ -60,6 +60,7 @@ void notify_parent() {
char buf[] = {1};
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
assert(n == 1);
pipe_write_fd = -1;
}
#endif

View File

@ -170,6 +170,10 @@ static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
template <>
void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
// This function is not thread-safe because it mutates symbols' members
static std::mutex mu;
std::scoped_lock lock(mu);
std::span<InputSection<E> *> m = members;
if (m.empty())
return;
@ -247,10 +251,8 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
// Scan relocations between B and C to collect symbols that need
// entries in the new thunk.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
scan_rels(ctx, *isec, *thunk, thunk_idx);
});
for (i64 i = b; i < c; i++)
scan_rels(ctx, *m[i], *thunk, thunk_idx);
// Now that we know the number of symbols in the thunk, we can compute
// the thunk's size.
@ -270,16 +272,15 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
}
// Scan relocations again to fix symbol offsets in the last thunk.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
std::span<Symbol<E> *> syms = isec->file.symbols;
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
std::span<ThunkRef> thunk_refs = isec->extra.thunk_refs;
for (i64 i = b; i < c; i++) {
std::span<Symbol<E> *> syms = m[i]->file.symbols;
std::span<const ElfRel<E>> rels = m[i]->get_rels(ctx);
std::span<ThunkRef> thunk_refs = m[i]->extra.thunk_refs;
for (i64 i = 0; i < rels.size(); i++)
if (thunk_refs[i].thunk_idx == thunk_idx)
thunk_refs[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx;
});
for (i64 j = 0; j < rels.size(); j++)
if (thunk_refs[j].thunk_idx == thunk_idx)
thunk_refs[j].sym_idx = syms[rels[j].r_sym]->extra.thunk_sym_idx;
}
// Move B forward to point to the begining of the next batch.
b = c;

26
test/elf/separate-debug-file.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
. $(dirname $0)/common.inc
on_qemu && skip
command -v gdb >& /dev/null || skip
command -v flock >& /dev/null || skip
cat <<EOF > $t/a.c
#include <stdio.h>
int main() {
printf("Hello world\n");
}
EOF
$CC -c -o $t/a.o $t/a.c -g
$CC -B. -o $t/exe1 $t/a.o -Wl,--separate-debug-file
readelf -SW $t/exe1 | grep -Fq .gnu_debuglink
$CC -c -o $t/a.o $t/a.c -g
$CC -B. -o $t/exe2 $t/a.o -Wl,--separate-debug-file -Wl,--no-build-id
readelf -SW $t/exe2 | grep -Fq .gnu_debuglink
sleep 1
gdb $t/exe1 -ex 'list main' -ex 'quit' | grep -Fq printf
gdb $t/exe2 -ex 'list main' -ex 'quit' | grep -Fq printf

View File

@ -37,5 +37,5 @@ grep -Eq '.note.baz\s+NOTE.+000008 00 A 0 0 8' $t/log
grep -Eq '.note.nonalloc\s+NOTE.+000008 00 0 0 1' $t/log
readelf --segments $t/exe > $t/log
grep -Fq '01 .note.baz .note.foo .note.bar' $t/log
grep -Fq '01 .note.bar .note.baz .note.foo' $t/log
! grep -q 'NOTE.*0x0000000000000000 0x0000000000000000' $t/log || false

View File

@ -29,4 +29,4 @@ EOF
./mold -o $t/exe $t/a.o $t/b.o $t/c.o $t/d.o
readelf --segments $t/exe > $t/log
grep -Fq '01 .note.a .note.c .note.b' $t/log
grep -Fq '01 .note.a .note.b .note.c' $t/log