1
1
mirror of https://github.com/rui314/mold.git synced 2024-08-16 00:10:55 +03:00

Compare commits

...

11 Commits

Author SHA1 Message Date
Jake Leahy
6c305541e6
Merge 9ab4ab14ab into 18da5b654e 2024-07-09 05:47:42 +01:00
Rui Ueyama
18da5b654e Add --no-detach to write to a separate debug file in the foreground
--detach is the default.
2024-07-09 12:06:41 +09:00
Rui Ueyama
97a1e218c5 Simplify crc32_solve()
The code was originally written by Pete Cawley
https://gist.github.com/corsix/bdfc8f2f1dc0f28de39f74de9bf4f060
2024-07-09 10:15:26 +09:00
Rui Ueyama
f9e4cb1a7f Add a missing #include 2024-07-08 10:43:40 +09:00
Rui Ueyama
60760a892a Attempt to fix CI 2024-07-08 09:59:07 +09:00
Rui Ueyama
596ffa959a Add --separate-debug-info
This option is to separate debug info to a different file. The debug
info file's filename is stored to the main output file's .gnu_debuglink
section. gdb can read the section contents and followg the link to
find debug info in another file.

Fixes https://github.com/rui314/mold/issues/1294
2024-07-08 09:28:32 +09:00
Rui Ueyama
cd3b817f13 Make notify_parent idempotent 2024-07-07 22:49:35 +09:00
Rui Ueyama
19de40fed4 Refactor 2024-07-07 22:45:17 +09:00
Rui Ueyama
e78e12b15b Refactor 2024-07-07 22:24:42 +09:00
Rui Ueyama
5b4377842b Fix CI 2024-07-07 21:50:53 +09:00
Jake Leahy
9ab4ab14ab
Correct the location of the global config
Correct path is listed [here](https://nim-lang.org/docs/nimc.html#compiler-usage-configuration-files)
2023-06-19 17:47:26 +10:00
14 changed files with 465 additions and 136 deletions

View File

@ -364,6 +364,7 @@ endforeach()
# Add other non-template source files.
target_sources(mold PRIVATE
common/compress.cc
common/crc32.cc
common/demangle.cc
common/filepath.cc
common/glob.cc

View File

@ -155,7 +155,7 @@ example, `gcc` is used as the linker driver. Use the `-fuse-ld` option if your
GCC is recent enough to recognize this option.
If you want to use mold for all projects, add the above snippet to
`~/.config/config.nims`.
`~/.config/nim/config.nims`.
</details>

View File

@ -2,6 +2,7 @@
#include "integers.h"
#include <array>
#include <atomic>
#include <bit>
#include <bitset>
@ -899,6 +900,13 @@ std::optional<std::string_view> demangle_rust(std::string_view name);
void acquire_global_lock();
void release_global_lock();
//
// crc32.cc
//
u32 compute_crc32(u32 crc, u8 *buf, i64 len);
std::vector<u8> crc32_solve(u32 current, u32 desired);
//
// compress.cc
//

60
common/crc32.cc Normal file
View File

@ -0,0 +1,60 @@
#include "common.h"
#include <tbb/parallel_for_each.h>
#include <zlib.h>
namespace mold {
// This function "forges" a CRC. That is, given the current and a desired
// CRC32 value, crc32_solve() returns a binary blob to add to the end of
// the original data to yield the desired CRC. Trailing garbage is ignored
// by many bianry file formats, so you can create a file with a desired
// CRC using crc32_solve(). We need it for --separate-debug-file.
std::vector<u8> crc32_solve(u32 current, u32 desired) {
constexpr u32 poly = 0xedb88320;
u32 x = ~desired;
// Each iteration computes x = (x * x^-1) mod poly.
for (i64 i = 0; i < 32; i++) {
x = std::rotl(x, 1);
x ^= (x & 1) * (poly << 1);
}
x ^= ~current;
std::vector<u8> out(4);
out[0] = x;
out[1] = x >> 8;
out[2] = x >> 16;
out[3] = x >> 24;
return out;
}
// Compute a CRC for given data in parallel
u32 compute_crc32(u32 crc, u8 *buf, i64 len) {
struct Shard {
u8 *buf;
i64 len;
u32 crc;
};
constexpr i64 shard_size = 1024 * 1024; // 1 MiB
std::vector<Shard> shards;
while (len > 0) {
i64 sz = std::min(len, shard_size);
shards.push_back({buf, sz, 0});
buf += sz;
len -= sz;
}
tbb::parallel_for_each(shards.begin(), shards.end(), [](Shard &shard) {
shard.crc = crc32_z(0, shard.buf, shard.len);
});
for (Shard &shard : shards)
crc = crc32_combine(crc, shard.crc, shard.len);
return crc;
}
} // namespace mold

View File

@ -85,6 +85,8 @@ Options:
--defsym=SYMBOL=VALUE Define a symbol alias
--demangle Demangle C++ symbols in log messages (default)
--no-demangle
--detach Create separate debug info file in the background (default)
--no-detach
--enable-new-dtags Emit DT_RUNPATH for --rpath (default)
--disable-new-dtags Emit DT_RPATH for --rpath
--execute-only Make executable segments unreadable
@ -143,6 +145,8 @@ Options:
--rpath-link DIR Ignored
--run COMMAND ARG... Run COMMAND with mold as /usr/bin/ld
--section-start=SECTION=ADDR Set address for section
--separate-debug-file[=FILE] Separate debug info to the specified file
--no-separate-debug-file
--shared, --Bshareable Create a shared library
--shuffle-sections[=SEED] Randomize the output by shuffling input sections
--sort-common Ignored
@ -526,6 +530,7 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
std::optional<SeparateCodeKind> z_separate_code;
std::optional<bool> report_undefined;
std::optional<bool> z_relro;
std::optional<std::string> separate_debug_file;
std::optional<u64> shuffle_sections_seed;
std::unordered_set<std::string_view> rpaths;
@ -756,6 +761,10 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.arg.demangle = true;
} else if (read_flag("no-demangle")) {
ctx.arg.demangle = false;
} else if (read_flag("detach")) {
ctx.arg.detach = true;
} else if (read_flag("no-detach")) {
ctx.arg.detach = false;
} else if (read_flag("default-symver")) {
ctx.arg.default_symver = true;
} else if (read_flag("noinhibit-exec")) {
@ -1003,6 +1012,12 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.arg.z_origin = true;
} else if (read_z_flag("nodefaultlib")) {
ctx.arg.z_nodefaultlib = true;
} else if (read_eq("separate-debug-file")) {
separate_debug_file = arg;
} else if (read_flag("separate-debug-file")) {
separate_debug_file = "";
} else if (read_flag("no-separate-debug-file")) {
separate_debug_file.reset();
} else if (read_z_flag("separate-loadable-segments")) {
z_separate_code = SEPARATE_LOADABLE_SEGMENTS;
} else if (read_z_flag("separate-code")) {
@ -1394,9 +1409,20 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.default_version = VER_NDX_LAST_RESERVED + 1;
}
if (separate_debug_file) {
if (separate_debug_file->empty())
ctx.arg.separate_debug_file = ctx.arg.output + ".dbg";
else
ctx.arg.separate_debug_file = *separate_debug_file;
}
if (ctx.arg.shared && warn_shared_textrel)
ctx.arg.warn_textrel = true;
// We don't want the background process to write to stdout
if (ctx.arg.stats || ctx.arg.perf)
ctx.arg.detach = false;
ctx.arg.undefined.push_back(ctx.arg.entry);
for (i64 i = 0; i < ctx.arg.defsyms.size(); i++) {

View File

@ -559,14 +559,17 @@ int elf_main(int argc, char **argv) {
// Compute the is_weak bit for each imported symbol.
compute_imported_symbol_weakness(ctx);
// Compute sizes of output sections while assigning offsets
// within an output section to input sections.
compute_section_sizes(ctx);
// Sort sections by section attributes so that we'll have to
// create as few segments as possible.
sort_output_sections(ctx);
if (!ctx.arg.separate_debug_file.empty())
separate_debug_sections(ctx);
// Compute sizes of output sections while assigning offsets
// within an output section to input sections.
compute_section_sizes(ctx);
// If --packed_dyn_relocs=relr was given, base relocations are stored
// to a .relr.dyn section in a compressed form. Construct a compressed
// relocations now so that we can fix section sizes and file layout.
@ -653,17 +656,18 @@ int elf_main(int argc, char **argv) {
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid) {
compute_build_id(ctx);
ctx.buildid->copy_buf(ctx);
}
if (ctx.buildid)
write_build_id(ctx);
// .gdb_index's contents cannot be constructed before applying
// relocations to other debug sections. We have relocated debug
// sections now, so write the .gdb_index section.
if (ctx.gdb_index)
if (ctx.gdb_index && ctx.arg.separate_debug_file.empty())
write_gdb_index(ctx);
if (!ctx.arg.separate_debug_file.empty())
write_gnu_debuglink(ctx);
t_copy.stop();
ctx.checkpoint();
@ -682,6 +686,9 @@ int elf_main(int argc, char **argv) {
if (ctx.arg.print_map)
print_map(ctx);
if (!ctx.arg.separate_debug_file.empty())
write_separate_debug_file(ctx);
// Show stats numbers
if (ctx.arg.stats)
show_stats(ctx);
@ -692,9 +699,7 @@ int elf_main(int argc, char **argv) {
std::cout << std::flush;
std::cerr << std::flush;
if (ctx.arg.fork)
notify_parent();
notify_parent();
release_global_lock();
if (ctx.arg.quick_exit)

View File

@ -373,7 +373,7 @@ public:
virtual ~Chunk() = default;
virtual bool is_header() { return false; }
virtual OutputSection<E> *to_osec() { return nullptr; }
virtual MergedSection<E> *to_merged_section() { return nullptr; }
virtual void compute_section_size(Context<E> &ctx) {}
virtual i64 get_reldyn_size(Context<E> &ctx) const { return 0; }
virtual void construct_relr(Context<E> &ctx) {}
virtual void copy_buf(Context<E> &ctx) {}
@ -480,6 +480,7 @@ public:
}
OutputSection<E> *to_osec() override { return this; }
void compute_section_size(Context<E> &ctx) override;
void construct_relr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_to(Context<E> &ctx, u8 *buf) override;
@ -806,9 +807,8 @@ public:
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
u64 hash, i64 p2align);
MergedSection<E> *to_merged_section() override { return this; }
void resolve(Context<E> &ctx);
void assign_offsets(Context<E> &ctx);
void compute_section_size(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_to(Context<E> &ctx, u8 *buf) override;
void print_stats(Context<E> &ctx);
@ -993,6 +993,22 @@ private:
std::map<u32, u32> properties;
};
template <typename E>
class GnuDebuglinkSection : public Chunk<E> {
public:
GnuDebuglinkSection() {
this->name = ".gnu_debuglink";
this->shdr.sh_type = SHT_PROGBITS;
this->shdr.sh_addralign = 4;
}
void update_shdr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
std::string filename;
u32 crc32 = 0;
};
template <typename E>
class GdbIndexSection : public Chunk<E> {
public:
@ -1439,11 +1455,14 @@ template <typename E> void apply_version_script(Context<E> &);
template <typename E> void parse_symbol_version(Context<E> &);
template <typename E> void compute_import_export(Context<E> &);
template <typename E> void compute_address_significance(Context<E> &);
template <typename E> void separate_debug_sections(Context<E> &);
template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &);
template <typename E> void fix_synthetic_symbols(Context<E> &);
template <typename E> i64 compress_debug_sections(Context<E> &);
template <typename E> void compute_build_id(Context<E> &);
template <typename E> void write_build_id(Context<E> &);
template <typename E> void write_gnu_debuglink(Context<E> &);
template <typename E> void write_separate_debug_file(Context<E> &ctx);
template <typename E> void write_dependency_file(Context<E> &);
template <typename E> void show_stats(Context<E> &);
@ -1721,6 +1740,7 @@ struct Context {
bool color_diagnostics = false;
bool default_symver = false;
bool demangle = true;
bool detach = true;
bool discard_all = false;
bool discard_locals = false;
bool eh_frame_hdr = true;
@ -1807,6 +1827,7 @@ struct Context {
std::string package_metadata;
std::string plugin;
std::string rpaths;
std::string separate_debug_file;
std::string soname;
std::string sysroot;
std::unique_ptr<std::unordered_set<std::string_view>> retain_symbols_file;
@ -1885,6 +1906,9 @@ struct Context {
tbb::concurrent_hash_map<Symbol<E> *, std::vector<std::string>> undef_errors;
// For --separate-debug-file
std::vector<Chunk<E> *> debug_chunks;
// Output chunks
OutputEhdr<E> *ehdr = nullptr;
OutputShdr<E> *shdr = nullptr;
@ -1900,6 +1924,7 @@ struct Context {
DynstrSection<E> *dynstr = nullptr;
HashSection<E> *hash = nullptr;
GnuHashSection<E> *gnu_hash = nullptr;
GnuDebuglinkSection<E> *gnu_debuglink = nullptr;
ShstrtabSection<E> *shstrtab = nullptr;
PltSection<E> *plt = nullptr;
PltGotSection<E> *pltgot = nullptr;

View File

@ -865,6 +865,84 @@ void DynamicSection<E>::copy_buf(Context<E> &ctx) {
write_vector(ctx.buf + this->shdr.sh_offset, contents);
}
template <typename T>
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
std::span<T> span(input);
std::vector<std::span<T>> vec;
while (span.size() >= unit) {
vec.push_back(span.subspan(0, unit));
span = span.subspan(unit);
}
if (!span.empty())
vec.push_back(span);
return vec;
}
// Assign offsets to OutputSection members
template <typename E>
void OutputSection<E>::compute_section_size(Context<E> &ctx) {
ElfShdr<E> &shdr = this->shdr;
// On most RISC systems, we need to create so-called "range extension
// thunks" to extend branch instructions reach, as their jump
// instructions' reach is limited. create_range_extension_thunks()
// computes the size of the section while inserting thunks.
if constexpr (needs_thunk<E>) {
if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) {
create_range_extension_thunks(ctx);
return;
}
}
// Since one output section may contain millions of input sections,
// we first split input sections into groups and assign offsets to
// groups.
struct Group {
std::span<InputSection<E> *> members;
i64 size = 0;
i64 p2align = 0;
i64 offset = 0;
};
std::span<InputSection<E> *> mem = members;
std::vector<Group> groups;
constexpr i64 group_size = 10000;
while (!mem.empty()) {
i64 sz = std::min<i64>(group_size, mem.size());
groups.push_back({mem.subspan(0, sz)});
mem = mem.subspan(sz);
}
tbb::parallel_for_each(groups, [](Group &group) {
for (InputSection<E> *isec : group.members) {
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
group.p2align = std::max<i64>(group.p2align, isec->p2align);
}
});
shdr.sh_size = 0;
for (i64 i = 0; i < groups.size(); i++) {
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
groups[i].offset = shdr.sh_size;
shdr.sh_size += groups[i].size;
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
}
// Assign offsets to input sections.
tbb::parallel_for_each(groups, [](Group &group) {
i64 offset = group.offset;
for (InputSection<E> *isec : group.members) {
offset = align_to(offset, 1 << isec->p2align);
isec->offset = offset;
offset += isec->sh_size;
}
});
}
template <typename E>
void OutputSection<E>::copy_buf(Context<E> &ctx) {
if (this->shdr.sh_type != SHT_NOBITS)
@ -1621,10 +1699,14 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
};
i64 shndx = -1;
InputSection<E> *isec = sym.get_input_section();
if (sym.has_copyrel) {
// Symbol in .copyrel
shndx = sym.is_copyrel_readonly ? ctx.copyrel_relro->shndx : ctx.copyrel->shndx;
esym.st_value = sym.get_addr(ctx);
} else if (sym.file->is_dso || sym.esym().is_undef()) {
// Undefined symbol in a DSO
esym.st_shndx = SHN_UNDEF;
esym.st_size = 0;
if (sym.is_canonical)
@ -1637,7 +1719,7 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
// Section fragment
shndx = frag->output_section.shndx;
esym.st_value = sym.get_addr(ctx);
} else if (!sym.get_input_section()) {
} else if (!isec) {
// Absolute symbol
esym.st_shndx = SHN_ABS;
esym.st_value = sym.get_addr(ctx);
@ -1651,7 +1733,25 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
esym.st_type = STT_FUNC;
esym.st_visibility = sym.visibility;
esym.st_value = sym.get_plt_addr(ctx);
} else if (!isec->output_section) {
// Symbol in a mergeable non-SHF_ALLOC section, such as .debug_str
assert(!(isec->shdr().sh_flags & SHF_ALLOC));
assert(isec->shdr().sh_flags & SHF_MERGE);
assert(!sym.file->is_dso);
ObjectFile<E> *file = (ObjectFile<E> *)sym.file;
MergeableSection<E> *m =
file->mergeable_sections[file->get_shndx(sym.esym())].get();
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = m->get_fragment(sym.esym().st_value);
shndx = m->parent.shndx;
esym.st_visibility = sym.visibility;
esym.st_value = frag->get_addr(ctx) + frag_addend;
} else {
// Symbol in a regular section
shndx = get_st_shndx(sym);
esym.st_visibility = sym.visibility;
esym.st_value = sym.get_addr(ctx, NO_PLT);
@ -1987,7 +2087,7 @@ void MergedSection<E>::resolve(Context<E> &ctx) {
}
template <typename E>
void MergedSection<E>::assign_offsets(Context<E> &ctx) {
void MergedSection<E>::compute_section_size(Context<E> &ctx) {
if (!resolved)
resolve(ctx);
@ -2848,6 +2948,20 @@ void ComdatGroupSection<E>::copy_buf(Context<E> &ctx) {
*buf++ = chunk->shndx;
}
template <typename E>
void GnuDebuglinkSection<E>::update_shdr(Context<E> &ctx) {
filename = std::filesystem::path(ctx.arg.separate_debug_file).filename().string();
this->shdr.sh_size = align_to(filename.size() + 1, 4) + 4;
}
template <typename E>
void GnuDebuglinkSection<E>::copy_buf(Context<E> &ctx) {
u8 *buf = ctx.buf + this->shdr.sh_offset;
memset(buf, 0, this->shdr.sh_size);
write_string(buf, filename);
*(U32<E> *)(buf + this->shdr.sh_size - 4) = crc32;
}
using E = MOLD_TARGET;
template class Chunk<E>;
@ -2886,6 +3000,7 @@ template class GdbIndexSection<E>;
template class CompressedSection<E>;
template class RelocSection<E>;
template class ComdatGroupSection<E>;
template class GnuDebuglinkSection<E>;
template OutputSection<E> *find_section(Context<E> &, u32);
template OutputSection<E> *find_section(Context<E> &, std::string_view);

View File

@ -156,6 +156,8 @@ void create_synthetic_sections(Context<E> &ctx) {
ctx.verdef = push(new VerdefSection<E>);
if (ctx.arg.emit_relocs)
ctx.eh_frame_reloc = push(new EhFrameRelocSection<E>);
if (!ctx.arg.separate_debug_file.empty())
ctx.gnu_debuglink = push(new GnuDebuglinkSection<E>);
if (ctx.arg.shared || !ctx.dsos.empty() || ctx.arg.pie) {
ctx.dynamic = push(new DynamicSection<E>(ctx));
@ -466,20 +468,6 @@ static std::string get_cmdline_args(Context<E> &ctx) {
return ss.str();
}
template <typename T>
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
std::span<T> span(input);
std::vector<std::span<T>> vec;
while (span.size() >= unit) {
vec.push_back(span.subspan(0, unit));
span = span.subspan(unit);
}
if (!span.empty())
vec.push_back(span);
return vec;
}
template <typename E>
static bool has_ctors_and_init_array(Context<E> &ctx) {
bool x = false;
@ -1351,84 +1339,24 @@ template <typename E>
void compute_section_sizes(Context<E> &ctx) {
Timer t(ctx, "compute_section_sizes");
struct Group {
i64 size = 0;
i64 p2align = 0;
i64 offset = 0;
std::span<InputSection<E> *> members;
};
// Assign offsets to OutputSection members
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
OutputSection<E> *osec = chunk->to_osec();
if (!osec)
return;
// This pattern will be processed in the next loop.
if constexpr (needs_thunk<E>)
if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable)
return;
// Since one output section may contain millions of input sections,
// we first split input sections into groups and assign offsets to
// groups.
std::vector<Group> groups;
constexpr i64 group_size = 10000;
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
groups.push_back(Group{.members = span});
tbb::parallel_for_each(groups, [](Group &group) {
for (InputSection<E> *isec : group.members) {
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
group.p2align = std::max<i64>(group.p2align, isec->p2align);
}
});
ElfShdr<E> &shdr = osec->shdr;
shdr.sh_size = 0;
for (i64 i = 0; i < groups.size(); i++) {
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
groups[i].offset = shdr.sh_size;
shdr.sh_size += groups[i].size;
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
}
// Assign offsets to input sections.
tbb::parallel_for_each(groups, [](Group &group) {
i64 offset = group.offset;
for (InputSection<E> *isec : group.members) {
offset = align_to(offset, 1 << isec->p2align);
isec->offset = offset;
offset += isec->sh_size;
}
});
});
// Assign offsets to MergedSection members
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
if (MergedSection<E> *sec = chunk->to_merged_section())
sec->assign_offsets(ctx);
});
// On ARM32 or ARM64, we may need to create so-called "range extension
// thunks" to extend branch instructions reach, as they can jump only
// to ±16 MiB or ±128 MiB, respecitvely.
//
// In the following loop, We compute the sizes of sections while
// inserting thunks. This pass cannot be parallelized. That is,
// create_range_extension_thunks is parallelized internally, but the
// function itself is not thread-safe.
if constexpr (needs_thunk<E>) {
Timer t(ctx, "create_range_extension_thunks");
// Chunk<E>::compute_section_size may obtain a global lock to create
// range extension thunks. I don't know why, but using parallel_for
// loop both inside and outside of the lock may cause a deadlock. It
// might be a bug in TBB. For now, I'll avoid using parallel_for_each
// here.
for (Chunk<E> *chunk : ctx.chunks)
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
chunk->compute_section_size(ctx);
if (!ctx.arg.relocatable)
for (Chunk<E> *chunk : ctx.chunks)
if (OutputSection<E> *osec = chunk->to_osec())
if (osec->shdr.sh_flags & SHF_EXECINSTR)
osec->create_range_extension_thunks(ctx);
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
if (!(chunk->shdr.sh_flags & SHF_EXECINSTR))
chunk->compute_section_size(ctx);
});
} else {
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
chunk->compute_section_size(ctx);
});
}
}
@ -2676,6 +2604,24 @@ static i64 set_file_offsets(Context<E> &ctx) {
return fileoff;
}
// Remove debug sections from ctx.chunks and save them to ctx.debug_chunks.
// This is for --separate-debug-file.
template <typename E>
void separate_debug_sections(Context<E> &ctx) {
auto is_debug_section = [&](Chunk<E> *chunk) {
if (chunk->shdr.sh_flags & SHF_ALLOC)
return false;
return chunk == ctx.gdb_index || chunk == ctx.symtab || chunk == ctx.strtab ||
chunk->name.starts_with(".debug_");
};
auto mid = std::stable_partition(ctx.chunks.begin(), ctx.chunks.end(),
is_debug_section);
ctx.debug_chunks = {ctx.chunks.begin(), mid};
ctx.chunks.erase(ctx.chunks.begin(), mid);
}
template <typename E>
void compute_section_headers(Context<E> &ctx) {
// Update sh_size for each chunk.
@ -3006,23 +2952,34 @@ static void blake3_hash(u8 *buf, i64 size, u8 *out) {
}
template <typename E>
void compute_build_id(Context<E> &ctx) {
Timer t(ctx, "compute_build_id");
std::vector<std::span<u8>> get_shards(Context<E> &ctx) {
constexpr i64 shard_size = 4 * 1024 * 1024; // 4 MiB
std::span<u8> buf = {ctx.buf, (size_t)ctx.output_file->filesize};
std::vector<std::span<u8>> vec;
while (!buf.empty()) {
i64 sz = std::min<i64>(shard_size, buf.size());
vec.push_back(buf.subspan(0, sz));
buf = buf.subspan(sz);
}
return vec;
}
template <typename E>
void write_build_id(Context<E> &ctx) {
Timer t(ctx, "write_build_id");
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
ctx.buildid->contents = ctx.arg.build_id.value;
break;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
std::vector<std::span<u8>> shards = get_shards(ctx);
std::vector<u8> hashes(shards.size() * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
blake3_hash(shards[i].data(), shards[i].size(),
hashes.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
@ -3033,7 +2990,7 @@ void compute_build_id(Context<E> &ctx) {
});
u8 buf[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), buf);
blake3_hash(hashes.data(), hashes.size(), buf);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
@ -3052,8 +3009,109 @@ void compute_build_id(Context<E> &ctx) {
default:
unreachable();
}
ctx.buildid->copy_buf(ctx);
}
// A .gnu_debuglink section contains a filename and a CRC32 checksum of a
// debug info file. When we are writing a .gnu_debuglink, we don't know
// its CRC32 checksum because we haven't created a debug info file. So we
// write a dummy value instead.
//
// We can't choose a random value as a dummy value for build
// reproducibility. We also don't want to write a fixed value for all
// files because the CRC checksum is in this section to prevent using
// wrong file on debugging. gdb rejects a debug info file if its CRC
// doesn't match with the one in .gdb_debuglink.
//
// Therefore, we'll try to make our CRC checksum as unique as possible.
// We'll remember that checksum, and after creating a debug info file, add
// a few bytes of garbage at the end of it so that the debug info file's
// CRC checksum becomes the one that we have precomputed.
template <typename E>
void write_gnu_debuglink(Context<E> &ctx) {
Timer t(ctx, "write_gnu_debuglink");
u32 crc32;
if (ctx.buildid) {
crc32 = compute_crc32(0, ctx.buildid->contents.data(),
ctx.buildid->contents.size());
} else {
std::vector<std::span<u8>> shards = get_shards(ctx);
std::vector<U64<E>> hashes(shards.size());
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
hashes[i] = hash_string({(char *)shards[i].data(), shards[i].size()});
});
crc32 = compute_crc32(0, (u8 *)hashes.data(), hashes.size() * 8);
}
ctx.gnu_debuglink->crc32 = crc32;
ctx.gnu_debuglink->copy_buf(ctx);
}
// Write a separate debug file. This function is called after we finish
// writing to the usual output file.
template <typename E>
void write_separate_debug_file(Context<E> &ctx) {
Timer t(ctx, "write_separate_debug_file");
// We want to write to the debug info file in background so that the
// user doesn't have to wait for it to complete.
if (ctx.arg.detach)
notify_parent();
// A debug info file contains all sections as the original file, though
// most of them can be empty as if they were bss sections. We convert
// real sections into dummy sections here.
for (i64 i = 0; i < ctx.chunks.size(); i++) {
Chunk<E> *chunk = ctx.chunks[i];
if (chunk != ctx.ehdr && chunk != ctx.shdr && chunk != ctx.shstrtab &&
chunk->shdr.sh_type != SHT_NOTE) {
Chunk<E> *sec = new OutputSection<E>(chunk->name, SHT_NULL);
sec->shdr = chunk->shdr;
sec->shdr.sh_type = SHT_NOBITS;
ctx.chunks[i] = sec;
ctx.chunk_pool.emplace_back(sec);
}
}
// Restore debug info sections that had been set aside while we were
// creating the main file.
tbb::parallel_for_each(ctx.debug_chunks, [&](Chunk<E> *chunk) {
chunk->compute_section_size(ctx);
});
append(ctx.chunks, ctx.debug_chunks);
// Write to the debug info file as if it were a regular output file.
compute_section_headers(ctx);
i64 filesize = set_osec_offsets(ctx);
ctx.output_file =
OutputFile<Context<E>>::open(ctx, ctx.arg.separate_debug_file,
filesize, 0666);
ctx.buf = ctx.output_file->buf;
copy_chunks(ctx);
if (ctx.gdb_index)
write_gdb_index(ctx);
// Reverse-compute a CRC32 value so that the CRC32 checksum embedded to
// the .gnu_debuglink section in the main executable matches with the
// debug info file's CRC32 checksum.
u32 crc = compute_crc32(0, ctx.buf, filesize);
std::vector<u8> &buf2 = ctx.output_file->buf2;
if (!buf2.empty())
crc = compute_crc32(crc, buf2.data(), buf2.size());
std::vector<u8> trailer = crc32_solve(crc, ctx.gnu_debuglink->crc32);
append(ctx.output_file->buf2, trailer);
ctx.output_file->close(ctx);
}
// Write Makefile-style dependency rules to a file specified by
// --dependency-file. This is analogous to the compiler's -M flag.
@ -3188,11 +3246,14 @@ template void apply_version_script(Context<E> &);
template void parse_symbol_version(Context<E> &);
template void compute_import_export(Context<E> &);
template void compute_address_significance(Context<E> &);
template void separate_debug_sections(Context<E> &);
template void compute_section_headers(Context<E> &);
template i64 set_osec_offsets(Context<E> &);
template void fix_synthetic_symbols(Context<E> &);
template i64 compress_debug_sections(Context<E> &);
template void compute_build_id(Context<E> &);
template void write_build_id(Context<E> &);
template void write_gnu_debuglink(Context<E> &);
template void write_separate_debug_file(Context<E> &);
template void write_dependency_file(Context<E> &);
template void show_stats(Context<E> &);

View File

@ -60,6 +60,7 @@ void notify_parent() {
char buf[] = {1};
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
assert(n == 1);
pipe_write_fd = -1;
}
#endif

View File

@ -170,6 +170,10 @@ static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
template <>
void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
// This function is not thread-safe because it mutates symbols' members
static std::mutex mu;
std::scoped_lock lock(mu);
std::span<InputSection<E> *> m = members;
if (m.empty())
return;
@ -247,10 +251,8 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
// Scan relocations between B and C to collect symbols that need
// entries in the new thunk.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
scan_rels(ctx, *isec, *thunk, thunk_idx);
});
for (i64 i = b; i < c; i++)
scan_rels(ctx, *m[i], *thunk, thunk_idx);
// Now that we know the number of symbols in the thunk, we can compute
// the thunk's size.
@ -270,16 +272,15 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
}
// Scan relocations again to fix symbol offsets in the last thunk.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
std::span<Symbol<E> *> syms = isec->file.symbols;
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
std::span<ThunkRef> thunk_refs = isec->extra.thunk_refs;
for (i64 i = b; i < c; i++) {
std::span<Symbol<E> *> syms = m[i]->file.symbols;
std::span<const ElfRel<E>> rels = m[i]->get_rels(ctx);
std::span<ThunkRef> thunk_refs = m[i]->extra.thunk_refs;
for (i64 i = 0; i < rels.size(); i++)
if (thunk_refs[i].thunk_idx == thunk_idx)
thunk_refs[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx;
});
for (i64 j = 0; j < rels.size(); j++)
if (thunk_refs[j].thunk_idx == thunk_idx)
thunk_refs[j].sym_idx = syms[rels[j].r_sym]->extra.thunk_sym_idx;
}
// Move B forward to point to the begining of the next batch.
b = c;

26
test/elf/separate-debug-file.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
. $(dirname $0)/common.inc
on_qemu && skip
command -v gdb >& /dev/null || skip
command -v flock >& /dev/null || skip
cat <<EOF > $t/a.c
#include <stdio.h>
int main() {
printf("Hello world\n");
}
EOF
$CC -c -o $t/a.o $t/a.c -g
$CC -B. -o $t/exe1 $t/a.o -Wl,--separate-debug-file
readelf -SW $t/exe1 | grep -Fq .gnu_debuglink
$CC -c -o $t/a.o $t/a.c -g
$CC -B. -o $t/exe2 $t/a.o -Wl,--separate-debug-file -Wl,--no-build-id
readelf -SW $t/exe2 | grep -Fq .gnu_debuglink
sleep 1
gdb $t/exe1 -ex 'list main' -ex 'quit' | grep -Fq printf
gdb $t/exe2 -ex 'list main' -ex 'quit' | grep -Fq printf

View File

@ -37,5 +37,5 @@ grep -Eq '.note.baz\s+NOTE.+000008 00 A 0 0 8' $t/log
grep -Eq '.note.nonalloc\s+NOTE.+000008 00 0 0 1' $t/log
readelf --segments $t/exe > $t/log
grep -Fq '01 .note.baz .note.foo .note.bar' $t/log
grep -Fq '01 .note.bar .note.baz .note.foo' $t/log
! grep -q 'NOTE.*0x0000000000000000 0x0000000000000000' $t/log || false

View File

@ -29,4 +29,4 @@ EOF
./mold -o $t/exe $t/a.o $t/b.o $t/c.o $t/d.o
readelf --segments $t/exe > $t/log
grep -Fq '01 .note.a .note.c .note.b' $t/log
grep -Fq '01 .note.a .note.b .note.c' $t/log