1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-05 17:17:40 +03:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Rui Ueyama
fa2632cf7f Make notify_parent idempotent 2024-07-07 14:36:28 +09:00
Rui Ueyama
6ce5a7845b Refactor 2024-07-07 14:36:28 +09:00
Rui Ueyama
d4cd52ca49 Refactor 2024-07-07 14:36:28 +09:00
Rui Ueyama
8e3679e4e5 Refactor 2024-07-06 13:11:09 +09:00
9 changed files with 300 additions and 266 deletions

View File

@ -677,6 +677,29 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
}
}
template <typename E>
void ObjectFile<E>::convert_mergeable_sections(Context<E> &ctx) {
// Convert InputSections to MergeableSections
for (i64 i = 0; i < this->sections.size(); i++) {
InputSection<E> *isec = this->sections[i].get();
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
continue;
const ElfShdr<E> &shdr = isec->shdr();
if (!(shdr.sh_flags & SHF_MERGE))
continue;
MergedSection<E> *parent =
MergedSection<E>::get_instance(ctx, isec->name(), shdr);
if (parent) {
this->mergeable_sections[i] =
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
this->sections[i] = nullptr;
}
}
}
// Usually a section is an atomic unit of inclusion or exclusion.
// Linker doesn't care about its contents. However, if a section is a
// mergeable section (a section with SHF_MERGE bit set), the linker is
@ -713,54 +736,17 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
// section piece in a section, but it doesn't do for any other types
// of symbols.
//
// In mold, we attach symbols to section pieces. If a relocation refers
// to a section symbol, and that symbol's section is a mergeable one,
// we create a new dummy symbol for a section piece and redirect the
// relocation to this new symbol. If a non-section symbol refers to a
// section piece, the section piece is attached to the symbol.
// Section garbage collection and Identical Code Folding work on graphs
// where sections or section pieces are vertices and relocations are
// edges. To make it easy to handle them, we rewrite symbols and
// relocations so that each non-absolute symbol always refers to either
// a non-mergeable section or a section piece.
//
// We do that only for SHF_ALLOC sections because GC and ICF work only
// on memory-allocated sections. Non-memory-allocated mergeable sections
// are not handled here for performance reasons.
template <typename E>
void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
// Convert InputSections to MergeableSections
for (i64 i = 0; i < this->sections.size(); i++) {
InputSection<E> *isec = this->sections[i].get();
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
continue;
MergedSection<E> *parent =
MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
if (parent) {
this->mergeable_sections[i] =
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
this->sections[i] = nullptr;
}
}
// Split section contents
for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
if (sec)
sec->split_contents(ctx);
}
template <typename E>
void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
if (!m)
continue;
m->fragments.reserve(m->frag_offsets.size());
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
SectionFragment<E> *frag =
m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
}
// Reclaim memory as we'll never use this vector again
m->hashes.clear();
m->hashes.shrink_to_fit();
}
void ObjectFile<E>::reattach_section_pieces(Context<E> &ctx) {
// Attach section pieces to symbols.
for (i64 i = 1; i < this->elf_syms.size(); i++) {
Symbol<E> &sym = *this->symbols[i];
@ -769,8 +755,9 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
if (esym.is_abs() || esym.is_common() || esym.is_undef())
continue;
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[get_shndx(esym)];
if (!m || m->fragments.empty())
i64 shndx = get_shndx(esym);
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[shndx];
if (!m || !m->parent.resolved)
continue;
SectionFragment<E> *frag;
@ -785,17 +772,16 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
}
// Compute the size of frag_syms.
std::vector<InputSection<E> *> vec;
for (std::unique_ptr<InputSection<E>> &isec : sections)
if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
vec.push_back(isec.get());
i64 nfrag_syms = 0;
for (InputSection<E> *isec : vec)
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
nfrag_syms++;
for (std::unique_ptr<InputSection<E>> &isec : sections)
if (isec)
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION)
if (std::unique_ptr<MergeableSection<E>> &m =
mergeable_sections[get_shndx(esym)])
if (m->parent.resolved)
nfrag_syms++;
this->frag_syms.resize(nfrag_syms);
@ -803,34 +789,38 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
// create a new dummy non-section symbol and redirect the relocation
// to the newly created symbol.
i64 idx = 0;
for (InputSection<E> *isec : vec) {
for (ElfRel<E> &r : isec->get_rels(ctx)) {
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
if (esym.st_type != STT_SECTION)
continue;
for (std::unique_ptr<InputSection<E>> &isec : sections) {
if (isec) {
for (ElfRel<E> &r : isec->get_rels(ctx)) {
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
if (esym.st_type != STT_SECTION)
continue;
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[get_shndx(esym)];
if (!m)
continue;
std::unique_ptr<MergeableSection<E>> &m =
mergeable_sections[get_shndx(esym)];
i64 r_addend = get_addend(*isec, r);
if (!m || !m->parent.resolved)
continue;
SectionFragment<E> *frag;
i64 in_frag_offset;
std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend);
i64 r_addend = get_addend(*isec, r);
if (!frag)
Fatal(ctx) << *this << ": bad relocation at " << r.r_sym;
SectionFragment<E> *frag;
i64 in_frag_offset;
std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend);
Symbol<E> &sym = this->frag_syms[idx];
sym.file = this;
sym.set_name("<fragment>");
sym.sym_idx = r.r_sym;
sym.visibility = STV_HIDDEN;
sym.set_frag(frag);
sym.value = in_frag_offset - r_addend;
r.r_sym = this->elf_syms.size() + idx;
idx++;
if (!frag)
Fatal(ctx) << *this << ": bad relocation at " << r.r_sym;
Symbol<E> &sym = this->frag_syms[idx];
sym.file = this;
sym.set_name("<fragment>");
sym.sym_idx = r.r_sym;
sym.visibility = STV_HIDDEN;
sym.set_frag(frag);
sym.value = in_frag_offset - r_addend;
r.r_sym = this->elf_syms.size() + idx;
idx++;
}
}
}

View File

@ -557,6 +557,9 @@ MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec)
: parent(parent), section(std::move(isec)), p2align(section->p2align) {
section->uncompress(ctx);
std::scoped_lock lock(parent.mu);
parent.members.push_back(this);
}
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
@ -630,6 +633,17 @@ void MergeableSection<E>::split_contents(Context<E> &ctx) {
counter += frag_offsets.size();
}
template <typename E>
void MergeableSection<E>::resolve_contents(Context<E> &ctx) {
fragments.reserve(frag_offsets.size());
for (i64 i = 0; i < frag_offsets.size(); i++)
fragments.push_back(parent.insert(ctx, get_contents(i), hashes[i], p2align));
// Reclaim memory as we'll never use this vector again
hashes.clear();
hashes.shrink_to_fit();
}
using E = MOLD_TARGET;
template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);

View File

@ -427,10 +427,7 @@ int elf_main(int argc, char **argv) {
kill_eh_frame_sections(ctx);
// Split mergeable section contents into section pieces.
split_section_pieces(ctx);
// Resolve mergeable section pieces to merge them.
resolve_section_pieces(ctx);
create_merged_sections(ctx);
// Handle --relocatable. Since the linker's behavior is quite different
// from the normal one when the option is given, the logic is implemented
@ -464,9 +461,6 @@ int elf_main(int argc, char **argv) {
if (ctx.arg.icf)
icf_sections(ctx);
// Compute sizes of sections containing mergeable strings.
compute_merged_section_sizes(ctx);
// Create linker-synthesized sections such as .got or .plt.
create_synthetic_sections(ctx);
@ -659,10 +653,8 @@ int elf_main(int argc, char **argv) {
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid) {
compute_build_id(ctx);
ctx.buildid->copy_buf(ctx);
}
if (ctx.buildid)
write_build_id(ctx);
// .gdb_index's contents cannot be constructed before applying
// relocations to other debug sections. We have relocated debug

View File

@ -373,6 +373,7 @@ public:
virtual ~Chunk() = default;
virtual bool is_header() { return false; }
virtual OutputSection<E> *to_osec() { return nullptr; }
virtual void compute_section_size(Context<E> &ctx) {}
virtual i64 get_reldyn_size(Context<E> &ctx) const { return 0; }
virtual void construct_relr(Context<E> &ctx) {}
virtual void copy_buf(Context<E> &ctx) {}
@ -479,6 +480,7 @@ public:
}
OutputSection<E> *to_osec() override { return this; }
void compute_section_size(Context<E> &ctx) override;
void construct_relr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_to(Context<E> &ctx, u8 *buf) override;
@ -805,13 +807,18 @@ public:
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
u64 hash, i64 p2align);
void assign_offsets(Context<E> &ctx);
void resolve(Context<E> &ctx);
void compute_section_size(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_to(Context<E> &ctx, u8 *buf) override;
void print_stats(Context<E> &ctx);
std::vector<MergeableSection<E> *> members;
std::mutex mu;
ConcurrentMap<SectionFragment<E>> map;
HyperLogLog estimator;
bool resolved = false;
private:
MergedSection(std::string_view name, i64 flags, i64 type, i64 entsize);
@ -1095,14 +1102,17 @@ public:
std::unique_ptr<InputSection<E>> &isec);
void split_contents(Context<E> &ctx);
void resolve_contents(Context<E> &ctx);
std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
std::string_view get_contents(i64 idx);
MergedSection<E> &parent;
std::vector<SectionFragment<E> *> fragments;
private:
std::unique_ptr<InputSection<E>> section;
std::vector<u32> frag_offsets;
std::vector<u32> hashes;
std::vector<SectionFragment<E> *> fragments;
u8 p2align = 0;
};
@ -1195,8 +1205,8 @@ public:
void parse(Context<E> &ctx);
void initialize_symbols(Context<E> &ctx);
void initialize_mergeable_sections(Context<E> &ctx);
void resolve_section_pieces(Context<E> &ctx);
void convert_mergeable_sections(Context<E> &ctx);
void reattach_section_pieces(Context<E> &ctx);
void resolve_symbols(Context<E> &ctx) override;
void mark_live_objects(Context<E> &ctx,
std::function<void(InputFile<E> *)> feeder) override;
@ -1400,10 +1410,8 @@ template <typename E> void create_synthetic_sections(Context<E> &);
template <typename E> void set_file_priority(Context<E> &);
template <typename E> void resolve_symbols(Context<E> &);
template <typename E> void kill_eh_frame_sections(Context<E> &);
template <typename E> void split_section_pieces(Context<E> &);
template <typename E> void resolve_section_pieces(Context<E> &);
template <typename E> void create_merged_sections(Context<E> &);
template <typename E> void convert_common_symbols(Context<E> &);
template <typename E> void compute_merged_section_sizes(Context<E> &);
template <typename E> void create_output_sections(Context<E> &);
template <typename E> void add_synthetic_symbols(Context<E> &);
template <typename E> void apply_section_align(Context<E> &);
@ -1435,7 +1443,7 @@ template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &);
template <typename E> void fix_synthetic_symbols(Context<E> &);
template <typename E> i64 compress_debug_sections(Context<E> &);
template <typename E> void compute_build_id(Context<E> &);
template <typename E> void write_build_id(Context<E> &);
template <typename E> void write_dependency_file(Context<E> &);
template <typename E> void show_stats(Context<E> &);
@ -2353,7 +2361,7 @@ InputSection<E>::get_fragment(Context<E> &ctx, const ElfRel<E> &rel) {
assert(!(shdr().sh_flags & SHF_ALLOC));
const ElfSym<E> &esym = file.elf_syms[rel.r_sym];
if (esym.st_type == STT_SECTION)
if (!esym.is_abs() && !esym.is_common() && !esym.is_undef())
if (std::unique_ptr<MergeableSection<E>> &m =
file.mergeable_sections[file.get_shndx(esym)])
return m->get_fragment(esym.st_value + get_addend(*this, rel));

View File

@ -865,6 +865,84 @@ void DynamicSection<E>::copy_buf(Context<E> &ctx) {
write_vector(ctx.buf + this->shdr.sh_offset, contents);
}
template <typename T>
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
std::span<T> span(input);
std::vector<std::span<T>> vec;
while (span.size() >= unit) {
vec.push_back(span.subspan(0, unit));
span = span.subspan(unit);
}
if (!span.empty())
vec.push_back(span);
return vec;
}
// Assign offsets to OutputSection members
template <typename E>
void OutputSection<E>::compute_section_size(Context<E> &ctx) {
ElfShdr<E> &shdr = this->shdr;
// On most RISC systems, we need to create so-called "range extension
// thunks" to extend branch instructions reach, as their jump
// instructions' reach is limited. create_range_extension_thunks()
// computes the size of the section while inserting thunks.
if constexpr (needs_thunk<E>) {
if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) {
create_range_extension_thunks(ctx);
return;
}
}
// Since one output section may contain millions of input sections,
// we first split input sections into groups and assign offsets to
// groups.
struct Group {
std::span<InputSection<E> *> members;
i64 size = 0;
i64 p2align = 0;
i64 offset = 0;
};
std::span<InputSection<E> *> mem = members;
std::vector<Group> groups;
constexpr i64 group_size = 10000;
while (!mem.empty()) {
i64 sz = std::min<i64>(group_size, mem.size());
groups.push_back({mem.subspan(0, sz)});
mem = mem.subspan(sz);
}
tbb::parallel_for_each(groups, [](Group &group) {
for (InputSection<E> *isec : group.members) {
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
group.p2align = std::max<i64>(group.p2align, isec->p2align);
}
});
shdr.sh_size = 0;
for (i64 i = 0; i < groups.size(); i++) {
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
groups[i].offset = shdr.sh_size;
shdr.sh_size += groups[i].size;
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
}
// Assign offsets to input sections.
tbb::parallel_for_each(groups, [](Group &group) {
i64 offset = group.offset;
for (InputSection<E> *isec : group.members) {
offset = align_to(offset, 1 << isec->p2align);
isec->offset = offset;
offset += isec->sh_size;
}
});
}
template <typename E>
void OutputSection<E>::copy_buf(Context<E> &ctx) {
if (this->shdr.sh_type != SHT_NOBITS)
@ -1971,7 +2049,26 @@ MergedSection<E>::insert(Context<E> &ctx, std::string_view data, u64 hash,
}
template <typename E>
void MergedSection<E>::assign_offsets(Context<E> &ctx) {
void MergedSection<E>::resolve(Context<E> &ctx) {
tbb::parallel_for_each(members, [&](MergeableSection<E> *sec) {
sec->split_contents(ctx);
});
// We aim 2/3 occupation ratio
map.resize(estimator.get_cardinality() * 3 / 2);
tbb::parallel_for_each(members, [&](MergeableSection<E> *sec) {
sec->resolve_contents(ctx);
});
resolved = true;
}
template <typename E>
void MergedSection<E>::compute_section_size(Context<E> &ctx) {
if (!resolved)
resolve(ctx);
std::vector<i64> sizes(map.NUM_SHARDS);
Atomic<i64> alignment = 1;

View File

@ -402,25 +402,50 @@ void kill_eh_frame_sections(Context<E> &ctx) {
}
template <typename E>
void split_section_pieces(Context<E> &ctx) {
Timer t(ctx, "split_section_pieces");
void create_merged_sections(Context<E> &ctx) {
Timer t(ctx, "create_merged_sections");
// Convert InputSections to MergeableSections.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->convert_mergeable_sections(ctx);
});
tbb::parallel_for_each(ctx.merged_sections,
[&](std::unique_ptr<MergedSection<E>> &sec) {
if (sec->shdr.sh_flags & SHF_ALLOC)
sec->resolve(ctx);
});
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->initialize_mergeable_sections(ctx);
file->reattach_section_pieces(ctx);
});
}
template <typename E>
void resolve_section_pieces(Context<E> &ctx) {
Timer t(ctx, "resolve_section_pieces");
// Add strings to .comment
if (!ctx.arg.oformat_binary) {
ElfShdr<E> shdr = {};
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
// We aim 2/3 occupation ratio
for (std::unique_ptr<MergedSection<E>> &sec : ctx.merged_sections)
sec->map.resize(sec->estimator.get_cardinality() * 3 / 2);
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
if (!sec->resolved) {
sec->map.resize(4096);
sec->resolved = true;
}
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->resolve_section_pieces(ctx);
});
auto add = [&](std::string str) {
std::string_view buf = save_string(ctx, str);
std::string_view data(buf.data(), buf.size() + 1);
sec->insert(ctx, data, hash_string(data), 0);
};
// Add an identification string to .comment.
add(get_mold_version());
// Embed command line arguments for debugging.
char *env = getenv("MOLD_DEBUG");
if (env && env[0])
add("mold command line: " + get_cmdline_args(ctx));
}
}
template <typename E>
@ -441,55 +466,6 @@ static std::string get_cmdline_args(Context<E> &ctx) {
return ss.str();
}
template <typename E>
void add_comment_string(Context<E> &ctx, std::string str) {
ElfShdr<E> shdr = {};
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
shdr.sh_entsize = 1;
shdr.sh_addralign = 1;
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
if (sec->map.nbuckets == 0)
sec->map.resize(4096);
std::string_view buf = save_string(ctx, str);
std::string_view data(buf.data(), buf.size() + 1);
sec->insert(ctx, data, hash_string(data), 0);
}
template <typename E>
void compute_merged_section_sizes(Context<E> &ctx) {
Timer t(ctx, "compute_merged_section_sizes");
// Add an identification string to .comment.
if (!ctx.arg.oformat_binary)
add_comment_string(ctx, get_mold_version());
// Embed command line arguments for debugging.
if (char *env = getenv("MOLD_DEBUG"); env && env[0])
add_comment_string(ctx, "mold command line: " + get_cmdline_args(ctx));
tbb::parallel_for_each(ctx.merged_sections,
[&](std::unique_ptr<MergedSection<E>> &sec) {
sec->assign_offsets(ctx);
});
}
template <typename T>
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
std::span<T> span(input);
std::vector<std::span<T>> vec;
while (span.size() >= unit) {
vec.push_back(span.subspan(0, unit));
span = span.subspan(unit);
}
if (!span.empty())
vec.push_back(span);
return vec;
}
template <typename E>
static bool has_ctors_and_init_array(Context<E> &ctx) {
bool x = false;
@ -729,8 +705,7 @@ void create_output_sections(Context<E> &ctx) {
// Add output sections and mergeable sections to ctx.chunks
for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
if (osec->shdr.sh_size)
chunks.push_back(osec.get());
chunks.push_back(osec.get());
// Sections are added to the section lists in an arbitrary order
// because they are created in parallel. Sort them to to make the
@ -1362,76 +1337,24 @@ template <typename E>
void compute_section_sizes(Context<E> &ctx) {
Timer t(ctx, "compute_section_sizes");
struct Group {
i64 size = 0;
i64 p2align = 0;
i64 offset = 0;
std::span<InputSection<E> *> members;
};
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
OutputSection<E> *osec = chunk->to_osec();
if (!osec)
return;
// This pattern will be processed in the next loop.
if constexpr (needs_thunk<E>)
if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable)
return;
// Since one output section may contain millions of input sections,
// we first split input sections into groups and assign offsets to
// groups.
std::vector<Group> groups;
constexpr i64 group_size = 10000;
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
groups.push_back(Group{.members = span});
tbb::parallel_for_each(groups, [](Group &group) {
for (InputSection<E> *isec : group.members) {
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
group.p2align = std::max<i64>(group.p2align, isec->p2align);
}
});
ElfShdr<E> &shdr = osec->shdr;
shdr.sh_size = 0;
for (i64 i = 0; i < groups.size(); i++) {
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
groups[i].offset = shdr.sh_size;
shdr.sh_size += groups[i].size;
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
}
// Assign offsets to input sections.
tbb::parallel_for_each(groups, [](Group &group) {
i64 offset = group.offset;
for (InputSection<E> *isec : group.members) {
offset = align_to(offset, 1 << isec->p2align);
isec->offset = offset;
offset += isec->sh_size;
}
});
});
// On ARM32 or ARM64, we may need to create so-called "range extension
// thunks" to extend branch instructions reach, as they can jump only
// to ±16 MiB or ±128 MiB, respecitvely.
//
// In the following loop, We compute the sizes of sections while
// inserting thunks. This pass cannot be parallelized. That is,
// create_range_extension_thunks is parallelized internally, but the
// function itself is not thread-safe.
if constexpr (needs_thunk<E>) {
Timer t2(ctx, "create_range_extension_thunks");
// Chunk<E>::compute_section_size may obtain a global lock to create
// range extension thunks. I don't know why, but using parallel_for
// loop both inside and outside of the lock may cause a deadlock. It
// might be a bug in TBB. For now, I'll avoid using parallel_for_each
// here.
for (Chunk<E> *chunk : ctx.chunks)
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
chunk->compute_section_size(ctx);
if (!ctx.arg.relocatable)
for (Chunk<E> *chunk : ctx.chunks)
if (OutputSection<E> *osec = chunk->to_osec())
if (osec->shdr.sh_flags & SHF_EXECINSTR)
osec->create_range_extension_thunks(ctx);
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
if (!(chunk->shdr.sh_flags & SHF_EXECINSTR))
chunk->compute_section_size(ctx);
});
} else {
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
chunk->compute_section_size(ctx);
});
}
}
@ -3009,23 +2932,34 @@ static void blake3_hash(u8 *buf, i64 size, u8 *out) {
}
template <typename E>
void compute_build_id(Context<E> &ctx) {
Timer t(ctx, "compute_build_id");
std::vector<std::span<u8>> get_shards(Context<E> &ctx) {
constexpr i64 shard_size = 4 * 1024 * 1024; // 4 MiB
std::span<u8> buf = {ctx.buf, (size_t)ctx.output_file->filesize};
std::vector<std::span<u8>> vec;
while (!buf.empty()) {
i64 sz = std::min<i64>(shard_size, buf.size());
vec.push_back(buf.subspan(0, sz));
buf = buf.subspan(sz);
}
return vec;
}
template <typename E>
void write_build_id(Context<E> &ctx) {
Timer t(ctx, "write_build_id");
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
ctx.buildid->contents = ctx.arg.build_id.value;
break;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
std::vector<std::span<u8>> shards = get_shards(ctx);
std::vector<u8> hashes(shards.size() * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
blake3_hash(shards[i].data(), shards[i].size(),
hashes.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
@ -3036,7 +2970,7 @@ void compute_build_id(Context<E> &ctx) {
});
u8 buf[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), buf);
blake3_hash(hashes.data(), hashes.size(), buf);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
@ -3055,8 +2989,9 @@ void compute_build_id(Context<E> &ctx) {
default:
unreachable();
}
}
ctx.buildid->copy_buf(ctx);
}
// Write Makefile-style dependency rules to a file specified by
// --dependency-file. This is analogous to the compiler's -M flag.
@ -3162,10 +3097,8 @@ template void apply_exclude_libs(Context<E> &);
template void create_synthetic_sections(Context<E> &);
template void resolve_symbols(Context<E> &);
template void kill_eh_frame_sections(Context<E> &);
template void split_section_pieces(Context<E> &);
template void resolve_section_pieces(Context<E> &);
template void create_merged_sections(Context<E> &);
template void convert_common_symbols(Context<E> &);
template void compute_merged_section_sizes(Context<E> &);
template void create_output_sections(Context<E> &);
template void add_synthetic_symbols(Context<E> &);
template void check_cet_errors(Context<E> &);
@ -3197,7 +3130,7 @@ template void compute_section_headers(Context<E> &);
template i64 set_osec_offsets(Context<E> &);
template void fix_synthetic_symbols(Context<E> &);
template i64 compress_debug_sections(Context<E> &);
template void compute_build_id(Context<E> &);
template void write_build_id(Context<E> &);
template void write_dependency_file(Context<E> &);
template void show_stats(Context<E> &);

View File

@ -148,8 +148,6 @@ static u64 r_set_osec_offsets(Context<E> &ctx) {
template <typename E>
void combine_objects(Context<E> &ctx) {
compute_merged_section_sizes(ctx);
create_output_sections(ctx);
r_create_synthetic_sections(ctx);

View File

@ -60,6 +60,7 @@ void notify_parent() {
char buf[] = {1};
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
assert(n == 1);
pipe_write_fd = -1;
}
#endif

View File

@ -170,6 +170,10 @@ static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
template <>
void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
// This function is not thread-safe because it mutates symbols' members
static std::mutex mu;
std::scoped_lock lock(mu);
std::span<InputSection<E> *> m = members;
if (m.empty())
return;
@ -247,10 +251,8 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
// Scan relocations between B and C to collect symbols that need
// entries in the new thunk.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
scan_rels(ctx, *isec, *thunk, thunk_idx);
});
for (i64 i = b; i < c; i++)
scan_rels(ctx, *m[i], *thunk, thunk_idx);
// Now that we know the number of symbols in the thunk, we can compute
// the thunk's size.
@ -270,16 +272,15 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
}
// Scan relocations again to fix symbol offsets in the last thunk.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
std::span<Symbol<E> *> syms = isec->file.symbols;
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
std::span<ThunkRef> thunk_refs = isec->extra.thunk_refs;
for (i64 i = b; i < c; i++) {
std::span<Symbol<E> *> syms = m[i]->file.symbols;
std::span<const ElfRel<E>> rels = m[i]->get_rels(ctx);
std::span<ThunkRef> thunk_refs = m[i]->extra.thunk_refs;
for (i64 i = 0; i < rels.size(); i++)
if (thunk_refs[i].thunk_idx == thunk_idx)
thunk_refs[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx;
});
for (i64 j = 0; j < rels.size(); j++)
if (thunk_refs[j].thunk_idx == thunk_idx)
thunk_refs[j].sym_idx = syms[rels[j].r_sym]->extra.thunk_sym_idx;
}
// Move B forward to point to the begining of the next batch.
b = c;