mirror of
https://github.com/rui314/mold.git
synced 2024-10-05 17:17:40 +03:00
Compare commits
4 Commits
7089304046
...
fa2632cf7f
Author | SHA1 | Date | |
---|---|---|---|
|
fa2632cf7f | ||
|
6ce5a7845b | ||
|
d4cd52ca49 | ||
|
8e3679e4e5 |
@ -677,6 +677,29 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void ObjectFile<E>::convert_mergeable_sections(Context<E> &ctx) {
|
||||
// Convert InputSections to MergeableSections
|
||||
for (i64 i = 0; i < this->sections.size(); i++) {
|
||||
InputSection<E> *isec = this->sections[i].get();
|
||||
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
|
||||
continue;
|
||||
|
||||
const ElfShdr<E> &shdr = isec->shdr();
|
||||
if (!(shdr.sh_flags & SHF_MERGE))
|
||||
continue;
|
||||
|
||||
MergedSection<E> *parent =
|
||||
MergedSection<E>::get_instance(ctx, isec->name(), shdr);
|
||||
|
||||
if (parent) {
|
||||
this->mergeable_sections[i] =
|
||||
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
|
||||
this->sections[i] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Usually a section is an atomic unit of inclusion or exclusion.
|
||||
// Linker doesn't care about its contents. However, if a section is a
|
||||
// mergeable section (a section with SHF_MERGE bit set), the linker is
|
||||
@ -713,54 +736,17 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
|
||||
// section piece in a section, but it doesn't do for any other types
|
||||
// of symbols.
|
||||
//
|
||||
// In mold, we attach symbols to section pieces. If a relocation refers
|
||||
// to a section symbol, and that symbol's section is a mergeable one,
|
||||
// we create a new dummy symbol for a section piece and redirect the
|
||||
// relocation to this new symbol. If a non-section symbol refers to a
|
||||
// section piece, the section piece is attached to the symbol.
|
||||
// Section garbage collection and Identical Code Folding work on graphs
|
||||
// where sections or section pieces are vertices and relocations are
|
||||
// edges. To make it easy to handle them, we rewrite symbols and
|
||||
// relocations so that each non-absolute symbol always refers to either
|
||||
// a non-mergeable section or a section piece.
|
||||
//
|
||||
// We do that only for SHF_ALLOC sections because GC and ICF work only
|
||||
// on memory-allocated sections. Non-memory-allocated mergeable sections
|
||||
// are not handled here for performance reasons.
|
||||
template <typename E>
|
||||
void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
|
||||
// Convert InputSections to MergeableSections
|
||||
for (i64 i = 0; i < this->sections.size(); i++) {
|
||||
InputSection<E> *isec = this->sections[i].get();
|
||||
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
|
||||
continue;
|
||||
|
||||
MergedSection<E> *parent =
|
||||
MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
|
||||
|
||||
if (parent) {
|
||||
this->mergeable_sections[i] =
|
||||
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
|
||||
this->sections[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Split section contents
|
||||
for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
|
||||
if (sec)
|
||||
sec->split_contents(ctx);
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
|
||||
for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
|
||||
if (!m)
|
||||
continue;
|
||||
|
||||
m->fragments.reserve(m->frag_offsets.size());
|
||||
|
||||
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
|
||||
SectionFragment<E> *frag =
|
||||
m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
|
||||
m->fragments.push_back(frag);
|
||||
}
|
||||
|
||||
// Reclaim memory as we'll never use this vector again
|
||||
m->hashes.clear();
|
||||
m->hashes.shrink_to_fit();
|
||||
}
|
||||
|
||||
void ObjectFile<E>::reattach_section_pieces(Context<E> &ctx) {
|
||||
// Attach section pieces to symbols.
|
||||
for (i64 i = 1; i < this->elf_syms.size(); i++) {
|
||||
Symbol<E> &sym = *this->symbols[i];
|
||||
@ -769,8 +755,9 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
|
||||
if (esym.is_abs() || esym.is_common() || esym.is_undef())
|
||||
continue;
|
||||
|
||||
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[get_shndx(esym)];
|
||||
if (!m || m->fragments.empty())
|
||||
i64 shndx = get_shndx(esym);
|
||||
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[shndx];
|
||||
if (!m || !m->parent.resolved)
|
||||
continue;
|
||||
|
||||
SectionFragment<E> *frag;
|
||||
@ -785,17 +772,16 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
|
||||
}
|
||||
|
||||
// Compute the size of frag_syms.
|
||||
std::vector<InputSection<E> *> vec;
|
||||
for (std::unique_ptr<InputSection<E>> &isec : sections)
|
||||
if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
|
||||
vec.push_back(isec.get());
|
||||
|
||||
i64 nfrag_syms = 0;
|
||||
for (InputSection<E> *isec : vec)
|
||||
for (ElfRel<E> &r : isec->get_rels(ctx))
|
||||
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
|
||||
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
|
||||
nfrag_syms++;
|
||||
for (std::unique_ptr<InputSection<E>> &isec : sections)
|
||||
if (isec)
|
||||
for (ElfRel<E> &r : isec->get_rels(ctx))
|
||||
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
|
||||
esym.st_type == STT_SECTION)
|
||||
if (std::unique_ptr<MergeableSection<E>> &m =
|
||||
mergeable_sections[get_shndx(esym)])
|
||||
if (m->parent.resolved)
|
||||
nfrag_syms++;
|
||||
|
||||
this->frag_syms.resize(nfrag_syms);
|
||||
|
||||
@ -803,34 +789,38 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
|
||||
// create a new dummy non-section symbol and redirect the relocation
|
||||
// to the newly created symbol.
|
||||
i64 idx = 0;
|
||||
for (InputSection<E> *isec : vec) {
|
||||
for (ElfRel<E> &r : isec->get_rels(ctx)) {
|
||||
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
|
||||
if (esym.st_type != STT_SECTION)
|
||||
continue;
|
||||
for (std::unique_ptr<InputSection<E>> &isec : sections) {
|
||||
if (isec) {
|
||||
for (ElfRel<E> &r : isec->get_rels(ctx)) {
|
||||
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
|
||||
if (esym.st_type != STT_SECTION)
|
||||
continue;
|
||||
|
||||
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[get_shndx(esym)];
|
||||
if (!m)
|
||||
continue;
|
||||
std::unique_ptr<MergeableSection<E>> &m =
|
||||
mergeable_sections[get_shndx(esym)];
|
||||
|
||||
i64 r_addend = get_addend(*isec, r);
|
||||
if (!m || !m->parent.resolved)
|
||||
continue;
|
||||
|
||||
SectionFragment<E> *frag;
|
||||
i64 in_frag_offset;
|
||||
std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend);
|
||||
i64 r_addend = get_addend(*isec, r);
|
||||
|
||||
if (!frag)
|
||||
Fatal(ctx) << *this << ": bad relocation at " << r.r_sym;
|
||||
SectionFragment<E> *frag;
|
||||
i64 in_frag_offset;
|
||||
std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend);
|
||||
|
||||
Symbol<E> &sym = this->frag_syms[idx];
|
||||
sym.file = this;
|
||||
sym.set_name("<fragment>");
|
||||
sym.sym_idx = r.r_sym;
|
||||
sym.visibility = STV_HIDDEN;
|
||||
sym.set_frag(frag);
|
||||
sym.value = in_frag_offset - r_addend;
|
||||
r.r_sym = this->elf_syms.size() + idx;
|
||||
idx++;
|
||||
if (!frag)
|
||||
Fatal(ctx) << *this << ": bad relocation at " << r.r_sym;
|
||||
|
||||
Symbol<E> &sym = this->frag_syms[idx];
|
||||
sym.file = this;
|
||||
sym.set_name("<fragment>");
|
||||
sym.sym_idx = r.r_sym;
|
||||
sym.visibility = STV_HIDDEN;
|
||||
sym.set_frag(frag);
|
||||
sym.value = in_frag_offset - r_addend;
|
||||
r.r_sym = this->elf_syms.size() + idx;
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -557,6 +557,9 @@ MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
|
||||
std::unique_ptr<InputSection<E>> &isec)
|
||||
: parent(parent), section(std::move(isec)), p2align(section->p2align) {
|
||||
section->uncompress(ctx);
|
||||
|
||||
std::scoped_lock lock(parent.mu);
|
||||
parent.members.push_back(this);
|
||||
}
|
||||
|
||||
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
|
||||
@ -630,6 +633,17 @@ void MergeableSection<E>::split_contents(Context<E> &ctx) {
|
||||
counter += frag_offsets.size();
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void MergeableSection<E>::resolve_contents(Context<E> &ctx) {
|
||||
fragments.reserve(frag_offsets.size());
|
||||
for (i64 i = 0; i < frag_offsets.size(); i++)
|
||||
fragments.push_back(parent.insert(ctx, get_contents(i), hashes[i], p2align));
|
||||
|
||||
// Reclaim memory as we'll never use this vector again
|
||||
hashes.clear();
|
||||
hashes.shrink_to_fit();
|
||||
}
|
||||
|
||||
using E = MOLD_TARGET;
|
||||
|
||||
template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);
|
||||
|
14
elf/main.cc
14
elf/main.cc
@ -427,10 +427,7 @@ int elf_main(int argc, char **argv) {
|
||||
kill_eh_frame_sections(ctx);
|
||||
|
||||
// Split mergeable section contents into section pieces.
|
||||
split_section_pieces(ctx);
|
||||
|
||||
// Resolve mergeable section pieces to merge them.
|
||||
resolve_section_pieces(ctx);
|
||||
create_merged_sections(ctx);
|
||||
|
||||
// Handle --relocatable. Since the linker's behavior is quite different
|
||||
// from the normal one when the option is given, the logic is implemented
|
||||
@ -464,9 +461,6 @@ int elf_main(int argc, char **argv) {
|
||||
if (ctx.arg.icf)
|
||||
icf_sections(ctx);
|
||||
|
||||
// Compute sizes of sections containing mergeable strings.
|
||||
compute_merged_section_sizes(ctx);
|
||||
|
||||
// Create linker-synthesized sections such as .got or .plt.
|
||||
create_synthetic_sections(ctx);
|
||||
|
||||
@ -659,10 +653,8 @@ int elf_main(int argc, char **argv) {
|
||||
// .note.gnu.build-id section contains a cryptographic hash of the
|
||||
// entire output file. Now that we wrote everything except build-id,
|
||||
// we can compute it.
|
||||
if (ctx.buildid) {
|
||||
compute_build_id(ctx);
|
||||
ctx.buildid->copy_buf(ctx);
|
||||
}
|
||||
if (ctx.buildid)
|
||||
write_build_id(ctx);
|
||||
|
||||
// .gdb_index's contents cannot be constructed before applying
|
||||
// relocations to other debug sections. We have relocated debug
|
||||
|
26
elf/mold.h
26
elf/mold.h
@ -373,6 +373,7 @@ public:
|
||||
virtual ~Chunk() = default;
|
||||
virtual bool is_header() { return false; }
|
||||
virtual OutputSection<E> *to_osec() { return nullptr; }
|
||||
virtual void compute_section_size(Context<E> &ctx) {}
|
||||
virtual i64 get_reldyn_size(Context<E> &ctx) const { return 0; }
|
||||
virtual void construct_relr(Context<E> &ctx) {}
|
||||
virtual void copy_buf(Context<E> &ctx) {}
|
||||
@ -479,6 +480,7 @@ public:
|
||||
}
|
||||
|
||||
OutputSection<E> *to_osec() override { return this; }
|
||||
void compute_section_size(Context<E> &ctx) override;
|
||||
void construct_relr(Context<E> &ctx) override;
|
||||
void copy_buf(Context<E> &ctx) override;
|
||||
void write_to(Context<E> &ctx, u8 *buf) override;
|
||||
@ -805,13 +807,18 @@ public:
|
||||
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
|
||||
u64 hash, i64 p2align);
|
||||
|
||||
void assign_offsets(Context<E> &ctx);
|
||||
void resolve(Context<E> &ctx);
|
||||
void compute_section_size(Context<E> &ctx) override;
|
||||
void copy_buf(Context<E> &ctx) override;
|
||||
void write_to(Context<E> &ctx, u8 *buf) override;
|
||||
void print_stats(Context<E> &ctx);
|
||||
|
||||
std::vector<MergeableSection<E> *> members;
|
||||
std::mutex mu;
|
||||
|
||||
ConcurrentMap<SectionFragment<E>> map;
|
||||
HyperLogLog estimator;
|
||||
bool resolved = false;
|
||||
|
||||
private:
|
||||
MergedSection(std::string_view name, i64 flags, i64 type, i64 entsize);
|
||||
@ -1095,14 +1102,17 @@ public:
|
||||
std::unique_ptr<InputSection<E>> &isec);
|
||||
|
||||
void split_contents(Context<E> &ctx);
|
||||
void resolve_contents(Context<E> &ctx);
|
||||
std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
|
||||
std::string_view get_contents(i64 idx);
|
||||
|
||||
MergedSection<E> &parent;
|
||||
std::vector<SectionFragment<E> *> fragments;
|
||||
|
||||
private:
|
||||
std::unique_ptr<InputSection<E>> section;
|
||||
std::vector<u32> frag_offsets;
|
||||
std::vector<u32> hashes;
|
||||
std::vector<SectionFragment<E> *> fragments;
|
||||
u8 p2align = 0;
|
||||
};
|
||||
|
||||
@ -1195,8 +1205,8 @@ public:
|
||||
|
||||
void parse(Context<E> &ctx);
|
||||
void initialize_symbols(Context<E> &ctx);
|
||||
void initialize_mergeable_sections(Context<E> &ctx);
|
||||
void resolve_section_pieces(Context<E> &ctx);
|
||||
void convert_mergeable_sections(Context<E> &ctx);
|
||||
void reattach_section_pieces(Context<E> &ctx);
|
||||
void resolve_symbols(Context<E> &ctx) override;
|
||||
void mark_live_objects(Context<E> &ctx,
|
||||
std::function<void(InputFile<E> *)> feeder) override;
|
||||
@ -1400,10 +1410,8 @@ template <typename E> void create_synthetic_sections(Context<E> &);
|
||||
template <typename E> void set_file_priority(Context<E> &);
|
||||
template <typename E> void resolve_symbols(Context<E> &);
|
||||
template <typename E> void kill_eh_frame_sections(Context<E> &);
|
||||
template <typename E> void split_section_pieces(Context<E> &);
|
||||
template <typename E> void resolve_section_pieces(Context<E> &);
|
||||
template <typename E> void create_merged_sections(Context<E> &);
|
||||
template <typename E> void convert_common_symbols(Context<E> &);
|
||||
template <typename E> void compute_merged_section_sizes(Context<E> &);
|
||||
template <typename E> void create_output_sections(Context<E> &);
|
||||
template <typename E> void add_synthetic_symbols(Context<E> &);
|
||||
template <typename E> void apply_section_align(Context<E> &);
|
||||
@ -1435,7 +1443,7 @@ template <typename E> void compute_section_headers(Context<E> &);
|
||||
template <typename E> i64 set_osec_offsets(Context<E> &);
|
||||
template <typename E> void fix_synthetic_symbols(Context<E> &);
|
||||
template <typename E> i64 compress_debug_sections(Context<E> &);
|
||||
template <typename E> void compute_build_id(Context<E> &);
|
||||
template <typename E> void write_build_id(Context<E> &);
|
||||
template <typename E> void write_dependency_file(Context<E> &);
|
||||
template <typename E> void show_stats(Context<E> &);
|
||||
|
||||
@ -2353,7 +2361,7 @@ InputSection<E>::get_fragment(Context<E> &ctx, const ElfRel<E> &rel) {
|
||||
assert(!(shdr().sh_flags & SHF_ALLOC));
|
||||
|
||||
const ElfSym<E> &esym = file.elf_syms[rel.r_sym];
|
||||
if (esym.st_type == STT_SECTION)
|
||||
if (!esym.is_abs() && !esym.is_common() && !esym.is_undef())
|
||||
if (std::unique_ptr<MergeableSection<E>> &m =
|
||||
file.mergeable_sections[file.get_shndx(esym)])
|
||||
return m->get_fragment(esym.st_value + get_addend(*this, rel));
|
||||
|
@ -865,6 +865,84 @@ void DynamicSection<E>::copy_buf(Context<E> &ctx) {
|
||||
write_vector(ctx.buf + this->shdr.sh_offset, contents);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
|
||||
std::span<T> span(input);
|
||||
std::vector<std::span<T>> vec;
|
||||
|
||||
while (span.size() >= unit) {
|
||||
vec.push_back(span.subspan(0, unit));
|
||||
span = span.subspan(unit);
|
||||
}
|
||||
if (!span.empty())
|
||||
vec.push_back(span);
|
||||
return vec;
|
||||
}
|
||||
|
||||
|
||||
// Assign offsets to OutputSection members
|
||||
template <typename E>
|
||||
void OutputSection<E>::compute_section_size(Context<E> &ctx) {
|
||||
ElfShdr<E> &shdr = this->shdr;
|
||||
|
||||
// On most RISC systems, we need to create so-called "range extension
|
||||
// thunks" to extend branch instructions reach, as their jump
|
||||
// instructions' reach is limited. create_range_extension_thunks()
|
||||
// computes the size of the section while inserting thunks.
|
||||
if constexpr (needs_thunk<E>) {
|
||||
if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) {
|
||||
create_range_extension_thunks(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Since one output section may contain millions of input sections,
|
||||
// we first split input sections into groups and assign offsets to
|
||||
// groups.
|
||||
struct Group {
|
||||
std::span<InputSection<E> *> members;
|
||||
i64 size = 0;
|
||||
i64 p2align = 0;
|
||||
i64 offset = 0;
|
||||
};
|
||||
|
||||
std::span<InputSection<E> *> mem = members;
|
||||
std::vector<Group> groups;
|
||||
constexpr i64 group_size = 10000;
|
||||
|
||||
while (!mem.empty()) {
|
||||
i64 sz = std::min<i64>(group_size, mem.size());
|
||||
groups.push_back({mem.subspan(0, sz)});
|
||||
mem = mem.subspan(sz);
|
||||
}
|
||||
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
|
||||
group.p2align = std::max<i64>(group.p2align, isec->p2align);
|
||||
}
|
||||
});
|
||||
|
||||
shdr.sh_size = 0;
|
||||
|
||||
for (i64 i = 0; i < groups.size(); i++) {
|
||||
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
|
||||
groups[i].offset = shdr.sh_size;
|
||||
shdr.sh_size += groups[i].size;
|
||||
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
|
||||
}
|
||||
|
||||
// Assign offsets to input sections.
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
i64 offset = group.offset;
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
offset = align_to(offset, 1 << isec->p2align);
|
||||
isec->offset = offset;
|
||||
offset += isec->sh_size;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void OutputSection<E>::copy_buf(Context<E> &ctx) {
|
||||
if (this->shdr.sh_type != SHT_NOBITS)
|
||||
@ -1971,7 +2049,26 @@ MergedSection<E>::insert(Context<E> &ctx, std::string_view data, u64 hash,
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void MergedSection<E>::assign_offsets(Context<E> &ctx) {
|
||||
void MergedSection<E>::resolve(Context<E> &ctx) {
|
||||
tbb::parallel_for_each(members, [&](MergeableSection<E> *sec) {
|
||||
sec->split_contents(ctx);
|
||||
});
|
||||
|
||||
// We aim 2/3 occupation ratio
|
||||
map.resize(estimator.get_cardinality() * 3 / 2);
|
||||
|
||||
tbb::parallel_for_each(members, [&](MergeableSection<E> *sec) {
|
||||
sec->resolve_contents(ctx);
|
||||
});
|
||||
|
||||
resolved = true;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void MergedSection<E>::compute_section_size(Context<E> &ctx) {
|
||||
if (!resolved)
|
||||
resolve(ctx);
|
||||
|
||||
std::vector<i64> sizes(map.NUM_SHARDS);
|
||||
Atomic<i64> alignment = 1;
|
||||
|
||||
|
229
elf/passes.cc
229
elf/passes.cc
@ -402,25 +402,50 @@ void kill_eh_frame_sections(Context<E> &ctx) {
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void split_section_pieces(Context<E> &ctx) {
|
||||
Timer t(ctx, "split_section_pieces");
|
||||
void create_merged_sections(Context<E> &ctx) {
|
||||
Timer t(ctx, "create_merged_sections");
|
||||
|
||||
// Convert InputSections to MergeableSections.
|
||||
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
|
||||
file->convert_mergeable_sections(ctx);
|
||||
});
|
||||
|
||||
tbb::parallel_for_each(ctx.merged_sections,
|
||||
[&](std::unique_ptr<MergedSection<E>> &sec) {
|
||||
if (sec->shdr.sh_flags & SHF_ALLOC)
|
||||
sec->resolve(ctx);
|
||||
});
|
||||
|
||||
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
|
||||
file->initialize_mergeable_sections(ctx);
|
||||
file->reattach_section_pieces(ctx);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void resolve_section_pieces(Context<E> &ctx) {
|
||||
Timer t(ctx, "resolve_section_pieces");
|
||||
// Add strings to .comment
|
||||
if (!ctx.arg.oformat_binary) {
|
||||
ElfShdr<E> shdr = {};
|
||||
shdr.sh_type = SHT_PROGBITS;
|
||||
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
|
||||
|
||||
// We aim 2/3 occupation ratio
|
||||
for (std::unique_ptr<MergedSection<E>> &sec : ctx.merged_sections)
|
||||
sec->map.resize(sec->estimator.get_cardinality() * 3 / 2);
|
||||
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
|
||||
if (!sec->resolved) {
|
||||
sec->map.resize(4096);
|
||||
sec->resolved = true;
|
||||
}
|
||||
|
||||
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
|
||||
file->resolve_section_pieces(ctx);
|
||||
});
|
||||
auto add = [&](std::string str) {
|
||||
std::string_view buf = save_string(ctx, str);
|
||||
std::string_view data(buf.data(), buf.size() + 1);
|
||||
sec->insert(ctx, data, hash_string(data), 0);
|
||||
};
|
||||
|
||||
// Add an identification string to .comment.
|
||||
add(get_mold_version());
|
||||
|
||||
// Embed command line arguments for debugging.
|
||||
char *env = getenv("MOLD_DEBUG");
|
||||
if (env && env[0])
|
||||
add("mold command line: " + get_cmdline_args(ctx));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
@ -441,55 +466,6 @@ static std::string get_cmdline_args(Context<E> &ctx) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void add_comment_string(Context<E> &ctx, std::string str) {
|
||||
ElfShdr<E> shdr = {};
|
||||
shdr.sh_type = SHT_PROGBITS;
|
||||
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
|
||||
shdr.sh_entsize = 1;
|
||||
shdr.sh_addralign = 1;
|
||||
|
||||
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
|
||||
if (sec->map.nbuckets == 0)
|
||||
sec->map.resize(4096);
|
||||
|
||||
std::string_view buf = save_string(ctx, str);
|
||||
std::string_view data(buf.data(), buf.size() + 1);
|
||||
sec->insert(ctx, data, hash_string(data), 0);
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void compute_merged_section_sizes(Context<E> &ctx) {
|
||||
Timer t(ctx, "compute_merged_section_sizes");
|
||||
|
||||
// Add an identification string to .comment.
|
||||
if (!ctx.arg.oformat_binary)
|
||||
add_comment_string(ctx, get_mold_version());
|
||||
|
||||
// Embed command line arguments for debugging.
|
||||
if (char *env = getenv("MOLD_DEBUG"); env && env[0])
|
||||
add_comment_string(ctx, "mold command line: " + get_cmdline_args(ctx));
|
||||
|
||||
tbb::parallel_for_each(ctx.merged_sections,
|
||||
[&](std::unique_ptr<MergedSection<E>> &sec) {
|
||||
sec->assign_offsets(ctx);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
|
||||
std::span<T> span(input);
|
||||
std::vector<std::span<T>> vec;
|
||||
|
||||
while (span.size() >= unit) {
|
||||
vec.push_back(span.subspan(0, unit));
|
||||
span = span.subspan(unit);
|
||||
}
|
||||
if (!span.empty())
|
||||
vec.push_back(span);
|
||||
return vec;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
static bool has_ctors_and_init_array(Context<E> &ctx) {
|
||||
bool x = false;
|
||||
@ -729,8 +705,7 @@ void create_output_sections(Context<E> &ctx) {
|
||||
|
||||
// Add output sections and mergeable sections to ctx.chunks
|
||||
for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
|
||||
if (osec->shdr.sh_size)
|
||||
chunks.push_back(osec.get());
|
||||
chunks.push_back(osec.get());
|
||||
|
||||
// Sections are added to the section lists in an arbitrary order
|
||||
// because they are created in parallel. Sort them to to make the
|
||||
@ -1362,76 +1337,24 @@ template <typename E>
|
||||
void compute_section_sizes(Context<E> &ctx) {
|
||||
Timer t(ctx, "compute_section_sizes");
|
||||
|
||||
struct Group {
|
||||
i64 size = 0;
|
||||
i64 p2align = 0;
|
||||
i64 offset = 0;
|
||||
std::span<InputSection<E> *> members;
|
||||
};
|
||||
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
OutputSection<E> *osec = chunk->to_osec();
|
||||
if (!osec)
|
||||
return;
|
||||
|
||||
// This pattern will be processed in the next loop.
|
||||
if constexpr (needs_thunk<E>)
|
||||
if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable)
|
||||
return;
|
||||
|
||||
// Since one output section may contain millions of input sections,
|
||||
// we first split input sections into groups and assign offsets to
|
||||
// groups.
|
||||
std::vector<Group> groups;
|
||||
constexpr i64 group_size = 10000;
|
||||
|
||||
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
|
||||
groups.push_back(Group{.members = span});
|
||||
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
|
||||
group.p2align = std::max<i64>(group.p2align, isec->p2align);
|
||||
}
|
||||
});
|
||||
|
||||
ElfShdr<E> &shdr = osec->shdr;
|
||||
shdr.sh_size = 0;
|
||||
|
||||
for (i64 i = 0; i < groups.size(); i++) {
|
||||
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
|
||||
groups[i].offset = shdr.sh_size;
|
||||
shdr.sh_size += groups[i].size;
|
||||
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
|
||||
}
|
||||
|
||||
// Assign offsets to input sections.
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
i64 offset = group.offset;
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
offset = align_to(offset, 1 << isec->p2align);
|
||||
isec->offset = offset;
|
||||
offset += isec->sh_size;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// On ARM32 or ARM64, we may need to create so-called "range extension
|
||||
// thunks" to extend branch instructions reach, as they can jump only
|
||||
// to ±16 MiB or ±128 MiB, respecitvely.
|
||||
//
|
||||
// In the following loop, We compute the sizes of sections while
|
||||
// inserting thunks. This pass cannot be parallelized. That is,
|
||||
// create_range_extension_thunks is parallelized internally, but the
|
||||
// function itself is not thread-safe.
|
||||
if constexpr (needs_thunk<E>) {
|
||||
Timer t2(ctx, "create_range_extension_thunks");
|
||||
// Chunk<E>::compute_section_size may obtain a global lock to create
|
||||
// range extension thunks. I don't know why, but using parallel_for
|
||||
// loop both inside and outside of the lock may cause a deadlock. It
|
||||
// might be a bug in TBB. For now, I'll avoid using parallel_for_each
|
||||
// here.
|
||||
for (Chunk<E> *chunk : ctx.chunks)
|
||||
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
|
||||
chunk->compute_section_size(ctx);
|
||||
|
||||
if (!ctx.arg.relocatable)
|
||||
for (Chunk<E> *chunk : ctx.chunks)
|
||||
if (OutputSection<E> *osec = chunk->to_osec())
|
||||
if (osec->shdr.sh_flags & SHF_EXECINSTR)
|
||||
osec->create_range_extension_thunks(ctx);
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
if (!(chunk->shdr.sh_flags & SHF_EXECINSTR))
|
||||
chunk->compute_section_size(ctx);
|
||||
});
|
||||
} else {
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
chunk->compute_section_size(ctx);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -3009,23 +2932,34 @@ static void blake3_hash(u8 *buf, i64 size, u8 *out) {
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void compute_build_id(Context<E> &ctx) {
|
||||
Timer t(ctx, "compute_build_id");
|
||||
std::vector<std::span<u8>> get_shards(Context<E> &ctx) {
|
||||
constexpr i64 shard_size = 4 * 1024 * 1024; // 4 MiB
|
||||
std::span<u8> buf = {ctx.buf, (size_t)ctx.output_file->filesize};
|
||||
std::vector<std::span<u8>> vec;
|
||||
|
||||
while (!buf.empty()) {
|
||||
i64 sz = std::min<i64>(shard_size, buf.size());
|
||||
vec.push_back(buf.subspan(0, sz));
|
||||
buf = buf.subspan(sz);
|
||||
}
|
||||
return vec;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void write_build_id(Context<E> &ctx) {
|
||||
Timer t(ctx, "write_build_id");
|
||||
|
||||
switch (ctx.arg.build_id.kind) {
|
||||
case BuildId::HEX:
|
||||
ctx.buildid->contents = ctx.arg.build_id.value;
|
||||
break;
|
||||
case BuildId::HASH: {
|
||||
i64 shard_size = 4 * 1024 * 1024;
|
||||
i64 filesize = ctx.output_file->filesize;
|
||||
i64 num_shards = align_to(filesize, shard_size) / shard_size;
|
||||
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
|
||||
std::vector<std::span<u8>> shards = get_shards(ctx);
|
||||
std::vector<u8> hashes(shards.size() * BLAKE3_OUT_LEN);
|
||||
|
||||
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
|
||||
u8 *begin = ctx.buf + shard_size * i;
|
||||
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
|
||||
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
|
||||
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
|
||||
blake3_hash(shards[i].data(), shards[i].size(),
|
||||
hashes.data() + i * BLAKE3_OUT_LEN);
|
||||
|
||||
#ifdef HAVE_MADVISE
|
||||
// Make the kernel page out the file contents we've just written
|
||||
@ -3036,7 +2970,7 @@ void compute_build_id(Context<E> &ctx) {
|
||||
});
|
||||
|
||||
u8 buf[BLAKE3_OUT_LEN];
|
||||
blake3_hash(shards.data(), shards.size(), buf);
|
||||
blake3_hash(hashes.data(), hashes.size(), buf);
|
||||
|
||||
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
|
||||
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
|
||||
@ -3055,8 +2989,9 @@ void compute_build_id(Context<E> &ctx) {
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
ctx.buildid->copy_buf(ctx);
|
||||
}
|
||||
|
||||
// Write Makefile-style dependency rules to a file specified by
|
||||
// --dependency-file. This is analogous to the compiler's -M flag.
|
||||
@ -3162,10 +3097,8 @@ template void apply_exclude_libs(Context<E> &);
|
||||
template void create_synthetic_sections(Context<E> &);
|
||||
template void resolve_symbols(Context<E> &);
|
||||
template void kill_eh_frame_sections(Context<E> &);
|
||||
template void split_section_pieces(Context<E> &);
|
||||
template void resolve_section_pieces(Context<E> &);
|
||||
template void create_merged_sections(Context<E> &);
|
||||
template void convert_common_symbols(Context<E> &);
|
||||
template void compute_merged_section_sizes(Context<E> &);
|
||||
template void create_output_sections(Context<E> &);
|
||||
template void add_synthetic_symbols(Context<E> &);
|
||||
template void check_cet_errors(Context<E> &);
|
||||
@ -3197,7 +3130,7 @@ template void compute_section_headers(Context<E> &);
|
||||
template i64 set_osec_offsets(Context<E> &);
|
||||
template void fix_synthetic_symbols(Context<E> &);
|
||||
template i64 compress_debug_sections(Context<E> &);
|
||||
template void compute_build_id(Context<E> &);
|
||||
template void write_build_id(Context<E> &);
|
||||
template void write_dependency_file(Context<E> &);
|
||||
template void show_stats(Context<E> &);
|
||||
|
||||
|
@ -148,8 +148,6 @@ static u64 r_set_osec_offsets(Context<E> &ctx) {
|
||||
|
||||
template <typename E>
|
||||
void combine_objects(Context<E> &ctx) {
|
||||
compute_merged_section_sizes(ctx);
|
||||
|
||||
create_output_sections(ctx);
|
||||
|
||||
r_create_synthetic_sections(ctx);
|
||||
|
@ -60,6 +60,7 @@ void notify_parent() {
|
||||
char buf[] = {1};
|
||||
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
|
||||
assert(n == 1);
|
||||
pipe_write_fd = -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -170,6 +170,10 @@ static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
|
||||
|
||||
template <>
|
||||
void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
// This function is not thread-safe because it mutates symbols' members
|
||||
static std::mutex mu;
|
||||
std::scoped_lock lock(mu);
|
||||
|
||||
std::span<InputSection<E> *> m = members;
|
||||
if (m.empty())
|
||||
return;
|
||||
@ -247,10 +251,8 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
|
||||
// Scan relocations between B and C to collect symbols that need
|
||||
// entries in the new thunk.
|
||||
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
|
||||
[&](InputSection<E> *isec) {
|
||||
scan_rels(ctx, *isec, *thunk, thunk_idx);
|
||||
});
|
||||
for (i64 i = b; i < c; i++)
|
||||
scan_rels(ctx, *m[i], *thunk, thunk_idx);
|
||||
|
||||
// Now that we know the number of symbols in the thunk, we can compute
|
||||
// the thunk's size.
|
||||
@ -270,16 +272,15 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
}
|
||||
|
||||
// Scan relocations again to fix symbol offsets in the last thunk.
|
||||
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
|
||||
[&](InputSection<E> *isec) {
|
||||
std::span<Symbol<E> *> syms = isec->file.symbols;
|
||||
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
|
||||
std::span<ThunkRef> thunk_refs = isec->extra.thunk_refs;
|
||||
for (i64 i = b; i < c; i++) {
|
||||
std::span<Symbol<E> *> syms = m[i]->file.symbols;
|
||||
std::span<const ElfRel<E>> rels = m[i]->get_rels(ctx);
|
||||
std::span<ThunkRef> thunk_refs = m[i]->extra.thunk_refs;
|
||||
|
||||
for (i64 i = 0; i < rels.size(); i++)
|
||||
if (thunk_refs[i].thunk_idx == thunk_idx)
|
||||
thunk_refs[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx;
|
||||
});
|
||||
for (i64 j = 0; j < rels.size(); j++)
|
||||
if (thunk_refs[j].thunk_idx == thunk_idx)
|
||||
thunk_refs[j].sym_idx = syms[rels[j].r_sym]->extra.thunk_sym_idx;
|
||||
}
|
||||
|
||||
// Move B forward to point to the begining of the next batch.
|
||||
b = c;
|
||||
|
Loading…
Reference in New Issue
Block a user