1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-05 17:17:40 +03:00
This commit is contained in:
Rui Ueyama 2024-07-05 17:17:54 +09:00
parent 7089304046
commit 8e3679e4e5
7 changed files with 170 additions and 152 deletions

View File

@ -677,6 +677,29 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
}
}
template <typename E>
void ObjectFile<E>::convert_mergeable_sections(Context<E> &ctx) {
// Convert InputSections to MergeableSections
for (i64 i = 0; i < this->sections.size(); i++) {
InputSection<E> *isec = this->sections[i].get();
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
continue;
const ElfShdr<E> &shdr = isec->shdr();
if (!(shdr.sh_flags & SHF_MERGE))
continue;
MergedSection<E> *parent =
MergedSection<E>::get_instance(ctx, isec->name(), shdr);
if (parent) {
this->mergeable_sections[i] =
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
this->sections[i] = nullptr;
}
}
}
// Usually a section is an atomic unit of inclusion or exclusion.
// Linker doesn't care about its contents. However, if a section is a
// mergeable section (a section with SHF_MERGE bit set), the linker is
@ -713,54 +736,17 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
// section piece in a section, but it doesn't do for any other types
// of symbols.
//
// In mold, we attach symbols to section pieces. If a relocation refers
// to a section symbol, and that symbol's section is a mergeable one,
// we create a new dummy symbol for a section piece and redirect the
// relocation to this new symbol. If a non-section symbol refers to a
// section piece, the section piece is attached to the symbol.
// Section garbage collection and Identical Code Folding work on graphs
// where sections or section pieces are vertices and relocations are
// edges. To make it easy to handle them, we rewrite symbols and
// relocations so that each non-absolute symbol always refers to either
// a non-mergeable section or a section piece.
//
// We do that only for SHF_ALLOC sections because GC and ICF work only
// on memory-allocated sections. Non-memory-allocated mergeable sections
// are not handled here for performance reasons.
template <typename E>
void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
// Convert InputSections to MergeableSections
for (i64 i = 0; i < this->sections.size(); i++) {
InputSection<E> *isec = this->sections[i].get();
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
continue;
MergedSection<E> *parent =
MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
if (parent) {
this->mergeable_sections[i] =
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
this->sections[i] = nullptr;
}
}
// Split section contents
for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
if (sec)
sec->split_contents(ctx);
}
template <typename E>
void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
if (!m)
continue;
m->fragments.reserve(m->frag_offsets.size());
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
SectionFragment<E> *frag =
m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
}
// Reclaim memory as we'll never use this vector again
m->hashes.clear();
m->hashes.shrink_to_fit();
}
void ObjectFile<E>::reattach_section_pieces(Context<E> &ctx) {
// Attach section pieces to symbols.
for (i64 i = 1; i < this->elf_syms.size(); i++) {
Symbol<E> &sym = *this->symbols[i];
@ -769,8 +755,9 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
if (esym.is_abs() || esym.is_common() || esym.is_undef())
continue;
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[get_shndx(esym)];
if (!m || m->fragments.empty())
i64 shndx = get_shndx(esym);
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[shndx];
if (!m || !m->parent.resolved)
continue;
SectionFragment<E> *frag;
@ -785,17 +772,16 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
}
// Compute the size of frag_syms.
std::vector<InputSection<E> *> vec;
for (std::unique_ptr<InputSection<E>> &isec : sections)
if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
vec.push_back(isec.get());
i64 nfrag_syms = 0;
for (InputSection<E> *isec : vec)
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
nfrag_syms++;
for (std::unique_ptr<InputSection<E>> &isec : sections)
if (isec)
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION)
if (std::unique_ptr<MergeableSection<E>> &m =
mergeable_sections[get_shndx(esym)])
if (m->parent.resolved)
nfrag_syms++;
this->frag_syms.resize(nfrag_syms);
@ -803,34 +789,38 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
// create a new dummy non-section symbol and redirect the relocation
// to the newly created symbol.
i64 idx = 0;
for (InputSection<E> *isec : vec) {
for (ElfRel<E> &r : isec->get_rels(ctx)) {
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
if (esym.st_type != STT_SECTION)
continue;
for (std::unique_ptr<InputSection<E>> &isec : sections) {
if (isec) {
for (ElfRel<E> &r : isec->get_rels(ctx)) {
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
if (esym.st_type != STT_SECTION)
continue;
std::unique_ptr<MergeableSection<E>> &m = mergeable_sections[get_shndx(esym)];
if (!m)
continue;
std::unique_ptr<MergeableSection<E>> &m =
mergeable_sections[get_shndx(esym)];
i64 r_addend = get_addend(*isec, r);
if (!m || !m->parent.resolved)
continue;
SectionFragment<E> *frag;
i64 in_frag_offset;
std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend);
i64 r_addend = get_addend(*isec, r);
if (!frag)
Fatal(ctx) << *this << ": bad relocation at " << r.r_sym;
SectionFragment<E> *frag;
i64 in_frag_offset;
std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend);
Symbol<E> &sym = this->frag_syms[idx];
sym.file = this;
sym.set_name("<fragment>");
sym.sym_idx = r.r_sym;
sym.visibility = STV_HIDDEN;
sym.set_frag(frag);
sym.value = in_frag_offset - r_addend;
r.r_sym = this->elf_syms.size() + idx;
idx++;
if (!frag)
Fatal(ctx) << *this << ": bad relocation at " << r.r_sym;
Symbol<E> &sym = this->frag_syms[idx];
sym.file = this;
sym.set_name("<fragment>");
sym.sym_idx = r.r_sym;
sym.visibility = STV_HIDDEN;
sym.set_frag(frag);
sym.value = in_frag_offset - r_addend;
r.r_sym = this->elf_syms.size() + idx;
idx++;
}
}
}

View File

@ -557,6 +557,9 @@ MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec)
: parent(parent), section(std::move(isec)), p2align(section->p2align) {
section->uncompress(ctx);
std::scoped_lock lock(parent.mu);
parent.members.push_back(this);
}
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
@ -630,6 +633,17 @@ void MergeableSection<E>::split_contents(Context<E> &ctx) {
counter += frag_offsets.size();
}
template <typename E>
void MergeableSection<E>::resolve_contents(Context<E> &ctx) {
fragments.reserve(frag_offsets.size());
for (i64 i = 0; i < frag_offsets.size(); i++)
fragments.push_back(parent.insert(ctx, get_contents(i), hashes[i], p2align));
// Reclaim memory as we'll never use this vector again
hashes.clear();
hashes.shrink_to_fit();
}
using E = MOLD_TARGET;
template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);

View File

@ -427,10 +427,7 @@ int elf_main(int argc, char **argv) {
kill_eh_frame_sections(ctx);
// Split mergeable section contents into section pieces.
split_section_pieces(ctx);
// Resolve mergeable section pieces to merge them.
resolve_section_pieces(ctx);
create_merged_sections(ctx);
// Handle --relocatable. Since the linker's behavior is quite different
// from the normal one when the option is given, the logic is implemented
@ -464,9 +461,6 @@ int elf_main(int argc, char **argv) {
if (ctx.arg.icf)
icf_sections(ctx);
// Compute sizes of sections containing mergeable strings.
compute_merged_section_sizes(ctx);
// Create linker-synthesized sections such as .got or .plt.
create_synthetic_sections(ctx);

View File

@ -373,6 +373,7 @@ public:
virtual ~Chunk() = default;
virtual bool is_header() { return false; }
virtual OutputSection<E> *to_osec() { return nullptr; }
virtual MergedSection<E> *to_merged_section() { return nullptr; }
virtual i64 get_reldyn_size(Context<E> &ctx) const { return 0; }
virtual void construct_relr(Context<E> &ctx) {}
virtual void copy_buf(Context<E> &ctx) {}
@ -805,13 +806,19 @@ public:
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
u64 hash, i64 p2align);
MergedSection<E> *to_merged_section() override { return this; }
void resolve(Context<E> &ctx);
void assign_offsets(Context<E> &ctx);
void copy_buf(Context<E> &ctx) override;
void write_to(Context<E> &ctx, u8 *buf) override;
void print_stats(Context<E> &ctx);
std::vector<MergeableSection<E> *> members;
std::mutex mu;
ConcurrentMap<SectionFragment<E>> map;
HyperLogLog estimator;
bool resolved = false;
private:
MergedSection(std::string_view name, i64 flags, i64 type, i64 entsize);
@ -1095,14 +1102,17 @@ public:
std::unique_ptr<InputSection<E>> &isec);
void split_contents(Context<E> &ctx);
void resolve_contents(Context<E> &ctx);
std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
std::string_view get_contents(i64 idx);
MergedSection<E> &parent;
std::vector<SectionFragment<E> *> fragments;
private:
std::unique_ptr<InputSection<E>> section;
std::vector<u32> frag_offsets;
std::vector<u32> hashes;
std::vector<SectionFragment<E> *> fragments;
u8 p2align = 0;
};
@ -1195,8 +1205,8 @@ public:
void parse(Context<E> &ctx);
void initialize_symbols(Context<E> &ctx);
void initialize_mergeable_sections(Context<E> &ctx);
void resolve_section_pieces(Context<E> &ctx);
void convert_mergeable_sections(Context<E> &ctx);
void reattach_section_pieces(Context<E> &ctx);
void resolve_symbols(Context<E> &ctx) override;
void mark_live_objects(Context<E> &ctx,
std::function<void(InputFile<E> *)> feeder) override;
@ -1400,10 +1410,8 @@ template <typename E> void create_synthetic_sections(Context<E> &);
template <typename E> void set_file_priority(Context<E> &);
template <typename E> void resolve_symbols(Context<E> &);
template <typename E> void kill_eh_frame_sections(Context<E> &);
template <typename E> void split_section_pieces(Context<E> &);
template <typename E> void resolve_section_pieces(Context<E> &);
template <typename E> void create_merged_sections(Context<E> &);
template <typename E> void convert_common_symbols(Context<E> &);
template <typename E> void compute_merged_section_sizes(Context<E> &);
template <typename E> void create_output_sections(Context<E> &);
template <typename E> void add_synthetic_symbols(Context<E> &);
template <typename E> void apply_section_align(Context<E> &);
@ -2353,7 +2361,7 @@ InputSection<E>::get_fragment(Context<E> &ctx, const ElfRel<E> &rel) {
assert(!(shdr().sh_flags & SHF_ALLOC));
const ElfSym<E> &esym = file.elf_syms[rel.r_sym];
if (esym.st_type == STT_SECTION)
if (!esym.is_abs() && !esym.is_common() && !esym.is_undef())
if (std::unique_ptr<MergeableSection<E>> &m =
file.mergeable_sections[file.get_shndx(esym)])
return m->get_fragment(esym.st_value + get_addend(*this, rel));

View File

@ -1970,8 +1970,27 @@ MergedSection<E>::insert(Context<E> &ctx, std::string_view data, u64 hash,
return frag;
}
template <typename E>
void MergedSection<E>::resolve(Context<E> &ctx) {
tbb::parallel_for_each(members, [&](MergeableSection<E> *sec) {
sec->split_contents(ctx);
});
// We aim 2/3 occupation ratio
map.resize(estimator.get_cardinality() * 3 / 2);
tbb::parallel_for_each(members, [&](MergeableSection<E> *sec) {
sec->resolve_contents(ctx);
});
resolved = true;
}
template <typename E>
void MergedSection<E>::assign_offsets(Context<E> &ctx) {
if (!resolved)
resolve(ctx);
std::vector<i64> sizes(map.NUM_SHARDS);
Atomic<i64> alignment = 1;

View File

@ -402,25 +402,50 @@ void kill_eh_frame_sections(Context<E> &ctx) {
}
template <typename E>
void split_section_pieces(Context<E> &ctx) {
Timer t(ctx, "split_section_pieces");
void create_merged_sections(Context<E> &ctx) {
Timer t(ctx, "create_merged_sections");
// Convert InputSections to MergeableSections.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->convert_mergeable_sections(ctx);
});
tbb::parallel_for_each(ctx.merged_sections,
[&](std::unique_ptr<MergedSection<E>> &sec) {
if (sec->shdr.sh_flags & SHF_ALLOC)
sec->resolve(ctx);
});
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->initialize_mergeable_sections(ctx);
file->reattach_section_pieces(ctx);
});
}
template <typename E>
void resolve_section_pieces(Context<E> &ctx) {
Timer t(ctx, "resolve_section_pieces");
// Add strings to .comment
if (!ctx.arg.oformat_binary) {
ElfShdr<E> shdr = {};
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
// We aim 2/3 occupation ratio
for (std::unique_ptr<MergedSection<E>> &sec : ctx.merged_sections)
sec->map.resize(sec->estimator.get_cardinality() * 3 / 2);
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
if (!sec->resolved) {
sec->map.resize(4096);
sec->resolved = true;
}
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->resolve_section_pieces(ctx);
});
auto add = [&](std::string str) {
std::string_view buf = save_string(ctx, str);
std::string_view data(buf.data(), buf.size() + 1);
sec->insert(ctx, data, hash_string(data), 0);
};
// Add an identification string to .comment.
add(get_mold_version());
// Embed command line arguments for debugging.
char *env = getenv("MOLD_DEBUG");
if (env && env[0])
add("mold command line: " + get_cmdline_args(ctx));
}
}
template <typename E>
@ -441,41 +466,6 @@ static std::string get_cmdline_args(Context<E> &ctx) {
return ss.str();
}
template <typename E>
void add_comment_string(Context<E> &ctx, std::string str) {
ElfShdr<E> shdr = {};
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
shdr.sh_entsize = 1;
shdr.sh_addralign = 1;
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
if (sec->map.nbuckets == 0)
sec->map.resize(4096);
std::string_view buf = save_string(ctx, str);
std::string_view data(buf.data(), buf.size() + 1);
sec->insert(ctx, data, hash_string(data), 0);
}
template <typename E>
void compute_merged_section_sizes(Context<E> &ctx) {
Timer t(ctx, "compute_merged_section_sizes");
// Add an identification string to .comment.
if (!ctx.arg.oformat_binary)
add_comment_string(ctx, get_mold_version());
// Embed command line arguments for debugging.
if (char *env = getenv("MOLD_DEBUG"); env && env[0])
add_comment_string(ctx, "mold command line: " + get_cmdline_args(ctx));
tbb::parallel_for_each(ctx.merged_sections,
[&](std::unique_ptr<MergedSection<E>> &sec) {
sec->assign_offsets(ctx);
});
}
template <typename T>
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
std::span<T> span(input);
@ -729,8 +719,7 @@ void create_output_sections(Context<E> &ctx) {
// Add output sections and mergeable sections to ctx.chunks
for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
if (osec->shdr.sh_size)
chunks.push_back(osec.get());
chunks.push_back(osec.get());
// Sections are added to the section lists in an arbitrary order
// because they are created in parallel. Sort them to to make the
@ -1369,6 +1358,7 @@ void compute_section_sizes(Context<E> &ctx) {
std::span<InputSection<E> *> members;
};
// Assign offsets to OutputSection members
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
OutputSection<E> *osec = chunk->to_osec();
if (!osec)
@ -1416,6 +1406,13 @@ void compute_section_sizes(Context<E> &ctx) {
});
});
// Assign offsets to MergedSection members
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
if (MergedSection<E> *sec = chunk->to_merged_section())
sec->assign_offsets(ctx);
});
// On ARM32 or ARM64, we may need to create so-called "range extension
// thunks" to extend branch instructions reach, as they can jump only
// to ±16 MiB or ±128 MiB, respecitvely.
@ -1425,7 +1422,7 @@ void compute_section_sizes(Context<E> &ctx) {
// create_range_extension_thunks is parallelized internally, but the
// function itself is not thread-safe.
if constexpr (needs_thunk<E>) {
Timer t2(ctx, "create_range_extension_thunks");
Timer t(ctx, "create_range_extension_thunks");
if (!ctx.arg.relocatable)
for (Chunk<E> *chunk : ctx.chunks)
@ -3162,10 +3159,8 @@ template void apply_exclude_libs(Context<E> &);
template void create_synthetic_sections(Context<E> &);
template void resolve_symbols(Context<E> &);
template void kill_eh_frame_sections(Context<E> &);
template void split_section_pieces(Context<E> &);
template void resolve_section_pieces(Context<E> &);
template void create_merged_sections(Context<E> &);
template void convert_common_symbols(Context<E> &);
template void compute_merged_section_sizes(Context<E> &);
template void create_output_sections(Context<E> &);
template void add_synthetic_symbols(Context<E> &);
template void check_cet_errors(Context<E> &);

View File

@ -148,8 +148,6 @@ static u64 r_set_osec_offsets(Context<E> &ctx) {
template <typename E>
void combine_objects(Context<E> &ctx) {
compute_merged_section_sizes(ctx);
create_output_sections(ctx);
r_create_synthetic_sections(ctx);