mirror of
https://github.com/rui314/mold.git
synced 2024-10-05 17:17:40 +03:00
Refactor
This commit is contained in:
parent
8e3679e4e5
commit
d4cd52ca49
@ -373,7 +373,7 @@ public:
|
||||
virtual ~Chunk() = default;
|
||||
virtual bool is_header() { return false; }
|
||||
virtual OutputSection<E> *to_osec() { return nullptr; }
|
||||
virtual MergedSection<E> *to_merged_section() { return nullptr; }
|
||||
virtual void compute_section_size(Context<E> &ctx) {}
|
||||
virtual i64 get_reldyn_size(Context<E> &ctx) const { return 0; }
|
||||
virtual void construct_relr(Context<E> &ctx) {}
|
||||
virtual void copy_buf(Context<E> &ctx) {}
|
||||
@ -480,6 +480,7 @@ public:
|
||||
}
|
||||
|
||||
OutputSection<E> *to_osec() override { return this; }
|
||||
void compute_section_size(Context<E> &ctx) override;
|
||||
void construct_relr(Context<E> &ctx) override;
|
||||
void copy_buf(Context<E> &ctx) override;
|
||||
void write_to(Context<E> &ctx, u8 *buf) override;
|
||||
@ -806,9 +807,8 @@ public:
|
||||
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
|
||||
u64 hash, i64 p2align);
|
||||
|
||||
MergedSection<E> *to_merged_section() override { return this; }
|
||||
void resolve(Context<E> &ctx);
|
||||
void assign_offsets(Context<E> &ctx);
|
||||
void compute_section_size(Context<E> &ctx) override;
|
||||
void copy_buf(Context<E> &ctx) override;
|
||||
void write_to(Context<E> &ctx, u8 *buf) override;
|
||||
void print_stats(Context<E> &ctx);
|
||||
|
@ -865,6 +865,84 @@ void DynamicSection<E>::copy_buf(Context<E> &ctx) {
|
||||
write_vector(ctx.buf + this->shdr.sh_offset, contents);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
|
||||
std::span<T> span(input);
|
||||
std::vector<std::span<T>> vec;
|
||||
|
||||
while (span.size() >= unit) {
|
||||
vec.push_back(span.subspan(0, unit));
|
||||
span = span.subspan(unit);
|
||||
}
|
||||
if (!span.empty())
|
||||
vec.push_back(span);
|
||||
return vec;
|
||||
}
|
||||
|
||||
|
||||
// Assign offsets to OutputSection members
|
||||
template <typename E>
|
||||
void OutputSection<E>::compute_section_size(Context<E> &ctx) {
|
||||
ElfShdr<E> &shdr = this->shdr;
|
||||
|
||||
// On most RISC systems, we need to create so-called "range extension
|
||||
// thunks" to extend branch instructions reach, as their jump
|
||||
// instructions' reach is limited. create_range_extension_thunks()
|
||||
// computes the size of the section while inserting thunks.
|
||||
if constexpr (needs_thunk<E>) {
|
||||
if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) {
|
||||
create_range_extension_thunks(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Since one output section may contain millions of input sections,
|
||||
// we first split input sections into groups and assign offsets to
|
||||
// groups.
|
||||
struct Group {
|
||||
std::span<InputSection<E> *> members;
|
||||
i64 size = 0;
|
||||
i64 p2align = 0;
|
||||
i64 offset = 0;
|
||||
};
|
||||
|
||||
std::span<InputSection<E> *> mem = members;
|
||||
std::vector<Group> groups;
|
||||
constexpr i64 group_size = 10000;
|
||||
|
||||
while (!mem.empty()) {
|
||||
i64 sz = std::min<i64>(group_size, mem.size());
|
||||
groups.push_back({mem.subspan(0, sz)});
|
||||
mem = mem.subspan(sz);
|
||||
}
|
||||
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
|
||||
group.p2align = std::max<i64>(group.p2align, isec->p2align);
|
||||
}
|
||||
});
|
||||
|
||||
shdr.sh_size = 0;
|
||||
|
||||
for (i64 i = 0; i < groups.size(); i++) {
|
||||
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
|
||||
groups[i].offset = shdr.sh_size;
|
||||
shdr.sh_size += groups[i].size;
|
||||
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
|
||||
}
|
||||
|
||||
// Assign offsets to input sections.
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
i64 offset = group.offset;
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
offset = align_to(offset, 1 << isec->p2align);
|
||||
isec->offset = offset;
|
||||
offset += isec->sh_size;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void OutputSection<E>::copy_buf(Context<E> &ctx) {
|
||||
if (this->shdr.sh_type != SHT_NOBITS)
|
||||
@ -1987,7 +2065,7 @@ void MergedSection<E>::resolve(Context<E> &ctx) {
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
void MergedSection<E>::assign_offsets(Context<E> &ctx) {
|
||||
void MergedSection<E>::compute_section_size(Context<E> &ctx) {
|
||||
if (!resolved)
|
||||
resolve(ctx);
|
||||
|
||||
|
106
elf/passes.cc
106
elf/passes.cc
@ -466,20 +466,6 @@ static std::string get_cmdline_args(Context<E> &ctx) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
|
||||
std::span<T> span(input);
|
||||
std::vector<std::span<T>> vec;
|
||||
|
||||
while (span.size() >= unit) {
|
||||
vec.push_back(span.subspan(0, unit));
|
||||
span = span.subspan(unit);
|
||||
}
|
||||
if (!span.empty())
|
||||
vec.push_back(span);
|
||||
return vec;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
static bool has_ctors_and_init_array(Context<E> &ctx) {
|
||||
bool x = false;
|
||||
@ -1351,84 +1337,24 @@ template <typename E>
|
||||
void compute_section_sizes(Context<E> &ctx) {
|
||||
Timer t(ctx, "compute_section_sizes");
|
||||
|
||||
struct Group {
|
||||
i64 size = 0;
|
||||
i64 p2align = 0;
|
||||
i64 offset = 0;
|
||||
std::span<InputSection<E> *> members;
|
||||
};
|
||||
|
||||
// Assign offsets to OutputSection members
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
OutputSection<E> *osec = chunk->to_osec();
|
||||
if (!osec)
|
||||
return;
|
||||
|
||||
// This pattern will be processed in the next loop.
|
||||
if constexpr (needs_thunk<E>)
|
||||
if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable)
|
||||
return;
|
||||
|
||||
// Since one output section may contain millions of input sections,
|
||||
// we first split input sections into groups and assign offsets to
|
||||
// groups.
|
||||
std::vector<Group> groups;
|
||||
constexpr i64 group_size = 10000;
|
||||
|
||||
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
|
||||
groups.push_back(Group{.members = span});
|
||||
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size;
|
||||
group.p2align = std::max<i64>(group.p2align, isec->p2align);
|
||||
}
|
||||
});
|
||||
|
||||
ElfShdr<E> &shdr = osec->shdr;
|
||||
shdr.sh_size = 0;
|
||||
|
||||
for (i64 i = 0; i < groups.size(); i++) {
|
||||
shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align);
|
||||
groups[i].offset = shdr.sh_size;
|
||||
shdr.sh_size += groups[i].size;
|
||||
shdr.sh_addralign = std::max<u32>(shdr.sh_addralign, 1 << groups[i].p2align);
|
||||
}
|
||||
|
||||
// Assign offsets to input sections.
|
||||
tbb::parallel_for_each(groups, [](Group &group) {
|
||||
i64 offset = group.offset;
|
||||
for (InputSection<E> *isec : group.members) {
|
||||
offset = align_to(offset, 1 << isec->p2align);
|
||||
isec->offset = offset;
|
||||
offset += isec->sh_size;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
// Assign offsets to MergedSection members
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
if (MergedSection<E> *sec = chunk->to_merged_section())
|
||||
sec->assign_offsets(ctx);
|
||||
});
|
||||
|
||||
// On ARM32 or ARM64, we may need to create so-called "range extension
|
||||
// thunks" to extend branch instructions reach, as they can jump only
|
||||
// to ±16 MiB or ±128 MiB, respecitvely.
|
||||
//
|
||||
// In the following loop, We compute the sizes of sections while
|
||||
// inserting thunks. This pass cannot be parallelized. That is,
|
||||
// create_range_extension_thunks is parallelized internally, but the
|
||||
// function itself is not thread-safe.
|
||||
if constexpr (needs_thunk<E>) {
|
||||
Timer t(ctx, "create_range_extension_thunks");
|
||||
// Chunk<E>::compute_section_size may obtain a global lock to create
|
||||
// range extension thunks. I don't know why, but using parallel_for
|
||||
// loop both inside and outside of the lock may cause a deadlock. It
|
||||
// might be a bug in TBB. For now, I'll avoid using parallel_for_each
|
||||
// here.
|
||||
for (Chunk<E> *chunk : ctx.chunks)
|
||||
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
|
||||
chunk->compute_section_size(ctx);
|
||||
|
||||
if (!ctx.arg.relocatable)
|
||||
for (Chunk<E> *chunk : ctx.chunks)
|
||||
if (OutputSection<E> *osec = chunk->to_osec())
|
||||
if (osec->shdr.sh_flags & SHF_EXECINSTR)
|
||||
osec->create_range_extension_thunks(ctx);
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
if (!(chunk->shdr.sh_flags & SHF_EXECINSTR))
|
||||
chunk->compute_section_size(ctx);
|
||||
});
|
||||
} else {
|
||||
tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
|
||||
chunk->compute_section_size(ctx);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -170,6 +170,10 @@ static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
|
||||
|
||||
template <>
|
||||
void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
// This function is not thread-safe because it mutates symbols' members
|
||||
static std::mutex mu;
|
||||
std::scoped_lock lock(mu);
|
||||
|
||||
std::span<InputSection<E> *> m = members;
|
||||
if (m.empty())
|
||||
return;
|
||||
@ -247,10 +251,8 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
|
||||
// Scan relocations between B and C to collect symbols that need
|
||||
// entries in the new thunk.
|
||||
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
|
||||
[&](InputSection<E> *isec) {
|
||||
scan_rels(ctx, *isec, *thunk, thunk_idx);
|
||||
});
|
||||
for (i64 i = b; i < c; i++)
|
||||
scan_rels(ctx, *m[i], *thunk, thunk_idx);
|
||||
|
||||
// Now that we know the number of symbols in the thunk, we can compute
|
||||
// the thunk's size.
|
||||
@ -270,16 +272,15 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
|
||||
}
|
||||
|
||||
// Scan relocations again to fix symbol offsets in the last thunk.
|
||||
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
|
||||
[&](InputSection<E> *isec) {
|
||||
std::span<Symbol<E> *> syms = isec->file.symbols;
|
||||
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
|
||||
std::span<ThunkRef> thunk_refs = isec->extra.thunk_refs;
|
||||
for (i64 i = b; i < c; i++) {
|
||||
std::span<Symbol<E> *> syms = m[i]->file.symbols;
|
||||
std::span<const ElfRel<E>> rels = m[i]->get_rels(ctx);
|
||||
std::span<ThunkRef> thunk_refs = m[i]->extra.thunk_refs;
|
||||
|
||||
for (i64 i = 0; i < rels.size(); i++)
|
||||
if (thunk_refs[i].thunk_idx == thunk_idx)
|
||||
thunk_refs[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx;
|
||||
});
|
||||
for (i64 j = 0; j < rels.size(); j++)
|
||||
if (thunk_refs[j].thunk_idx == thunk_idx)
|
||||
thunk_refs[j].sym_idx = syms[rels[j].r_sym]->extra.thunk_sym_idx;
|
||||
}
|
||||
|
||||
// Move B forward to point to the begining of the next batch.
|
||||
b = c;
|
||||
|
Loading…
Reference in New Issue
Block a user