1
1
mirror of https://github.com/rui314/mold.git synced 2024-09-11 13:06:59 +03:00

Use relocations to identify address-taken functions

This commit is contained in:
Rui Ueyama 2023-10-01 15:29:04 +09:00
parent 0eea8f4015
commit a994d71fa1
8 changed files with 188 additions and 76 deletions

View File

@ -1848,6 +1848,44 @@ template <typename E> concept is_ppc = is_ppc64<E> || is_ppc32<E>;
template <typename E> concept is_sparc = is_sparc64<E>;
template <typename E> concept is_loongarch = is_loongarch64<E> || is_loongarch32<E>;
// Returns true if a given relocation is of type used for direct
// function call.
template <typename E>
inline bool is_func_call_rel(const ElfRel<E> &r) {
u32 ty = r.r_type;
if constexpr (is_x86_64<E>) {
return ty == R_X86_64_PLT32 || ty == R_X86_64_PLTOFF64;
} else if constexpr (is_i386<E>) {
return ty == R_386_PLT32;
} else if constexpr (is_arm64<E>) {
return ty == R_AARCH64_JUMP26 || ty == R_AARCH64_CALL26;
} else if constexpr (is_arm32<E>) {
return ty == R_ARM_JUMP24 || ty == R_ARM_THM_JUMP24 ||
ty == R_ARM_CALL || ty == R_ARM_THM_CALL ||
ty == R_ARM_PLT32;
} else if constexpr (is_riscv<E>) {
return ty == R_RISCV_CALL || ty == R_RISCV_CALL_PLT;
} else if constexpr (is_ppc32<E>) {
return ty == R_PPC_REL24 || ty == R_PPC_PLTREL24 || ty == R_PPC_LOCAL24PC;
} else if constexpr (is_ppc64<E>) {
return ty == R_PPC64_REL24 || ty == R_PPC64_REL24_NOTOC;
} else if constexpr (is_s390x<E>) {
return ty == R_390_PLT32DBL;
} else if constexpr (is_sparc64<E>) {
return ty == R_SPARC_WPLT30 || ty == R_SPARC_WDISP30;
} else if constexpr (is_m68k<E>) {
return ty == R_68K_PLT32;
} else if constexpr (is_sh4<E>) {
return ty == R_SH_PLT32;
} else if constexpr (is_alpha<E>) {
return false;
} else {
static_assert(is_loongarch<E>);
return ty == R_LARCH_B26;
}
}
struct X86_64 {
static constexpr std::string_view target_name = "x86_64";
static constexpr bool is_64 = true;

View File

@ -115,21 +115,19 @@ static bool is_eligible(Context<E> &ctx, InputSection<E> &isec) {
const ElfShdr<E> &shdr = isec.shdr();
std::string_view name = isec.name();
bool is_alloc = (shdr.sh_flags & SHF_ALLOC);
bool is_exec = (shdr.sh_flags & SHF_EXECINSTR) ||
ctx.arg.ignore_data_address_equality;
bool is_relro = (name == ".data.rel.ro" ||
name.starts_with(".data.rel.ro."));
bool is_readonly = !(shdr.sh_flags & SHF_WRITE) || is_relro;
bool is_bss = (shdr.sh_type == SHT_NOBITS);
bool is_empty = (shdr.sh_size == 0);
bool is_init = (shdr.sh_type == SHT_INIT_ARRAY || name == ".init");
bool is_fini = (shdr.sh_type == SHT_FINI_ARRAY || name == ".fini");
bool is_enumerable = is_c_identifier(name);
bool is_addr_taken = !ctx.arg.icf_all && isec.address_significant;
if (shdr.sh_size == 0 || !(shdr.sh_flags & SHF_ALLOC) ||
shdr.sh_type == SHT_NOBITS || is_c_identifier(name))
return false;
return is_alloc && is_exec && is_readonly && !is_bss && !is_empty &&
!is_init && !is_fini && !is_enumerable && !is_addr_taken;
if (shdr.sh_flags & SHF_EXECINSTR) {
return (ctx.arg.icf_all || !isec.address_taken) &&
name != ".init" && name != ".fini";
} else {
bool is_readonly = !(shdr.sh_flags & SHF_WRITE);
bool is_relro = isec.output_section && isec.output_section->is_relro;
return (ctx.arg.ignore_data_address_equality || !isec.address_taken) &&
(is_readonly || is_relro);
}
}
static Digest digest_final(blake3_hasher *hasher) {

View File

@ -866,33 +866,6 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
this->symbols.push_back(&sym);
}
template <typename E>
void ObjectFile<E>::mark_addrsig(Context<E> &ctx) {
// Parse a .llvm_addrsig section.
if (llvm_addrsig) {
u8 *cur = (u8 *)llvm_addrsig->contents.data();
u8 *end = cur + llvm_addrsig->contents.size();
while (cur != end) {
Symbol<E> &sym = *this->symbols[read_uleb(&cur)];
if (sym.file == this)
if (InputSection<E> *isec = sym.get_input_section())
isec->address_significant = true;
}
}
// We treat a symbol's address as significant if
//
// 1. we have no address significance information for the symbol, or
// 2. the symbol can be referenced from the outside in an address-
// significant manner.
for (Symbol<E> *sym : this->symbols)
if (sym->file == this)
if (InputSection<E> *isec = sym->get_input_section())
if (!llvm_addrsig || sym->is_exported)
isec->address_significant = true;
}
template <typename E>
void ObjectFile<E>::parse(Context<E> &ctx) {
sections.resize(this->elf_sections.size());

View File

@ -496,17 +496,15 @@ int elf_main(int argc, char **argv) {
// Set is_imported and is_exported bits for each symbol.
compute_import_export(ctx);
// Read address-significant section information.
if (ctx.arg.icf && !ctx.arg.icf_all)
mark_addrsig(ctx);
// Garbage-collect unreachable sections.
if (ctx.arg.gc_sections)
gc_sections(ctx);
// Merge identical read-only sections.
if (ctx.arg.icf)
if (ctx.arg.icf) {
compute_address_significance(ctx);
icf_sections(ctx);
}
// Compute sizes of sections containing mergeable strings.
compute_merged_section_sizes(ctx);

View File

@ -263,12 +263,14 @@ public:
u32 relsec_idx = -1;
u32 reldyn_offset = 0;
bool uncompressed = false;
// For COMDAT de-duplication and garbage collection
std::atomic_bool is_alive = true;
u8 p2align = 0;
bool address_significant : 1 = false;
bool uncompressed : 1 = false;
// For ICF
Atomic<bool> address_taken = false;
// For garbage collection
Atomic<bool> is_visited = false;
@ -1210,7 +1212,6 @@ public:
void mark_live_objects(Context<E> &ctx,
std::function<void(InputFile<E> *)> feeder) override;
void convert_undefined_weak_symbols(Context<E> &ctx);
void mark_addrsig(Context<E> &ctx);
void scan_relocations(Context<E> &ctx);
void convert_common_symbols(Context<E> &ctx);
void compute_symtab_size(Context<E> &ctx);
@ -1440,7 +1441,7 @@ template <typename E> void copy_chunks(Context<E> &);
template <typename E> void apply_version_script(Context<E> &);
template <typename E> void parse_symbol_version(Context<E> &);
template <typename E> void compute_import_export(Context<E> &);
template <typename E> void mark_addrsig(Context<E> &);
template <typename E> void compute_address_significance(Context<E> &);
template <typename E> void clear_padding(Context<E> &);
template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &);

View File

@ -1672,12 +1672,85 @@ void compute_import_export(Context<E> &ctx) {
});
}
// Compute the "address-taken" bit for each input section.
//
// As a space-saving optimization, we want to merge two read-only objects
// into a single object if their contents are equivalent. That
// optimization is called the Identical Code Folding or ICF.
//
// A catch is that comparing object contents is not enough to determine if
// two objects can be merged safely; we need to take care of pointer
// equivalence.
//
// In C/C++, two pointers are equivalent if and only if they are taken for
// the same object. Merging two objects into a single object can break
// this assumption because two distinctive pointers would become
// equivalent as a result of merging. We can still merge one object with
// another if no pointer to the object was taken in code, because without
// a pointer, comparing its address becomes moot.
//
// In mold, each input section has an "address-taken" bit. If there is a
// pointer-taking reference to the object, it's set to true. At the ICF
// stage, we merge only objects whose addresses were not taken.
//
// For functions, address-taking relocations are separated from
// non-address-taking ones. For example, x86-64 uses R_X86_64_PLT32 for
// direct function calls (e.g., "call foo" to call the function foo) while
// R_X86_64_PC32 or R_X86_64_GOT32 are used for pointer-taking operations.
//
// Unfortunately, for data, we can't distinguish between address-taking
// relocations and non-address-taking ones. LLVM generates an "address
// significance" table in the ".llvm_addrsig" section to mark symbols
// whose addresses are taken in code. If that table is available, we use
// that information in this function. Otherwise, we conservatively assume
// that all data items are address-taken.
template <typename E>
void mark_addrsig(Context<E> &ctx) {
Timer t(ctx, "mark_addrsig");
void compute_address_significance(Context<E> &ctx) {
Timer t(ctx, "compute_address_significance");
// Flip address-taken bit for executable sections first.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->mark_addrsig(ctx);
for (std::unique_ptr<InputSection<E>> &src : file->sections)
if (src && src->is_alive && (src->shdr().sh_flags & SHF_ALLOC))
for (const ElfRel<E> &r : src->get_rels(ctx))
if (!is_func_call_rel(r))
if (InputSection<E> *dst = file->symbols[r.r_sym]->get_input_section())
if (!dst->address_taken && (dst->shdr().sh_flags & SHF_EXECINSTR))
dst->address_taken = true;
});
auto mark = [](Symbol<E> *sym) {
if (sym)
if (InputSection<E> *isec = sym->get_input_section())
isec->address_taken = true;
};
// Some symbols' pointer values are leaked to the dynamic section.
mark(get_symbol(ctx, ctx.arg.entry));
mark(get_symbol(ctx, ctx.arg.init));
mark(get_symbol(ctx, ctx.arg.fini));
// Exported symbols are conservatively considered address-taken.
if (ctx.dynsym)
for (Symbol<E> *sym : ctx.dynsym->symbols)
if (sym->is_imported)
mark(sym);
// Handle data objects.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
if (InputSection<E> *sec = file->llvm_addrsig.get()) {
u8 *p = (u8 *)sec->contents.data();
u8 *end = p + sec->contents.size();
while (p != end) {
Symbol<E> *sym = file->symbols[read_uleb(&p)];
if (InputSection<E> *isec = sym->get_input_section())
isec->address_taken = true;
}
} else {
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
if (isec && !(isec->shdr().sh_flags & SHF_EXECINSTR))
isec->address_taken = true;
}
});
}
@ -2622,7 +2695,7 @@ template void create_output_symtab(Context<E> &);
template void apply_version_script(Context<E> &);
template void parse_symbol_version(Context<E> &);
template void compute_import_export(Context<E> &);
template void mark_addrsig(Context<E> &);
template void compute_address_significance(Context<E> &);
template void clear_padding(Context<E> &);
template void compute_section_headers(Context<E> &);
template i64 set_osec_offsets(Context<E> &);

View File

@ -68,27 +68,6 @@ static constexpr i64 max_thunk_size = 102400;
static_assert(max_thunk_size / E::thunk_size < INT16_MAX);
// Returns true if a given relocation is of type used for function calls.
template <typename E>
static bool needs_thunk_rel(const ElfRel<E> &r) {
u32 ty = r.r_type;
if constexpr (is_arm64<E>) {
return ty == R_AARCH64_JUMP26 || ty == R_AARCH64_CALL26;
} else if constexpr (is_arm32<E>) {
return ty == R_ARM_JUMP24 || ty == R_ARM_THM_JUMP24 ||
ty == R_ARM_CALL || ty == R_ARM_THM_CALL ||
ty == R_ARM_PLT32;
} else if constexpr (is_ppc32<E>) {
return ty == R_PPC_REL24 || ty == R_PPC_PLTREL24 || ty == R_PPC_LOCAL24PC;
} else if constexpr (is_ppc64<E>) {
return ty == R_PPC64_REL24 || ty == R_PPC64_REL24_NOTOC;
} else {
static_assert(is_loongarch<E>);
return ty == R_LARCH_B26;
}
}
template <typename E>
static bool is_reachable(Context<E> &ctx, InputSection<E> &isec,
Symbol<E> &sym, const ElfRel<E> &rel) {
@ -157,7 +136,7 @@ static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (!needs_thunk_rel(rel))
if (!is_func_call_rel(rel))
continue;
// Skip if the symbol is undefined. apply_reloc() will report an error.

52
test/elf/icf-safe.sh Normal file
View File

@ -0,0 +1,52 @@
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $GCC -c -o $t/a.o -ffunction-sections -fdata-sections -xc -
int bar() {
return 5;
}
int foo1(int x) {
return bar() + x;
}
int foo2(int x) {
return bar() + x;
}
int foo3(int x) {
return bar() + x;
}
EOF
cat <<EOF | $CC -c -o $t/b.o -ffunction-sections -fdata-sections -xc -
#include <stdio.h>
int foo1();
int foo2();
int foo3();
int main() {
printf("%d %d\n", foo1 == foo2, foo2 == foo3);
}
EOF
$CC -B. -o $t/exe1 -Wl,-icf=safe $t/a.o $t/b.o
$QEMU $t/exe1 | grep -q '^0 0$'
cat <<EOF | $GCC -c -o $t/c.o -ffunction-sections -fdata-sections -xc -
int foo1();
int foo2();
int foo3();
int main() {
foo1();
foo2();
foo3();
}
EOF
$CC -B. -o $t/exe2 -Wl,-icf=safe $t/a.o $t/c.o
$QEMU $t/exe2 > $t/log2
! grep foo2 $t/log2 || false