1
1
mirror of https://github.com/rui314/mold.git synced 2024-09-11 13:06:59 +03:00

Do not emit text relocations for IFUNC symbols in PDEs

IFUNC symbols are resolved at process startup by executing the function
that the symbol points to. This is used to select the "best" function at
runtime; for instance, the runtime may choose a faster version of memcpy
that uses SIMD instructions if they are available on the current system.

Thus, an IFUNC symbol has two addresses: the initial address (or the
resolver's address) and the resolved address, which is the return value of
the resolver.

In position-independent executables (PIEs), function pointers are loaded
from the GOT indirectly, and symbols are not directly referenced. In such
executables, the initial value of the GOT slot for an IFUNC symbol
contains the resolver address, and this is overwritten at runtime to the
resolved address upon process startup. When user code takes a pointer to
an IFUNC, it always reads the resolved address from GOT.

In contrast, position-dependent executables (PDEs) may have instructions
that directly refer to an IFUNC symbol, such as movabs on x86-64. The GOT
entry for an IFUNC holds the resolved address, so any direct reference
must also produce the resolved address to maintain pointer equality.
(C/C++ standards require that two pointers must be equal if and only if
they are taken for the same symbol.)

Previously, we emitted text relocations to modify instruction operands.
However, text relocations are undesirable and not always reliable. For
example, on ARM64, multiple instructions are used to materialize a
symbol's address, and it's not feasible to issue a dynamic relocation to
alter those instructions since the dynamic loader generally can only
modify 32-bit or 64-bit words.

In this commit, I have adopted a different strategy. An IFUNC symbol now
occupies two consecutive GOT slots in a PDE. The first slot holds the
symbol's PLT address, and the second slot holds the resolved address. The
PLT address is consistently used as the symbol's address throughout the
process, while the second slot is used only by the PLT entry to jump to
the resolved address.

This method ensures pointer equality without the need to emit text
relocations for IFUNC symbols in PDEs.
This commit is contained in:
Rui Ueyama 2023-11-01 15:36:40 +09:00
parent 9516da1751
commit 4cdfc7e72c
16 changed files with 152 additions and 20 deletions

View File

@ -225,7 +225,7 @@ void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
memcpy(buf, plt_entry, sizeof(plt_entry));
*(ul32 *)(buf + 12) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 12;
*(ul32 *)(buf + 12) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 12;
}
// ARM does not use .eh_frame for exception handling. Instead, it uses

View File

@ -93,7 +93,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
0xd503'201f, // nop
};
u64 got = sym.get_got_addr(ctx);
u64 got = sym.get_got_pltgot_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);
memcpy(buf, insn, sizeof(insn));

View File

@ -164,7 +164,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
*(ul32 *)(buf + 6) = sym.get_got_pltgot_addr(ctx) - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
@ -172,7 +172,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx);
*(ul32 *)(buf + 6) = sym.get_got_pltgot_addr(ctx);
}
}

View File

@ -172,7 +172,7 @@ void write_plt_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
template <>
void write_pltgot_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
u64 got = sym.get_got_addr(ctx);
u64 got = sym.get_got_pltgot_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);
memcpy(buf, E::is_64 ? plt_entry_64 : plt_entry_32, E::plt_size);

View File

@ -52,7 +52,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 4) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 2;
*(ub32 *)(buf + 4) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 2;
}
template <>

View File

@ -120,7 +120,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
memcpy(buf, plt_entry, sizeof(plt_entry));
ub32 *loc = (ub32 *)buf;
i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8;
i64 offset = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 8;
loc[4] |= higha(offset);
loc[5] |= lo(offset);
}

View File

@ -198,7 +198,7 @@ void write_plt_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
template <>
void write_pltgot_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
u64 got = sym.get_got_addr(ctx);
u64 got = sym.get_got_pltgot_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);
memcpy(buf, E::is_64 ? plt_entry_64 : plt_entry_32, E::plt_size);

View File

@ -90,7 +90,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 2) = (sym.get_got_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
*(ub32 *)(buf + 2) = (sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
}
template <>

View File

@ -169,7 +169,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static_assert(sizeof(insn) == E::pltgot_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
*(ul32 *)(buf + 8) = sym.get_got_pltgot_addr(ctx) - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0x01, 0xd0, // mov.l 1f, r0
@ -181,7 +181,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static_assert(sizeof(insn) == E::pltgot_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx);
*(ul32 *)(buf + 8) = sym.get_got_pltgot_addr(ctx);
}
}

View File

@ -115,7 +115,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};
memcpy(buf, entry, sizeof(entry));
*(ub64 *)(buf + 24) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 4;
*(ub64 *)(buf + 24) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 4;
}
template <>

View File

@ -98,7 +98,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 2) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 6;
*(ul32 *)(buf + 2) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 6;
}
template <>

View File

@ -7,7 +7,8 @@
namespace mold::elf {
typedef enum {
NONE, ERROR, COPYREL, DYN_COPYREL, PLT, CPLT, DYN_CPLT, DYNREL, BASEREL, IFUNC,
NONE, ERROR, COPYREL, DYN_COPYREL, PLT, CPLT, DYN_CPLT, DYNREL,
BASEREL, IFUNC_DYNREL,
} Action;
template <typename E>
@ -203,7 +204,7 @@ static void scan_rel(Context<E> &ctx, InputSection<E> &isec, Symbol<E> &sym,
if (!isec.is_relr_reloc(ctx, rel))
isec.file.num_dynrel++;
break;
case IFUNC:
case IFUNC_DYNREL:
// Create an IRELATIVE relocation for a GNU ifunc symbol.
//
// We usually create an IRELATIVE relocation in .got for each ifunc.
@ -273,7 +274,7 @@ static Action get_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
template <typename E>
static Action get_dyn_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
if (sym.is_ifunc())
return IFUNC;
return ctx.arg.pic ? IFUNC_DYNREL : NONE;
// This is a decision table for absolute relocations for the pointer
// size data (e.g. R_X86_64_64). Unlike the absrel_table, we can emit
@ -291,7 +292,7 @@ static Action get_dyn_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
template <typename E>
static Action get_ppc64_toc_action(Context<E> &ctx, Symbol<E> &sym) {
if (sym.is_ifunc())
return IFUNC;
return IFUNC_DYNREL;
// As a special case, we do not create copy relocations nor canonical
// PLTs for .toc sections. PPC64's .toc is a compiler-generated
@ -406,7 +407,7 @@ static void apply_absrel(Context<E> &ctx, InputSection<E> &isec,
case DYNREL:
emit_abs_dynrel();
break;
case IFUNC:
case IFUNC_DYNREL:
if constexpr (supports_ifunc<E>) {
u64 addr = sym.get_addr(ctx, NO_PLT) + A;
*dynrel++ = ElfRel<E>(P, E::R_IRELATIVE, 0, addr);

View File

@ -1940,6 +1940,7 @@ public:
u64 get_tlsdesc_addr(Context<E> &ctx) const;
u64 get_plt_addr(Context<E> &ctx) const;
u64 get_opd_addr(Context<E> &ctx) const;
u64 get_got_pltgot_addr(Context<E> &ctx) const;
void set_got_idx(Context<E> &ctx, i32 idx);
void set_gottp_idx(Context<E> &ctx, i32 idx);
@ -1973,6 +1974,7 @@ public:
bool is_relative() const { return !is_absolute(); }
bool is_local(Context<E> &ctx) const;
bool is_ifunc() const { return get_type() == STT_GNU_IFUNC; }
bool is_pde_ifunc(Context<E> &ctx) const;
bool is_remaining_undef_weak() const;
bool is_pcrel_linktime_const(Context<E> &ctx) const;
@ -2574,6 +2576,25 @@ inline u64 Symbol<E>::get_opd_addr(Context<E> &ctx) const {
get_opd_idx(ctx) * PPC64OpdSection::ENTRY_SIZE;
}
template <typename E>
inline u64 Symbol<E>::get_got_pltgot_addr(Context<E> &ctx) const {
// An ifunc symbol occupies two consecutive GOT slots in a
// position-dependent executable (PDE). The first slot contains the
// symbol's PLT address, and the second slot holds the resolved
// address. A PDE uses the ifunc symbol's PLT entry as the address
// for the symbol, akin to a canonical PLT.
//
// This function returns the address that the PLT entry should use
// to jump to the resolved address.
//
// Note that we don't use this function for PPC64. In PPC64, symbols
// are always accessed through the TOC table regardless of the
// -fno-PIE setting. We don't need canonical PLTs on the psABIs too.
if (is_pde_ifunc(ctx))
return get_got_addr(ctx) + sizeof(Word<E>);
return get_got_addr(ctx);
}
template <typename E>
inline void Symbol<E>::set_got_idx(Context<E> &ctx, i32 idx) {
assert(aux_idx != -1);
@ -2702,6 +2723,12 @@ inline bool Symbol<E>::is_local(Context<E> &ctx) const {
return !is_imported && !is_exported;
}
template <typename E>
inline bool Symbol<E>::is_pde_ifunc(Context<E> &ctx) const {
// Returns true if this is an ifunc tha uses two GOT slots
return is_ifunc() && !ctx.arg.pic && !is_ppc64<E>;
}
// A remaining weak undefined symbol is promoted to a dynamic symbol
// in DSO and resolved to 0 in an executable. This function returns
// true if it's latter.

View File

@ -1082,7 +1082,14 @@ void OutputSection<E>::populate_symtab(Context<E> &ctx) {
template <typename E>
void GotSection<E>::add_got_symbol(Context<E> &ctx, Symbol<E> *sym) {
sym->set_got_idx(ctx, this->shdr.sh_size / sizeof(Word<E>));
this->shdr.sh_size += sizeof(Word<E>);
// An IFUNC symbol uses two GOT slots in a position-dependent
// executable.
if (sym->is_pde_ifunc(ctx))
this->shdr.sh_size += sizeof(Word<E>) * 2;
else
this->shdr.sh_size += sizeof(Word<E>);
got_syms.push_back(sym);
}
@ -1176,7 +1183,12 @@ static std::vector<GotEntry<E>> get_got_entries(Context<E> &ctx) {
// IFUNC always needs to be fixed up by the dynamic linker.
if constexpr (supports_ifunc<E>) {
if (sym->is_ifunc()) {
add({idx, sym->get_addr(ctx, NO_PLT), E::R_IRELATIVE});
if (sym->is_pde_ifunc(ctx)) {
add({idx, sym->get_plt_addr(ctx)});
add({idx + 1, sym->get_addr(ctx, NO_PLT), E::R_IRELATIVE});
} else {
add({idx, sym->get_addr(ctx, NO_PLT), E::R_IRELATIVE});
}
continue;
}
}
@ -1656,8 +1668,15 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
esym.st_shndx = SHN_ABS;
esym.st_value = sym.get_addr(ctx);
} else if (sym.get_type() == STT_TLS) {
// TLS symbol
shndx = get_st_shndx(sym);
esym.st_value = sym.get_addr(ctx) - ctx.tls_begin;
} else if (sym.is_pde_ifunc(ctx)) {
// IFUNC symbol in PDE that uses two GOT slots
shndx = get_st_shndx(sym);
esym.st_type = STT_FUNC;
esym.st_visibility = sym.visibility;
esym.st_value = sym.get_addr(ctx);
} else {
shndx = get_st_shndx(sym);
esym.st_visibility = sym.visibility;

View File

@ -0,0 +1,33 @@
#!/bin/bash
. $(dirname $0)/common.inc
supports_ifunc || skip
cat <<EOF | $CC -c -fPIC -o $t/a.o -xc -
typedef void Func();
void foo();
Func *get_foo() { return foo; }
EOF
$CC -B. -shared -o $t/b.so $t/a.o
cat <<EOF | $CC -c -fno-PIE -o $t/c.o -xc -
#include <stdio.h>
typedef void Func();
__attribute__((ifunc("resolve_foo"))) void foo(void);
void real_foo(void) { printf("foo "); }
Func *resolve_foo() { return real_foo; }
Func *get_foo();
int main() {
printf("%p %p\n", foo, get_foo());
foo();
printf("\n");
}
EOF
$CC -B. -o $t/exe1 $t/c.o $t/b.so -no-pie
$QEMU $t/exe1 | grep -Eq '^(\S+) \1'

View File

@ -0,0 +1,52 @@
#!/bin/bash
. $(dirname $0)/common.inc
supports_ifunc || skip
cat <<EOF | $CC -c -fno-PIE -o $t/a.o -xc -
#include <stdio.h>
typedef void Func();
__attribute__((ifunc("resolve_foo"))) void foo(void);
void real_foo(void) { printf("foo "); }
Func *resolve_foo() { return real_foo; }
__attribute__((ifunc("resolve_bar"))) void bar(void);
void real_bar(void) { printf("bar "); }
Func *resolve_bar() { return real_bar; }
EOF
cat <<EOF | $CC -c -fPIC -o $t/b.o -xc -
typedef void Func();
void foo();
void bar();
Func *get_foo() { return foo; }
Func *get_bar() { return bar; }
EOF
cat <<EOF | $CC -c -fno-PIE -o $t/c.o -xc -
#include <stdio.h>
typedef void Func();
void foo();
void bar();
Func *get_foo();
Func *get_bar();
int main() {
printf("%p %p %p %p\n", foo, get_foo(), bar, get_bar());
foo();
bar();
printf("\n");
}
EOF
$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -no-pie
$QEMU $t/exe1 | grep -Eq '^(\S+) \1 (\S+) \2'
readelf --dynamic $t/exe1 > $t/log1
! grep -q TEXTREL $t/log1 || false