diff --git a/elf/arch-arm64.cc b/elf/arch-arm64.cc index 46312bd3..95e4093a 100644 --- a/elf/arch-arm64.cc +++ b/elf/arch-arm64.cc @@ -48,7 +48,7 @@ static void write_plt_header(Context &ctx, u8 *buf) { } static void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - u8 *ent = buf + ctx.plt_hdr_size + sym.get_plt_idx(ctx) * ctx.plt_size; + u8 *ent = buf + E::plt_hdr_size + sym.get_plt_idx(ctx) * E::plt_size; static const u32 data[] = { 0x90000010, // adrp x16, .got.plt[n] diff --git a/elf/arch-i386.cc b/elf/arch-i386.cc index 86526fc8..cb960446 100644 --- a/elf/arch-i386.cc +++ b/elf/arch-i386.cc @@ -28,7 +28,7 @@ static void write_plt_header(Context &ctx, u8 *buf) { static void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym, i64 idx) { - u8 *ent = buf + ctx.plt_hdr_size + sym.get_plt_idx(ctx) * ctx.plt_size; + u8 *ent = buf + E::plt_hdr_size + sym.get_plt_idx(ctx) * E::plt_size; if (ctx.arg.pic) { static const u8 data[] = { diff --git a/elf/arch-riscv64.cc b/elf/arch-riscv64.cc index 30dec271..1589f1a1 100644 --- a/elf/arch-riscv64.cc +++ b/elf/arch-riscv64.cc @@ -106,8 +106,8 @@ static void write_plt_header(Context &ctx) { } static void write_plt_entry(Context &ctx, Symbol &sym) { - u8 *ent = ctx.buf + ctx.plt->shdr.sh_offset + ctx.plt_hdr_size + - sym.get_plt_idx(ctx) * ctx.plt_size; + u8 *ent = ctx.buf + ctx.plt->shdr.sh_offset + E::plt_hdr_size + + sym.get_plt_idx(ctx) * E::plt_size; static const u32 data[] = { 0x00000e17, // auipc t3, %pcrel_hi(function@.got.plt) diff --git a/elf/arch-x86-64.cc b/elf/arch-x86-64.cc index 06fde383..b05efd73 100644 --- a/elf/arch-x86-64.cc +++ b/elf/arch-x86-64.cc @@ -4,38 +4,22 @@ namespace mold::elf { using E = X86_64; -// The compact PLT format is used when `-z now` is given. If the flag -// is given, all PLT symbols are resolved eagerly on startup, so we -// can omit code for lazy symbol resolution from PLT in that case. -static void write_compact_plt(Context &ctx) { - u8 *buf = ctx.buf + ctx.plt->shdr.sh_offset; - - static const u8 data[] = { - 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT - 0x66, 0x90, // nop - }; - - for (Symbol *sym : ctx.plt->symbols) { - u8 *ent = buf + sym->get_plt_idx(ctx) * ctx.plt_size; - memcpy(ent, data, sizeof(data)); - *(ul32 *)(ent + 2) = sym->get_gotplt_addr(ctx) - sym->get_plt_addr(ctx) - 6; - } -} - -// The IBTPLT is a security-enhanced version of the regular PLT. -// It uses Indirect Branch Tracking (IBT) feature which is part of -// Intel Control-Flow Enforcement (CET). +// This is a security-enhanced version of the regular PLT. The PLT +// header and each PLT entry starts with endbr64 for the Intel's +// control-flow enforcement security mechanism. // -// Note that our IBTPLT instruction sequence is different from the one -// used in GNU ld. GNU's IBTPLT implementation uses two separate -// sections (.plt and .plt.sec) in which one PLT entry takes 32 bytes -// in total. Our PLT consists of just .plt and each entry is 16 bytes -// long. +// Note that our IBT-enabled PLT instruction sequence is different +// from the one used in GNU ld. GNU's IBTPLT implementation uses two +// separate sections (.plt and .plt.sec) in which one PLT entry takes +// 32 bytes in total. Our IBTPLT consists of just .plt and each entry +// is 16 bytes long. // -// Our PLT entry clobbers r11, but that's fine because the resolver -// function (_dl_runtime_resolve) does not preserve r11 anyway. -static void write_ibtplt(Context &ctx) { - u8 *buf = ctx.buf + ctx.plt->shdr.sh_offset; +// Our PLT entry clobbers %r11, but that's fine because the resolver +// function (_dl_runtime_resolve) clobbers %r11 anyway. +template <> +void PltSection::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + this->shdr.sh_offset; + memset(buf, 0xcc, this->shdr.sh_size); // Write PLT header static const u8 plt0[] = { @@ -43,15 +27,11 @@ static void write_ibtplt(Context &ctx) { 0x41, 0x53, // push %r11 0xff, 0x35, 0, 0, 0, 0, // push GOTPLT+8(%rip) 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip) - 0x0f, 0x1f, 0x40, 0x00, // nop - 0x0f, 0x1f, 0x40, 0x00, // nop - 0x0f, 0x1f, 0x40, 0x00, // nop - 0x66, 0x90, // nop }; memcpy(buf, plt0, sizeof(plt0)); - *(ul32 *)(buf + 8) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4; - *(ul32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 2; + *(ul32 *)(buf + 8) = ctx.gotplt->shdr.sh_addr - this->shdr.sh_addr - 4; + *(ul32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - this->shdr.sh_addr - 2; // Write PLT entries i64 relplt_idx = 0; @@ -62,70 +42,28 @@ static void write_ibtplt(Context &ctx) { 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOTPLT }; - for (Symbol *sym : ctx.plt->symbols) { - u8 *ent = buf + ctx.plt_hdr_size + sym->get_plt_idx(ctx) * ctx.plt_size; + for (Symbol *sym : symbols) { + u8 *ent = buf + E::plt_hdr_size + sym->get_plt_idx(ctx) * E::plt_size; memcpy(ent, data, sizeof(data)); *(ul32 *)(ent + 6) = relplt_idx++; *(ul32 *)(ent + 12) = sym->get_gotplt_addr(ctx) - sym->get_plt_addr(ctx) - 16; } } -// The regular PLT. -static void write_plt(Context &ctx) { - u8 *buf = ctx.buf + ctx.plt->shdr.sh_offset; - - // Write PLT header - static const u8 plt0[] = { - 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip) - 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip) - 0x0f, 0x1f, 0x40, 0x00, // nop - }; - - memcpy(buf, plt0, sizeof(plt0)); - *(ul32 *)(buf + 2) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 2; - *(ul32 *)(buf + 8) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4; - - // Write PLT entries - i64 relplt_idx = 0; - - static const u8 data[] = { - 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOTPLT - 0x68, 0, 0, 0, 0, // push $index_in_relplt - 0xe9, 0, 0, 0, 0, // jmp PLT[0] - }; - - for (Symbol *sym : ctx.plt->symbols) { - u8 *ent = buf + ctx.plt_hdr_size + sym->get_plt_idx(ctx) * ctx.plt_size; - memcpy(ent, data, sizeof(data)); - *(ul32 *)(ent + 2) = sym->get_gotplt_addr(ctx) - sym->get_plt_addr(ctx) - 6; - *(ul32 *)(ent + 7) = relplt_idx++; - *(ul32 *)(ent + 12) = ctx.plt->shdr.sh_addr - sym->get_plt_addr(ctx) - 16; - } -} - -template <> -void PltSection::copy_buf(Context &ctx) { - if (ctx.arg.z_now) - write_compact_plt(ctx); - else if (ctx.arg.z_ibtplt) - write_ibtplt(ctx); - else - write_plt(ctx); -} - template <> void PltGotSection::copy_buf(Context &ctx) { u8 *buf = ctx.buf + this->shdr.sh_offset; + memset(buf, 0xcc, this->shdr.sh_size); static const u8 data[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT - 0x66, 0x90, // nop }; for (Symbol *sym : symbols) { u8 *ent = buf + sym->get_pltgot_idx(ctx) * E::pltgot_size; memcpy(ent, data, sizeof(data)); - *(ul32 *)(ent + 2) = sym->get_got_addr(ctx) - sym->get_plt_addr(ctx) - 6; + *(ul32 *)(ent + 6) = sym->get_got_addr(ctx) - sym->get_plt_addr(ctx) - 10; } } diff --git a/elf/cmdline.cc b/elf/cmdline.cc index 8fef27d5..5e8a5428 100644 --- a/elf/cmdline.cc +++ b/elf/cmdline.cc @@ -317,28 +317,6 @@ parse_defsym_value(Context &ctx, std::string_view s) { return get_symbol(ctx, s); } -// Returns a PLT header size and a PLT entry size. -template -static std::pair get_plt_size(Context &ctx) { - if constexpr (std::is_same_v) { - if (ctx.arg.z_now) - return {0, 8}; - if (ctx.arg.z_ibtplt) - return {32, 16}; - return {16, 16}; - } - - if constexpr (std::is_same_v) - return {16, 16}; - if constexpr (std::is_same_v) - return {32, 16}; - if constexpr (std::is_same_v) - return {32, 16}; - if constexpr (std::is_same_v) - return {32, 16}; - unreachable(); -} - template std::vector parse_nonpositional_args(Context &ctx) { std::span args = ctx.cmdline_args; @@ -717,9 +695,7 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.z_interpose = true; } else if (read_z_flag("ibt")) { ctx.arg.z_ibt = true; - ctx.arg.z_ibtplt = true; } else if (read_z_flag("ibtplt")) { - ctx.arg.z_ibtplt = true; } else if (read_z_flag("muldefs")) { ctx.arg.allow_multiple_definition = true; } else if (read_z_flag("keep-text-section-prefix")) { @@ -1046,8 +1022,6 @@ std::vector parse_nonpositional_args(Context &ctx) { if (ctx.arg.shared && warn_shared_textrel) ctx.arg.warn_textrel = true; - std::tie(ctx.plt_hdr_size, ctx.plt_size) = get_plt_size(ctx); - ctx.arg.undefined.push_back(ctx.arg.entry); // TLSDESC relocs must be always relaxed for statically-linked diff --git a/elf/elf.h b/elf/elf.h index 1bc8137e..240044a8 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -1359,7 +1359,9 @@ struct X86_64 { static constexpr u32 word_size = 8; static constexpr u32 page_size = 4096; static constexpr u32 e_machine = EM_X86_64; - static constexpr u32 pltgot_size = 8; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; static constexpr bool is_rel = false; static constexpr bool supports_tlsdesc = true; }; @@ -1390,6 +1392,8 @@ struct I386 { static constexpr u32 word_size = 4; static constexpr u32 page_size = 4096; static constexpr u32 e_machine = EM_386; + static constexpr u32 plt_hdr_size = 16; + static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 8; static constexpr bool is_rel = true; static constexpr bool supports_tlsdesc = true; @@ -1421,6 +1425,8 @@ struct ARM64 { static constexpr u32 word_size = 8; static constexpr u32 page_size = 65536; static constexpr u32 e_machine = EM_AARCH64; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; static constexpr bool is_rel = false; static constexpr bool supports_tlsdesc = true; @@ -1452,6 +1458,8 @@ struct ARM32 { static constexpr u32 word_size = 4; static constexpr u32 page_size = 4096; static constexpr u32 e_machine = EM_ARM; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; static constexpr bool is_rel = true; static constexpr bool supports_tlsdesc = true; @@ -1482,6 +1490,8 @@ struct RISCV64 { static constexpr u32 word_size = 8; static constexpr u32 page_size = 4096; static constexpr u32 e_machine = EM_RISCV; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; static constexpr bool is_rel = false; static constexpr bool supports_tlsdesc = false; diff --git a/elf/mold.h b/elf/mold.h index 0dd2d495..edf46d69 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -1524,7 +1524,6 @@ struct Context { bool z_execstack = false; bool z_execstack_if_needed = false; bool z_ibt = false; - bool z_ibtplt = false; bool z_initfirst = false; bool z_interpose = false; bool z_keep_text_section_prefix = false; @@ -1575,8 +1574,6 @@ struct Context { std::vector version_patterns; u16 default_version = VER_NDX_GLOBAL; i64 page_size = -1; - i64 plt_hdr_size = -1; - i64 plt_size = -1; // Reader context bool as_needed = false; @@ -2389,7 +2386,7 @@ inline u64 Symbol::get_tlsdesc_addr(Context &ctx) const { template inline u64 Symbol::get_plt_addr(Context &ctx) const { if (i32 idx = get_plt_idx(ctx); idx != -1) - return ctx.plt->shdr.sh_addr + ctx.plt_hdr_size + idx * ctx.plt_size; + return ctx.plt->shdr.sh_addr + E::plt_hdr_size + idx * E::plt_size; return ctx.pltgot->shdr.sh_addr + get_pltgot_idx(ctx) * E::pltgot_size; } diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index bc14b3ff..daeece49 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -1137,19 +1137,10 @@ void GotPltSection::copy_buf(Context &ctx) { buf[2] = 0; auto get_plt_resolver_addr = [&](Symbol &sym) -> u64 { - if constexpr (std::is_same_v || std::is_same_v || - std::is_same_v) - return ctx.plt->shdr.sh_addr; - - if constexpr (std::is_same_v) { - if (ctx.arg.z_ibtplt) - return ctx.plt->shdr.sh_addr; - return sym.get_plt_addr(ctx) + 6; - } - if constexpr (std::is_same_v) return sym.get_plt_addr(ctx) + 6; - unreachable(); + else + return ctx.plt->shdr.sh_addr; }; for (Symbol *sym : ctx.plt->symbols) @@ -1161,10 +1152,10 @@ void PltSection::add_symbol(Context &ctx, Symbol *sym) { assert(!sym->has_plt(ctx)); if (this->shdr.sh_size == 0) - this->shdr.sh_size = ctx.plt_hdr_size; + this->shdr.sh_size = E::plt_hdr_size; sym->set_plt_idx(ctx, symbols.size()); - this->shdr.sh_size += ctx.plt_size; + this->shdr.sh_size += E::plt_size; symbols.push_back(sym); sym->set_gotplt_idx(ctx, ctx.gotplt->shdr.sh_size / E::word_size); diff --git a/test/elf/gnu-linkonce.sh b/test/elf/gnu-linkonce.sh index bc5848bc..264d1dee 100755 --- a/test/elf/gnu-linkonce.sh +++ b/test/elf/gnu-linkonce.sh @@ -34,6 +34,6 @@ int main() {} EOF $CC -B. -o $t/exe $t/a.o $t/b.o $t/c.o -$OBJDUMP -d $t/exe | grep -A1 '<__x86.get_pc_thunk.bx>:' | grep -q puts +$OBJDUMP -d $t/exe | grep -A1 '<__x86.get_pc_thunk.bx>:' | fgrep -q .plt echo OK diff --git a/test/elf/pltgot.sh b/test/elf/pltgot.sh index bcfe4895..6c306699 100755 --- a/test/elf/pltgot.sh +++ b/test/elf/pltgot.sh @@ -36,6 +36,6 @@ EOF $OBJDUMP -d -j .plt.got $t/exe > $t/log -grep -Eq '1020:.*jmp.* ' $t/log +grep -Eq '1034:.*jmp.* ' $t/log echo OK