diff --git a/elf/arch-arm32.cc b/elf/arch-arm32.cc index 53497a50..97dd82c7 100644 --- a/elf/arch-arm32.cc +++ b/elf/arch-arm32.cc @@ -272,11 +272,11 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - P; continue; case R_ARM_TLS_LE32: - *(ul32 *)loc = S + A - ctx.tls_begin + 8; + *(ul32 *)loc = S + A - ctx.tls_begin + E::tls_offset; continue; case R_ARM_TLS_GOTDESC: if (sym.get_tlsdesc_idx(ctx) == -1) - *(ul32 *)loc = S - ctx.tls_begin + 8; + *(ul32 *)loc = S - ctx.tls_begin + E::tls_offset; else *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - P - 6; continue; diff --git a/elf/arch-arm64.cc b/elf/arch-arm64.cc index 02d101b5..e9d0b9b0 100644 --- a/elf/arch-arm64.cc +++ b/elf/arch-arm64.cc @@ -279,14 +279,14 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { *(ul32 *)loc |= bits(sym.get_gottp_addr(ctx) + A, 11, 3) << 10; continue; case R_AARCH64_TLSLE_ADD_TPREL_HI12: { - i64 val = S + A - ctx.tls_begin + 16; + i64 val = S + A - ctx.tls_begin + E::tls_offset; overflow_check(val, 0, (i64)1 << 24); *(ul32 *)loc |= bits(val, 23, 12) << 10; continue; } case R_AARCH64_TLSLE_ADD_TPREL_LO12: case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - *(ul32 *)loc |= bits(S + A - ctx.tls_begin + 16, 11, 0) << 10; + *(ul32 *)loc |= bits(S + A - ctx.tls_begin + E::tls_offset, 11, 0) << 10; continue; case R_AARCH64_TLSGD_ADR_PAGE21: { i64 val = page(sym.get_tlsgd_addr(ctx) + A) - page(P); @@ -300,7 +300,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_AARCH64_TLSDESC_ADR_PAGE21: { if (ctx.relax_tlsdesc && !sym.is_imported) { // adrp x0, 0 -> movz x0, #tls_ofset_hi, lsl #16 - i64 val = (S + A - ctx.tls_begin + 16); + i64 val = (S + A - ctx.tls_begin + E::tls_offset); overflow_check(val, -((i64)1 << 32), (i64)1 << 32); *(ul32 *)loc = 0xd2a00000 | (bits(val, 32, 16) << 5); } else { @@ -313,7 +313,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_AARCH64_TLSDESC_LD64_LO12: if (ctx.relax_tlsdesc && !sym.is_imported) { // ldr x2, [x0] -> movk x0, #tls_ofset_lo - u32 offset_lo = (S + A - ctx.tls_begin + 16) & 0xffff; + u32 offset_lo = (S + A - ctx.tls_begin + E::tls_offset) & 0xffff; *(ul32 *)loc = 0xf2800000 | (offset_lo << 5); } else { *(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 3) << 10; diff --git a/elf/elf.h b/elf/elf.h index 240044a8..093835c9 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -1428,6 +1428,7 @@ struct ARM64 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; + static constexpr u32 tls_offset = 16; static constexpr bool is_rel = false; static constexpr bool supports_tlsdesc = true; }; @@ -1461,6 +1462,7 @@ struct ARM32 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; + static constexpr u32 tls_offset = 8; static constexpr bool is_rel = true; static constexpr bool supports_tlsdesc = true; }; @@ -1493,6 +1495,7 @@ struct RISCV64 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; + static constexpr u32 tls_offset = 0; static constexpr bool is_rel = false; static constexpr bool supports_tlsdesc = false; }; diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index 37378f31..a5efc443 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -356,7 +356,7 @@ void RelDynSection::update_shdr(Context &ctx) { template static ElfRel reloc(u64 offset, u32 type, u32 sym, i64 addend = 0) { - if constexpr (std::is_same_v || std::is_same_v) + if constexpr (E::is_rel) return {(u32)offset, (u8)type, sym}; else return {offset, type, sym, addend}; @@ -1077,17 +1077,18 @@ std::vector> GotSection::get_entries(Context &ctx) const { continue; } - // Otherwise, we know the offset at link-time, so fill the GOT entry. + // Otherwise, we know the offset from the thread pointer (TP) at + // link-time, so we can fill the GOT entry directly. + // + // On x86, TP (%gs for 32-bit, %fs for 64-bit) points to the end of + // all thread-local variables for a historical reason, so the offset + // we calculate here will be negative. On other architectures, TP + // points to an optional padding whose size is architecture-dependent + // followed by thread-local variables. if constexpr (std::is_same_v || std::is_same_v) entries.push_back({idx, sym->get_addr(ctx) - ctx.tls_end}); - else if constexpr (std::is_same_v) - entries.push_back({idx, sym->get_addr(ctx) - ctx.tls_begin + 8}); - else if constexpr (std::is_same_v) - entries.push_back({idx, sym->get_addr(ctx) - ctx.tls_begin + 16}); - else if constexpr (std::is_same_v) - entries.push_back({idx, sym->get_addr(ctx) - ctx.tls_begin}); else - unreachable(); + entries.push_back({idx, sym->get_addr(ctx) - ctx.tls_begin + E::tls_offset}); } if (tlsld_idx != -1)