mirror of
https://github.com/rui314/mold.git
synced 2024-09-20 01:18:53 +03:00
Implement range extension thunks for LoongArch
This commit is contained in:
parent
9441170d39
commit
3b5ccb9342
@ -590,7 +590,7 @@ void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
||||
u64 P = output_section.shdr.sh_addr + offset + i * E::thunk_size;
|
||||
|
||||
u8 *loc = buf + i * E::thunk_size;
|
||||
memcpy(loc , data, sizeof(data));
|
||||
memcpy(loc, data, sizeof(data));
|
||||
write_adrp(loc, page(S) - page(P));
|
||||
*(ul32 *)(loc + 4) |= bits(S, 11, 0) << 10;
|
||||
}
|
||||
|
@ -11,8 +11,12 @@
|
||||
// Linux, GCC, LLVM, etc.
|
||||
//
|
||||
// All instructions are 4 bytes long in LoongArch and aligned to 4-byte
|
||||
// boundaries. The psABI defines a few linker relaxations. We haven't
|
||||
// supported them yet, though.
|
||||
// boundaries. It has 32 general-purpose registers. Among these, $t0 - $t8
|
||||
// (aliases for $r12 - $r20) are temporary registers that we can use in
|
||||
// our PLT and range extension thunks.
|
||||
//
|
||||
// The psABI defines a few linker relaxations. We haven't supported them
|
||||
// yet.
|
||||
//
|
||||
// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
|
||||
|
||||
@ -33,7 +37,7 @@ static u64 hi20(u64 val, u64 pc) {
|
||||
// register with the following instructions:
|
||||
//
|
||||
// pcalau12i $rN, %hi20(sym)
|
||||
// addi.d $rN, $zero, %lo12(sym)
|
||||
// addi.d $rN, $rN, %lo12(sym)
|
||||
//
|
||||
// PCALAU12I materializes bits [63:12] by computing (pc + imm << 12)
|
||||
// and zero-clear [11:0]. ADDI.D sign-extends its 12 bit immediate and
|
||||
@ -313,10 +317,13 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
||||
check_branch(S + A - P, -(1 << 22), 1 << 22);
|
||||
write_d5k16(loc, (S + A - P) >> 2);
|
||||
break;
|
||||
case R_LARCH_B26:
|
||||
check_branch(S + A - P, -(1 << 27), 1 << 27);
|
||||
write_d10k16(loc, (S + A - P) >> 2);
|
||||
case R_LARCH_B26: {
|
||||
i64 val = S + A - P;
|
||||
if (val < -(1 << 27) || (1 << 27) <= val)
|
||||
val = get_thunk_addr(i) + A - P;
|
||||
write_d10k16(loc, val >> 2);
|
||||
break;
|
||||
}
|
||||
case R_LARCH_ABS_HI20:
|
||||
write_j20(loc, (S + A) >> 12);
|
||||
break;
|
||||
@ -668,6 +675,30 @@ void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
||||
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
|
||||
|
||||
static const ul32 insn[] = {
|
||||
0x1a00'000c, // pcalau12i $t0, 0
|
||||
0x02c0'018c, // addi.d $t0, $t0, 0
|
||||
0x4c00'0180, // jirl $zero, $t0, 0
|
||||
0x0340'0000, // nop
|
||||
};
|
||||
|
||||
static_assert(E::thunk_size == sizeof(insn));
|
||||
|
||||
for (i64 i = 0; i < symbols.size(); i++) {
|
||||
u64 S = symbols[i]->get_addr(ctx);
|
||||
u64 P = output_section.shdr.sh_addr + offset + i * E::thunk_size;
|
||||
|
||||
u8 *loc = buf + i * E::thunk_size;
|
||||
memcpy(loc, insn, sizeof(insn));
|
||||
write_j20(loc, hi20(S, P) >> 12);
|
||||
write_k12(loc + 4, S);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mold::elf
|
||||
|
||||
#endif
|
||||
|
@ -2377,6 +2377,8 @@ struct LOONGARCH64 {
|
||||
static constexpr u32 plt_hdr_size = 32;
|
||||
static constexpr u32 plt_size = 16;
|
||||
static constexpr u32 pltgot_size = 16;
|
||||
static constexpr u32 thunk_hdr_size = 0;
|
||||
static constexpr u32 thunk_size = 16;
|
||||
|
||||
static constexpr u32 R_COPY = R_LARCH_COPY;
|
||||
static constexpr u32 R_GLOB_DAT = R_LARCH_64;
|
||||
@ -2400,6 +2402,8 @@ struct LOONGARCH32 {
|
||||
static constexpr u32 plt_hdr_size = 32;
|
||||
static constexpr u32 plt_size = 16;
|
||||
static constexpr u32 pltgot_size = 16;
|
||||
static constexpr u32 thunk_hdr_size = 0;
|
||||
static constexpr u32 thunk_size = 16;
|
||||
|
||||
static constexpr u32 R_COPY = R_LARCH_COPY;
|
||||
static constexpr u32 R_GLOB_DAT = R_LARCH_32;
|
||||
|
@ -114,7 +114,7 @@ public:
|
||||
idx * E::thunk_size;
|
||||
}
|
||||
|
||||
static constexpr i64 alignment = 4;
|
||||
static constexpr i64 alignment = 16;
|
||||
|
||||
OutputSection<E> &output_section;
|
||||
i64 offset;
|
||||
|
@ -20,7 +20,8 @@
|
||||
// we don't need to try too hard to reduce thunk size to the absolute
|
||||
// minimum.
|
||||
|
||||
#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2
|
||||
#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 || \
|
||||
MOLD_LOONGARCH64 || MOLD_LOONGARCH32
|
||||
|
||||
#include "mold.h"
|
||||
|
||||
@ -37,7 +38,9 @@ static consteval i64 max_distance() {
|
||||
// and therefore the least two bits are always zero. So the branch
|
||||
// operand is effectively 28 bits long. That means the branch range is
|
||||
// [-2^27, 2^27) or PC ± 128 MiB.
|
||||
if (is_arm64<E>)
|
||||
//
|
||||
// LoongArch's BR instruction also takes a 26 bit immediate.
|
||||
if (is_arm64<E> || is_loongarch<E>)
|
||||
return 1 << 27;
|
||||
|
||||
// ARM32's Thumb branch has 24 bits immediate, and the instructions are
|
||||
@ -76,9 +79,11 @@ static bool needs_thunk_rel(const ElfRel<E> &r) {
|
||||
ty == R_ARM_PLT32;
|
||||
} else if constexpr (is_ppc32<E>) {
|
||||
return ty == R_PPC_REL24 || ty == R_PPC_PLTREL24 || ty == R_PPC_LOCAL24PC;
|
||||
} else {
|
||||
static_assert(is_ppc64<E>);
|
||||
} else if constexpr (is_ppc64<E>) {
|
||||
return ty == R_PPC64_REL24 || ty == R_PPC64_REL24_NOTOC;
|
||||
} else {
|
||||
static_assert(is_loongarch<E>);
|
||||
return ty == R_LARCH_B26;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,9 +8,6 @@
|
||||
# It looks like SPARC's runtime can't handle PLT if it's too far from GOT.
|
||||
[ $MACHINE = sparc64 ] && skip
|
||||
|
||||
# The crt*.o compiled with B26 caused far form GOT.
|
||||
[[ $MACHINE = loongarch* ]] && skip
|
||||
|
||||
cat <<EOF > $t/a.c
|
||||
#include <stdio.h>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user