1
1
mirror of https://github.com/rui314/mold.git synced 2024-09-20 01:18:53 +03:00

Align range extension thunk entry points to 16 byte boundaries

Quote from Arm Cortex-A75 Software Optimization Guide
(PJDOC-466751330-205) p.59:

  Consider aligning subroutine entry points and branch targets to
  16-byte boundaries, within the bounds of the code-density requirements
  of the program. This ensures that the subsequent fetch can maximize
  bandwidth following the taken branch by bringing in all useful
  instructions.
This commit is contained in:
Rui Ueyama 2023-08-22 16:11:22 +09:00
parent 576cd86ecb
commit 7e9fedd203
2 changed files with 7 additions and 7 deletions

View File

@ -575,26 +575,26 @@ void InputSection<E>::scan_relocations(Context<E> &ctx) {
template <>
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
static const ul32 data[] = {
static const ul32 insn[] = {
0x9000'0010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21
0x9100'0210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC
0xd61f'0200, // br x16
0xd503'201f, // nop
};
static_assert(E::thunk_size == sizeof(data));
static_assert(E::thunk_size == sizeof(insn));
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
u64 P = output_section.shdr.sh_addr + offset;
for (Symbol<E> *sym : symbols) {
u64 S = sym->get_addr(ctx);
memcpy(buf, data, sizeof(data));
memcpy(buf, insn, E::thunk_size);
write_adrp(buf, page(S) - page(P));
*(ul32 *)(buf + 4) |= bits(S, 11, 0) << 10;
buf += sizeof(data);
P += sizeof(data);
buf += E::thunk_size;
P += E::thunk_size;
}
}

View File

@ -2000,7 +2000,7 @@ struct ARM64 {
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 thunk_hdr_size = 0;
static constexpr u32 thunk_size = 12;
static constexpr u32 thunk_size = 16;
static constexpr u32 R_COPY = R_AARCH64_COPY;
static constexpr u32 R_GLOB_DAT = R_AARCH64_GLOB_DAT;