mirror of
https://github.com/rui314/mold.git
synced 2024-09-20 17:39:56 +03:00
596 lines
18 KiB
C++
596 lines
18 KiB
C++
// ARM32 is a bit special from the linker's viewpoint because ARM
|
|
// processors support two different instruction encodings: Thumb and
|
|
// ARM (in a narrower sense). Thumb instructions are either 16 bits or
|
|
// 32 bits, while ARM instructions are all 32 bits. Feature-wise,
|
|
// thumb is a subset of ARM, so not all ARM instructions are
|
|
// representable in Thumb.
|
|
//
|
|
// ARM processors originally supported only ARM instructions. Thumb
|
|
// instructions were later added to increase code density.
|
|
//
|
|
// ARM processors runs in either ARM mode or Thumb mode. The mode can
|
|
// be switched using BX (branch and mode exchange)-family instructions.
|
|
// We need to use such instructions to, for example, call a function
|
|
// encoded in Thumb from a function encoded in ARM. Sometimes, the
|
|
// linker even has to emit an interworking thunk code to switch from
|
|
// Thumb to ARM.
|
|
//
|
|
// ARM instructions are aligned to 4 byte boundaries. Thumb are to 2
|
|
// byte boundaries.
|
|
//
|
|
// You can distinguish Thumb functions from ARM functions by looking
|
|
// at the least significant bit (LSB) of its "address". If LSB is 0,
|
|
// it's ARM; otherwise, Thumb. LSB is not a part of its real address.
|
|
// For example, if a symbol `foo` is of type STT_FUNC and has value
|
|
// 0x2001, then `foo` is a function using Thumb instructions whose
|
|
// address is 0x2000 (not 0x2001).
|
|
|
|
#include "mold.h"
|
|
|
|
#include <tbb/parallel_for.h>
|
|
#include <tbb/parallel_for_each.h>
|
|
#include <tbb/parallel_sort.h>
|
|
|
|
namespace mold::elf {
|
|
|
|
using E = ARM32;
|
|
|
|
static void write_mov_imm(u8 *loc, u32 val) {
|
|
u32 imm12 = bits(val, 11, 0);
|
|
u32 imm4 = bits(val, 15, 12);
|
|
*(ul32 *)loc = (*(ul32 *)loc & 0xfff0f000) | (imm4 << 16) | imm12;
|
|
}
|
|
|
|
static void write_thm_b_imm(u8 *loc, u32 val) {
|
|
// https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/BL--BLX--immediate-
|
|
u32 sign = bit(val, 24);
|
|
u32 I1 = bit(val, 23);
|
|
u32 I2 = bit(val, 22);
|
|
u32 J1 = !I1 ^ sign;
|
|
u32 J2 = !I2 ^ sign;
|
|
u32 imm10 = bits(val, 21, 12);
|
|
u32 imm11 = bits(val, 11, 1);
|
|
|
|
*(ul16 *)loc = (*(ul16 *)loc & 0xf800) | (sign << 10) | imm10;
|
|
*(ul16 *)(loc + 2) =
|
|
(*(ul16 *)(loc + 2) & 0xd000) | (J1 << 13) | (J2 << 11) | imm11;
|
|
}
|
|
|
|
static void write_thm_mov_imm(u8 *loc, u32 val) {
|
|
// https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/MOVT
|
|
u32 imm4 = bits(val, 15, 12);
|
|
u32 i = bit(val, 11);
|
|
u32 imm3 = bits(val, 10, 8);
|
|
u32 imm8 = bits(val, 7, 0);
|
|
*(ul16 *)loc = (*(ul16 *)loc & 0b1111'1011'1111'0000) | (i << 10) | imm4;
|
|
*(ul16 *)(loc + 2) =
|
|
((*(ul16 *)(loc + 2)) & 0b1000'1111'0000'0000) | (imm3 << 12) | imm8;
|
|
}
|
|
|
|
template <>
|
|
void PltSection<E>::copy_buf(Context<E> &ctx) {
|
|
u8 *buf = ctx.buf + this->shdr.sh_offset;
|
|
|
|
static const u32 plt0[] = {
|
|
0xe52de004, // push {lr}
|
|
0xe59fe004, // ldr lr, 2f
|
|
0xe08fe00e, // 1: add lr, pc, lr
|
|
0xe5bef008, // ldr pc, [lr, #8]!
|
|
0x00000000, // 2: .word .got.plt - 1b - 8
|
|
0xe320f000, // nop
|
|
0xe320f000, // nop
|
|
0xe320f000, // nop
|
|
};
|
|
|
|
memcpy(buf, plt0, sizeof(plt0));
|
|
*(ul32 *)(buf + 16) = ctx.gotplt->shdr.sh_addr - this->shdr.sh_addr - 16;
|
|
|
|
for (Symbol<E> *sym : symbols) {
|
|
static const u32 plt[] = {
|
|
0xe59fc004, // 1: ldr ip, 2f
|
|
0xe08cc00f, // add ip, ip, pc
|
|
0xe59cf000, // ldr pc, [ip]
|
|
0x00000000, // 2: .word sym@PLTGOT - 1b
|
|
};
|
|
|
|
u8 *ent = buf + sizeof(plt0) + sym->get_plt_idx(ctx) * sizeof(plt);
|
|
memcpy(ent, plt, sizeof(plt));
|
|
*(ul32 *)(ent + 12) = sym->get_gotplt_addr(ctx) - sym->get_plt_addr(ctx) - 12;
|
|
}
|
|
}
|
|
|
|
template <>
|
|
void PltGotSection<E>::copy_buf(Context<E> &ctx) {
|
|
u8 *buf = ctx.buf + this->shdr.sh_offset;
|
|
|
|
for (Symbol<E> *sym : symbols) {
|
|
static const u32 plt[] = {
|
|
0xe59fc004, // 1: ldr ip, 2f
|
|
0xe08cc00f, // add ip, ip, pc
|
|
0xe59cf000, // ldr pc, [ip]
|
|
0x00000000, // 2: .word sym@GOT - 1b
|
|
};
|
|
|
|
u8 *ent = buf + sym->get_pltgot_idx(ctx) * sizeof(plt);
|
|
memcpy(ent, plt, sizeof(plt));
|
|
*(ul32 *)(ent + 12) = sym->get_got_addr(ctx) - sym->get_plt_addr(ctx) - 12;
|
|
}
|
|
}
|
|
|
|
// ARM does not use .eh_frame for exception handling. Instead, it uses
|
|
// .ARM.exidx and .ARM.extab. So this function is empty.
|
|
template <>
|
|
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
u64 offset, u64 val) {}
|
|
|
|
template <>
|
|
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
ElfRel<E> *dynrel = nullptr;
|
|
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
|
|
i64 frag_idx = 0;
|
|
|
|
if (ctx.reldyn)
|
|
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
file.reldyn_offset + this->reldyn_offset);
|
|
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
const ElfRel<E> &rel = rels[i];
|
|
if (rel.r_type == R_ARM_NONE)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
u8 *loc = base + rel.r_offset;
|
|
|
|
const SectionFragmentRef<E> *frag_ref = nullptr;
|
|
if (rel_fragments && rel_fragments[frag_idx].idx == i)
|
|
frag_ref = &rel_fragments[frag_idx++];
|
|
|
|
#define S (frag_ref ? frag_ref->frag->get_addr(ctx) : sym.get_addr(ctx))
|
|
#define A (frag_ref ? frag_ref->addend : this->get_addend(rel))
|
|
#define P (output_section->shdr.sh_addr + offset + rel.r_offset)
|
|
#define T (sym.get_addr(ctx) & 1)
|
|
#define G (sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr)
|
|
#define GOT ctx.got->shdr.sh_addr
|
|
|
|
switch (rel.r_type) {
|
|
case R_ARM_ABS32:
|
|
case R_ARM_TARGET1:
|
|
if (sym.is_absolute() || !ctx.arg.pic) {
|
|
*(ul32 *)loc = S + A;
|
|
} else if (sym.is_imported) {
|
|
*dynrel++ = {P, R_ARM_ABS32, (u32)sym.get_dynsym_idx(ctx)};
|
|
} else {
|
|
if (!is_relr_reloc(ctx, rel))
|
|
*dynrel++ = {P, R_ARM_RELATIVE, 0};
|
|
*(ul32 *)loc = S + A;
|
|
}
|
|
continue;
|
|
case R_ARM_REL32:
|
|
*(ul32 *)loc = S + A - P;
|
|
continue;
|
|
case R_ARM_THM_CALL:
|
|
// THM_CALL relocation refers either BL or BLX instruction.
|
|
// They are different in only one bit. We need to use BL if
|
|
// the jump target is Thumb. Otherwise, use BLX.
|
|
if (sym.esym().is_undef_weak()) {
|
|
// On ARM, calling an weak undefined symbol jumps to the
|
|
// next instruction.
|
|
write_thm_b_imm(loc, 4);
|
|
*(ul16 *)(loc + 2) |= (1 << 12); // rewrite with BL
|
|
} else if (T) {
|
|
write_thm_b_imm(loc, S + A - P);
|
|
*(ul16 *)(loc + 2) |= (1 << 12); // rewrite with BL
|
|
} else {
|
|
write_thm_b_imm(loc, align_to(S + A - P, 4));
|
|
*(ul16 *)(loc + 2) &= ~(1 << 12); // rewrite with BLX
|
|
}
|
|
continue;
|
|
case R_ARM_BASE_PREL:
|
|
*(ul32 *)loc = GOT + A - P;
|
|
continue;
|
|
case R_ARM_GOT_PREL:
|
|
*(ul32 *)loc = G + A - P;
|
|
continue;
|
|
case R_ARM_GOT_BREL:
|
|
*(ul32 *)loc = G + A;
|
|
continue;
|
|
case R_ARM_TARGET2:
|
|
*(ul32 *)loc = GOT + G + A - P;
|
|
continue;
|
|
case R_ARM_CALL:
|
|
case R_ARM_JUMP24: {
|
|
u32 val;
|
|
|
|
if (sym.esym().is_undef_weak()) {
|
|
// On ARM, calling an weak undefined symbol jumps to the
|
|
// next instruction.
|
|
val = 4;
|
|
} else {
|
|
val = S + A - P;
|
|
}
|
|
|
|
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | ((val >> 2) & 0x00ff'ffff);
|
|
continue;
|
|
}
|
|
case R_ARM_THM_JUMP11: {
|
|
assert(T);
|
|
u32 val = (S + A - P) >> 1;
|
|
*(ul16 *)loc = (*(ul16 *)loc & 0xf800) | (val & 0x07ff);
|
|
continue;
|
|
}
|
|
case R_ARM_THM_JUMP24:
|
|
if (T) {
|
|
write_thm_b_imm(loc, S + A - P);
|
|
} else {
|
|
assert(sym.extra.thumb_to_arm_thunk_idx != -1);
|
|
u64 thunk_addr =
|
|
ctx.thumb_to_arm->shdr.sh_addr +
|
|
sym.extra.thumb_to_arm_thunk_idx * ThumbToArmSection::ENTRY_SIZE;
|
|
write_thm_b_imm(loc, thunk_addr - P - 4);
|
|
}
|
|
continue;
|
|
case R_ARM_MOVW_PREL_NC:
|
|
write_mov_imm(loc, ((S + A) | T) - P);
|
|
continue;
|
|
case R_ARM_MOVW_ABS_NC:
|
|
write_mov_imm(loc, (S + A) | T);
|
|
continue;
|
|
case R_ARM_THM_MOVW_PREL_NC:
|
|
write_thm_mov_imm(loc, ((S + A) | T) - P);
|
|
continue;
|
|
case R_ARM_PREL31: {
|
|
u32 val = S + A - P;
|
|
*(ul32 *)loc = (*(ul32 *)loc & 0x8000'0000) | (val & 0x7fff'ffff);
|
|
continue;
|
|
}
|
|
case R_ARM_THM_MOVW_ABS_NC:
|
|
write_thm_mov_imm(loc, (S + A) | T);
|
|
continue;
|
|
case R_ARM_MOVT_PREL:
|
|
write_mov_imm(loc, (S + A - P) >> 16);
|
|
continue;
|
|
case R_ARM_THM_MOVT_PREL:
|
|
write_thm_mov_imm(loc, (S + A - P) >> 16);
|
|
continue;
|
|
case R_ARM_MOVT_ABS:
|
|
write_mov_imm(loc, (S + A) >> 16);
|
|
continue;
|
|
case R_ARM_THM_MOVT_ABS:
|
|
write_thm_mov_imm(loc, (S + A) >> 16);
|
|
continue;
|
|
case R_ARM_TLS_GD32:
|
|
*(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - P;
|
|
continue;
|
|
case R_ARM_TLS_LDM32:
|
|
*(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - P;
|
|
continue;
|
|
case R_ARM_TLS_LDO32:
|
|
*(ul32 *)loc = S + A - ctx.tls_begin;
|
|
continue;
|
|
case R_ARM_TLS_IE32:
|
|
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - P;
|
|
continue;
|
|
case R_ARM_TLS_LE32:
|
|
*(ul32 *)loc = S + A - ctx.tls_begin + 8;
|
|
continue;
|
|
case R_ARM_TLS_GOTDESC:
|
|
if (sym.get_tlsdesc_idx(ctx) == -1)
|
|
*(ul32 *)loc = S - ctx.tls_begin + 8;
|
|
else
|
|
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - P - 6;
|
|
continue;
|
|
case R_ARM_THM_TLS_CALL:
|
|
if (sym.get_tlsdesc_idx(ctx) == -1) {
|
|
// BL -> NOP
|
|
*(ul32 *)loc = 0x8000f3af;
|
|
} else {
|
|
u64 addr = ctx.tls_trampoline->shdr.sh_addr;
|
|
write_thm_b_imm(loc, align_to(addr - P - 4, 4));
|
|
*(ul16 *)(loc + 2) &= ~(1 << 12); // rewrite BL with BLX
|
|
}
|
|
continue;
|
|
default:
|
|
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
}
|
|
|
|
#undef S
|
|
#undef A
|
|
#undef P
|
|
#undef T
|
|
#undef G
|
|
#undef GOT
|
|
}
|
|
}
|
|
|
|
template <>
|
|
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
const ElfRel<E> &rel = rels[i];
|
|
if (rel.r_type == R_ARM_NONE)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
u8 *loc = base + rel.r_offset;
|
|
|
|
if (!sym.file) {
|
|
report_undef(ctx, file, sym);
|
|
continue;
|
|
}
|
|
|
|
SectionFragment<E> *frag;
|
|
i64 addend;
|
|
std::tie(frag, addend) = get_fragment(ctx, rel);
|
|
|
|
#define S (frag ? frag->get_addr(ctx) : sym.get_addr(ctx))
|
|
#define A (frag ? addend : this->get_addend(rel))
|
|
|
|
switch (rel.r_type) {
|
|
case R_ARM_ABS32:
|
|
if (!frag) {
|
|
if (std::optional<u64> val = get_tombstone(sym)) {
|
|
*(ul32 *)loc = *val;
|
|
break;
|
|
}
|
|
}
|
|
*(ul32 *)loc = S + A;
|
|
break;
|
|
case R_ARM_TLS_LDO32:
|
|
if (std::optional<u64> val = get_tombstone(sym))
|
|
*(ul32 *)loc = *val;
|
|
else
|
|
*(ul32 *)loc = S + A - ctx.tls_begin;
|
|
break;
|
|
default:
|
|
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
<< rel;
|
|
break;
|
|
}
|
|
|
|
#undef S
|
|
#undef A
|
|
}
|
|
}
|
|
|
|
template <>
|
|
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
assert(shdr().sh_flags & SHF_ALLOC);
|
|
|
|
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
|
|
// Scan relocations
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
const ElfRel<E> &rel = rels[i];
|
|
if (rel.r_type == R_ARM_NONE)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
|
|
if (!sym.file) {
|
|
report_undef(ctx, file, sym);
|
|
continue;
|
|
}
|
|
|
|
if (sym.get_type() == STT_GNU_IFUNC) {
|
|
sym.flags |= NEEDS_GOT;
|
|
sym.flags |= NEEDS_PLT;
|
|
}
|
|
|
|
switch (rel.r_type) {
|
|
case R_ARM_ABS32:
|
|
case R_ARM_MOVT_ABS:
|
|
case R_ARM_THM_MOVT_ABS:
|
|
case R_ARM_TARGET1: {
|
|
Action table[][4] = {
|
|
// Absolute Local Imported data Imported code
|
|
{ NONE, BASEREL, DYNREL, DYNREL }, // DSO
|
|
{ NONE, BASEREL, DYNREL, DYNREL }, // PIE
|
|
{ NONE, NONE, COPYREL, CPLT }, // PDE
|
|
};
|
|
dispatch(ctx, table, i, rel, sym);
|
|
break;
|
|
}
|
|
case R_ARM_REL32:
|
|
case R_ARM_BASE_PREL:
|
|
break;
|
|
case R_ARM_THM_CALL: {
|
|
Action table[][4] = {
|
|
// Absolute Local Imported data Imported code
|
|
{ NONE, NONE, PLT, PLT }, // DSO
|
|
{ NONE, NONE, PLT, PLT }, // PIE
|
|
{ NONE, NONE, PLT, PLT }, // PDE
|
|
};
|
|
dispatch(ctx, table, i, rel, sym);
|
|
break;
|
|
}
|
|
case R_ARM_GOT_PREL:
|
|
case R_ARM_GOT_BREL:
|
|
case R_ARM_TARGET2:
|
|
sym.flags |= NEEDS_GOT;
|
|
break;
|
|
case R_ARM_CALL:
|
|
case R_ARM_JUMP24:
|
|
if (sym.is_imported)
|
|
sym.flags |= NEEDS_PLT;
|
|
break;
|
|
case R_ARM_THM_JUMP24:
|
|
if (sym.is_imported || sym.get_type() == STT_GNU_IFUNC)
|
|
sym.flags |= NEEDS_PLT | NEEDS_THUMB_TO_ARM_THUNK;
|
|
else if (sym.esym().st_value % 2 == 0)
|
|
sym.flags |= NEEDS_THUMB_TO_ARM_THUNK;
|
|
break;
|
|
case R_ARM_MOVT_PREL:
|
|
case R_ARM_THM_MOVT_PREL:
|
|
case R_ARM_PREL31: {
|
|
Action table[][4] = {
|
|
// Absolute Local Imported data Imported code
|
|
{ ERROR, NONE, ERROR, ERROR }, // DSO
|
|
{ ERROR, NONE, COPYREL, PLT }, // PIE
|
|
{ NONE, NONE, COPYREL, PLT }, // PDE
|
|
};
|
|
dispatch(ctx, table, i, rel, sym);
|
|
break;
|
|
}
|
|
case R_ARM_TLS_GD32:
|
|
sym.flags |= NEEDS_TLSGD;
|
|
break;
|
|
case R_ARM_TLS_LDM32:
|
|
ctx.needs_tlsld = true;
|
|
break;
|
|
case R_ARM_TLS_IE32:
|
|
sym.flags |= NEEDS_GOTTP;
|
|
break;
|
|
case R_ARM_TLS_GOTDESC:
|
|
if (!ctx.relax_tlsdesc || sym.is_imported)
|
|
sym.flags |= NEEDS_TLSDESC;
|
|
break;
|
|
case R_ARM_THM_JUMP11:
|
|
case R_ARM_MOVW_PREL_NC:
|
|
case R_ARM_MOVW_ABS_NC:
|
|
case R_ARM_THM_MOVW_PREL_NC:
|
|
case R_ARM_THM_MOVW_ABS_NC:
|
|
case R_ARM_TLS_LDO32:
|
|
case R_ARM_TLS_LE32:
|
|
case R_ARM_THM_TLS_CALL:
|
|
break;
|
|
default:
|
|
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If a function referenced by a Thumb B (branch) instruction is
|
|
// resovled to a non-thumb function, we can't directly jump from the
|
|
// thumb function to the ARM function. We can't rewrite B with BX
|
|
// because there's no such BX instruction that takes an immediate.
|
|
// BX takes only a register.
|
|
//
|
|
// In order to support such branch, we insert a small piece of code to
|
|
// the resulting executable which switches the processor mode from
|
|
// Thumb to ARM. This section contains such code.
|
|
void ThumbToArmSection::add_symbol(Context<E> &ctx, Symbol<E> *sym) {
|
|
if (sym->extra.thumb_to_arm_thunk_idx == -1) {
|
|
sym->extra.thumb_to_arm_thunk_idx = symbols.size();
|
|
symbols.push_back(sym);
|
|
}
|
|
}
|
|
|
|
void ThumbToArmSection::update_shdr(Context<E> &ctx) {
|
|
this->shdr.sh_size = symbols.size() * ENTRY_SIZE;
|
|
}
|
|
|
|
void ThumbToArmSection::copy_buf(Context<E> &ctx) {
|
|
u8 *buf = ctx.buf + this->shdr.sh_offset;
|
|
i64 offset = 0;
|
|
|
|
static u8 insn[] = {
|
|
0x40, 0xf2, 0x00, 0x0c, // movw ip, $0
|
|
0xc0, 0xf2, 0x00, 0x0c, // movt ip, $0
|
|
0xfc, 0x44, // add ip, pc
|
|
0x60, 0x47, // bx ip
|
|
};
|
|
|
|
static_assert(sizeof(insn) == ENTRY_SIZE);
|
|
|
|
for (Symbol<E> *sym : symbols) {
|
|
memcpy(buf + offset, insn, sizeof(insn));
|
|
|
|
u32 val = sym->get_addr(ctx) - this->shdr.sh_addr - offset - 12;
|
|
write_thm_mov_imm(buf + offset, val);
|
|
write_thm_mov_imm(buf + offset + 4, val >> 16);
|
|
offset += sizeof(insn);
|
|
}
|
|
}
|
|
|
|
void TlsTrampolineSection::copy_buf(Context<E> &ctx) {
|
|
// Trampoline code for TLSDESC
|
|
static u32 insn[] = {
|
|
0xe08e0000, // add r0, lr, r0
|
|
0xe5901004, // ldr r1, [r0, #4]
|
|
0xe12fff11, // bx r1
|
|
};
|
|
memcpy(ctx.buf + this->shdr.sh_offset, insn, sizeof(insn));
|
|
}
|
|
|
|
// ARM executables use an .ARM.exidx section to look up an exception
|
|
// handling record for the current instruction pointer. The table needs
|
|
// to be sorted by their addresses.
|
|
//
|
|
// Other target uses .eh_frame_hdr instead for the same purpose.
|
|
// I don't know why only ARM uses the different mechanism, but it's
|
|
// likely that it's due to some historical reason.
|
|
//
|
|
// This function sorts .ARM.exidx records.
|
|
void sort_arm_exidx(Context<E> &ctx) {
|
|
Timer t(ctx, "sort_arm_exidx");
|
|
|
|
auto find_exidx = [&]() -> OutputSection<E> * {
|
|
for (std::unique_ptr<OutputSection<E>> &osec : ctx.output_sections)
|
|
if (osec->shdr.sh_type == SHT_ARM_EXIDX)
|
|
return osec.get();
|
|
return nullptr;
|
|
};
|
|
|
|
OutputSection<E> *osec = find_exidx();
|
|
if (!osec)
|
|
return;
|
|
|
|
// .ARM.exidx records consists of a signed 31-bit relative address
|
|
// and a 32-bit value. The relative address indicates the start
|
|
// address of a function that the record covers. The value is one of
|
|
// the followings:
|
|
//
|
|
// 1. CANTUNWIND indicating that there's no unwinding info for the function,
|
|
// 2. a compact unwinding record encoded into a 32-bit value, or
|
|
// 3. a 31-bit relative address which points to a larger record in
|
|
// the .ARM.extab section.
|
|
//
|
|
// CANTUNWIND is value 1. The most significant is set in (2) but not
|
|
// in (3). So they can be distinguished just by looking at a value.
|
|
struct Entry {
|
|
ul32 addr;
|
|
ul32 val;
|
|
};
|
|
|
|
if (osec->shdr.sh_size % sizeof(Entry))
|
|
Fatal(ctx) << "invalid .ARM.exidx section size";
|
|
|
|
Entry *begin = (Entry *)(ctx.buf + osec->shdr.sh_offset);
|
|
Entry *end = (Entry *)(ctx.buf + osec->shdr.sh_offset + osec->shdr.sh_size);
|
|
|
|
struct Entry2 {
|
|
u32 addr;
|
|
u32 val;
|
|
u32 idx;
|
|
};
|
|
|
|
// Read section contents
|
|
std::vector<Entry2> vec;
|
|
vec.reserve(end - begin);
|
|
for (Entry *it = begin; it < end; it++)
|
|
vec.push_back({it->addr, it->val, (u32)(it - begin)});
|
|
|
|
// Sort the records
|
|
tbb::parallel_sort(vec.begin(), vec.end(), [](const Entry2 &a, const Entry2 &b) {
|
|
return sign_extend(a.addr, 30) + a.idx * sizeof(Entry) <
|
|
sign_extend(b.addr, 30) + b.idx * sizeof(Entry);
|
|
});
|
|
|
|
// Write back the sorted records while adjusting relative addresses
|
|
for (i64 i = 0; i < vec.size(); i++) {
|
|
u32 offset = (vec[i].idx - i) * sizeof(Entry);
|
|
begin[i].addr = 0x7fff'ffff & (sign_extend(vec[i].addr, 30) + offset);
|
|
|
|
const u32 EXIDX_CANTUNWIND = 1;
|
|
if (vec[i].val == EXIDX_CANTUNWIND || (vec[i].val & 0x8000'0000))
|
|
begin[i].val = vec[i].val;
|
|
else
|
|
begin[i].val = 0x7fff'ffff & (sign_extend(vec[i].val, 30) + offset);
|
|
}
|
|
}
|
|
|
|
} // namespace mold::elf
|