mirror of
https://github.com/rui314/mold.git
synced 2024-10-05 00:57:08 +03:00
446c461201
This fixes a regression introduced in 92876820cb
.
Until now, mold set bogus addresses to non-alloc sections and
as a result emitted corrupted debug info.
Fixes https://github.com/rui314/mold/issues/387
785 lines
24 KiB
C++
785 lines
24 KiB
C++
#include "mold.h"
|
|
|
|
#include <tbb/parallel_for.h>
|
|
#include <tbb/parallel_for_each.h>
|
|
|
|
namespace mold::elf {
|
|
|
|
using E = ARM64;
|
|
|
|
static void write_adr(u8 *buf, u64 val) {
|
|
u32 hi = (val & 0x1ffffc) << 3;
|
|
u32 lo = (val & 3) << 29;
|
|
*(u32 *)buf = (*(u32 *)buf & 0x9f00001f) | hi | lo;
|
|
}
|
|
|
|
// Returns [hi:lo] bits of val.
|
|
static u64 bits(u64 val, u64 hi, u64 lo) {
|
|
return (val >> lo) & (((u64)1 << (hi - lo + 1)) - 1);
|
|
}
|
|
|
|
static u64 page(u64 val) {
|
|
return val & ~(u64)0xfff;
|
|
}
|
|
|
|
static void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
// Write PLT header
|
|
static const u32 plt0[] = {
|
|
0xa9bf7bf0, // stp x16, x30, [sp,#-16]!
|
|
0x90000010, // adrp x16, .got.plt[2]
|
|
0xf9400211, // ldr x17, [x16, .got.plt[2]]
|
|
0x91000210, // add x16, x16, .got.plt[2]
|
|
0xd61f0220, // br x17
|
|
0xd503201f, // nop
|
|
0xd503201f, // nop
|
|
0xd503201f, // nop
|
|
};
|
|
|
|
u64 gotplt = ctx.gotplt->shdr.sh_addr + 16;
|
|
u64 plt = ctx.plt->shdr.sh_addr;
|
|
|
|
memcpy(buf, plt0, sizeof(plt0));
|
|
write_adr(buf + 4, bits(page(gotplt) - page(plt + 4), 32, 12));
|
|
*(u32 *)(buf + 8) |= bits(gotplt, 11, 3) << 10;
|
|
*(u32 *)(buf + 12) |= ((gotplt) & 0xfff) << 10;
|
|
}
|
|
|
|
static void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
u8 *ent = buf + ctx.plt_hdr_size + sym.get_plt_idx(ctx) * ctx.plt_size;
|
|
|
|
static const u32 data[] = {
|
|
0x90000010, // adrp x16, .got.plt[n]
|
|
0xf9400211, // ldr x17, [x16, .got.plt[n]]
|
|
0x91000210, // add x16, x16, .got.plt[n]
|
|
0xd61f0220, // br x17
|
|
};
|
|
|
|
u64 gotplt = sym.get_gotplt_addr(ctx);
|
|
u64 plt = sym.get_plt_addr(ctx);
|
|
|
|
memcpy(ent, data, sizeof(data));
|
|
write_adr(ent, bits(page(gotplt) - page(plt), 32, 12));
|
|
*(u32 *)(ent + 4) |= bits(gotplt, 11, 3) << 10;
|
|
*(u32 *)(ent + 8) |= (gotplt & 0xfff) << 10;
|
|
}
|
|
|
|
template <>
|
|
void PltSection<E>::copy_buf(Context<E> &ctx) {
|
|
u8 *buf = ctx.buf + this->shdr.sh_offset;
|
|
write_plt_header(ctx, buf);
|
|
for (Symbol<E> *sym : symbols)
|
|
write_plt_entry(ctx, buf, *sym);
|
|
}
|
|
|
|
template <>
|
|
void PltGotSection<E>::copy_buf(Context<E> &ctx) {
|
|
u8 *buf = ctx.buf + this->shdr.sh_offset;
|
|
|
|
for (Symbol<E> *sym : symbols) {
|
|
u8 *ent = buf + sym->get_pltgot_idx(ctx) * ARM64::pltgot_size;
|
|
|
|
static const u32 data[] = {
|
|
0x90000010, // adrp x16, GOT[n]
|
|
0xf9400211, // ldr x17, [x16, GOT[n]]
|
|
0xd61f0220, // br x17
|
|
0xd503201f, // nop
|
|
};
|
|
|
|
u64 got = sym->get_got_addr(ctx);
|
|
u64 plt = sym->get_plt_addr(ctx);
|
|
|
|
memcpy(ent, data, sizeof(data));
|
|
write_adr(ent, bits(page(got) - page(plt), 32, 12));
|
|
*(u32 *)(ent + 4) |= bits(got, 11, 3) << 10;
|
|
}
|
|
}
|
|
|
|
template <>
|
|
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, ElfRel<E> &rel,
|
|
u64 offset, u64 val) {
|
|
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
|
|
switch (rel.r_type) {
|
|
case R_AARCH64_ABS64:
|
|
*(u64 *)loc = val;
|
|
return;
|
|
case R_AARCH64_PREL32:
|
|
*(u32 *)loc = val - this->shdr.sh_addr - offset;
|
|
return;
|
|
case R_AARCH64_PREL64:
|
|
*(u64 *)loc = val - this->shdr.sh_addr - offset;
|
|
return;
|
|
}
|
|
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
}
|
|
|
|
template <>
|
|
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
ElfRel<E> *dynrel = nullptr;
|
|
std::span<ElfRel<E>> rels = get_rels(ctx);
|
|
std::span<RangeExtensionRef> range_extn = get_range_extn();
|
|
|
|
i64 frag_idx = 0;
|
|
|
|
if (ctx.reldyn)
|
|
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
file.reldyn_offset + this->reldyn_offset);
|
|
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
const ElfRel<E> &rel = rels[i];
|
|
if (rel.r_type == R_AARCH64_NONE)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
u8 *loc = base + rel.r_offset;
|
|
|
|
const SectionFragmentRef<E> *frag_ref = nullptr;
|
|
if (rel_fragments && rel_fragments[frag_idx].idx == i)
|
|
frag_ref = &rel_fragments[frag_idx++];
|
|
|
|
auto overflow_check = [&](i64 val, i64 lo, i64 hi) {
|
|
if (val < lo || hi <= val)
|
|
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
<< sym << " out of range: " << val << " is not in ["
|
|
<< lo << ", " << hi << ")";
|
|
};
|
|
|
|
#define S (frag_ref ? frag_ref->frag->get_addr(ctx) : sym.get_addr(ctx))
|
|
#define A (frag_ref ? frag_ref->addend : rel.r_addend)
|
|
#define P (output_section->shdr.sh_addr + offset + rel.r_offset)
|
|
#define G (sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr)
|
|
#define GOT ctx.got->shdr.sh_addr
|
|
|
|
switch (rel.r_type) {
|
|
case R_AARCH64_ABS64:
|
|
if (sym.is_absolute() || !ctx.arg.pic) {
|
|
*(u64 *)loc = S + A;
|
|
} else if (sym.is_imported) {
|
|
*dynrel++ = {P, R_AARCH64_ABS64, (u32)sym.get_dynsym_idx(ctx), A};
|
|
*(u64 *)loc = A;
|
|
} else {
|
|
if (!is_relr_reloc(ctx, rel))
|
|
*dynrel++ = {P, R_AARCH64_RELATIVE, 0, (i64)(S + A)};
|
|
*(u64 *)loc = S + A;
|
|
}
|
|
continue;
|
|
case R_AARCH64_LDST8_ABS_LO12_NC:
|
|
*(u32 *)loc |= bits(S + A, 11, 0) << 10;
|
|
continue;
|
|
case R_AARCH64_LDST16_ABS_LO12_NC:
|
|
*(u32 *)loc |= bits(S + A, 11, 1) << 10;
|
|
continue;
|
|
case R_AARCH64_LDST32_ABS_LO12_NC:
|
|
*(u32 *)loc |= bits(S + A, 11, 2) << 10;
|
|
continue;
|
|
case R_AARCH64_LDST64_ABS_LO12_NC:
|
|
*(u32 *)loc |= bits(S + A, 11, 3) << 10;
|
|
continue;
|
|
case R_AARCH64_LDST128_ABS_LO12_NC:
|
|
*(u32 *)loc |= bits(S + A, 11, 4) << 10;
|
|
continue;
|
|
case R_AARCH64_ADD_ABS_LO12_NC:
|
|
*(u32 *)loc |= bits(S + A, 11, 0) << 10;
|
|
continue;
|
|
case R_AARCH64_MOVW_UABS_G0_NC:
|
|
*(u32 *)loc |= bits(S + A, 15, 0) << 5;
|
|
continue;
|
|
case R_AARCH64_MOVW_UABS_G1_NC:
|
|
*(u32 *)loc |= bits(S + A, 31, 16) << 5;
|
|
continue;
|
|
case R_AARCH64_MOVW_UABS_G2_NC:
|
|
*(u32 *)loc |= bits(S + A, 47, 32) << 5;
|
|
continue;
|
|
case R_AARCH64_MOVW_UABS_G3:
|
|
*(u32 *)loc |= bits(S + A, 63, 48) << 5;
|
|
continue;
|
|
case R_AARCH64_ADR_GOT_PAGE: {
|
|
i64 val = page(G + GOT + A) - page(P);
|
|
overflow_check(val, -((i64)1 << 32), (i64)1 << 32);
|
|
write_adr(loc, bits(val, 32, 12));
|
|
continue;
|
|
}
|
|
case R_AARCH64_ADR_PREL_PG_HI21: {
|
|
i64 val = page(S + A) - page(P);
|
|
overflow_check(val, -((i64)1 << 32), (i64)1 << 32);
|
|
write_adr(loc, bits(val, 32, 12));
|
|
continue;
|
|
}
|
|
case R_AARCH64_ADR_PREL_LO21: {
|
|
i64 val = S + A - P;
|
|
overflow_check(val, -((i64)1 << 20), (i64)1 << 20);
|
|
write_adr(loc, val);
|
|
continue;
|
|
}
|
|
case R_AARCH64_CALL26:
|
|
case R_AARCH64_JUMP26: {
|
|
if (sym.esym().is_undef_weak()) {
|
|
// On ARM, calling an weak undefined symbol jumps to the
|
|
// next instruction.
|
|
*(u32 *)loc |= 1;
|
|
continue;
|
|
}
|
|
|
|
i64 lo = -(1 << 27);
|
|
i64 hi = 1 << 27;
|
|
i64 val = S + A - P;
|
|
|
|
if (val < lo || hi <= val) {
|
|
RangeExtensionRef ref = range_extn[i];
|
|
val = output_section->thunks[ref.thunk_idx]->get_addr(ref.sym_idx) + A - P;
|
|
assert(lo <= val && val < hi);
|
|
}
|
|
|
|
*(u32 *)loc |= (val >> 2) & 0x3ffffff;
|
|
continue;
|
|
}
|
|
case R_AARCH64_CONDBR19: {
|
|
i64 val = S + A - P;
|
|
overflow_check(val, -((i64)1 << 20), (i64)1 << 20);
|
|
*(u32 *)loc |= bits(val, 20, 2) << 5;
|
|
continue;
|
|
}
|
|
case R_AARCH64_PREL16: {
|
|
i64 val = S + A - P;
|
|
overflow_check(val, -((i64)1 << 15), (i64)1 << 15);
|
|
*(u16 *)loc = val;
|
|
continue;
|
|
}
|
|
case R_AARCH64_PREL32: {
|
|
i64 val = S + A - P;
|
|
overflow_check(val, -((i64)1 << 31), (i64)1 << 32);
|
|
*(u32 *)loc = val;
|
|
continue;
|
|
}
|
|
case R_AARCH64_PREL64:
|
|
*(u64 *)loc = S + A - P;
|
|
continue;
|
|
case R_AARCH64_LD64_GOT_LO12_NC:
|
|
*(u32 *)loc |= bits(G + GOT + A, 11, 3) << 10;
|
|
continue;
|
|
case R_AARCH64_LD64_GOTPAGE_LO15: {
|
|
i64 val = G + GOT + A - page(GOT);
|
|
overflow_check(val, 0, 1 << 15);
|
|
*(u32 *)loc |= bits(val, 14, 3) << 10;
|
|
continue;
|
|
}
|
|
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
|
|
i64 val = page(sym.get_gottp_addr(ctx) + A) - page(P);
|
|
overflow_check(val, -((i64)1 << 32), (i64)1 << 32);
|
|
write_adr(loc, bits(val, 32, 12));
|
|
continue;
|
|
}
|
|
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
|
|
*(u32 *)loc |= bits(sym.get_gottp_addr(ctx) + A, 11, 3) << 10;
|
|
continue;
|
|
case R_AARCH64_TLSLE_ADD_TPREL_HI12: {
|
|
i64 val = S + A - ctx.tls_begin + 16;
|
|
overflow_check(val, 0, (i64)1 << 24);
|
|
*(u32 *)loc |= bits(val, 23, 12) << 10;
|
|
continue;
|
|
}
|
|
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
|
|
*(u32 *)loc |= bits(S + A - ctx.tls_begin + 16, 11, 0) << 10;
|
|
continue;
|
|
case R_AARCH64_TLSGD_ADR_PAGE21: {
|
|
i64 val = page(sym.get_tlsgd_addr(ctx) + A) - page(P);
|
|
overflow_check(val, -((i64)1 << 32), (i64)1 << 32);
|
|
write_adr(loc, bits(val, 32, 12));
|
|
continue;
|
|
}
|
|
case R_AARCH64_TLSGD_ADD_LO12_NC:
|
|
*(u32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A, 11, 0) << 10;
|
|
continue;
|
|
case R_AARCH64_TLSDESC_ADR_PAGE21: {
|
|
if (ctx.relax_tlsdesc && !sym.is_imported) {
|
|
// adrp x0, 0 -> movz x0, #tls_ofset_hi, lsl #16
|
|
i64 val = (S + A - ctx.tls_begin + 16);
|
|
overflow_check(val, -((i64)1 << 32), (i64)1 << 32);
|
|
*(u32 *)loc = 0xd2a00000 | (bits(val, 32, 16) << 5);
|
|
} else {
|
|
i64 val = page(sym.get_tlsdesc_addr(ctx) + A) - page(P);
|
|
overflow_check(val, -((i64)1 << 32), (i64)1 << 32);
|
|
write_adr(loc, bits(val, 32, 12));
|
|
}
|
|
continue;
|
|
}
|
|
case R_AARCH64_TLSDESC_LD64_LO12:
|
|
if (ctx.relax_tlsdesc && !sym.is_imported) {
|
|
// ldr x2, [x0] -> movk x0, #tls_ofset_lo
|
|
u32 offset_lo = (S + A - ctx.tls_begin + 16) & 0xffff;
|
|
*(u32 *)loc = 0xf2800000 | (offset_lo << 5);
|
|
} else {
|
|
*(u32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 3) << 10;
|
|
}
|
|
continue;
|
|
case R_AARCH64_TLSDESC_ADD_LO12:
|
|
if (ctx.relax_tlsdesc && !sym.is_imported) {
|
|
// add x0, x0, #0 -> nop
|
|
*(u32 *)loc = 0xd503201f;
|
|
} else {
|
|
*(u32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 0) << 10;
|
|
}
|
|
continue;
|
|
case R_AARCH64_TLSDESC_CALL:
|
|
if (ctx.relax_tlsdesc && !sym.is_imported) {
|
|
// blr x2 -> nop
|
|
*(u32 *)loc = 0xd503201f;
|
|
}
|
|
continue;
|
|
default:
|
|
unreachable();
|
|
}
|
|
|
|
#undef S
|
|
#undef A
|
|
#undef P
|
|
#undef G
|
|
#undef GOT
|
|
}
|
|
}
|
|
|
|
template <>
|
|
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
std::span<ElfRel<E>> rels = get_rels(ctx);
|
|
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
const ElfRel<E> &rel = rels[i];
|
|
if (rel.r_type == R_AARCH64_NONE)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
u8 *loc = base + rel.r_offset;
|
|
|
|
if (!sym.file) {
|
|
report_undef(ctx, file, sym);
|
|
continue;
|
|
}
|
|
|
|
SectionFragment<E> *frag;
|
|
i64 addend;
|
|
std::tie(frag, addend) = get_fragment(ctx, rel);
|
|
|
|
#define S (frag ? frag->get_addr(ctx) : sym.get_addr(ctx))
|
|
#define A (frag ? addend : rel.r_addend)
|
|
|
|
switch (rel.r_type) {
|
|
case R_AARCH64_ABS64:
|
|
*(u64 *)loc = S + A;
|
|
continue;
|
|
case R_AARCH64_ABS32:
|
|
*(u32 *)loc = S + A;
|
|
continue;
|
|
default:
|
|
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
<< rel;
|
|
break;
|
|
}
|
|
|
|
#undef S
|
|
#undef A
|
|
}
|
|
}
|
|
|
|
template <>
|
|
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
assert(shdr().sh_flags & SHF_ALLOC);
|
|
|
|
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
std::span<ElfRel<E>> rels = get_rels(ctx);
|
|
|
|
// Scan relocations
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
const ElfRel<E> &rel = rels[i];
|
|
if (rel.r_type == R_AARCH64_NONE)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
|
|
if (!sym.file) {
|
|
report_undef(ctx, file, sym);
|
|
continue;
|
|
}
|
|
|
|
if (sym.get_type() == STT_GNU_IFUNC) {
|
|
sym.flags |= NEEDS_GOT;
|
|
sym.flags |= NEEDS_PLT;
|
|
}
|
|
|
|
switch (rel.r_type) {
|
|
case R_AARCH64_ABS64: {
|
|
Action table[][4] = {
|
|
// Absolute Local Imported data Imported code
|
|
{ NONE, BASEREL, DYNREL, DYNREL }, // DSO
|
|
{ NONE, BASEREL, DYNREL, DYNREL }, // PIE
|
|
{ NONE, NONE, COPYREL, PLT }, // PDE
|
|
};
|
|
dispatch(ctx, table, i, rel, sym);
|
|
break;
|
|
}
|
|
case R_AARCH64_ADR_GOT_PAGE:
|
|
case R_AARCH64_LD64_GOT_LO12_NC:
|
|
case R_AARCH64_LD64_GOTPAGE_LO15:
|
|
sym.flags |= NEEDS_GOT;
|
|
break;
|
|
case R_AARCH64_CALL26:
|
|
case R_AARCH64_JUMP26:
|
|
if (sym.is_imported)
|
|
sym.flags |= NEEDS_PLT;
|
|
break;
|
|
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
|
|
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
|
|
sym.flags |= NEEDS_GOTTP;
|
|
break;
|
|
case R_AARCH64_ADR_PREL_PG_HI21: {
|
|
Action table[][4] = {
|
|
// Absolute Local Imported data Imported code
|
|
{ ERROR, NONE, ERROR, ERROR }, // DSO
|
|
{ ERROR, NONE, COPYREL, PLT }, // PIE
|
|
{ NONE, NONE, COPYREL, PLT }, // PDE
|
|
};
|
|
dispatch(ctx, table, i, rel, sym);
|
|
break;
|
|
}
|
|
case R_AARCH64_TLSGD_ADR_PAGE21:
|
|
sym.flags |= NEEDS_TLSGD;
|
|
break;
|
|
case R_AARCH64_TLSDESC_ADR_PAGE21:
|
|
case R_AARCH64_TLSDESC_LD64_LO12:
|
|
case R_AARCH64_TLSDESC_ADD_LO12:
|
|
if (!ctx.relax_tlsdesc || sym.is_imported)
|
|
sym.flags |= NEEDS_TLSDESC;
|
|
break;
|
|
case R_AARCH64_ADD_ABS_LO12_NC:
|
|
case R_AARCH64_ADR_PREL_LO21:
|
|
case R_AARCH64_CONDBR19:
|
|
case R_AARCH64_LDST16_ABS_LO12_NC:
|
|
case R_AARCH64_LDST32_ABS_LO12_NC:
|
|
case R_AARCH64_LDST64_ABS_LO12_NC:
|
|
case R_AARCH64_LDST128_ABS_LO12_NC:
|
|
case R_AARCH64_LDST8_ABS_LO12_NC:
|
|
case R_AARCH64_MOVW_UABS_G0_NC:
|
|
case R_AARCH64_MOVW_UABS_G1_NC:
|
|
case R_AARCH64_MOVW_UABS_G2_NC:
|
|
case R_AARCH64_MOVW_UABS_G3:
|
|
case R_AARCH64_PREL16:
|
|
case R_AARCH64_PREL32:
|
|
case R_AARCH64_PREL64:
|
|
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
|
|
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
|
|
case R_AARCH64_TLSGD_ADD_LO12_NC:
|
|
case R_AARCH64_TLSDESC_CALL:
|
|
break;
|
|
default:
|
|
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void reset_thunk(RangeExtensionThunk<E> &thunk) {
|
|
for (Symbol<E> *sym : thunk.symbols) {
|
|
sym->extra.thunk_idx = -1;
|
|
sym->extra.thunk_sym_idx = -1;
|
|
sym->flags &= (u8)~NEEDS_THUNK;
|
|
}
|
|
}
|
|
|
|
static bool is_reachable(Context<E> &ctx, Symbol<E> &sym,
|
|
InputSection<E> &isec, const ElfRel<E> &rel) {
|
|
// We always create a thunk for an absolute symbol conservatively
|
|
// because `shrink_sections` may increase a distance between a
|
|
// branch instruction and an absolute symbol. Branching to an
|
|
// absolute location is extremely rare in real code, though.
|
|
if (sym.is_absolute())
|
|
return false;
|
|
|
|
// Compute a distance between the relocated place and the symbol
|
|
// and check if they are within reach.
|
|
i64 S = sym.get_addr(ctx);
|
|
i64 A = rel.r_addend;
|
|
i64 P = isec.get_addr() + rel.r_offset;
|
|
i64 val = S + A - P;
|
|
return -(1 << 27) <= val && val < (1 << 27);
|
|
}
|
|
|
|
// We create a thunk no further than 100 MiB from any section.
|
|
static constexpr i64 MAX_DISTANCE = 100 * 1024 * 1024;
|
|
|
|
// We create a thunk for each 10 MiB input sections.
|
|
static constexpr i64 GROUP_SIZE = 10 * 1024 * 1024;
|
|
|
|
static void create_thunks(Context<E> &ctx, OutputSection<E> &osec) {
|
|
std::span<InputSection<E> *> members = osec.members;
|
|
members[0]->offset = 0;
|
|
|
|
// Initialize input sections with very large dummy offsets so that
|
|
// sections that have got real offsets are separated from the ones
|
|
// without in the virtual address space.
|
|
tbb::parallel_for((i64)1, (i64)members.size(), [&](i64 i) {
|
|
members[i]->offset = 1 << 31;
|
|
});
|
|
|
|
// We create thunks from the beginning of the section to the end.
|
|
// We manage progress using four offsets which increase monotonically.
|
|
// The locations they point to are always A <= B <= C <= D.
|
|
i64 a = 0;
|
|
i64 b = 0;
|
|
i64 c = 0;
|
|
i64 d = 0;
|
|
i64 offset = 0;
|
|
|
|
while (b < members.size()) {
|
|
// Move D foward as far as we can jump from B to D.
|
|
while (d < members.size() && offset - members[b]->offset < MAX_DISTANCE) {
|
|
offset = align_to(offset, 1 << members[d]->p2align);
|
|
members[d]->offset = offset;
|
|
offset += members[d]->sh_size;
|
|
d++;
|
|
}
|
|
|
|
// Move C forward so that C is apart from B by GROUP_SIZE.
|
|
while (c < members.size() &&
|
|
members[c]->offset - members[b]->offset < GROUP_SIZE)
|
|
c++;
|
|
|
|
// Move A forward so that A is reachable from C.
|
|
if (c > 0) {
|
|
i64 c_end = members[c - 1]->offset + members[c - 1]->sh_size;
|
|
while (a < osec.thunks.size() &&
|
|
osec.thunks[a]->offset < c_end - MAX_DISTANCE)
|
|
reset_thunk(*osec.thunks[a++]);
|
|
}
|
|
|
|
// Create a thunk for input sections between B and C and place it at D.
|
|
osec.thunks.emplace_back(new RangeExtensionThunk<E>{osec});
|
|
|
|
RangeExtensionThunk<E> &thunk = *osec.thunks.back();
|
|
thunk.thunk_idx = osec.thunks.size() - 1;
|
|
thunk.offset = offset;
|
|
|
|
// Scan relocations between B and C to collect symbols that need thunks.
|
|
tbb::parallel_for_each(members.begin() + b, members.begin() + c,
|
|
[&](InputSection<E> *isec) {
|
|
std::span<ElfRel<E>> rels = isec->get_rels(ctx);
|
|
std::vector<RangeExtensionRef> &range_extn = isec->get_range_extn();
|
|
range_extn.resize(rels.size());
|
|
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
const ElfRel<E> &rel = rels[i];
|
|
if (rel.r_type != R_AARCH64_CALL26 && rel.r_type != R_AARCH64_JUMP26)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *isec->file.symbols[rel.r_sym];
|
|
|
|
// Skip if the destination is within reach.
|
|
if (is_reachable(ctx, sym, *isec, rel))
|
|
continue;
|
|
|
|
// If the symbol is already in another thunk, reuse it.
|
|
if (sym.extra.thunk_idx != -1) {
|
|
range_extn[i] = {sym.extra.thunk_idx, sym.extra.thunk_sym_idx};
|
|
continue;
|
|
}
|
|
|
|
// Otherwise, add the symbol to this thunk if it's not added already.
|
|
range_extn[i] = {thunk.thunk_idx, -1};
|
|
|
|
if (!(sym.flags.fetch_or(NEEDS_THUNK) & NEEDS_THUNK)) {
|
|
std::scoped_lock lock(thunk.mu);
|
|
thunk.symbols.push_back(&sym);
|
|
}
|
|
}
|
|
});
|
|
|
|
// Now that we know the number of symbols in the thunk, we can compute
|
|
// its size.
|
|
offset += thunk.size();
|
|
|
|
// Sort symbols added to the thunk to make the output deterministic.
|
|
sort(thunk.symbols, [](Symbol<E> *a, Symbol<E> *b) { return *a < *b; });
|
|
|
|
// Assign offsets within the thunk to the symbols.
|
|
for (i64 i = 0; Symbol<E> *sym : thunk.symbols) {
|
|
sym->extra.thunk_idx = thunk.thunk_idx;
|
|
sym->extra.thunk_sym_idx = i++;
|
|
}
|
|
|
|
// Scan relocations again to fix symbol offsets in the last thunk.
|
|
tbb::parallel_for_each(members.begin() + b, members.begin() + c,
|
|
[&](InputSection<E> *isec) {
|
|
std::span<ElfRel<E>> rels = isec->get_rels(ctx);
|
|
std::span<RangeExtensionRef> range_extn = isec->get_range_extn();
|
|
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
if (range_extn[i].thunk_idx == thunk.thunk_idx) {
|
|
Symbol<E> &sym = *isec->file.symbols[rels[i].r_sym];
|
|
range_extn[i].sym_idx = sym.extra.thunk_sym_idx;
|
|
}
|
|
}
|
|
});
|
|
|
|
// Move B forward to point to the begining of the next group.
|
|
b = c;
|
|
}
|
|
|
|
while (a < osec.thunks.size())
|
|
reset_thunk(*osec.thunks[a++]);
|
|
|
|
osec.shdr.sh_size = offset;
|
|
}
|
|
|
|
static void gc_thunk_symbols(Context<E> &ctx, OutputSection<E> &osec) {
|
|
for (std::unique_ptr<RangeExtensionThunk<E>> &thunk : osec.thunks) {
|
|
i64 sz = thunk->symbols.size();
|
|
thunk->symbol_map.resize(sz);
|
|
thunk->used.reset(new std::atomic_bool[sz]{});
|
|
}
|
|
|
|
// Mark referenced thunk symbols
|
|
tbb::parallel_for_each(osec.members, [&](InputSection<E> *isec) {
|
|
std::span<ElfRel<E>> rels = isec->get_rels(ctx);
|
|
std::span<RangeExtensionRef> range_extn = isec->get_range_extn();
|
|
|
|
for (i64 i = 0; i < rels.size(); i++) {
|
|
RangeExtensionRef &ref = range_extn[i];
|
|
if (ref.thunk_idx == -1)
|
|
continue;
|
|
|
|
Symbol<E> &sym = *isec->file.symbols[rels[i].r_sym];
|
|
if (!is_reachable(ctx, sym, *isec, rels[i]))
|
|
osec.thunks[ref.thunk_idx]->used[ref.sym_idx] = true;
|
|
}
|
|
});
|
|
|
|
// Remove unreferenced thunk symbols
|
|
tbb::parallel_for_each(osec.thunks,
|
|
[&](std::unique_ptr<RangeExtensionThunk<E>> &thunk) {
|
|
i64 i = 0;
|
|
for (i64 j = 0; j < thunk->symbols.size(); j++) {
|
|
if (thunk->used[j]) {
|
|
thunk->symbol_map[j] = i;
|
|
thunk->symbols[i] = thunk->symbols[j];
|
|
i++;
|
|
}
|
|
}
|
|
thunk->symbols.resize(i);
|
|
});
|
|
}
|
|
|
|
static void shrink_section(Context<E> &ctx, OutputSection<E> &osec) {
|
|
std::span<std::unique_ptr<RangeExtensionThunk<E>>> thunks = osec.thunks;
|
|
std::span<InputSection<E> *> members = osec.members;
|
|
|
|
i64 offset = 0;
|
|
|
|
auto add_thunk = [&] {
|
|
thunks[0]->offset = offset;
|
|
offset += thunks[0]->size();
|
|
thunks = thunks.subspan(1);
|
|
};
|
|
|
|
auto add_isec = [&] {
|
|
offset = align_to(offset, 1 << members[0]->p2align);
|
|
members[0]->offset = offset;
|
|
offset += members[0]->sh_size;
|
|
members = members.subspan(1);
|
|
};
|
|
|
|
while (!thunks.empty() && !members.empty()) {
|
|
if (thunks[0]->offset < members[0]->offset)
|
|
add_thunk();
|
|
else
|
|
add_isec();
|
|
}
|
|
|
|
while (!thunks.empty())
|
|
add_thunk();
|
|
while (!members.empty())
|
|
add_isec();
|
|
|
|
assert(offset <= osec.shdr.sh_size);
|
|
osec.shdr.sh_size = offset;
|
|
}
|
|
|
|
// ARM64's call/jump instructions take 27 bits displacement, so they
|
|
// can refer only up to ±128 MiB. If a branch target is further than
|
|
// that, we need to let it branch to a linker-synthesized code
|
|
// sequence that construct a full 32 bit address in a register and
|
|
// jump there. That linker-synthesized code is called "thunk".
|
|
i64 create_range_extension_thunks(Context<E> &ctx) {
|
|
Timer t(ctx, "create_range_extension_thunks");
|
|
|
|
for (ObjectFile<E> *file : ctx.objs)
|
|
file->range_extn.resize(file->sections.size());
|
|
|
|
// First, we create thunks with a pessimistic assumption that all
|
|
// out-of-section relocations would need thunks. To do so, we start
|
|
// with an initial layout in which output sections are separated far
|
|
// apart.
|
|
for (i64 i = 0; Chunk<E> *chunk : ctx.chunks)
|
|
if (chunk->shdr.sh_flags & SHF_ALLOC)
|
|
chunk->shdr.sh_addr = i++ << 31;
|
|
|
|
std::vector<OutputSection<E> *> sections;
|
|
for (std::unique_ptr<OutputSection<E>> &osec : ctx.output_sections)
|
|
if (!osec->members.empty() && (osec->shdr.sh_flags & SHF_EXECINSTR))
|
|
sections.push_back(osec.get());
|
|
|
|
for (OutputSection<E> *osec : sections)
|
|
create_thunks(ctx, *osec);
|
|
|
|
// Recompute file layout.
|
|
set_osec_offsets(ctx);
|
|
|
|
// Based on the current file layout, remove thunk symbols that turned
|
|
// out to be unnecessary.
|
|
tbb::parallel_for_each(sections, [&](OutputSection<E> *osec) {
|
|
gc_thunk_symbols(ctx, *osec);
|
|
});
|
|
|
|
// Recompute output section sizes that contain thunks. New section
|
|
// sizes must be equal to or smaller than previous values, so all
|
|
// relocations that were previously reachable will still be reachable
|
|
// after this step.
|
|
for (OutputSection<E> *osec : sections)
|
|
shrink_section(ctx, *osec);
|
|
|
|
// Compute the final layout.
|
|
return set_osec_offsets(ctx);
|
|
}
|
|
|
|
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
|
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
|
|
|
|
static const u32 data[] = {
|
|
0x90000010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21
|
|
0x91000210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC
|
|
0xd61f0200, // br x16
|
|
};
|
|
|
|
static_assert(ENTRY_SIZE == sizeof(data));
|
|
|
|
for (i64 i = 0; i < symbols.size(); i++) {
|
|
u64 S = symbols[i]->get_addr(ctx);
|
|
u64 P = output_section.shdr.sh_addr + offset + i * ENTRY_SIZE;
|
|
|
|
u8 *loc = buf + i * ENTRY_SIZE;
|
|
memcpy(loc , data, sizeof(data));
|
|
write_adr(loc, bits(page(S) - page(P), 32, 12));
|
|
*(u32 *)(loc + 4) |= bits(S, 11, 0) << 10;
|
|
}
|
|
}
|
|
|
|
void write_thunks(Context<E> &ctx) {
|
|
Timer t(ctx, "write_thunks");
|
|
|
|
tbb::parallel_for_each(ctx.output_sections,
|
|
[&](std::unique_ptr<OutputSection<E>> &osec) {
|
|
tbb::parallel_for_each(osec->thunks,
|
|
[&](std::unique_ptr<RangeExtensionThunk<E>> &thunk) {
|
|
thunk->copy_buf(ctx);
|
|
});
|
|
});
|
|
}
|
|
|
|
} // namespace mold::elf
|