#include "mold.h" namespace mold::elf { template <> void GotPltSection::copy_buf(Context &ctx) { u64 *buf = (u64 *)(ctx.buf + this->shdr.sh_offset); // The first slot of .got.plt points to _DYNAMIC, as requested by // the x86-64 psABI. The second and the third slots are reserved by // the psABI. buf[0] = ctx.dynamic ? ctx.dynamic->shdr.sh_addr : 0; buf[1] = 0; buf[2] = 0; for (Symbol *sym : ctx.plt->symbols) buf[sym->get_gotplt_idx(ctx)] = sym->get_plt_addr(ctx) + 6; } template <> void PltSection::copy_buf(Context &ctx) { u8 *buf = ctx.buf + this->shdr.sh_offset; // Write PLT header static const u8 plt0[] = { 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip) 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip) 0x0f, 0x1f, 0x40, 0x00, // nop }; memcpy(buf, plt0, sizeof(plt0)); *(u32 *)(buf + 2) = ctx.gotplt->shdr.sh_addr - this->shdr.sh_addr + 2; *(u32 *)(buf + 8) = ctx.gotplt->shdr.sh_addr - this->shdr.sh_addr + 4; // Write PLT entries i64 relplt_idx = 0; static const u8 data[] = { 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOTPLT 0x68, 0, 0, 0, 0, // push $index_in_relplt 0xe9, 0, 0, 0, 0, // jmp PLT[0] }; for (Symbol *sym : symbols) { u8 *ent = buf + sym->get_plt_idx(ctx) * X86_64::plt_size; memcpy(ent, data, sizeof(data)); *(u32 *)(ent + 2) = sym->get_gotplt_addr(ctx) - sym->get_plt_addr(ctx) - 6; *(u32 *)(ent + 7) = relplt_idx++; *(u32 *)(ent + 12) = this->shdr.sh_addr - sym->get_plt_addr(ctx) - 16; } } template <> void PltGotSection::copy_buf(Context &ctx) { u8 *buf = ctx.buf + this->shdr.sh_offset; static const u8 data[] = { 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 0x66, 0x90, // nop }; for (Symbol *sym : symbols) { u8 *ent = buf + sym->get_pltgot_idx(ctx) * X86_64::pltgot_size; memcpy(ent, data, sizeof(data)); *(u32 *)(ent + 2) = sym->get_got_addr(ctx) - sym->get_plt_addr(ctx) - 6; } } template <> void EhFrameSection::apply_reloc(Context &ctx, ElfRel &rel, u64 loc, u64 val) { u8 *base = ctx.buf + this->shdr.sh_offset; switch (rel.r_type) { case R_X86_64_NONE: return; case R_X86_64_32: *(u32 *)(base + loc) = val; return; case R_X86_64_64: *(u64 *)(base + loc) = val; return; case R_X86_64_PC32: *(u32 *)(base + loc) = val - this->shdr.sh_addr - loc; return; case R_X86_64_PC64: *(u64 *)(base + loc) = val - this->shdr.sh_addr - loc; return; } unreachable(); } static u32 relax_gotpcrelx(u8 *loc) { switch ((loc[0] << 8) | loc[1]) { case 0xff15: return 0x90e8; // call *0(%rip) -> call 0 case 0xff25: return 0x90e9; // jmp *0(%rip) -> jmp 0 } return 0; } static u32 relax_rex_gotpcrelx(u8 *loc) { switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) { case 0x488b05: return 0x488d05; // mov 0(%rip), %rax -> lea 0(%rip), %rax case 0x488b0d: return 0x488d0d; // mov 0(%rip), %rcx -> lea 0(%rip), %rcx case 0x488b15: return 0x488d15; // mov 0(%rip), %rdx -> lea 0(%rip), %rdx case 0x488b1d: return 0x488d1d; // mov 0(%rip), %rbx -> lea 0(%rip), %rbx case 0x488b25: return 0x488d25; // mov 0(%rip), %rsp -> lea 0(%rip), %rsp case 0x488b2d: return 0x488d2d; // mov 0(%rip), %rbp -> lea 0(%rip), %rbp case 0x488b35: return 0x488d35; // mov 0(%rip), %rsi -> lea 0(%rip), %rsi case 0x488b3d: return 0x488d3d; // mov 0(%rip), %rdi -> lea 0(%rip), %rdi case 0x4c8b05: return 0x4c8d05; // mov 0(%rip), %r8 -> lea 0(%rip), %r8 case 0x4c8b0d: return 0x4c8d0d; // mov 0(%rip), %r9 -> lea 0(%rip), %r9 case 0x4c8b15: return 0x4c8d15; // mov 0(%rip), %r10 -> lea 0(%rip), %r10 case 0x4c8b1d: return 0x4c8d1d; // mov 0(%rip), %r11 -> lea 0(%rip), %r11 case 0x4c8b25: return 0x4c8d25; // mov 0(%rip), %r12 -> lea 0(%rip), %r12 case 0x4c8b2d: return 0x4c8d2d; // mov 0(%rip), %r13 -> lea 0(%rip), %r13 case 0x4c8b35: return 0x4c8d35; // mov 0(%rip), %r14 -> lea 0(%rip), %r14 case 0x4c8b3d: return 0x4c8d3d; // mov 0(%rip), %r15 -> lea 0(%rip), %r15 } return 0; } static u32 relax_gottpoff(u8 *loc) { switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) { case 0x488b05: return 0x48c7c0; // mov 0(%rip), %rax -> mov $0, %rax case 0x488b0d: return 0x48c7c1; // mov 0(%rip), %rcx -> mov $0, %rcx case 0x488b15: return 0x48c7c2; // mov 0(%rip), %rdx -> mov $0, %rdx case 0x488b1d: return 0x48c7c3; // mov 0(%rip), %rbx -> mov $0, %rbx case 0x488b25: return 0x48c7c4; // mov 0(%rip), %rsp -> mov $0, %rsp case 0x488b2d: return 0x48c7c5; // mov 0(%rip), %rbp -> mov $0, %rbp case 0x488b35: return 0x48c7c6; // mov 0(%rip), %rsi -> mov $0, %rsi case 0x488b3d: return 0x48c7c7; // mov 0(%rip), %rdi -> mov $0, %rdi case 0x4c8b05: return 0x49c7c0; // mov 0(%rip), %r8 -> mov $0, %r8 case 0x4c8b0d: return 0x49c7c1; // mov 0(%rip), %r9 -> mov $0, %r9 case 0x4c8b15: return 0x49c7c2; // mov 0(%rip), %r10 -> mov $0, %r10 case 0x4c8b1d: return 0x49c7c3; // mov 0(%rip), %r11 -> mov $0, %r11 case 0x4c8b25: return 0x49c7c4; // mov 0(%rip), %r12 -> mov $0, %r12 case 0x4c8b2d: return 0x49c7c5; // mov 0(%rip), %r13 -> mov $0, %r13 case 0x4c8b35: return 0x49c7c6; // mov 0(%rip), %r14 -> mov $0, %r14 case 0x4c8b3d: return 0x49c7c7; // mov 0(%rip), %r15 -> mov $0, %r15 } return 0; } // Apply relocations to SHF_ALLOC sections (i.e. sections that are // mapped to memory at runtime) based on the result of // scan_relocations(). template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { ElfRel *dynrel = nullptr; std::span> rels = get_rels(ctx); i64 frag_idx = 0; if (ctx.reldyn) dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + file.reldyn_offset + this->reldyn_offset); for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_X86_64_NONE) continue; Symbol &sym = *file.symbols[rel.r_sym]; u8 *loc = base + rel.r_offset; const SectionFragmentRef *ref = nullptr; if (rel_fragments && rel_fragments[frag_idx].idx == i) ref = &rel_fragments[frag_idx++]; auto overflow_check = [&](i64 val, i64 lo, i64 hi) { if (val < lo || hi <= val) Error(ctx) << *this << ": relocation " << rel << " against " << sym << " out of range: " << val << " is not in [" << lo << ", " << hi << ")"; }; auto write8 = [&](u64 val) { overflow_check(val, 0, 1 << 8); *loc = val; }; auto write8s = [&](u64 val) { overflow_check(val, -(1 << 7), 1 << 7); *loc = val; }; auto write16 = [&](u64 val) { overflow_check(val, 0, 1 << 16); *(u16 *)loc = val; }; auto write16s = [&](u64 val) { overflow_check(val, -(1 << 15), 1 << 15); *(u16 *)loc = val; }; auto write32 = [&](u64 val) { overflow_check(val, 0, (i64)1 << 32); *(u32 *)loc = val; }; auto write32s = [&](u64 val) { overflow_check(val, -((i64)1 << 31), (i64)1 << 31); *(u32 *)loc = val; }; #define S (ref ? ref->frag->get_addr(ctx) : sym.get_addr(ctx)) #define A (ref ? ref->addend : rel.r_addend) #define P (output_section->shdr.sh_addr + offset + rel.r_offset) #define G (sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr) #define GOT ctx.got->shdr.sh_addr if (needs_dynrel[i]) { *dynrel++ = {P, R_X86_64_64, (u32)sym.get_dynsym_idx(ctx), A}; *(u64 *)loc = A; continue; } if (needs_baserel[i]) { *dynrel++ = {P, R_X86_64_RELATIVE, 0, (i64)(S + A)}; *(u64 *)loc = S + A; continue; } switch (rel.r_type) { case R_X86_64_8: write8(S + A); continue; case R_X86_64_16: write16(S + A); continue; case R_X86_64_32: write32(S + A); continue; case R_X86_64_32S: write32s(S + A); continue; case R_X86_64_64: *(u64 *)loc = S + A; continue; case R_X86_64_PC8: write8s(S + A - P); continue; case R_X86_64_PC16: write16s(S + A - P); continue; case R_X86_64_PC32: write32s(S + A - P); continue; case R_X86_64_PC64: *(u64 *)loc = S + A - P; continue; case R_X86_64_PLT32: write32s(S + A - P); continue; case R_X86_64_GOT32: write32s(G + A); continue; case R_X86_64_GOT64: *(u64 *)loc = G + A; continue; case R_X86_64_GOTPC32: write32s(GOT + A - P); continue; case R_X86_64_GOTPC64: *(u64 *)loc = GOT + A - P; continue; case R_X86_64_GOTPCREL: write32s(G + GOT + A - P); continue; case R_X86_64_GOTPCREL64: *(u64 *)loc = G + GOT + A - P; continue; case R_X86_64_GOTPCRELX: if (sym.get_got_idx(ctx) == -1) { u32 insn = relax_gotpcrelx(loc - 2); loc[-2] = insn >> 8; loc[-1] = insn; write32s(S + A - P); } else { write32s(G + GOT + A - P); } continue; case R_X86_64_REX_GOTPCRELX: if (sym.get_got_idx(ctx) == -1) { u32 insn = relax_rex_gotpcrelx(loc - 3); loc[-3] = insn >> 16; loc[-2] = insn >> 8; loc[-1] = insn; write32s(S + A - P); } else { write32s(G + GOT + A - P); } continue; case R_X86_64_TLSGD: if (sym.get_tlsgd_idx(ctx) == -1) { // Relax GD to LE static const u8 insn[] = { 0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax 0x48, 0x8d, 0x80, 0, 0, 0, 0, // lea 0(%rax), %rax }; memcpy(loc - 4, insn, sizeof(insn)); i64 val = S - ctx.tls_end + A + 4; overflow_check(val, -((i64)1 << 31), (i64)1 << 31); *(u32 *)(loc + 8) = val; i++; } else { write32s(sym.get_tlsgd_addr(ctx) + A - P); } continue; case R_X86_64_TLSLD: if (ctx.got->tlsld_idx == -1) { // Relax LD to LE static const u8 insn[] = { 0x66, 0x66, 0x66, // (padding) 0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax }; memcpy(loc - 3, insn, sizeof(insn)); i++; } else { write32s(ctx.got->get_tlsld_addr(ctx) + A - P); } continue; case R_X86_64_DTPOFF32: if (ctx.arg.relax && !ctx.arg.shared) write32s(S + A - ctx.tls_end); else write32s(S + A - ctx.tls_begin); continue; case R_X86_64_DTPOFF64: if (ctx.arg.relax && !ctx.arg.shared) *(u64 *)loc = S + A - ctx.tls_end; else *(u64 *)loc = S + A - ctx.tls_begin; continue; case R_X86_64_TPOFF32: write32s(S + A - ctx.tls_end); continue; case R_X86_64_TPOFF64: *(u64 *)loc = S + A - ctx.tls_end; continue; case R_X86_64_GOTTPOFF: if (sym.get_gottp_idx(ctx) == -1) { u32 insn = relax_gottpoff(loc - 3); loc[-3] = insn >> 16; loc[-2] = insn >> 8; loc[-1] = insn; write32s(S + A - ctx.tls_end + 4); } else { write32s(sym.get_gottp_addr(ctx) + A - P); } continue; case R_X86_64_GOTPC32_TLSDESC: if (sym.get_tlsdesc_idx(ctx) == -1) { static const u8 insn[] = { 0x48, 0xc7, 0xc0, 0, 0, 0, 0, // mov $0, %rax }; memcpy(loc - 3, insn, sizeof(insn)); write32s(S + A - ctx.tls_end + 4); } else { write32s(sym.get_tlsdesc_addr(ctx) + A - P); } continue; case R_X86_64_SIZE32: write32(sym.esym().st_size + A); continue; case R_X86_64_SIZE64: *(u64 *)loc = sym.esym().st_size + A; continue; case R_X86_64_TLSDESC_CALL: if (sym.get_tlsdesc_idx(ctx) == -1) { // call *(%rax) -> nop loc[0] = 0x66; loc[1] = 0x90; } continue; default: unreachable(); } #undef S #undef A #undef P #undef G #undef GOT } } // This function is responsible for applying relocations against // non-SHF_ALLOC sections (i.e. sections that are not mapped to memory // at runtime). // // Relocations against non-SHF_ALLOC sections are much easier to // handle than that against SHF_ALLOC sections. It is because, since // they are not mapped to memory, they don't contain any variable or // function and never need PLT or GOT. Non-SHF_ALLOC sections are // mostly debug info sections. // // Relocations against non-SHF_ALLOC sections are not scanned by // scan_relocations. template <> void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); i64 frag_idx = 0; for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_X86_64_NONE) continue; Symbol &sym = *file.symbols[rel.r_sym]; u8 *loc = base + rel.r_offset; if (!sym.file) { report_undef(ctx, sym); continue; } const SectionFragmentRef *ref = nullptr; if (rel_fragments && rel_fragments[frag_idx].idx == i) ref = &rel_fragments[frag_idx++]; auto overflow_check = [&](i64 val, i64 lo, i64 hi) { if (val < lo || hi <= val) Error(ctx) << *this << ": relocation " << rel << " against " << sym << " out of range: " << val << " is not in [" << lo << ", " << hi << ")"; }; auto write8 = [&](u64 val) { overflow_check(val, 0, 1 << 8); *loc = val; }; auto write8s = [&](u64 val) { overflow_check(val, -(1 << 7), 1 << 7); *loc = val; }; auto write16 = [&](u64 val) { overflow_check(val, 0, 1 << 16); *(u16 *)loc = val; }; auto write16s = [&](u64 val) { overflow_check(val, -(1 << 15), 1 << 15); *(u16 *)loc = val; }; auto write32 = [&](u64 val) { overflow_check(val, 0, (i64)1 << 32); *(u32 *)loc = val; }; auto write32s = [&](u64 val) { overflow_check(val, -((i64)1 << 31), (i64)1 << 31); *(u32 *)loc = val; }; #define S (ref ? ref->frag->get_addr(ctx) : sym.get_addr(ctx)) #define A (ref ? ref->addend : rel.r_addend) switch (rel.r_type) { case R_X86_64_8: write8(S + A); break; case R_X86_64_16: write16(S + A); break; case R_X86_64_32: write32(S + A); break; case R_X86_64_32S: write32s(S + A); break; case R_X86_64_64: *(u64 *)loc = S + A; break; case R_X86_64_DTPOFF32: write32s(S + A - ctx.tls_begin); break; case R_X86_64_DTPOFF64: *(u64 *)loc = S + A - ctx.tls_begin; break; case R_X86_64_SIZE32: write32(sym.esym().st_size + A); break; case R_X86_64_SIZE64: *(u64 *)loc = sym.esym().st_size + A; break; default: Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " << rel; break; } #undef S #undef A } } // Linker has to create data structures in an output file to apply // some type of relocations. For example, if a relocation refers a GOT // or a PLT entry of a symbol, linker has to create an entry in .got // or in .plt for that symbol. In order to fix the file layout, we // need to scan relocations. template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr.sh_flags & SHF_ALLOC); this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); bool is_writable = (shdr.sh_flags & SHF_WRITE); // Scan relocations for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_X86_64_NONE) continue; Symbol &sym = *file.symbols[rel.r_sym]; u8 *loc = (u8 *)(contents.data() + rel.r_offset); if (!sym.file) { report_undef(ctx, sym); continue; } if (sym.get_type() == STT_GNU_IFUNC) { sym.flags |= NEEDS_GOT; sym.flags |= NEEDS_PLT; } switch (rel.r_type) { case R_X86_64_8: case R_X86_64_16: case R_X86_64_32: case R_X86_64_32S: { // Dynamic linker does not support 8, 16 or 32-bit dynamic // relocations for these types of relocations. We report an // error if we cannot relocate them even at load-time. Action table[][4] = { // Absolute Local Imported data Imported code { NONE, ERROR, ERROR, ERROR }, // DSO { NONE, ERROR, ERROR, ERROR }, // PIE { NONE, NONE, COPYREL, PLT }, // PDE }; dispatch(ctx, table, i, rel, sym); break; } case R_X86_64_64: { // Unlike the above, we can use R_X86_64_RELATIVE and R_86_64_64 // relocations. Action table[][4] = { // Absolute Local Imported data Imported code { NONE, BASEREL, DYNREL, DYNREL }, // DSO { NONE, BASEREL, DYNREL, DYNREL }, // PIE { NONE, NONE, COPYREL, PLT }, // PDE }; dispatch(ctx, table, i, rel, sym); break; } case R_X86_64_PC8: case R_X86_64_PC16: case R_X86_64_PC32: { Action table[][4] = { // Absolute Local Imported data Imported code { ERROR, NONE, ERROR, ERROR }, // DSO { ERROR, NONE, COPYREL, PLT }, // PIE { NONE, NONE, COPYREL, PLT }, // PDE }; dispatch(ctx, table, i, rel, sym); break; } case R_X86_64_PC64: { Action table[][4] = { // Absolute Local Imported data Imported code { BASEREL, NONE, ERROR, ERROR }, // DSO { BASEREL, NONE, COPYREL, PLT }, // PIE { NONE, NONE, COPYREL, PLT }, // PDE }; dispatch(ctx, table, i, rel, sym); break; } case R_X86_64_GOT32: case R_X86_64_GOT64: case R_X86_64_GOTPC32: case R_X86_64_GOTPC64: case R_X86_64_GOTPCREL: case R_X86_64_GOTPCREL64: sym.flags |= NEEDS_GOT; break; case R_X86_64_GOTPCRELX: { if (rel.r_addend != -4) Fatal(ctx) << *this << ": bad r_addend for R_X86_64_GOTPCRELX"; bool do_relax = ctx.arg.relax && !sym.is_imported && sym.is_relative(ctx) && relax_gotpcrelx(loc - 2); if (!do_relax) sym.flags |= NEEDS_GOT; break; } case R_X86_64_REX_GOTPCRELX: { if (rel.r_addend != -4) Fatal(ctx) << *this << ": bad r_addend for R_X86_64_REX_GOTPCRELX"; bool do_relax = ctx.arg.relax && !sym.is_imported && sym.is_relative(ctx) && relax_rex_gotpcrelx(loc - 3); if (!do_relax) sym.flags |= NEEDS_GOT; break; } case R_X86_64_PLT32: if (sym.is_imported) sym.flags |= NEEDS_PLT; break; case R_X86_64_TLSGD: if (i + 1 == rels.size()) Fatal(ctx) << *this << ": TLSGD reloc must be followed by PLT32 or GOTPCREL"; if (ctx.arg.relax && !ctx.arg.shared && !sym.is_imported) i++; else sym.flags |= NEEDS_TLSGD; break; case R_X86_64_TLSLD: if (i + 1 == rels.size()) Fatal(ctx) << *this << ": TLSLD reloc must be followed by PLT32 or GOTPCREL"; if (sym.is_imported) { // This should be a fatal error as well, but it looks like GCC 7 // wrongly creates such relocations against STB_GNU_UNIQUE symbols. // Other linkers don't have this error check. So, for bug-compatibility, // we only warn about it and create a (possibly corrupted) output file. Warn(ctx) << *this << ": TLSLD reloc refers external symbol: " << sym; } if (ctx.arg.relax && !ctx.arg.shared) i++; else sym.flags |= NEEDS_TLSLD; break; case R_X86_64_DTPOFF32: case R_X86_64_DTPOFF64: if (sym.is_imported) Fatal(ctx) << *this << ": DTPOFF reloc refers external symbol: " << sym; break; case R_X86_64_GOTTPOFF: { ctx.has_gottp_rel = true; bool do_relax = ctx.arg.relax && !ctx.arg.shared && !sym.is_imported && relax_gottpoff(loc - 3); if (!do_relax) sym.flags |= NEEDS_GOTTP; break; } case R_X86_64_GOTPC32_TLSDESC: { if (memcmp(loc - 3, "\x48\x8d\x05", 3)) Fatal(ctx) << *this << ": GOTPC32_TLSDESC relocation is used" << " against an invalid code sequence"; bool do_relax = ctx.relax_tlsdesc && !sym.is_imported; if (!do_relax) sym.flags |= NEEDS_TLSDESC; break; } case R_X86_64_TPOFF32: case R_X86_64_TPOFF64: case R_X86_64_SIZE32: case R_X86_64_SIZE64: case R_X86_64_TLSDESC_CALL: break; default: Error(ctx) << *this << ": unknown relocation: " << rel; } } } } // namespace mold::elf