1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-04 08:37:28 +03:00

[Mach-O] Support LOH_ARM64_ADRP_LDR_GOT_LDR

This commit is contained in:
Rui Ueyama 2022-07-26 16:14:49 +08:00
parent d3d0c81bc6
commit 54399fe567
7 changed files with 171 additions and 1 deletions

View File

@ -467,4 +467,103 @@ void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
}
}
#define ASSERT_RANGE(val, start, size) \
assert((start) <= (val) && (val) < ((start) + (size)))
// On ARM, we need two or more instructions to materialize an address
// of an object in a register or jump to a function within PC ± 2GiB.
// However, if an object or a function is close enough to PC, a single
// instruction is sufficient to materialize its address.
//
// This function replaces such redundant two or more instruction
// sequence with a single instruction. We don't shrink a section, so
// the total number of instructions won't change by this relaxation,
// but replacing an instruction with a NOP generally increases
// performance since CPU has a special logic to skip a NOP instead of
// executing it.
//
// Locations of relaxable instructions are in the
// LC_LINKER_OPTIMIZATION_HINT segment. That segment contains a
// sequence of ULEB-encoded integers.
void apply_linker_optimization_hints(Context<E> &ctx) {
Timer t(ctx, "apply_linker_optimization_hints");
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
u8 *hints = file->get_linker_optimization_hints(ctx);
if (!hints)
return;
for (;;) {
i64 type = read_uleb(hints);
if (type == 0)
return;
i64 nargs = read_uleb(hints);
switch (type) {
case LOH_ARM64_ADRP_LDR_GOT_LDR: {
i64 addr1 = read_uleb(hints);
i64 addr2 = read_uleb(hints);
i64 addr3 = read_uleb(hints);
Subsection<E> *subsec = file->find_subsection(ctx, addr1);
if (!subsec || !subsec->is_alive)
break;
ASSERT_RANGE(addr2, subsec->input_addr, subsec->input_size);
ASSERT_RANGE(addr3, subsec->input_addr, subsec->input_size);
u8 *loc = ctx.buf + subsec->isec.osec.hdr.offset + subsec->output_offset;
i64 offset1 = addr1 - subsec->input_addr;
i64 offset2 = addr2 - subsec->input_addr;
i64 offset3 = addr3 - subsec->input_addr;
ul32 *loc1 = (ul32 *)(loc + offset1);
ul32 *loc2 = (ul32 *)(loc + offset2);
ul32 *loc3 = (ul32 *)(loc + offset3);
// We expect the following instructions:
//
// adrp reg1, _foo@GOTPAGE
// ldr reg2, [reg1, _foo@GOTPAGEOFF]
// ldr reg3, [reg2]
assert((*loc1 & 0x9f00'0000) == 0x9000'0000);
assert((*loc2 & 0xbfc0'0000) == 0xb940'0000);
assert((*loc3 & 0xbfc0'0000) == 0xb940'0000);
u64 got_addr = page(subsec->get_addr(ctx) + offset1) +
(bits(*loc1, 23, 5) << 14) + (bits(*loc1, 30, 29) << 12) +
(bits(*loc2, 21, 10) << 3);
ASSERT_RANGE(got_addr, ctx.got.hdr.addr, ctx.got.hdr.size);
u64 got_value = *(ul64 *)(ctx.buf + ctx.got.hdr.offset + got_addr -
ctx.got.hdr.addr);
if (got_value) {
i64 disp = got_value - subsec->get_addr(ctx) - offset2;
if (disp == sign_extend(disp, 20)) {
// If the GOT entry has already been filled, and its value is
// within the range of LDR, we can convert to
//
// nop
// nop
// ldr reg3, _foo
*loc1 = 0xd503'201f;
*loc2 = 0xd503'201f;
*loc3 = 0x1800'0000 | (bits(disp, 20, 2) << 5) | (*loc2 & 0x0000'001f);
break;
}
}
break;
}
default:
for (i64 i = 0; i < nargs; i++)
read_uleb(hints);
}
}
});
}
} // namespace mold::macho

View File

@ -63,6 +63,7 @@ Options:
Allocate MAXPATHLEN byte padding after load commands
-help Report usage information
-hidden-l<LIB>
-ignore_optimization_hints Do not rewrite instructions as optimization
-install_name <NAME>
-l<LIB> Search for a given library
-lto_library <FILE> Ignored
@ -387,6 +388,8 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
} else if (read_joined("-hidden-l")) {
remaining.push_back("-hidden-l");
remaining.push_back(std::string(arg));
} else if (read_flag("-ignore_optimization_hints")) {
ctx.arg.ignore_optimization_hints = true;
} else if (read_arg("-install_name") || read_arg("-dylib_install_name")) {
ctx.arg.install_name = arg;
} else if (read_joined("-l")) {

View File

@ -398,6 +398,9 @@ std::vector<std::string> ObjectFile<E>::get_linker_options(Context<E> &ctx) {
template <typename E>
LoadCommand *ObjectFile<E>::find_load_command(Context<E> &ctx, u32 type) {
if (!this->mf)
return nullptr;
MachHeader &hdr = *(MachHeader *)this->mf->data;
u8 *p = this->mf->data + sizeof(hdr);
@ -778,6 +781,16 @@ void ObjectFile<E>::parse_lto_symbols(Context<E> &ctx) {
mach_syms = mach_syms2;
}
template <typename E>
u8 *ObjectFile<E>::get_linker_optimization_hints(Context<E> &ctx) {
LinkEditDataCommand *cmd =
(LinkEditDataCommand *)find_load_command(ctx, LC_LINKER_OPTIMIZATION_HINT);
if (cmd)
return this->mf->data + cmd->dataoff;
return nullptr;
}
template <typename E>
DylibFile<E>::DylibFile(Context<E> &ctx, MappedFile<Context<E>> *mf)
: InputFile<E>(mf) {

View File

@ -326,6 +326,15 @@ static constexpr u32 OBJC_IMAGE_SUPPORTS_COMPACTION = 1 << 4;
static constexpr u32 OBJC_IMAGE_IS_SIMULATED = 1 << 5;
static constexpr u32 OBJC_IMAGE_HAS_CATEGORY_CLASS_PROPERTIES = 1 << 6;
static constexpr u32 LOH_ARM64_ADRP_ADRP = 1;
static constexpr u32 LOH_ARM64_ADRP_LDR = 2;
static constexpr u32 LOH_ARM64_ADRP_ADD_LDR = 3;
static constexpr u32 LOH_ARM64_ADRP_LDR_GOT_LDR = 4;
static constexpr u32 LOH_ARM64_ADRP_ADD_STR = 5;
static constexpr u32 LOH_ARM64_ADRP_LDR_GOT_STR = 6;
static constexpr u32 LOH_ARM64_ADRP_ADD = 7;
static constexpr u32 LOH_ARM64_ADRP_LDR_GOT = 8;
static constexpr u32 ARM64_RELOC_UNSIGNED = 0;
static constexpr u32 ARM64_RELOC_SUBTRACTOR = 1;
static constexpr u32 ARM64_RELOC_BRANCH26 = 2;

View File

@ -1047,6 +1047,10 @@ static int do_main(int argc, char **argv) {
copy_sections_to_output_file(ctx);
if constexpr (std::is_same_v<E, ARM64>)
if (!ctx.arg.ignore_optimization_hints)
apply_linker_optimization_hints(ctx);
if (ctx.code_sig)
ctx.code_sig->write_signature(ctx);
else if (ctx.arg.uuid == UUID_HASH)

View File

@ -124,6 +124,7 @@ public:
std::function<void(ObjectFile<E> *)> feeder);
void convert_common_symbols(Context<E> &ctx);
void check_duplicate_symbols(Context<E> &ctx);
u8 *get_linker_optimization_hints(Context<E> &ctx);
Relocation<E> read_reloc(Context<E> &ctx, const MachSection &hdr, MachRel r);
@ -807,6 +808,7 @@ void do_lto(Context<E> &ctx);
//
void create_range_extension_thunks(Context<ARM64> &ctx, OutputSection<ARM64> &osec);
void apply_linker_optimization_hints(Context<ARM64> &ctx);
//
// main.cc
@ -874,13 +876,14 @@ struct Context {
bool dynamic = true;
bool export_dynamic = false;
bool fatal_warnings = false;
bool ignore_optimization_hints = false;
bool mark_dead_strippable_dylib = false;
bool noinhibit_exec = false;
bool perf = false;
bool quick_exit = true;
bool search_paths_first = true;
bool stats = false;
bool trace = false;
bool mark_dead_strippable_dylib = false;
i64 arch = CPU_TYPE_ARM64;
i64 compatibility_version = 0;
i64 current_version = 0;

View File

@ -0,0 +1,39 @@
#!/bin/bash
export LC_ALL=C
set -e
CC="${TEST_CC:-cc}"
CXX="${TEST_CXX:-c++}"
GCC="${TEST_GCC:-gcc}"
GXX="${TEST_GXX:-g++}"
OBJDUMP="${OBJDUMP:-objdump}"
MACHINE="${MACHINE:-$(uname -m)}"
testname=$(basename "$0" .sh)
echo -n "Testing $testname ... "
t=out/test/macho/$MACHINE/$testname
mkdir -p $t
cat <<EOF | $CC -o $t/a.o -c -xc - -O2
#include <stdio.h>
int foo = 0;
void hello() {
printf("Hello world\n");
}
EOF
cat <<EOF | $CC -o $t/b.o -c -xc - -O2
void hello();
extern int foo;
int main() {
hello();
return foo;
}
EOF
clang --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o
$t/exe | grep -q 'Hello world'
echo OK