1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-26 13:10:46 +03:00

Compare commits

...

7 Commits

Author SHA1 Message Date
Christian Sattler
e60398ce18
Merge 24f818820b into 742ea87d41 2024-08-15 22:13:32 +08:00
Rui Ueyama
742ea87d41 Merge .ARM.exidx entries
This change should make our output for ARM32 a little bit smaller.
2024-08-15 17:15:45 +09:00
Rui Ueyama
fa546965ec Refactor 2024-08-15 14:11:55 +09:00
Rui Ueyama
f68657d201 Refactor 2024-08-15 13:34:41 +09:00
Rui Ueyama
23bc92c66f Emit PT_GNU_PROPERTY program header if .note.gnu.property is present
It looks like Linux kernel reads this program header to enable BTI on ARM64.
2024-08-15 12:57:45 +09:00
Rui Ueyama
c2956484ca Refactor 2024-08-15 12:26:29 +09:00
Christian Sattler
24f818820b
Fix typos in design.md 2021-12-26 14:37:35 +01:00
15 changed files with 173 additions and 97 deletions

View File

@ -362,7 +362,6 @@ target_sources(mold PRIVATE
lib/compress.cc
lib/crc32.cc
lib/demangle.cc
lib/elf.cc
lib/filepath.cc
lib/glob.cc
lib/hyperloglog.cc
@ -384,6 +383,7 @@ target_sources(mold PRIVATE
src/arch-sparc64.cc
src/arch-x86-64.cc
src/config.cc
src/elf.cc
third-party/rust-demangle/rust-demangle.c
)

View File

@ -163,7 +163,7 @@ tool.
(i.e. `_start`) or a few other root sections. In mold, we are using
multiple threads to mark sections concurrently.
- Similarly, BFD, gold an lld support Identical Comdat Folding (ICF)
- Similarly, BFD, gold and lld support Identical Comdat Folding (ICF)
as yet another size optimization. ICF merges two or more read-only
sections that happen to have the same contents and relocations.
To do that, we have to find isomorphic subgraphs from larger graphs.
@ -381,7 +381,7 @@ not plan to implement and why I turned them down.
fixing the final file layout.
The other reason to reject this idea is because there's good a
chance for this idea to have a negative impact on linker's overall
chance for this idea to have a negative impact on the linker's overall
performance. If we copy file contents before fixing the layout, we
can't apply relocations to them while copying because symbol
addresses are not available yet. If we fix the file layout first, we

View File

@ -26,7 +26,6 @@
#pragma once
#include "common.h"
#include "filetype.h"
namespace mold {
@ -76,7 +75,7 @@ struct ArHdr {
}
};
template <typename Context, typename MappedFile>
template <typename Context>
std::vector<MappedFile *>
read_thin_archive_members(Context &ctx, MappedFile *mf) {
u8 *begin = mf->data;
@ -124,7 +123,7 @@ read_thin_archive_members(Context &ctx, MappedFile *mf) {
return vec;
}
template <typename Context, typename MappedFile>
template <typename Context>
std::vector<MappedFile *> read_fat_archive_members(Context &ctx, MappedFile *mf) {
u8 *begin = mf->data;
u8 *data = begin + 8;
@ -162,16 +161,13 @@ std::vector<MappedFile *> read_fat_archive_members(Context &ctx, MappedFile *mf)
return vec;
}
template <typename Context, typename MappedFile>
template <typename Context>
std::vector<MappedFile *> read_archive_members(Context &ctx, MappedFile *mf) {
switch (get_file_type(ctx, mf)) {
case FileType::AR:
std::string_view str = mf->get_contents();
if (str.starts_with("!<arch>\n"))
return read_fat_archive_members(ctx, mf);
case FileType::THIN_AR:
return read_thin_archive_members(ctx, mf);
default:
unreachable();
}
assert(str.starts_with("!<thin>\n"));
return read_thin_archive_members(ctx, mf);
}
} // namespace mold

View File

@ -8,12 +8,12 @@
// big-endian SPARC machine to create a little-endian RV64 binary.
//
// 2. Even though data members in all ELF data strucutres are naturally
// aligned, they are not guaranteed to be aligned on memory. Because
// archive file (.a file) aligns each member only to a 2 byte boundary,
// anything larger than 2 bytes may be unaligned in an mmap'ed memory.
// Unaligned access is an undefined behavior in C/C++, so we shouldn't
// cast an arbitrary pointer to a uint32_t, for example, to read a
// 32 bit value.
// aligned, they are not guaranteed to be aligned on memory because of
// archive files. Archive files (.a files) align each member only to a
// 2 byte boundary, so anything larger than 2 bytes may be misaligned
// in an mmap'ed memory. Misaligned access is an undefined behavior in
// C/C++, so we shouldn't cast an arbitrary pointer to a uint32_t, for
// example, to read a 32 bit value.
//
// The data types defined in this file don't depend on host byte order and
// don't do unaligned access.

View File

@ -1,3 +1,5 @@
// This file contains functions to create a tar file.
#include "common.h"
#ifdef _WIN32

View File

@ -710,6 +710,45 @@ u64 get_eflags(Context<E> &ctx) {
return EF_ARM_EABI_VER5;
}
void create_arm_exidx_section(Context<E> &ctx) {
for (i64 i = 0; i < ctx.chunks.size(); i++) {
if (ctx.chunks[i]->shdr.sh_type == SHT_ARM_EXIDX) {
auto *sec = new Arm32ExidxSection(ctx, *ctx.chunks[i]->to_osec());
ctx.extra.exidx = sec;
ctx.chunks[i] = sec;
ctx.chunk_pool.emplace_back(sec);
break;
}
}
}
Arm32ExidxSection::Arm32ExidxSection(Context<ARM32> &ctx,
OutputSection<ARM32> &osec)
: output_section(osec) {
this->name = ".ARM.exidx";
this->shdr.sh_type = SHT_ARM_EXIDX;
this->shdr.sh_flags = SHF_ALLOC;
this->shdr.sh_addralign = 4;
this->shdr.sh_size = get_contents(ctx).size();
this->sect_order = osec.sect_order;
for (InputSection<E> *isec : osec.members)
isec->is_alive = false;
}
void Arm32ExidxSection::update_shdr(Context<E> &ctx) {
// .ARM.exidx's sh_link should be set to the .text section index.
// Runtime doesn't care about it, but the binutils's strip command does.
if (Chunk<E> *chunk = find_chunk(ctx, ".text"))
this->shdr.sh_link = chunk->shndx;
}
void Arm32ExidxSection::copy_buf(Context<E> &ctx) {
std::vector<u8> contents = get_contents(ctx);
assert(this->shdr.sh_size = contents.size());
write_vector(ctx.buf + this->shdr.sh_offset, contents);
}
// ARM executables use an .ARM.exidx section to look up an exception
// handling record for the current instruction pointer. The table needs
// to be sorted by their addresses.
@ -718,13 +757,12 @@ u64 get_eflags(Context<E> &ctx) {
// I don't know why only ARM uses the different mechanism, but it's
// likely that it's due to some historical reason.
//
// This function sorts .ARM.exidx records.
void fixup_arm_exidx_section(Context<E> &ctx) {
Timer t(ctx, "fixup_arm_exidx_section");
// This function returns contents of .ARM.exidx.
std::vector<u8> Arm32ExidxSection::get_contents(Context<E> &ctx) {
std::vector<u8> buf(output_section.shdr.sh_size);
OutputSection<E> *osec = find_section(ctx, SHT_ARM_EXIDX);
if (!osec)
return;
output_section.shdr.sh_addr = this->shdr.sh_addr;
output_section.write_to(ctx, buf.data());
// .ARM.exidx records consists of a signed 31-bit relative address
// and a 32-bit value. The relative address indicates the start
@ -738,24 +776,24 @@ void fixup_arm_exidx_section(Context<E> &ctx) {
//
// CANTUNWIND is value 1. The most significant bit is set in (2) but
// not in (3). So we can distinguished them just by looking at a value.
const u32 EXIDX_CANTUNWIND = 1;
const u32 CANTUNWIND = 1;
struct Entry {
ul32 addr;
ul32 val;
};
if (osec->shdr.sh_size % sizeof(Entry))
if (buf.size() % sizeof(Entry))
Fatal(ctx) << "invalid .ARM.exidx section size";
Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset);
i64 num_entries = osec->shdr.sh_size / sizeof(Entry);
Entry *ent = (Entry *)buf.data();
i64 num_entries = buf.size() / sizeof(Entry);
// Entry's addresses are relative to themselves. In order to sort
// records by addresses, we first translate them so that the addresses
// records by address, we first translate them so that the addresses
// are relative to the beginning of the section.
auto is_relative = [](u32 val) {
return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000);
return val != CANTUNWIND && !(val & 0x8000'0000);
};
tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
@ -765,10 +803,21 @@ void fixup_arm_exidx_section(Context<E> &ctx) {
ent[i].val = 0x7fff'ffff & (ent[i].val + offset);
});
tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) {
std::sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) {
return a.addr < b.addr;
});
// Remove duplicate adjacent entries. That is, if two adjacent functions
// have the same compact unwind info or are both CANTUNWIND, we can
// merge them into a single range.
auto it = std::unique(ent, ent + num_entries,
[](const Entry &a, const Entry &b) {
return a.val == b.val;
});
num_entries = it - ent;
buf.resize(num_entries * sizeof(Entry));
// Make addresses relative to themselves.
tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
i64 offset = sizeof(Entry) * i;
@ -777,14 +826,7 @@ void fixup_arm_exidx_section(Context<E> &ctx) {
ent[i].val = 0x7fff'ffff & (ent[i].val - offset);
});
// .ARM.exidx's sh_link should be set to the .text section index.
// Runtime doesn't care about it, but the binutils's strip command does.
if (ctx.shdr) {
if (Chunk<E> *text = find_section(ctx, ".text")) {
osec->shdr.sh_link = text->shndx;
ctx.shdr->copy_buf(ctx);
}
}
return buf;
}
} // namespace mold

View File

@ -1,6 +1,6 @@
#pragma once
#include "integers.h"
#include "../lib/integers.h"
#include <concepts>
#include <ostream>
@ -191,6 +191,7 @@ enum : u32 {
PT_GNU_EH_FRAME = 0x6474e550,
PT_GNU_STACK = 0x6474e551,
PT_GNU_RELRO = 0x6474e552,
PT_GNU_PROPERTY = 0x6474e553,
PT_OPENBSD_RANDOMIZE = 0x65a3dbe6,
PT_ARM_EXIDX = 0x70000001,
PT_RISCV_ATTRIBUTES = 0x70000003,

View File

@ -1,6 +1,6 @@
#pragma once
#include "common.h"
#include "../lib/common.h"
#include "elf.h"
namespace mold {
@ -17,8 +17,7 @@ enum class FileType {
LLVM_BITCODE,
};
template <typename MappedFile>
bool is_text_file(MappedFile *mf) {
inline bool is_text_file(MappedFile *mf) {
auto istext = [](char c) {
return isprint(c) || c == '\n' || c == '\t';
};
@ -28,8 +27,8 @@ bool is_text_file(MappedFile *mf) {
istext(data[2]) && istext(data[3]);
}
template <typename E, typename Context, typename MappedFile>
inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) {
template <typename E>
inline bool is_gcc_lto_obj(MappedFile *mf, bool has_plugin) {
const char *data = mf->get_contents().data();
ElfEhdr<E> &ehdr = *(ElfEhdr<E> *)data;
ElfShdr<E> *sh_begin = (ElfShdr<E> *)(data + ehdr.e_shoff);
@ -46,7 +45,7 @@ inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) {
// the LTO linker plugin is available and falls back as regular
// objects otherwise. GCC FAT LTO object can be identified by the
// presence of `.gcc.lto_.symtab` section.
if (!ctx.arg.plugin.empty()) {
if (has_plugin) {
std::string_view name = data + shdrs[shstrtab_idx].sh_offset + sec.sh_name;
if (name.starts_with(".gnu.lto_.symtab."))
return true;
@ -81,9 +80,10 @@ inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) {
return false;
}
template <typename Context, typename MappedFile>
FileType get_file_type(Context &ctx, MappedFile *mf) {
template <typename E>
FileType get_file_type(Context<E> &ctx, MappedFile *mf) {
std::string_view data = mf->get_contents();
bool has_plugin = !ctx.arg.plugin.empty();
if (data.empty())
return FileType::EMPTY;
@ -96,10 +96,10 @@ FileType get_file_type(Context &ctx, MappedFile *mf) {
if (ehdr.e_type == ET_REL) {
if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
if (is_gcc_lto_obj<I386>(ctx, mf))
if (is_gcc_lto_obj<I386>(mf, has_plugin))
return FileType::GCC_LTO_OBJ;
} else {
if (is_gcc_lto_obj<X86_64>(ctx, mf))
if (is_gcc_lto_obj<X86_64>(mf, has_plugin))
return FileType::GCC_LTO_OBJ;
}
return FileType::ELF_OBJ;
@ -112,10 +112,10 @@ FileType get_file_type(Context &ctx, MappedFile *mf) {
if (ehdr.e_type == ET_REL) {
if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
if (is_gcc_lto_obj<M68K>(ctx, mf))
if (is_gcc_lto_obj<M68K>(mf, has_plugin))
return FileType::GCC_LTO_OBJ;
} else {
if (is_gcc_lto_obj<SPARC64>(ctx, mf))
if (is_gcc_lto_obj<SPARC64>(mf, has_plugin))
return FileType::GCC_LTO_OBJ;
}
return FileType::ELF_OBJ;
@ -140,23 +140,23 @@ FileType get_file_type(Context &ctx, MappedFile *mf) {
return FileType::UNKNOWN;
}
inline std::string filetype_to_string(FileType type) {
switch (type) {
case FileType::UNKNOWN: return "UNKNOWN";
case FileType::EMPTY: return "EMPTY";
case FileType::ELF_OBJ: return "ELF_OBJ";
case FileType::ELF_DSO: return "ELF_DSO";
case FileType::AR: return "AR";
case FileType::THIN_AR: return "THIN_AR";
case FileType::TEXT: return "TEXT";
case FileType::GCC_LTO_OBJ: return "GCC_LTO_OBJ";
case FileType::LLVM_BITCODE: return "LLVM_BITCODE";
}
return "UNKNOWN";
}
inline std::ostream &operator<<(std::ostream &out, FileType type) {
out << filetype_to_string(type);
auto to_string = [&] {
switch (type) {
case FileType::UNKNOWN: return "UNKNOWN";
case FileType::EMPTY: return "EMPTY";
case FileType::ELF_OBJ: return "ELF_OBJ";
case FileType::ELF_DSO: return "ELF_DSO";
case FileType::AR: return "AR";
case FileType::THIN_AR: return "THIN_AR";
case FileType::TEXT: return "TEXT";
case FileType::GCC_LTO_OBJ: return "GCC_LTO_OBJ";
case FileType::LLVM_BITCODE: return "LLVM_BITCODE";
default: return "UNKNOWN";
}
};
out << to_string();
return out;
}

View File

@ -1,4 +1,5 @@
#include "mold.h"
#include "filetype.h"
#include "../lib/archive-file.h"
#include "../lib/output-file.h"
@ -578,6 +579,10 @@ int mold_main(int argc, char **argv) {
if (ctx.arg.pack_dyn_relocs_relr)
construct_relr(ctx);
// Convert an .ARM.exidx to a synthetic section.
if constexpr (is_arm32<E>)
create_arm_exidx_section(ctx);
// Reserve a space for dynamic symbol strings in .dynstr and sort
// .dynsym contents if necessary. Beyond this point, no symbol will
// be added to .dynsym.

View File

@ -1,7 +1,7 @@
#pragma once
#include "../lib/common.h"
#include "../lib/elf.h"
#include "elf.h"
#include <atomic>
#include <bitset>
@ -341,10 +341,10 @@ template <typename E> u64 get_dtp_addr(const ElfPhdr<E> &);
//
template <typename E>
OutputSection<E> *find_section(Context<E> &ctx, u32 sh_type);
Chunk<E> *find_chunk(Context<E> &ctx, u32 sh_type);
template <typename E>
OutputSection<E> *find_section(Context<E> &ctx, std::string_view name);
Chunk<E> *find_chunk(Context<E> &ctx, std::string_view name);
template <typename E>
u64 get_eflags(Context<E> &ctx) {
@ -1528,7 +1528,20 @@ void rewrite_endbr(Context<X86_64> &ctx);
// arch-arm32.cc
//
class Arm32ExidxSection : public Chunk<ARM32> {
public:
Arm32ExidxSection(Context<ARM32> &ctx, OutputSection<ARM32> &osec);
void update_shdr(Context<ARM32> &ctx) override;
void copy_buf(Context<ARM32> &ctx) override;
private:
std::vector<u8> get_contents(Context<ARM32> &ctx);
OutputSection<ARM32> &output_section;
};
template <> u64 get_eflags(Context<ARM32> &ctx);
void create_arm_exidx_section(Context<ARM32> &ctx);
void fixup_arm_exidx_section(Context<ARM32> &ctx);
//
@ -1705,6 +1718,11 @@ struct SectionOrder {
template <typename E>
struct ContextExtras {};
template <>
struct ContextExtras<ARM32> {
Arm32ExidxSection *exidx = nullptr;
};
template <is_riscv E>
struct ContextExtras<E> {
RiscvAttributesSection<E> *riscv_attributes = nullptr;

View File

@ -25,20 +25,18 @@ static u32 elf_hash(std::string_view name) {
}
template <typename E>
OutputSection<E> *find_section(Context<E> &ctx, u32 sh_type) {
Chunk<E> *find_chunk(Context<E> &ctx, u32 sh_type) {
for (Chunk<E> *chunk : ctx.chunks)
if (OutputSection<E> *osec = chunk->to_osec())
if (osec->shdr.sh_type == sh_type)
return osec;
if (chunk->shdr.sh_type == sh_type)
return chunk;
return nullptr;
}
template <typename E>
OutputSection<E> *find_section(Context<E> &ctx, std::string_view name) {
Chunk<E> *find_chunk(Context<E> &ctx, std::string_view name) {
for (Chunk<E> *chunk : ctx.chunks)
if (OutputSection<E> *osec = chunk->to_osec())
if (osec->name == name)
return osec;
if (chunk->name == name)
return chunk;
return nullptr;
}
@ -267,6 +265,10 @@ static std::vector<ElfPhdr<E>> create_phdr(Context<E> &ctx) {
if (ctx.eh_frame_hdr)
define(PT_GNU_EH_FRAME, PF_R, ctx.eh_frame_hdr);
// Add PT_GNU_PROPERTY
if (Chunk<E> *chunk = find_chunk(ctx, ".note.gnu.property"))
define(PT_GNU_PROPERTY, PF_R, chunk);
// Add PT_GNU_STACK, which is a marker segment that doesn't really
// contain any segments. It controls executable bit of stack area.
{
@ -293,8 +295,8 @@ static std::vector<ElfPhdr<E>> create_phdr(Context<E> &ctx) {
// Create a PT_ARM_EDXIDX
if constexpr (is_arm32<E>)
if (OutputSection<E> *osec = find_section(ctx, SHT_ARM_EXIDX))
define(PT_ARM_EXIDX, PF_R, osec);
if (ctx.extra.exidx)
define(PT_ARM_EXIDX, PF_R, ctx.extra.exidx);
// Create a PT_RISCV_ATTRIBUTES
if constexpr (is_riscv<E>)
@ -734,19 +736,19 @@ static std::vector<Word<E>> create_dynamic_section(Context<E> &ctx) {
define(DT_STRSZ, ctx.dynstr->shdr.sh_size);
}
if (find_section(ctx, SHT_INIT_ARRAY)) {
if (find_chunk(ctx, SHT_INIT_ARRAY)) {
define(DT_INIT_ARRAY, ctx.__init_array_start->value);
define(DT_INIT_ARRAYSZ,
ctx.__init_array_end->value - ctx.__init_array_start->value);
}
if (find_section(ctx, SHT_PREINIT_ARRAY)) {
if (find_chunk(ctx, SHT_PREINIT_ARRAY)) {
define(DT_PREINIT_ARRAY, ctx.__preinit_array_start->value);
define(DT_PREINIT_ARRAYSZ,
ctx.__preinit_array_end->value - ctx.__preinit_array_start->value);
}
if (find_section(ctx, SHT_FINI_ARRAY)) {
if (find_chunk(ctx, SHT_FINI_ARRAY)) {
define(DT_FINI_ARRAY, ctx.__fini_array_start->value);
define(DT_FINI_ARRAYSZ,
ctx.__fini_array_end->value - ctx.__fini_array_start->value);
@ -2949,8 +2951,8 @@ template class RelocSection<E>;
template class ComdatGroupSection<E>;
template class GnuDebuglinkSection<E>;
template OutputSection<E> *find_section(Context<E> &, u32);
template OutputSection<E> *find_section(Context<E> &, std::string_view);
template Chunk<E> *find_chunk(Context<E> &, u32);
template Chunk<E> *find_chunk(Context<E> &, std::string_view);
template i64 to_phdr_flags(Context<E> &ctx, Chunk<E> *chunk);
template ElfSym<E> to_output_esym(Context<E> &, Symbol<E> &, u32, U32<E> *);

View File

@ -1271,10 +1271,13 @@ void fixup_ctors_in_init_array(Context<E> &ctx) {
}
};
if (OutputSection<E> *osec = find_section(ctx, ".init_array"))
fixup(*osec);
if (OutputSection<E> *osec = find_section(ctx, ".fini_array"))
fixup(*osec);
if (Chunk<E> *chunk = find_chunk(ctx, ".init_array"))
if (OutputSection<E> *osec = chunk->to_osec())
fixup(*osec);
if (Chunk<E> *chunk = find_chunk(ctx, ".fini_array"))
if (OutputSection<E> *osec = chunk->to_osec())
fixup(*osec);
}
template <typename T>
@ -1659,9 +1662,6 @@ void copy_chunks(Context<E> &ctx) {
// undefined errors.
report_undef_errors(ctx);
if constexpr (is_arm32<E>)
fixup_arm_exidx_section(ctx);
// Zero-clear paddings between chunks
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;

View File

@ -186,7 +186,7 @@ u64 get_dtp_addr(const ElfPhdr<E> &phdr) {
using E = MOLD_TARGET;
template u64 get_tp_addr<E>(const ElfPhdr<E> &);
template u64 get_dtp_addr<E>(const ElfPhdr<E> &);
template u64 get_tp_addr(const ElfPhdr<E> &);
template u64 get_dtp_addr(const ElfPhdr<E> &);
} // namespace mold

10
test/gnu-property.sh Normal file
View File

@ -0,0 +1,10 @@
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/a.o -c -xc -
int main() {}
EOF
$CC -B. -o $t/exe $t/a.o -no-pie
readelf -W --sections $t/exe | grep -Fqw .note.gnu.property || skip
readelf -W --segments $t/exe | grep -qw GNU_PROPERTY