1
1
mirror of https://github.com/rui314/mold.git synced 2024-09-21 01:47:11 +03:00
mold/macho/object-file.cc
2021-11-16 12:51:56 +09:00

558 lines
15 KiB
C++

#include "mold.h"
#include "../archive-file.h"
namespace mold::macho {
std::ostream &operator<<(std::ostream &out, const InputFile &file) {
if (file.archive_name.empty())
out << path_clean(file.mf->name);
else
out << path_clean(file.archive_name) << "(" << path_clean(file.mf->name) + ")";
return out;
}
ObjectFile *ObjectFile::create(Context &ctx, MappedFile<Context> *mf,
std::string archive_name) {
ObjectFile *obj = new ObjectFile;
obj->mf = mf;
obj->archive_name = archive_name;
obj->is_alive = archive_name.empty();
ctx.obj_pool.push_back(std::unique_ptr<ObjectFile>(obj));
return obj;
};
void ObjectFile::parse(Context &ctx) {
parse_sections(ctx);
parse_symtab(ctx);
split_subsections(ctx);
parse_data_in_code(ctx);
for (std::unique_ptr<InputSection> &isec : sections)
if (isec)
isec->parse_relocations(ctx);
if (unwind_sec)
parse_compact_unwind(ctx, *unwind_sec);
}
void ObjectFile::parse_sections(Context &ctx) {
MachHeader &hdr = *(MachHeader *)mf->data;
u8 *p = mf->data + sizeof(hdr);
// Read all but a symtab section
for (i64 i = 0; i < hdr.ncmds; i++) {
LoadCommand &lc = *(LoadCommand *)p;
p += lc.cmdsize;
if (lc.cmd != LC_SEGMENT_64)
continue;
SegmentCommand &cmd = *(SegmentCommand *)&lc;
MachSection *mach_sec = (MachSection *)((u8 *)&lc + sizeof(cmd));
for (MachSection &msec : std::span(mach_sec, mach_sec + cmd.nsects)) {
sections.push_back(nullptr);
if (msec.match("__LD", "__compact_unwind")) {
unwind_sec = &msec;
continue;
}
if (msec.attr & S_ATTR_DEBUG)
continue;
sections.back().reset(new InputSection(ctx, *this, msec));
}
}
}
void ObjectFile::parse_symtab(Context &ctx) {
SymtabCommand *cmd = (SymtabCommand *)find_load_command(ctx, LC_SYMTAB);
if (!cmd)
return;
mach_syms = {(MachSym *)(mf->data + cmd->symoff), cmd->nsyms};
syms.reserve(mach_syms.size());
i64 nlocal = 0;
for (MachSym &msym : mach_syms)
if (!msym.ext)
nlocal++;
local_syms.reserve(nlocal);
for (MachSym &msym : mach_syms) {
std::string_view name = (char *)(mf->data + cmd->stroff + msym.stroff);
if (msym.ext) {
syms.push_back(intern(ctx, name));
} else {
local_syms.emplace_back(name);
syms.push_back(&local_syms.back());
}
}
}
struct SplitInfo {
struct Region {
u32 offset;
u32 size;
u32 symidx;
bool is_alt_entry;
};
InputSection *isec = nullptr;
std::vector<Region> regions;
};
static std::vector<SplitInfo> split(Context &ctx, ObjectFile &file) {
std::vector<SplitInfo> vec;
for (std::unique_ptr<InputSection> &isec : file.sections)
vec.push_back({isec.get()});
for (i64 i = 0; i < file.mach_syms.size(); i++) {
MachSym &msym = file.mach_syms[i];
if (msym.type == N_SECT) {
SplitInfo::Region r;
r.offset = msym.value - file.sections[msym.sect - 1]->hdr.addr;
r.symidx = i;
r.is_alt_entry = (msym.desc & N_ALT_ENTRY);
vec[msym.sect - 1].regions.push_back(r);
}
}
erase(vec, [](const SplitInfo &info) { return !info.isec; });
sort(vec, [](const SplitInfo &a, const SplitInfo &b) {
return a.isec->hdr.addr < b.isec->hdr.addr;
});
for (SplitInfo &info : vec) {
std::vector<SplitInfo::Region> &r = info.regions;
if (r.empty()) {
r.push_back({0, (u32)info.isec->hdr.size, (u32)-1, false});
continue;
}
sort(r, [](const SplitInfo::Region &a, const SplitInfo::Region &b) {
return a.offset < b.offset;
});
if (r[0].offset > 0)
r.insert(r.begin(), {0, r[0].offset, (u32)-1, false});
for (i64 i = 0; i < r.size() - 1; i++)
r[i].size = r[i + 1].offset - r[i].offset;
r.back().size = info.isec->hdr.size - r.back().offset;
}
return vec;
}
void ObjectFile::split_subsections(Context &ctx) {
sym_to_subsec.resize(mach_syms.size());
for (SplitInfo &info : split(ctx, *this)) {
InputSection &isec = *info.isec;
for (SplitInfo::Region &r : info.regions) {
if (!r.is_alt_entry) {
Subsection *subsec = new Subsection{
.isec = isec,
.input_offset = r.offset,
.input_size = r.size,
.input_addr = (u32)(isec.hdr.addr + r.offset),
.p2align = (u8)isec.hdr.p2align,
};
subsections.push_back(std::unique_ptr<Subsection>(subsec));
}
if (r.symidx != -1)
sym_to_subsec[r.symidx] = subsections.size() - 1;
}
}
for (i64 i = 0; i < mach_syms.size(); i++)
if (!mach_syms[i].ext)
override_symbol(ctx, i);
}
void ObjectFile::parse_data_in_code(Context &ctx) {
if (auto *cmd = (LinkEditDataCommand *)find_load_command(ctx, LC_DATA_IN_CODE))
data_in_code_entries = {(DataInCodeEntry *)(mf->data + cmd->dataoff),
cmd->datasize / sizeof(DataInCodeEntry)};
}
LoadCommand *ObjectFile::find_load_command(Context &ctx, u32 type) {
MachHeader &hdr = *(MachHeader *)mf->data;
u8 *p = mf->data + sizeof(hdr);
for (i64 i = 0; i < hdr.ncmds; i++) {
LoadCommand &lc = *(LoadCommand *)p;
if (lc.cmd == type)
return &lc;
p += lc.cmdsize;
}
return nullptr;
}
i64 ObjectFile::find_subsection_idx(Context &ctx, u32 addr) {
auto it = std::upper_bound(
subsections.begin(), subsections.end(), addr,
[&](u32 addr, const std::unique_ptr<Subsection> &subsec) {
return addr < subsec->input_addr;
});
if (it == subsections.begin())
return -1;
return it - subsections.begin() - 1;
}
Subsection *ObjectFile::find_subsection(Context &ctx, u32 addr) {
i64 i = find_subsection_idx(ctx, addr);
return (i == -1) ? nullptr : subsections[i].get();
}
void ObjectFile::parse_compact_unwind(Context &ctx, MachSection &hdr) {
if (hdr.size % sizeof(CompactUnwindEntry))
Fatal(ctx) << *this << ": invalid __compact_unwind section size";
i64 num_entries = hdr.size / sizeof(CompactUnwindEntry);
unwind_records.reserve(num_entries);
CompactUnwindEntry *src = (CompactUnwindEntry *)(mf->data + hdr.offset);
// Read compact unwind entries
for (i64 i = 0; i < num_entries; i++)
unwind_records.emplace_back(src[i].code_len, src[i].encoding);
// Read relocations
MachRel *mach_rels = (MachRel *)(mf->data + hdr.reloff);
for (i64 i = 0; i < hdr.nreloc; i++) {
MachRel &r = mach_rels[i];
if (r.offset >= hdr.size)
Fatal(ctx) << *this << ": relocation offset too large: " << i;
i64 idx = r.offset / sizeof(CompactUnwindEntry);
UnwindRecord &dst = unwind_records[idx];
auto error = [&]() {
Fatal(ctx) << *this << ": __compact_unwind: unsupported relocation: " << i;
};
switch (r.offset % sizeof(CompactUnwindEntry)) {
case offsetof(CompactUnwindEntry, code_start): {
if (r.is_pcrel || r.p2size != 3 || r.is_extern || r.type)
error();
Subsection *target = find_subsection(ctx, src[idx].code_start);
if (!target)
error();
dst.subsec = target;
dst.offset = src[idx].code_start - target->input_addr;
break;
}
case offsetof(CompactUnwindEntry, personality):
if (r.is_pcrel || r.p2size != 3 || !r.is_extern || r.type)
error();
dst.personality = syms[r.idx];
break;
case offsetof(CompactUnwindEntry, lsda): {
if (r.is_pcrel || r.p2size != 3 || r.is_extern || r.type)
error();
i32 addr = *(i32 *)((u8 *)mf->data + hdr.offset + r.offset);
Subsection *target = find_subsection(ctx, addr);
if (!target)
error();
dst.lsda = target;
dst.lsda_offset = addr - target->input_addr;
break;
}
default:
Fatal(ctx) << *this << ": __compact_unwind: unsupported relocation: " << i;
}
}
for (i64 i = 0; i < num_entries; i++)
if (!unwind_records[i].subsec)
Fatal(ctx) << ": __compact_unwind: missing relocation at " << i;
// Sort unwind entries by offset
sort(unwind_records, [](const UnwindRecord &a, const UnwindRecord &b) {
return std::tuple(a.subsec->input_addr, a.offset) <
std::tuple(b.subsec->input_addr, b.offset);
});
// Associate unwind entries to subsections
for (i64 i = 0; i < num_entries;) {
Subsection &subsec = *unwind_records[i].subsec;
subsec.unwind_offset = i;
i64 j = i + 1;
while (j < num_entries && unwind_records[j].subsec == &subsec)
j++;
subsec.nunwind = j - i;
i = j;
}
}
// Symbols with higher priorities overwrites symbols with lower priorities.
// Here is the list of priorities, from the highest to the lowest.
//
// 1. Strong defined symbol
// 2. Weak defined symbol
// 3. Strong defined symbol in a DSO
// 4. Weak defined symbol in a DSO
// 5. Strong or weak defined symbol in an archive
// 6. Common symbol
// 7. Unclaimed (nonexistent) symbol
//
// Ties are broken by file priority.
static u64 get_rank(InputFile *file, MachSym &msym, bool is_lazy) {
if (msym.is_common())
return (6 << 24) + file->priority;
if (is_lazy)
return (5 << 24) + file->priority;
if (file->is_dylib)
return (3 << 24) + file->priority;
return (1 << 24) + file->priority;
}
static u64 get_rank(Symbol &sym) {
InputFile *file = sym.file;
if (!file)
return 7 << 24;
if (sym.is_common)
return (6 << 24) + file->priority;
if (!file->archive_name.empty())
return (5 << 24) + file->priority;
if (file->is_dylib)
return (3 << 24) + file->priority;
return (1 << 24) + file->priority;
}
void ObjectFile::override_symbol(Context &ctx, i64 idx) {
Symbol &sym = *syms[idx];
MachSym &msym = mach_syms[idx];
sym.file = this;
sym.is_extern = msym.ext;
sym.is_lazy = false;
switch (msym.type) {
case N_UNDF:
assert(msym.is_common());
sym.subsec = nullptr;
sym.value = msym.value;
sym.is_common = true;
break;
case N_ABS:
sym.subsec = nullptr;
sym.value = msym.value;
sym.is_common = false;
break;
case N_SECT:
sym.subsec = subsections[sym_to_subsec[idx]].get();
sym.value = msym.value - sym.subsec->input_addr;
sym.is_common = false;
break;
default:
Fatal(ctx) << sym << ": unknown symbol type: " << (u64)msym.type;
}
}
void ObjectFile::resolve_regular_symbols(Context &ctx) {
for (i64 i = 0; i < syms.size(); i++) {
MachSym &msym = mach_syms[i];
if (!msym.ext || msym.is_undef())
continue;
Symbol &sym = *syms[i];
std::lock_guard lock(sym.mu);
if (get_rank(this, msym, false) < get_rank(sym))
override_symbol(ctx, i);
}
}
void ObjectFile::resolve_lazy_symbols(Context &ctx) {
for (i64 i = 0; i < syms.size(); i++) {
MachSym &msym = mach_syms[i];
if (!msym.ext || msym.is_undef() || msym.is_common())
continue;
Symbol &sym = *syms[i];
std::lock_guard lock(sym.mu);
if (get_rank(this, msym, false) < get_rank(sym)) {
sym.file = this;
sym.subsec = nullptr;
sym.value = 0;
sym.is_extern = false;
sym.is_lazy = true;
sym.is_common = false;
}
}
}
std::vector<ObjectFile *> ObjectFile::mark_live_objects(Context &ctx) {
std::vector<ObjectFile *> vec;
assert(is_alive);
for (i64 i = 0; i < syms.size(); i++) {
MachSym &msym = mach_syms[i];
if (!msym.ext)
continue;
Symbol &sym = *syms[i];
std::lock_guard lock(sym.mu);
if (msym.is_undef()) {
if (sym.file && !sym.file->is_alive.exchange(true))
vec.push_back((ObjectFile *)sym.file);
continue;
}
if (get_rank(this, msym, false) < get_rank(sym))
override_symbol(ctx, i);
}
return vec;
}
void ObjectFile::convert_common_symbols(Context &ctx) {
for (i64 i = 0; i < syms.size(); i++) {
Symbol &sym = *syms[i];
MachSym &msym = mach_syms[i];
if (sym.file == this && sym.is_common) {
InputSection *isec = get_common_sec(ctx);
Subsection *subsec = new Subsection{
.isec = *isec,
.input_size = (u32)msym.value,
.p2align = (u8)msym.p2align,
};
subsections.push_back(std::unique_ptr<Subsection>(subsec));
sym.subsec = subsec;
sym.value = 0;
sym.is_common = false;
}
}
}
void ObjectFile::check_duplicate_symbols(Context &ctx) {
for (i64 i = 0; i < syms.size(); i++) {
Symbol *sym = syms[i];
MachSym &msym = mach_syms[i];
if (sym && !msym.is_undef() && !msym.is_common() && sym->file != this)
Error(ctx) << "duplicate symbol: " << *this << ": " << *sym->file
<< ": " << *sym;
}
}
InputSection *ObjectFile::get_common_sec(Context &ctx) {
if (!common_sec) {
MachSection *hdr = new MachSection;
common_hdr.reset(hdr);
memset(hdr, 0, sizeof(*hdr));
hdr->set_segname("__DATA");
hdr->set_sectname("__common");
hdr->type = S_ZEROFILL;
common_sec = new InputSection(ctx, *this, *hdr);
sections.push_back(std::unique_ptr<InputSection>(common_sec));
}
return common_sec;
}
DylibFile *DylibFile::create(Context &ctx, MappedFile<Context> *mf) {
DylibFile *dylib = new DylibFile;
dylib->mf = mf;
ctx.dylib_pool.push_back(std::unique_ptr<DylibFile>(dylib));
return dylib;
};
void DylibFile::read_trie(Context &ctx, u8 *start, i64 offset = 0,
const std::string &prefix = "") {
u8 *buf = start + offset;
if (*buf) {
read_uleb(buf); // size
read_uleb(buf); // flags
read_uleb(buf); // addr
syms.push_back(intern(ctx, prefix));
} else {
buf++;
}
i64 nchild = *buf++;
for (i64 i = 0; i < nchild; i++) {
std::string suffix((char *)buf);
buf += suffix.size() + 1;
i64 off = read_uleb(buf);
read_trie(ctx, start, off, prefix + suffix);
}
}
void DylibFile::parse_dylib(Context &ctx) {
MachHeader &hdr = *(MachHeader *)mf->data;
u8 *p = mf->data + sizeof(hdr);
for (i64 i = 0; i < hdr.ncmds; i++) {
LoadCommand &lc = *(LoadCommand *)p;
switch (lc.cmd) {
case LC_ID_DYLIB: {
DylibCommand &cmd = *(DylibCommand *)p;
install_name = (char *)p + cmd.nameoff;
break;
}
case LC_DYLD_INFO_ONLY: {
DyldInfoCommand &cmd = *(DyldInfoCommand *)p;
if (cmd.export_off)
read_trie(ctx, mf->data + cmd.export_off);
break;
}
case LC_DYLD_EXPORTS_TRIE: {
LinkEditDataCommand &cmd = *(LinkEditDataCommand *)p;
read_trie(ctx, mf->data + cmd.dataoff);
break;
}
}
p += lc.cmdsize;
}
}
void DylibFile::parse(Context &ctx) {
switch (get_file_type(mf)) {
case FileType::TAPI: {
TextDylib tbd = parse_tbd(ctx, mf);
for (std::string_view sym : tbd.exports)
syms.push_back(intern(ctx, sym));
install_name = tbd.install_name;
break;
}
case FileType::MACH_DYLIB:
parse_dylib(ctx);
break;
default:
Fatal(ctx) << mf->name << ": is not a dylib";
}
}
void DylibFile::resolve_symbols(Context &ctx) {
for (Symbol *sym : syms) {
std::lock_guard lock(sym->mu);
if (sym->file && sym->file->priority < priority)
continue;
sym->file = this;
sym->is_extern = true;
}
}
} // namespace mold::macho