1
1
mirror of https://github.com/rui314/mold.git synced 2025-01-07 18:02:15 +03:00
mold/object_file.cc

845 lines
23 KiB
C++
Raw Normal View History

2020-10-20 08:54:35 +03:00
#include "mold.h"
2020-10-04 12:00:33 +03:00
2020-11-04 04:39:17 +03:00
#include <cstring>
2020-12-22 10:33:44 +03:00
#include <fcntl.h>
2020-11-11 08:43:07 +03:00
#include <regex>
2020-12-22 10:33:44 +03:00
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
2020-12-22 12:33:16 +03:00
MemoryMappedFile *MemoryMappedFile::open(std::string path) {
struct stat st;
if (stat(path.c_str(), &st) == -1)
return nullptr;
u64 mtime = (u64)st.st_mtim.tv_sec * 1000000000 + st.st_mtim.tv_nsec;
return new MemoryMappedFile(path, nullptr, st.st_size, mtime);
}
MemoryMappedFile *MemoryMappedFile::must_open(std::string path) {
if (MemoryMappedFile *mb = MemoryMappedFile::open(path))
return mb;
2021-01-09 05:19:57 +03:00
Error() << "cannot open " << path;
2020-12-22 12:33:16 +03:00
}
2020-12-22 10:33:44 +03:00
u8 *MemoryMappedFile::data() {
if (data_)
return data_;
2020-12-22 11:37:49 +03:00
std::lock_guard lock(mu);
if (data_)
return data_;
2020-12-22 12:33:16 +03:00
int fd = ::open(name.c_str(), O_RDONLY);
2020-12-22 10:33:44 +03:00
if (fd == -1)
2021-01-09 05:19:57 +03:00
Error() << name << ": cannot open: " << strerror(errno);
2020-12-22 10:33:44 +03:00
data_ = (u8 *)mmap(nullptr, size_, PROT_READ, MAP_PRIVATE, fd, 0);
if (data_ == MAP_FAILED)
2021-01-09 05:19:57 +03:00
Error() << name << ": mmap failed: " << strerror(errno);
2020-12-22 10:33:44 +03:00
close(fd);
return data_;
}
2020-12-22 11:37:49 +03:00
MemoryMappedFile *MemoryMappedFile::slice(std::string name, u64 start, u64 size) {
MemoryMappedFile *mb = new MemoryMappedFile(name, data_ + start, size);
mb->parent = this;
2020-12-22 10:33:44 +03:00
return mb;
}
2020-11-04 04:39:17 +03:00
2020-12-22 11:37:49 +03:00
InputFile::InputFile(MemoryMappedFile *mb)
: mb(mb), name(mb->name), ehdr(*(ElfEhdr *)mb->data()), is_dso(ehdr.e_type == ET_DYN) {
if (mb->size() < sizeof(ElfEhdr))
2021-01-09 06:41:32 +03:00
Error() << *this << ": file too small";
2020-12-22 11:37:49 +03:00
if (memcmp(mb->data(), "\177ELF", 4))
2021-01-09 06:41:32 +03:00
Error() << *this << ": not an ELF file";
2020-12-12 06:44:22 +03:00
2020-12-22 11:37:49 +03:00
u8 *sh_begin = mb->data() + ehdr.e_shoff;
2020-12-12 06:44:22 +03:00
u8 *sh_end = sh_begin + ehdr.e_shnum * sizeof(ElfShdr);
2020-12-22 11:37:49 +03:00
if (mb->data() + mb->size() < sh_end)
2021-01-09 06:41:32 +03:00
Error() << *this << ": e_shoff or e_shnum corrupted: "
2021-01-09 05:19:57 +03:00
<< mb->size() << " " << ehdr.e_shnum;
2020-12-12 06:44:22 +03:00
elf_sections = {(ElfShdr *)sh_begin, (ElfShdr *)sh_end};
}
2020-12-22 10:33:44 +03:00
std::string_view InputFile::get_string(const ElfShdr &shdr) {
2020-12-22 11:37:49 +03:00
u8 *begin = mb->data() + shdr.sh_offset;
2020-12-12 06:46:24 +03:00
u8 *end = begin + shdr.sh_size;
2020-12-22 11:37:49 +03:00
if (mb->data() + mb->size() < end)
2021-01-09 06:41:32 +03:00
Error() << *this << ": shdr corrupted";
2020-12-12 06:46:24 +03:00
return {(char *)begin, (char *)end};
}
2020-12-22 10:33:44 +03:00
std::string_view InputFile::get_string(u32 idx) {
2020-12-12 06:46:24 +03:00
if (elf_sections.size() <= idx)
2021-01-09 06:41:32 +03:00
Error() << *this << ": invalid section index";
2020-12-12 06:46:24 +03:00
return get_string(elf_sections[idx]);
}
template<typename T>
2020-12-22 10:33:44 +03:00
std::span<T> InputFile::get_data(const ElfShdr &shdr) {
2020-12-12 06:46:24 +03:00
std::string_view view = get_string(shdr);
if (view.size() % sizeof(T))
2021-01-09 06:41:32 +03:00
Error() << *this << ": corrupted section";
2020-12-12 06:46:24 +03:00
return {(T *)view.data(), view.size() / sizeof(T)};
}
template<typename T>
2020-12-22 10:33:44 +03:00
std::span<T> InputFile::get_data(u32 idx) {
2020-12-12 06:46:24 +03:00
if (elf_sections.size() <= idx)
2021-01-09 06:41:32 +03:00
Error() << *this << ": invalid section index";
2020-12-12 06:46:24 +03:00
return get_data<T>(elf_sections[idx]);
}
2020-12-13 14:40:01 +03:00
ElfShdr *InputFile::find_section(u32 type) {
for (ElfShdr &sec : elf_sections)
if (sec.sh_type == type)
return &sec;
return nullptr;
}
2020-12-22 11:37:49 +03:00
ObjectFile::ObjectFile(MemoryMappedFile *mb, std::string archive_name)
2020-12-12 06:42:43 +03:00
: InputFile(mb), archive_name(archive_name),
2020-11-24 10:22:32 +03:00
is_in_archive(archive_name != "") {
is_alive = (archive_name == "");
}
2020-10-04 12:00:33 +03:00
2020-10-19 14:05:34 +03:00
void ObjectFile::initialize_sections() {
2020-11-07 05:53:32 +03:00
// Read sections
2020-10-19 14:17:32 +03:00
for (int i = 0; i < elf_sections.size(); i++) {
2020-12-10 09:59:24 +03:00
const ElfShdr &shdr = elf_sections[i];
2020-10-19 14:17:32 +03:00
2020-10-22 16:42:09 +03:00
if ((shdr.sh_flags & SHF_EXCLUDE) && !(shdr.sh_flags & SHF_ALLOC))
continue;
2020-10-19 14:17:32 +03:00
switch (shdr.sh_type) {
case SHT_GROUP: {
// Get the signature of this section group.
if (shdr.sh_info >= elf_syms.size())
2021-01-09 06:41:32 +03:00
Error() << *this << ": invalid symbol index";
2020-12-10 09:10:18 +03:00
const ElfSym &sym = elf_syms[shdr.sh_info];
2020-12-10 09:31:54 +03:00
std::string_view signature = symbol_strtab.data() + sym.st_name;
2020-10-27 14:15:57 +03:00
2020-10-19 15:32:57 +03:00
// Get comdat group members.
2020-12-12 06:38:56 +03:00
std::span<u32> entries = get_data<u32>(shdr);
2020-12-10 12:23:13 +03:00
2020-10-19 15:32:57 +03:00
if (entries.empty())
2021-01-09 06:41:32 +03:00
Error() << *this << ": empty SHT_GROUP";
2020-10-19 15:32:57 +03:00
if (entries[0] == 0)
continue;
if (entries[0] != GRP_COMDAT)
2021-01-09 06:41:32 +03:00
Error() << *this << ": unsupported SHT_GROUP format";
2020-10-19 14:17:32 +03:00
static ConcurrentMap<ComdatGroup> map;
ComdatGroup *group = map.insert(signature, ComdatGroup(nullptr, 0));
2020-11-08 12:20:48 +03:00
comdat_groups.push_back({group, entries});
2020-11-03 11:48:15 +03:00
2020-11-03 11:52:39 +03:00
static Counter counter("comdats");
2020-11-03 11:48:15 +03:00
counter.inc();
2020-10-19 15:32:57 +03:00
break;
2020-10-19 14:17:32 +03:00
}
2020-10-19 16:55:52 +03:00
case SHT_SYMTAB_SHNDX:
2021-01-09 06:41:32 +03:00
Error() << *this << ": SHT_SYMTAB_SHNDX section is not supported";
2020-10-19 16:55:52 +03:00
break;
case SHT_SYMTAB:
case SHT_STRTAB:
case SHT_REL:
case SHT_RELA:
case SHT_NULL:
break;
2020-10-19 14:17:32 +03:00
default: {
2020-11-03 11:52:39 +03:00
static Counter counter("regular_sections");
counter.inc();
2020-11-07 04:39:29 +03:00
2020-12-12 06:41:09 +03:00
std::string_view shstrtab = get_string(ehdr.e_shstrndx);
std::string_view name = shstrtab.data() + shdr.sh_name;
2020-11-07 05:41:14 +03:00
this->sections[i] = new InputSection(this, shdr, name);
2020-10-19 14:17:32 +03:00
break;
}
}
2020-10-13 14:35:35 +03:00
}
2020-10-20 04:32:32 +03:00
2020-11-07 05:53:32 +03:00
// Attach relocation sections to their target sections.
2020-12-10 09:59:24 +03:00
for (const ElfShdr &shdr : elf_sections) {
2020-10-20 04:32:32 +03:00
if (shdr.sh_type != SHT_RELA)
continue;
if (shdr.sh_info >= sections.size())
2021-01-09 06:41:32 +03:00
Error() << *this << ": invalid relocated section index: "
2021-01-09 05:19:57 +03:00
<< (u32)shdr.sh_info;
2020-10-20 04:32:32 +03:00
InputSection *target = sections[shdr.sh_info];
2020-10-23 07:23:12 +03:00
if (target) {
2020-12-12 06:38:56 +03:00
target->rels = get_data<ElfRela>(shdr);
2020-12-15 16:24:08 +03:00
target->rel_types.resize(target->rels.size());
2020-12-19 02:32:07 +03:00
target->has_rel_piece.resize(target->rels.size());
2020-11-03 11:52:39 +03:00
if (target->shdr.sh_flags & SHF_ALLOC) {
2020-11-03 15:09:21 +03:00
static Counter counter("relocs_alloc");
2020-11-03 11:52:39 +03:00
counter.inc(target->rels.size());
}
2020-10-23 07:23:12 +03:00
}
2020-10-20 04:32:32 +03:00
}
2020-12-01 15:07:00 +03:00
// Set is_comdat_member bits.
for (auto &pair : comdat_groups) {
2020-12-10 14:11:45 +03:00
std::span<u32> entries = pair.second;
2020-12-01 15:07:00 +03:00
for (u32 i : entries)
if (this->sections[i])
this->sections[i]->is_comdat_member = true;
}
2020-10-19 14:05:34 +03:00
}
2020-10-09 14:47:45 +03:00
2020-12-20 04:12:10 +03:00
static bool should_write_symtab(const ElfSym &esym, std::string_view name) {
2020-12-20 04:27:01 +03:00
if (config.discard_all || config.strip_all)
2020-12-20 04:20:24 +03:00
return false;
2020-12-20 04:12:10 +03:00
if (esym.st_type == STT_SECTION)
return false;
if (config.discard_locals && name.starts_with(".L"))
return false;
return true;
}
2020-10-19 14:05:34 +03:00
void ObjectFile::initialize_symbols() {
2020-11-22 10:39:40 +03:00
if (!symtab_sec)
return;
2020-11-03 11:52:39 +03:00
static Counter counter("all_syms");
counter.inc(elf_syms.size());
2020-11-03 08:53:32 +03:00
symbols.reserve(elf_syms.size());
2020-10-27 14:15:57 +03:00
local_symbols.reserve(first_global);
2020-12-19 01:57:13 +03:00
sym_pieces.resize(elf_syms.size() - first_global);
2020-10-27 14:15:57 +03:00
2020-11-03 09:43:23 +03:00
// First symbol entry is always null
2021-01-09 10:11:54 +03:00
local_symbols.push_back({});
2020-11-03 09:43:23 +03:00
symbols.push_back(&local_symbols.back());
2020-11-07 06:31:54 +03:00
// Initialize local symbols
2020-11-03 09:43:23 +03:00
for (int i = 1; i < first_global; i++) {
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[i];
2021-01-09 10:11:54 +03:00
local_symbols.push_back({});
2020-11-03 09:08:05 +03:00
Symbol &sym = local_symbols.back();
2021-01-09 10:11:54 +03:00
sym.name = symbol_strtab.data() + esym.st_name;
2020-11-03 09:08:05 +03:00
sym.file = this;
2020-12-10 09:31:54 +03:00
sym.type = esym.st_type;
2020-11-06 06:52:16 +03:00
sym.value = esym.st_value;
2020-11-16 05:46:25 +03:00
sym.esym = &esym;
2021-01-09 10:11:54 +03:00
sym.write_symtab = should_write_symtab(esym, sym.name);
2020-11-03 09:08:05 +03:00
2020-12-10 09:31:54 +03:00
if (!esym.is_abs()) {
if (esym.is_common())
2021-01-09 05:19:57 +03:00
Error() << "common local symbol?";
2020-11-03 09:08:05 +03:00
sym.input_section = sections[esym.st_shndx];
}
2020-11-03 08:53:32 +03:00
symbols.push_back(&local_symbols.back());
2020-10-27 16:22:07 +03:00
2020-12-20 04:12:10 +03:00
if (sym.write_symtab) {
2021-01-09 10:11:54 +03:00
strtab_size += sym.name.size() + 1;
2020-12-10 09:10:18 +03:00
local_symtab_size += sizeof(ElfSym);
2020-10-27 19:57:57 +03:00
}
2020-10-27 14:15:57 +03:00
}
2020-11-07 06:31:54 +03:00
// Initialize global symbols
2020-10-27 10:40:05 +03:00
for (int i = first_global; i < elf_syms.size(); i++) {
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[i];
2020-12-10 09:31:54 +03:00
std::string_view name = symbol_strtab.data() + esym.st_name;
2020-11-20 13:40:54 +03:00
int pos = name.find('@');
2020-12-10 07:44:58 +03:00
if (pos != std::string_view::npos)
2020-11-20 13:40:54 +03:00
name = name.substr(0, pos);
2020-10-27 10:40:05 +03:00
symbols.push_back(Symbol::intern(name));
2020-12-10 09:31:54 +03:00
if (esym.is_common())
2020-10-27 02:45:20 +03:00
has_common_symbol = true;
2020-10-09 17:26:26 +03:00
}
2020-10-10 06:18:11 +03:00
}
2021-01-09 09:43:28 +03:00
static int binary_search(std::span<u32> span, u32 offset) {
if (offset < span[0])
return -1;
int ret = 0;
while (span.size() > 1) {
u32 mid = span.size() / 2;
if (offset < span[mid]) {
span = span.subspan(0, mid);
} else {
span = span.subspan(mid);
ret += mid;
}
2020-11-07 06:14:37 +03:00
}
2021-01-09 09:43:28 +03:00
return ret;
2020-11-07 06:14:37 +03:00
}
2020-12-10 09:59:24 +03:00
static bool is_mergeable(const ElfShdr &shdr) {
2020-11-08 08:13:59 +03:00
return (shdr.sh_flags & SHF_MERGE) &&
(shdr.sh_flags & SHF_STRINGS) &&
shdr.sh_entsize == 1;
}
2020-11-07 05:53:32 +03:00
void ObjectFile::initialize_mergeable_sections() {
2020-12-13 19:12:55 +03:00
mergeable_sections.resize(sections.size());
2020-11-08 08:13:59 +03:00
for (int i = 0; i < sections.size(); i++) {
InputSection *isec = sections[i];
2020-12-13 19:21:39 +03:00
if (isec && is_mergeable(isec->shdr)) {
2020-12-13 19:12:55 +03:00
mergeable_sections[i] = new MergeableSection(isec, get_string(isec->shdr));
2020-12-13 19:21:39 +03:00
sections[i] = nullptr;
}
2020-11-08 08:13:59 +03:00
}
2020-11-07 06:14:37 +03:00
// Initialize rel_pieces
for (InputSection *isec : sections) {
if (!isec || isec->rels.empty())
continue;
for (int i = 0; i < isec->rels.size(); i++) {
2020-12-10 13:54:01 +03:00
const ElfRela &rel = isec->rels[i];
2020-11-07 06:14:37 +03:00
2020-12-10 13:54:01 +03:00
switch (rel.r_type) {
2020-11-07 13:23:14 +03:00
case R_X86_64_64:
case R_X86_64_PC32:
case R_X86_64_32:
case R_X86_64_32S:
case R_X86_64_16:
case R_X86_64_PC16:
case R_X86_64_8:
case R_X86_64_PC8:
2020-12-10 13:54:01 +03:00
if (rel.r_sym >= this->first_global)
2020-11-07 13:23:14 +03:00
continue;
2020-12-13 19:21:39 +03:00
const ElfSym &esym = elf_syms[rel.r_sym];
if (esym.st_type != STT_SECTION)
2020-11-07 13:43:41 +03:00
continue;
2020-11-08 08:13:59 +03:00
2020-12-13 19:21:39 +03:00
MergeableSection *m = mergeable_sections[esym.st_shndx];
2020-12-13 19:12:55 +03:00
if (!m)
2020-11-08 09:56:17 +03:00
continue;
2020-11-08 08:13:59 +03:00
2020-12-13 19:21:39 +03:00
u32 offset = esym.st_value + rel.r_addend;
2021-01-09 09:43:28 +03:00
int idx = binary_search(m->piece_offsets, offset);
if (idx == -1)
2021-01-09 06:41:32 +03:00
Error() << *this << ": bad relocation at " << rel.r_sym;
2020-11-07 13:23:14 +03:00
2020-12-19 02:32:07 +03:00
isec->rel_pieces.push_back(
2021-01-09 09:43:28 +03:00
{m->pieces[idx], (i32)(offset - m->piece_offsets[idx])});
2020-12-19 02:32:07 +03:00
isec->has_rel_piece[i] = true;
2020-11-07 13:23:14 +03:00
}
2020-11-07 06:14:37 +03:00
}
}
// Initialize sym_pieces
for (int i = 0; i < elf_syms.size(); i++) {
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[i];
2020-12-10 09:31:54 +03:00
if (esym.is_abs() || esym.is_common())
2020-11-07 06:14:37 +03:00
continue;
2020-12-13 19:12:55 +03:00
MergeableSection *m = mergeable_sections[esym.st_shndx];
if (!m)
2020-11-07 06:14:37 +03:00
continue;
2021-01-09 09:43:28 +03:00
int idx = binary_search(m->piece_offsets, esym.st_value);
if (idx == -1)
2021-01-09 06:41:32 +03:00
Error() << *this << ": bad symbol value";
2020-11-07 06:14:37 +03:00
2020-11-24 13:47:30 +03:00
if (i < first_global) {
2021-01-09 09:43:28 +03:00
local_symbols[i].piece_ref.piece = m->pieces[idx];
2020-11-24 13:47:30 +03:00
} else {
2021-01-09 09:43:28 +03:00
sym_pieces[i - first_global].piece = m->pieces[idx];
sym_pieces[i - first_global].addend = esym.st_value - m->piece_offsets[idx];
2020-11-24 13:47:30 +03:00
}
2020-11-07 06:14:37 +03:00
}
2020-11-24 13:47:30 +03:00
2020-12-13 19:12:55 +03:00
erase(mergeable_sections, [](MergeableSection *m) { return !m; });
2020-11-07 04:39:29 +03:00
}
2020-10-19 14:05:34 +03:00
void ObjectFile::parse() {
2020-11-22 10:39:40 +03:00
sections.resize(elf_sections.size());
2020-12-13 14:40:01 +03:00
symtab_sec = find_section(SHT_SYMTAB);
2020-10-26 15:44:08 +03:00
if (symtab_sec) {
first_global = symtab_sec->sh_info;
2020-12-12 06:38:56 +03:00
elf_syms = get_data<ElfSym>(*symtab_sec);
symbol_strtab = get_string(symtab_sec->sh_link);
2020-10-26 15:44:08 +03:00
}
2020-10-19 14:17:32 +03:00
2020-11-24 10:22:32 +03:00
initialize_sections();
initialize_symbols();
2020-12-19 10:28:38 +03:00
initialize_mergeable_sections();
2020-10-19 14:05:34 +03:00
}
2020-11-20 01:33:20 +03:00
// Symbols with higher priorities overwrites symbols with lower priorities.
// Here is the list of priorities, from the highest to the lowest.
//
// 1. Strong defined symbol
// 2. Weak defined symbol
// 3. Defined symbol in an archive member
// 4. Unclaimed (nonexistent) symbol
//
// Ties are broken by file priority.
2020-12-10 09:10:18 +03:00
static u64 get_rank(InputFile *file, const ElfSym &esym, InputSection *isec) {
2020-12-01 15:07:00 +03:00
if (isec && isec->is_comdat_member)
return file->priority;
2020-12-10 09:31:54 +03:00
if (esym.is_undef()) {
assert(esym.st_bind == STB_WEAK);
2020-11-16 05:46:25 +03:00
return ((u64)2 << 32) + file->priority;
}
2020-12-10 09:31:54 +03:00
if (esym.st_bind == STB_WEAK)
2020-11-16 05:46:25 +03:00
return ((u64)1 << 32) + file->priority;
return file->priority;
}
static u64 get_rank(const Symbol &sym) {
if (!sym.file)
return (u64)4 << 32;
if (sym.is_placeholder)
return ((u64)3 << 32) + sym.file->priority;
2020-12-01 15:07:00 +03:00
return get_rank(sym.file, *sym.esym, sym.input_section);
2020-11-16 05:46:25 +03:00
}
2020-11-24 10:22:32 +03:00
void ObjectFile::maybe_override_symbol(Symbol &sym, int symidx) {
2020-11-05 07:16:33 +03:00
InputSection *isec = nullptr;
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[symidx];
2020-12-10 09:31:54 +03:00
if (!esym.is_abs() && !esym.is_common())
2020-11-05 07:16:33 +03:00
isec = sections[esym.st_shndx];
std::lock_guard lock(sym.mu);
2020-12-01 15:07:00 +03:00
u64 new_rank = get_rank(this, esym, isec);
2020-11-16 05:46:25 +03:00
u64 existing_rank = get_rank(sym);
2020-11-05 07:16:33 +03:00
2020-11-16 05:46:25 +03:00
if (new_rank < existing_rank) {
2020-11-05 07:16:33 +03:00
sym.file = this;
sym.input_section = isec;
2020-12-19 01:57:13 +03:00
sym.piece_ref = sym_pieces[symidx - first_global];
2020-11-06 06:52:16 +03:00
sym.value = esym.st_value;
2020-12-07 14:20:53 +03:00
sym.ver_idx = 0;
2020-12-10 09:31:54 +03:00
sym.type = esym.st_type;
2020-11-16 05:46:25 +03:00
sym.esym = &esym;
2020-11-05 07:16:33 +03:00
sym.is_placeholder = false;
2020-12-10 09:31:54 +03:00
sym.is_weak = (esym.st_bind == STB_WEAK);
2020-11-24 10:22:32 +03:00
sym.is_imported = false;
if (UNLIKELY(sym.traced))
2021-01-09 06:49:39 +03:00
SyncOut() << "trace: " << sym.file
<< (sym.is_weak ? ": weak definition of " : ": definition of ")
<< sym.name;
2020-11-24 10:22:32 +03:00
}
}
2020-11-05 06:20:16 +03:00
void ObjectFile::resolve_symbols() {
2020-11-03 08:53:32 +03:00
for (int i = first_global; i < symbols.size(); i++) {
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[i];
2020-12-10 09:31:54 +03:00
if (!esym.is_defined())
2020-11-05 02:59:20 +03:00
continue;
2020-10-27 10:35:14 +03:00
Symbol &sym = *symbols[i];
2020-11-05 07:16:33 +03:00
if (is_in_archive) {
std::lock_guard lock(sym.mu);
bool is_new = !sym.file;
bool tie_but_higher_priority =
sym.is_placeholder && this->priority < sym.file->priority;
2020-10-27 10:35:14 +03:00
2020-11-05 07:16:33 +03:00
if (is_new || tie_but_higher_priority) {
sym.file = this;
sym.is_placeholder = true;
2020-11-20 01:33:20 +03:00
if (UNLIKELY(sym.traced))
2021-01-09 06:49:39 +03:00
SyncOut() << "trace: " << sym.file << ": lazy definition of " << sym.name;
2020-11-05 07:16:33 +03:00
}
} else {
2020-11-24 10:22:32 +03:00
maybe_override_symbol(sym, i);
2020-10-26 09:15:03 +03:00
}
2020-10-13 14:35:35 +03:00
}
}
2020-12-20 10:42:17 +03:00
std::vector<ObjectFile *> ObjectFile::mark_live_objects() {
std::vector<ObjectFile *> vec;
2020-11-20 01:33:20 +03:00
assert(is_alive);
2020-11-03 08:53:32 +03:00
for (int i = first_global; i < symbols.size(); i++) {
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[i];
2020-10-27 10:36:53 +03:00
Symbol &sym = *symbols[i];
2020-12-10 09:31:54 +03:00
if (esym.is_defined()) {
2020-11-05 07:16:33 +03:00
if (is_in_archive)
2020-11-24 10:22:32 +03:00
maybe_override_symbol(sym, i);
2020-11-05 07:16:33 +03:00
continue;
}
2020-11-05 02:59:20 +03:00
if (UNLIKELY(sym.traced))
2021-01-09 06:49:39 +03:00
SyncOut() << "trace: " << *this << ": reference to " << sym.name;
2020-11-05 02:59:20 +03:00
2020-12-10 09:31:54 +03:00
if (esym.st_bind != STB_WEAK && sym.file &&
2020-11-07 20:03:54 +03:00
!sym.file->is_alive.exchange(true)) {
2020-11-30 10:43:47 +03:00
if (!sym.file->is_dso)
2020-12-20 10:42:17 +03:00
vec.push_back((ObjectFile *)sym.file);
2020-11-05 02:59:20 +03:00
if (UNLIKELY(sym.traced))
2021-01-09 06:49:39 +03:00
SyncOut() << "trace: " << *this << " keeps " << sym.file
<< " for " << sym.name;
2020-10-28 13:27:23 +03:00
}
2020-10-09 16:29:25 +03:00
}
2020-12-20 10:42:17 +03:00
return vec;
2020-10-09 14:47:45 +03:00
}
2020-11-19 10:09:07 +03:00
void ObjectFile::handle_undefined_weak_symbols() {
2020-11-25 13:53:14 +03:00
if (!is_alive)
2020-10-30 10:00:25 +03:00
return;
2020-11-03 08:53:32 +03:00
for (int i = first_global; i < symbols.size(); i++) {
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[i];
2020-10-30 10:00:25 +03:00
Symbol &sym = *symbols[i];
2020-12-10 09:31:54 +03:00
if (esym.is_undef() && esym.st_bind == STB_WEAK) {
2020-10-30 10:00:25 +03:00
std::lock_guard lock(sym.mu);
2020-11-20 01:53:53 +03:00
bool is_new = !sym.file || sym.is_placeholder;
2020-10-30 10:00:25 +03:00
bool tie_but_higher_priority =
2020-11-01 06:45:30 +03:00
!is_new && sym.is_undef_weak && this->priority < sym.file->priority;
2020-10-30 10:00:25 +03:00
if (is_new || tie_but_higher_priority) {
sym.file = this;
sym.input_section = nullptr;
2020-11-06 06:52:16 +03:00
sym.value = 0;
2020-11-16 05:46:25 +03:00
sym.esym = &esym;
2020-11-26 12:09:32 +03:00
sym.is_placeholder = false;
2020-10-30 10:00:25 +03:00
sym.is_undef_weak = true;
2020-11-25 14:35:04 +03:00
sym.is_imported = false;
2020-11-05 02:59:20 +03:00
if (UNLIKELY(sym.traced))
2021-01-09 06:49:39 +03:00
SyncOut() << "trace: " << *this << ": unresolved weak symbol "
<< sym.name;
2020-10-30 10:00:25 +03:00
}
}
}
}
2020-11-08 12:17:24 +03:00
void ObjectFile::resolve_comdat_groups() {
2020-12-01 11:16:28 +03:00
if (!is_alive)
return;
2020-11-08 12:17:24 +03:00
for (auto &pair : comdat_groups) {
ComdatGroup *group = pair.first;
ObjectFile *cur = group->file;
while (!cur || cur->priority > this->priority)
2020-11-30 16:00:04 +03:00
if (group->file.compare_exchange_weak(cur, this))
2020-11-08 12:17:24 +03:00
break;
}
}
2020-10-19 15:50:33 +03:00
void ObjectFile::eliminate_duplicate_comdat_groups() {
2020-12-01 11:16:28 +03:00
if (!is_alive)
return;
for (auto &pair : comdat_groups) {
2020-11-08 12:17:24 +03:00
ComdatGroup *group = pair.first;
2020-11-08 12:18:22 +03:00
if (group->file == this)
continue;
2020-12-10 14:11:45 +03:00
std::span<u32> entries = pair.second;
2020-12-01 11:16:28 +03:00
for (u32 i : entries) {
if (sections[i])
sections[i]->is_alive = false;
2020-11-08 12:18:22 +03:00
sections[i] = nullptr;
2020-12-01 11:16:28 +03:00
}
2020-11-08 12:28:49 +03:00
static Counter counter("removed_comdat_mem");
counter.inc(entries.size());
2020-10-19 15:50:33 +03:00
}
}
2020-10-27 06:50:25 +03:00
void ObjectFile::convert_common_symbols() {
if (!has_common_symbol)
return;
2020-10-27 07:52:10 +03:00
2020-10-29 07:31:41 +03:00
static OutputSection *bss =
2020-12-17 15:19:29 +03:00
OutputSection::get_instance(".bss", SHT_NOBITS, SHF_WRITE | SHF_ALLOC);
2020-10-29 07:31:41 +03:00
2020-10-27 07:52:10 +03:00
for (int i = first_global; i < elf_syms.size(); i++) {
2020-12-10 09:31:54 +03:00
if (!elf_syms[i].is_common())
2020-10-27 07:52:10 +03:00
continue;
2020-11-03 08:53:32 +03:00
Symbol *sym = symbols[i];
if (sym->file != this)
2020-10-27 07:52:10 +03:00
continue;
2020-12-10 09:59:24 +03:00
auto *shdr = new ElfShdr;
2020-10-27 07:52:10 +03:00
memset(shdr, 0, sizeof(*shdr));
shdr->sh_flags = SHF_ALLOC;
shdr->sh_type = SHT_NOBITS;
2020-10-29 07:31:41 +03:00
shdr->sh_size = elf_syms[i].st_size;
2020-10-27 07:52:10 +03:00
shdr->sh_addralign = 1;
auto *isec = new InputSection(this, *shdr, ".bss");
2020-10-29 07:31:41 +03:00
isec->output_section = bss;
2020-10-27 07:52:10 +03:00
sections.push_back(isec);
sym->input_section = isec;
2020-11-06 06:52:16 +03:00
sym->value = 0;
2020-10-27 07:52:10 +03:00
}
2020-10-27 06:50:25 +03:00
}
2020-12-20 04:27:01 +03:00
static bool should_write_global_symtab(Symbol &sym) {
return !config.strip_all && sym.esym->st_type != STT_SECTION;
}
2020-10-27 14:58:28 +03:00
void ObjectFile::compute_symtab() {
2020-10-27 16:22:07 +03:00
for (int i = first_global; i < elf_syms.size(); i++) {
2020-12-10 09:10:18 +03:00
const ElfSym &esym = elf_syms[i];
2020-11-03 08:53:32 +03:00
Symbol &sym = *symbols[i];
2020-10-27 16:22:07 +03:00
2020-12-20 04:27:01 +03:00
if (sym.file == this && should_write_global_symtab(sym)) {
2020-12-10 09:10:18 +03:00
global_symtab_size += sizeof(ElfSym);
2020-11-29 12:45:33 +03:00
strtab_size += sym.name.size() + 1;
2020-10-27 14:58:28 +03:00
}
}
2020-10-27 14:15:57 +03:00
}
2020-11-29 12:31:19 +03:00
void ObjectFile::write_symtab() {
u8 *symtab_base = out::buf + out::symtab->shdr.sh_offset;
u8 *strtab_base = out::buf + out::strtab->shdr.sh_offset;
2020-11-29 12:45:33 +03:00
u32 symtab_off;
u32 strtab_off = strtab_offset;
2020-10-27 15:38:52 +03:00
2020-11-29 12:45:33 +03:00
auto write_sym = [&](u32 i) {
2020-11-03 09:43:23 +03:00
Symbol &sym = *symbols[i];
2020-12-10 09:10:18 +03:00
ElfSym &esym = *(ElfSym *)(symtab_base + symtab_off);
symtab_off += sizeof(ElfSym);
2020-11-29 12:45:33 +03:00
2020-11-13 04:19:47 +03:00
esym = elf_syms[i];
2020-11-03 09:43:23 +03:00
esym.st_name = strtab_off;
2020-12-12 14:34:13 +03:00
if (sym.type == STT_TLS)
esym.st_value = sym.get_addr() - sym.input_section->output_section->shdr.sh_addr;
else
esym.st_value = sym.get_addr();
2020-11-04 08:03:01 +03:00
if (sym.input_section)
esym.st_shndx = sym.input_section->output_section->shndx;
2020-11-04 08:41:40 +03:00
else if (sym.shndx)
esym.st_shndx = sym.shndx;
2020-11-04 08:03:01 +03:00
else
2020-12-12 06:58:55 +03:00
esym.st_shndx = SHN_ABS;
2020-11-03 09:43:23 +03:00
2020-11-29 12:31:19 +03:00
write_string(strtab_base + strtab_off, sym.name);
2020-11-29 12:45:33 +03:00
strtab_off += sym.name.size() + 1;
2020-11-29 12:31:19 +03:00
};
2020-10-27 15:45:19 +03:00
2020-11-29 12:45:33 +03:00
symtab_off = local_symtab_offset;
2020-11-29 12:31:19 +03:00
for (int i = 1; i < first_global; i++)
2020-12-20 04:12:10 +03:00
if (symbols[i]->write_symtab)
write_sym(i);
2020-10-27 15:38:52 +03:00
2020-11-29 12:45:33 +03:00
symtab_off = global_symtab_offset;
2020-11-29 12:31:19 +03:00
for (int i = first_global; i < elf_syms.size(); i++)
2020-12-20 04:27:01 +03:00
if (symbols[i]->file == this && should_write_global_symtab(*symbols[i]))
2020-12-20 04:16:13 +03:00
write_sym(i);
2020-10-27 15:38:52 +03:00
}
2020-12-10 07:44:58 +03:00
bool is_c_identifier(std::string_view name) {
2020-11-11 08:43:07 +03:00
static std::regex re("[a-zA-Z_][a-zA-Z0-9_]*");
return std::regex_match(name.begin(), name.end(), re);
2020-11-04 04:39:17 +03:00
}
2020-11-17 07:48:11 +03:00
ObjectFile *ObjectFile::create_internal_file() {
2020-11-03 13:33:46 +03:00
// Create a dummy object file.
constexpr int bufsz = 256;
2020-12-11 13:34:05 +03:00
u8 *buf = (u8 *)calloc(1, bufsz);
2020-12-10 12:23:13 +03:00
memcpy(buf, "\177ELF", 4);
2020-12-10 11:44:30 +03:00
MemoryMappedFile *mb = new MemoryMappedFile("<internal>", buf, bufsz);
2020-12-22 11:37:49 +03:00
auto *obj = new ObjectFile(mb, "");
2020-11-03 13:33:46 +03:00
// Create linker-synthesized symbols.
2020-12-10 09:10:18 +03:00
auto *elf_syms = new std::vector<ElfSym>(1);
2021-01-09 10:11:54 +03:00
obj->symbols.push_back(new Symbol);
2020-11-20 06:44:02 +03:00
obj->first_global = 1;
2020-11-05 02:23:11 +03:00
obj->is_alive = true;
2020-11-03 13:33:46 +03:00
2020-12-10 07:44:58 +03:00
auto add = [&](std::string_view name, u8 visibility = STV_DEFAULT) {
2020-12-10 09:10:18 +03:00
ElfSym esym = {};
2020-12-10 09:31:54 +03:00
esym.st_type = STT_NOTYPE;
2020-12-12 06:58:55 +03:00
esym.st_shndx = SHN_ABS;
2020-12-10 09:31:54 +03:00
esym.st_bind = STB_GLOBAL;
esym.st_visibility = visibility;
2020-11-03 13:33:46 +03:00
elf_syms->push_back(esym);
2020-11-20 06:44:02 +03:00
Symbol *sym = Symbol::intern(name);
obj->symbols.push_back(sym);
2020-11-03 13:33:46 +03:00
return sym;
};
2020-11-20 06:44:02 +03:00
out::__ehdr_start = add("__ehdr_start", STV_HIDDEN);
out::__rela_iplt_start = add("__rela_iplt_start", STV_HIDDEN);
out::__rela_iplt_end = add("__rela_iplt_end", STV_HIDDEN);
out::__init_array_start = add("__init_array_start", STV_HIDDEN);
out::__init_array_end = add("__init_array_end", STV_HIDDEN);
out::__fini_array_start = add("__fini_array_start", STV_HIDDEN);
out::__fini_array_end = add("__fini_array_end", STV_HIDDEN);
out::__preinit_array_start = add("__preinit_array_start", STV_HIDDEN);
out::__preinit_array_end = add("__preinit_array_end", STV_HIDDEN);
out::_DYNAMIC = add("_DYNAMIC", STV_HIDDEN);
out::_GLOBAL_OFFSET_TABLE_ = add("_GLOBAL_OFFSET_TABLE_", STV_HIDDEN);
out::__bss_start = add("__bss_start", STV_HIDDEN);
out::_end = add("_end", STV_HIDDEN);
out::_etext = add("_etext", STV_HIDDEN);
out::_edata = add("_edata", STV_HIDDEN);
2020-11-12 08:40:39 +03:00
2020-11-17 07:48:11 +03:00
for (OutputChunk *chunk : out::chunks) {
2020-11-04 04:43:05 +03:00
if (!is_c_identifier(chunk->name))
continue;
2020-12-11 03:14:30 +03:00
auto *start = new std::string("__start_" + std::string(chunk->name));
auto *stop = new std::string("__stop_" + std::string(chunk->name));
2020-11-20 06:44:02 +03:00
add(*start, STV_HIDDEN);
add(*stop, STV_HIDDEN);
2020-11-04 04:39:17 +03:00
}
2020-11-04 01:16:59 +03:00
obj->elf_syms = *elf_syms;
2020-12-19 01:57:13 +03:00
obj->sym_pieces.resize(elf_syms->size() - obj->first_global);
2020-11-03 13:33:46 +03:00
return obj;
}
2021-01-09 06:41:32 +03:00
std::ostream &operator<<(std::ostream &out, const InputFile &file) {
if (file.is_dso) {
out << file.name;
return out;
}
2020-11-24 10:22:32 +03:00
2021-01-09 06:41:32 +03:00
ObjectFile *obj = (ObjectFile *)&file;
2020-10-14 13:04:36 +03:00
if (obj->archive_name == "")
2021-01-09 06:41:32 +03:00
out << obj->name;
else
out << obj->archive_name << ":(" << obj->name + ")";
return out;
2020-10-09 14:47:45 +03:00
}
2020-11-25 14:35:04 +03:00
2020-12-13 14:40:01 +03:00
std::string_view SharedFile::get_soname() {
if (ElfShdr *sec = find_section(SHT_DYNAMIC))
for (ElfDyn &dyn : get_data<ElfDyn>(*sec))
if (dyn.d_tag == DT_SONAME)
return std::string_view(symbol_strtab.data() + dyn.d_val);
2020-11-28 13:59:56 +03:00
return name;
}
void SharedFile::parse() {
2020-12-13 14:40:01 +03:00
symtab_sec = find_section(SHT_DYNSYM);
2020-11-28 13:59:56 +03:00
if (!symtab_sec)
return;
2020-12-12 06:38:56 +03:00
symbol_strtab = get_string(symtab_sec->sh_link);
2020-12-13 14:40:01 +03:00
soname = get_soname();
2020-11-28 16:08:46 +03:00
version_strings = read_verdef();
2020-11-28 14:43:48 +03:00
2020-11-28 13:59:56 +03:00
// Read a symbol table.
int first_global = symtab_sec->sh_info;
2020-12-12 06:38:56 +03:00
std::span<ElfSym> esyms = get_data<ElfSym>(*symtab_sec);
2020-12-10 12:23:13 +03:00
2020-12-10 14:11:45 +03:00
std::span<u16> vers;
2020-12-13 14:40:01 +03:00
if (ElfShdr *sec = find_section(SHT_GNU_VERSYM))
2020-12-12 06:38:56 +03:00
vers = get_data<u16>(*sec);
2020-11-28 13:59:56 +03:00
2020-12-10 09:10:18 +03:00
std::vector<std::pair<const ElfSym *, u16>> pairs;
2020-11-28 13:59:56 +03:00
2020-11-28 14:43:48 +03:00
for (int i = first_global; i < esyms.size(); i++) {
2020-12-10 09:31:54 +03:00
if (!esyms[i].is_defined())
2020-11-28 14:43:48 +03:00
continue;
if (!vers.empty() && (vers[i] >> 15) == 1)
continue;
2020-11-28 13:59:56 +03:00
2020-11-28 14:43:48 +03:00
if (vers.empty())
pairs.push_back({&esyms[i], 1});
else
pairs.push_back({&esyms[i], vers[i]});
}
2020-11-28 13:59:56 +03:00
// Sort symbols by value for find_aliases(), as find_aliases() does
// binary search on symbols.
2020-12-21 12:42:14 +03:00
sort(pairs, [](const std::pair<const ElfSym *, u16> &a,
const std::pair<const ElfSym *, u16> &b) {
return a.first->st_value < b.first->st_value;
});
2020-11-28 13:59:56 +03:00
2020-11-28 14:43:48 +03:00
elf_syms.reserve(pairs.size());
versyms.reserve(pairs.size());
symbols.reserve(pairs.size());
2020-12-10 09:10:18 +03:00
for (std::pair<const ElfSym *, u16> &x : pairs) {
2020-11-28 14:43:48 +03:00
elf_syms.push_back(x.first);
versyms.push_back(x.second);
2020-12-10 09:31:54 +03:00
std::string_view name = symbol_strtab.data() + x.first->st_name;
2020-11-28 13:59:56 +03:00
symbols.push_back(Symbol::intern(name));
}
static Counter counter("dso_syms");
counter.inc(elf_syms.size());
}
2020-12-10 07:44:58 +03:00
std::vector<std::string_view> SharedFile::read_verdef() {
2020-12-13 14:40:01 +03:00
ElfShdr *verdef_sec = find_section(SHT_GNU_VERDEF);
2020-11-28 13:59:56 +03:00
if (!verdef_sec)
return {};
2020-12-12 06:38:56 +03:00
std::string_view verdef = get_string(*verdef_sec);
std::string_view strtab = get_string(verdef_sec->sh_link);
2020-11-28 13:59:56 +03:00
2020-12-10 07:44:58 +03:00
std::vector<std::string_view> ret(2);
2020-12-10 15:38:37 +03:00
auto *ver = (ElfVerdef *)verdef.data();
2020-11-28 13:59:56 +03:00
2020-11-28 16:02:58 +03:00
for (;;) {
if (ret.size() <= ver->vd_ndx)
ret.resize(ver->vd_ndx + 1);
2020-12-13 14:41:23 +03:00
ElfVerdaux *aux = (ElfVerdaux *)((u8 *)ver + ver->vd_aux);
2020-11-28 13:59:56 +03:00
ret[ver->vd_ndx] = strtab.data() + aux->vda_name;
2020-11-28 16:02:58 +03:00
if (!ver->vd_next)
break;
2020-12-13 14:41:23 +03:00
2020-12-10 15:38:37 +03:00
ver = (ElfVerdef *)((u8 *)ver + ver->vd_next);
2020-11-28 13:59:56 +03:00
}
return ret;
}
2020-11-28 14:25:39 +03:00
void SharedFile::resolve_symbols() {
for (int i = 0; i < symbols.size(); i++) {
Symbol &sym = *symbols[i];
2020-12-10 09:10:18 +03:00
const ElfSym &esym = *elf_syms[i];
2020-11-28 14:25:39 +03:00
std::lock_guard lock(sym.mu);
2020-12-01 15:07:00 +03:00
u64 new_rank = get_rank(this, esym, nullptr);
2020-11-28 14:25:39 +03:00
u64 existing_rank = get_rank(sym);
if (new_rank < existing_rank) {
sym.file = this;
sym.input_section = nullptr;
sym.piece_ref = {};
sym.value = esym.st_value;
2020-11-28 16:08:46 +03:00
sym.ver_idx = versyms[i];
2020-12-10 09:31:54 +03:00
sym.type = (esym.st_type == STT_GNU_IFUNC) ? STT_FUNC : esym.st_type;
2020-11-28 14:25:39 +03:00
sym.esym = &esym;
sym.is_placeholder = false;
2020-12-10 09:31:54 +03:00
sym.is_weak = (esym.st_bind == STB_WEAK);
2020-11-28 14:25:39 +03:00
sym.is_imported = true;
2020-11-28 13:59:56 +03:00
2020-11-28 14:25:39 +03:00
if (UNLIKELY(sym.traced))
2021-01-09 06:49:39 +03:00
SyncOut() << "trace: " << *sym.file
<< (sym.is_weak ? ": weak definition of " : ": definition of ")
<< sym.name;
2020-11-28 14:25:39 +03:00
}
2020-11-28 13:59:56 +03:00
}
}
2020-12-10 14:12:43 +03:00
std::span<Symbol *> SharedFile::find_aliases(Symbol *sym) {
2020-11-25 14:35:04 +03:00
assert(sym->file == this);
auto [begin, end] = std::equal_range(
symbols.begin(), symbols.end(), sym,
[&](Symbol *a, Symbol *b) { return a->value < b->value; });
2020-12-10 14:12:43 +03:00
return {begin, end};
2020-11-25 14:35:04 +03:00
}