1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-05 09:07:10 +03:00
mold/mold.h

1010 lines
22 KiB
C
Raw Normal View History

2020-10-04 12:00:33 +03:00
#pragma once
2020-11-09 06:30:13 +03:00
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
2020-12-16 05:46:15 +03:00
#include "elf.h"
2020-10-09 14:47:45 +03:00
#include "tbb/concurrent_hash_map.h"
2020-10-10 10:49:02 +03:00
#include "tbb/parallel_for_each.h"
2020-11-06 04:11:19 +03:00
#include "tbb/parallel_invoke.h"
2020-11-06 02:49:41 +03:00
#include "tbb/parallel_reduce.h"
2020-10-28 13:49:58 +03:00
#include "tbb/spin_mutex.h"
2020-10-04 12:00:33 +03:00
2020-10-19 17:37:29 +03:00
#include <algorithm>
2020-10-10 13:15:16 +03:00
#include <atomic>
2020-12-11 10:51:20 +03:00
#include <cassert>
2020-10-04 12:00:33 +03:00
#include <cstdint>
2020-12-10 16:32:47 +03:00
#include <iostream>
2020-10-21 05:55:03 +03:00
#include <mutex>
2020-12-10 06:54:10 +03:00
#include <span>
2020-10-04 12:00:33 +03:00
#include <string>
2020-12-10 07:44:58 +03:00
#include <string_view>
2020-10-04 12:00:33 +03:00
2020-11-03 14:45:45 +03:00
#define SECTOR_SIZE 512
#define PAGE_SIZE 4096
2020-11-11 10:53:41 +03:00
#define GOT_SIZE 8
#define PLT_SIZE 16
2020-11-03 14:45:45 +03:00
2020-11-05 02:36:10 +03:00
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
2020-11-03 14:45:45 +03:00
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
2020-11-08 06:26:40 +03:00
class InputChunk;
2020-11-24 10:22:32 +03:00
class InputFile;
2020-11-03 14:45:45 +03:00
class InputSection;
2020-11-08 07:01:46 +03:00
class MergeableSection;
2020-11-24 10:22:32 +03:00
class MergedSection;
2020-11-03 14:45:45 +03:00
class ObjectFile;
2020-11-04 08:03:01 +03:00
class OutputChunk;
2020-11-03 14:45:45 +03:00
class OutputSection;
2020-11-24 10:22:32 +03:00
class SharedFile;
class Symbol;
2020-10-08 11:01:54 +03:00
2020-10-04 12:00:33 +03:00
struct Config {
2020-12-10 08:32:42 +03:00
std::string dynamic_linker = "/lib64/ld-linux-x86-64.so.2";
2020-12-14 04:07:14 +03:00
std::string entry = "_start";
2020-12-10 08:32:42 +03:00
std::string output;
2020-11-30 10:43:47 +03:00
bool as_needed = false;
2020-12-20 04:20:24 +03:00
bool discard_all = false;
2020-12-20 04:12:10 +03:00
bool discard_locals = false;
2020-12-01 15:43:30 +03:00
bool export_dynamic = false;
2020-12-20 02:31:17 +03:00
bool fork = true;
2020-11-04 12:47:13 +03:00
bool is_static = false;
2020-12-13 16:41:50 +03:00
bool perf = false;
2020-12-16 14:56:04 +03:00
bool pie = false;
2020-11-11 03:02:36 +03:00
bool print_map = false;
2020-12-20 04:27:01 +03:00
bool strip_all = false;
2020-12-11 06:43:14 +03:00
bool trace = false;
2020-12-17 16:29:43 +03:00
bool z_now = false;
2020-11-12 07:33:57 +03:00
int filler = -1;
2020-12-11 06:43:14 +03:00
int thread_count = -1;
2020-11-20 13:27:39 +03:00
std::string sysroot;
2020-12-10 07:44:58 +03:00
std::vector<std::string> library_paths;
std::vector<std::string> rpaths;
std::vector<std::string> globals;
2020-11-17 09:05:53 +03:00
u64 image_base = 0x200000;
2020-10-04 12:00:33 +03:00
};
2020-11-29 05:06:11 +03:00
inline Config config;
2020-12-10 08:32:42 +03:00
[[noreturn]] inline void error(std::string msg) {
2020-10-27 09:10:34 +03:00
static std::mutex mu;
2020-10-28 13:51:49 +03:00
std::lock_guard lock(mu);
2020-12-10 16:32:47 +03:00
std::cerr << msg << "\n";
2020-10-04 12:00:33 +03:00
exit(1);
}
2020-11-30 11:52:08 +03:00
#define unreachable() \
2020-12-10 08:32:42 +03:00
error("internal error at " + std::string(__FILE__) + ":" + std::to_string(__LINE__))
2020-11-30 11:52:08 +03:00
2020-12-12 07:10:38 +03:00
std::string to_string(const InputFile *);
2020-10-19 15:32:57 +03:00
2020-10-12 07:30:34 +03:00
//
2020-10-19 15:32:57 +03:00
// Interned string
2020-10-12 07:30:34 +03:00
//
2020-10-19 12:13:55 +03:00
namespace tbb {
template<>
2020-12-10 07:44:58 +03:00
struct tbb_hash_compare<std::string_view> {
static size_t hash(const std::string_view& k) {
2020-12-10 16:35:36 +03:00
return std::hash<std::string_view>()(k);
2020-10-19 12:13:55 +03:00
}
2020-12-10 07:44:58 +03:00
static bool equal(const std::string_view& k1, const std::string_view& k2) {
2020-10-19 12:13:55 +03:00
return k1 == k2;
}
};
}
2020-10-19 15:32:57 +03:00
template<typename ValueT>
class ConcurrentMap {
public:
2020-12-10 07:44:58 +03:00
typedef tbb::concurrent_hash_map<std::string_view, ValueT> MapT;
2020-10-19 15:32:57 +03:00
2020-12-10 07:44:58 +03:00
ValueT *insert(std::string_view key, const ValueT &val) {
2020-10-28 03:39:31 +03:00
typename MapT::const_accessor acc;
2020-10-19 15:32:57 +03:00
map.insert(acc, std::make_pair(key, val));
2020-10-28 03:39:31 +03:00
return const_cast<ValueT *>(&acc->second);
2020-10-19 15:32:57 +03:00
}
2020-12-13 19:52:22 +03:00
void for_each_value(std::function<void(ValueT &)> fn) {
for (typename MapT::iterator it = map.begin(); it != map.end(); ++it)
fn(it->second);
}
2020-11-07 16:07:19 +03:00
size_t size() const { return map.size(); }
2020-10-19 15:32:57 +03:00
private:
MapT map;
};
//
// Symbol
//
2020-11-07 04:49:34 +03:00
struct StringPiece {
2020-12-13 19:58:58 +03:00
StringPiece(std::string_view view)
: data((const char *)view.data()), size(view.size()) {}
2020-11-07 04:49:34 +03:00
StringPiece(const StringPiece &other)
2020-12-13 19:58:58 +03:00
: isec(other.isec.load()), data(other.data), size(other.size),
2020-11-07 13:23:14 +03:00
output_offset(other.output_offset) {}
2020-11-07 04:49:34 +03:00
2020-11-07 14:05:51 +03:00
inline u64 get_addr() const;
2020-11-08 08:13:59 +03:00
std::atomic<MergeableSection *> isec = ATOMIC_VAR_INIT(nullptr);
2020-12-13 19:58:58 +03:00
const char *data;
u32 size;
2020-11-10 11:18:10 +03:00
u32 output_offset = -1;
2020-11-07 04:49:34 +03:00
};
struct StringPieceRef {
2020-11-07 16:21:41 +03:00
StringPiece *piece = nullptr;
u32 input_offset = 0;
2020-11-24 13:26:26 +03:00
i32 addend = 0;
2020-11-07 04:49:34 +03:00
};
2020-12-16 12:38:13 +03:00
enum {
NEEDS_GOT = 1 << 0,
NEEDS_PLT = 1 << 1,
NEEDS_GOTTPOFF = 1 << 2,
NEEDS_TLSGD = 1 << 3,
NEEDS_TLSLD = 1 << 4,
NEEDS_COPYREL = 1 << 5,
2020-12-16 14:56:04 +03:00
NEEDS_DYNSYM = 1 << 6,
2020-12-16 12:38:13 +03:00
};
2020-10-12 07:30:34 +03:00
class Symbol {
public:
2020-12-13 14:18:49 +03:00
Symbol(std::string_view name) : name(name) {}
2020-11-29 04:22:14 +03:00
Symbol(const Symbol &other) : Symbol(other.name) {}
2020-10-12 07:30:34 +03:00
2020-12-10 07:44:58 +03:00
static Symbol *intern(std::string_view name) {
2020-10-25 08:42:44 +03:00
static ConcurrentMap<Symbol> map;
2020-11-13 06:30:27 +03:00
return map.insert(name, Symbol(name));
2020-10-25 08:42:44 +03:00
}
2020-11-06 06:50:26 +03:00
inline u64 get_addr() const;
2020-11-15 07:01:38 +03:00
inline u64 get_got_addr() const;
inline u64 get_gotplt_addr() const;
2020-11-21 04:48:51 +03:00
inline u64 get_gottpoff_addr() const;
2020-11-21 04:48:23 +03:00
inline u64 get_tlsgd_addr() const;
inline u64 get_tlsld_addr() const;
2020-11-15 07:01:38 +03:00
inline u64 get_plt_addr() const;
2020-11-06 06:50:26 +03:00
2020-12-17 08:32:18 +03:00
bool needs_relative_rel() const {
return config.pie && !is_undef_weak;
}
2020-12-10 07:44:58 +03:00
std::string_view name;
2020-11-24 10:22:32 +03:00
InputFile *file = nullptr;
2020-12-10 09:10:18 +03:00
const ElfSym *esym = nullptr;
2020-11-08 09:56:17 +03:00
InputSection *input_section = nullptr;
2020-11-07 16:20:49 +03:00
StringPieceRef piece_ref;
2020-10-23 06:09:27 +03:00
2020-11-26 07:34:42 +03:00
u64 value = -1;
2020-11-15 07:01:38 +03:00
u32 got_idx = -1;
u32 gotplt_idx = -1;
2020-11-21 04:48:51 +03:00
u32 gottpoff_idx = -1;
2020-11-21 04:48:23 +03:00
u32 tlsgd_idx = -1;
u32 tlsld_idx = -1;
2020-11-15 07:01:38 +03:00
u32 plt_idx = -1;
u32 relplt_idx = -1;
2020-11-15 08:07:40 +03:00
u32 dynsym_idx = -1;
2020-11-16 16:25:58 +03:00
u32 dynstr_offset = -1;
2020-11-25 11:20:48 +03:00
u32 copyrel_offset = -1;
2020-12-14 11:27:47 +03:00
u16 shndx = 0;
2020-11-28 16:04:23 +03:00
u16 ver_idx = 0;
2020-10-30 11:59:45 +03:00
2020-11-03 16:24:50 +03:00
tbb::spin_mutex mu;
2020-12-13 14:18:49 +03:00
u8 is_placeholder : 1 = false;
u8 is_imported : 1 = false;
u8 is_weak : 1 = false;
u8 is_undef_weak : 1 = false;
2020-12-20 04:12:10 +03:00
u8 write_symtab : 1 = false;
2020-12-13 14:18:49 +03:00
u8 traced : 1 = false;
2020-12-16 12:38:13 +03:00
std::atomic_uint8_t flags = ATOMIC_VAR_INIT(0);
2020-12-10 15:38:37 +03:00
u8 type = STT_NOTYPE;
2020-10-12 07:30:34 +03:00
};
//
2020-11-06 02:49:41 +03:00
// input_sections.cc
2020-10-12 07:30:34 +03:00
//
2020-11-08 06:26:40 +03:00
class InputChunk {
2020-10-08 11:01:54 +03:00
public:
2020-11-17 07:56:40 +03:00
virtual void copy_buf() {}
2020-11-29 13:58:02 +03:00
inline u64 get_addr() const;
2020-10-23 03:21:40 +03:00
2020-10-19 17:37:29 +03:00
ObjectFile *file;
2020-12-10 09:59:24 +03:00
const ElfShdr &shdr;
2020-11-08 06:26:40 +03:00
OutputSection *output_section = nullptr;
2020-11-07 06:14:37 +03:00
2020-12-10 07:44:58 +03:00
std::string_view name;
2020-11-07 15:53:21 +03:00
u32 offset;
2020-11-08 08:13:59 +03:00
protected:
2020-12-10 09:59:24 +03:00
InputChunk(ObjectFile *file, const ElfShdr &shdr, std::string_view name);
2020-11-08 06:26:40 +03:00
};
2020-12-15 16:24:08 +03:00
enum RelType : u8 {
R_NONE,
R_ABS,
2020-12-16 14:56:04 +03:00
R_DYN,
2020-12-15 16:24:08 +03:00
R_PC,
R_GOT,
R_GOTPC,
R_GOTPCREL,
R_PLT,
R_TLSGD,
R_TLSGD_RELAX_LE,
R_TLSLD,
R_TLSLD_RELAX_LE,
R_TPOFF,
R_GOTTPOFF,
};
2020-11-08 06:26:40 +03:00
class InputSection : public InputChunk {
public:
2020-12-10 09:59:24 +03:00
InputSection(ObjectFile *file, const ElfShdr &shdr, std::string_view name)
2020-11-29 11:47:51 +03:00
: InputChunk(file, shdr, name) {}
2020-11-08 06:26:40 +03:00
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-26 12:09:32 +03:00
void scan_relocations();
void report_undefined_symbols();
2020-11-08 06:26:40 +03:00
2020-12-10 13:54:01 +03:00
std::span<ElfRela> rels;
2020-12-19 02:32:07 +03:00
std::vector<bool> has_rel_piece;
2020-11-08 06:26:40 +03:00
std::vector<StringPieceRef> rel_pieces;
2020-12-15 16:24:08 +03:00
std::vector<RelType> rel_types;
2020-12-16 14:56:04 +03:00
u64 reldyn_offset = 0;
2020-12-01 15:07:00 +03:00
bool is_comdat_member = false;
2020-12-01 11:16:28 +03:00
bool is_alive = true;
2020-11-08 08:13:59 +03:00
};
class MergeableSection : public InputChunk {
public:
2020-12-10 14:11:45 +03:00
MergeableSection(InputSection *isec, std::string_view contents);
2020-11-07 15:53:21 +03:00
2020-11-08 08:13:59 +03:00
MergedSection &parent;
2020-11-07 15:53:21 +03:00
std::vector<StringPieceRef> pieces;
2020-11-08 08:13:59 +03:00
u32 size = 0;
2020-10-08 11:01:54 +03:00
};
2020-12-11 03:15:08 +03:00
std::string to_string(InputChunk *isec);
2020-10-19 17:37:29 +03:00
2020-10-12 08:17:34 +03:00
//
2020-10-20 09:33:40 +03:00
// output_chunks.cc
2020-10-12 08:17:34 +03:00
//
2020-10-15 12:30:06 +03:00
class OutputChunk {
2020-10-15 11:06:01 +03:00
public:
2020-11-08 10:09:01 +03:00
enum Kind : u8 { HEADER, REGULAR, SYNTHETIC };
2020-11-08 10:05:36 +03:00
OutputChunk(Kind kind) : kind(kind) { shdr.sh_addralign = 1; }
2020-10-25 08:17:05 +03:00
2020-11-17 07:56:40 +03:00
virtual void copy_buf() {}
2020-11-25 11:20:48 +03:00
virtual void update_shdr() {}
2020-10-25 09:17:43 +03:00
2020-12-10 07:44:58 +03:00
std::string_view name;
2020-10-29 09:30:41 +03:00
int shndx = 0;
2020-11-29 13:58:02 +03:00
Kind kind;
2020-10-26 04:03:17 +03:00
bool starts_new_ptload = false;
2020-12-10 09:59:24 +03:00
ElfShdr shdr = {};
2020-10-12 08:17:34 +03:00
};
2020-11-16 18:17:01 +03:00
// ELF header
class OutputEhdr : public OutputChunk {
public:
OutputEhdr() : OutputChunk(HEADER) {
2020-12-10 15:38:37 +03:00
shdr.sh_flags = SHF_ALLOC;
2020-12-10 15:13:02 +03:00
shdr.sh_size = sizeof(ElfEhdr);
2020-11-16 18:17:01 +03:00
}
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-16 18:17:01 +03:00
};
2020-11-16 18:23:51 +03:00
// Section header
class OutputShdr : public OutputChunk {
public:
OutputShdr() : OutputChunk(HEADER) {
2020-12-10 15:38:37 +03:00
shdr.sh_flags = SHF_ALLOC;
2020-11-16 18:23:51 +03:00
}
void update_shdr() override;
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-16 18:23:51 +03:00
};
2020-11-16 18:33:41 +03:00
// Program header
class OutputPhdr : public OutputChunk {
2020-10-15 11:06:01 +03:00
public:
2020-11-16 18:33:41 +03:00
OutputPhdr() : OutputChunk(HEADER) {
2020-12-10 15:38:37 +03:00
shdr.sh_flags = SHF_ALLOC;
2020-11-08 10:05:36 +03:00
}
2020-11-16 18:33:41 +03:00
void update_shdr() override;
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-10-15 11:06:01 +03:00
};
2020-11-16 19:05:01 +03:00
class InterpSection : public OutputChunk {
public:
InterpSection() : OutputChunk(SYNTHETIC) {
name = ".interp";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_ALLOC;
2020-11-16 19:05:01 +03:00
shdr.sh_size = config.dynamic_linker.size() + 1;
}
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-16 19:05:01 +03:00
};
2020-10-16 10:38:03 +03:00
// Sections
2020-10-15 12:30:06 +03:00
class OutputSection : public OutputChunk {
public:
2020-12-17 15:19:29 +03:00
static OutputSection *get_instance(std::string_view name, u32 type, u64 flags);
2020-10-22 10:35:17 +03:00
2020-12-10 07:44:58 +03:00
OutputSection(std::string_view name, u32 type, u64 flags)
2020-11-08 10:09:01 +03:00
: OutputChunk(REGULAR) {
2020-10-22 13:35:16 +03:00
this->name = name;
2020-10-25 07:17:10 +03:00
shdr.sh_type = type;
2020-11-13 06:43:59 +03:00
shdr.sh_flags = flags;
2020-10-28 08:06:35 +03:00
idx = instances.size();
instances.push_back(this);
2020-10-21 05:28:43 +03:00
}
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-10-26 07:34:15 +03:00
2020-11-06 15:17:35 +03:00
static inline std::vector<OutputSection *> instances;
2020-10-29 12:31:06 +03:00
2020-11-08 10:09:01 +03:00
std::vector<InputChunk *> members;
2020-10-29 12:31:06 +03:00
u32 idx;
2020-10-15 12:30:06 +03:00
};
2020-11-18 11:11:58 +03:00
class GotSection : public OutputChunk {
public:
GotSection() : OutputChunk(SYNTHETIC) {
name = ".got";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_ALLOC | SHF_WRITE;
2020-11-18 11:11:58 +03:00
shdr.sh_addralign = GOT_SIZE;
}
2020-11-18 14:29:24 +03:00
2020-11-21 06:49:28 +03:00
void add_got_symbol(Symbol *sym);
2020-11-21 04:48:51 +03:00
void add_gottpoff_symbol(Symbol *sym);
2020-11-21 04:48:23 +03:00
void add_tlsgd_symbol(Symbol *sym);
void add_tlsld_symbol(Symbol *sym);
2020-11-18 14:29:24 +03:00
void copy_buf() override;
std::vector<Symbol *> got_syms;
2020-11-21 04:48:51 +03:00
std::vector<Symbol *> gottpoff_syms;
2020-11-21 04:48:23 +03:00
std::vector<Symbol *> tlsgd_syms;
std::vector<Symbol *> tlsld_syms;
2020-11-18 11:11:58 +03:00
};
2020-11-13 06:43:59 +03:00
class GotPltSection : public OutputChunk {
public:
GotPltSection() : OutputChunk(SYNTHETIC) {
2020-11-16 19:05:01 +03:00
name = ".got.plt";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_ALLOC | SHF_WRITE;
2020-11-13 06:43:59 +03:00
shdr.sh_addralign = GOT_SIZE;
2020-12-07 10:12:19 +03:00
shdr.sh_size = GOT_SIZE * 3;
2020-11-13 06:43:59 +03:00
}
2020-11-18 15:45:49 +03:00
void copy_buf() override;
2020-11-13 06:43:59 +03:00
};
2020-11-01 11:46:08 +03:00
class PltSection : public OutputChunk {
public:
2020-11-08 10:05:36 +03:00
PltSection() : OutputChunk(SYNTHETIC) {
2020-11-16 19:05:01 +03:00
name = ".plt";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR;
2020-11-01 11:46:08 +03:00
shdr.sh_addralign = 8;
2020-11-13 06:30:27 +03:00
shdr.sh_size = PLT_SIZE;
2020-11-01 11:46:08 +03:00
}
2020-11-18 15:45:49 +03:00
void add_symbol(Symbol *sym);
void copy_buf() override;
2020-11-17 15:02:14 +03:00
2020-11-18 15:45:49 +03:00
std::vector<Symbol *> symbols;
2020-11-01 11:46:08 +03:00
};
2020-11-01 10:22:47 +03:00
class RelPltSection : public OutputChunk {
2020-11-01 02:55:13 +03:00
public:
2020-11-08 10:05:36 +03:00
RelPltSection() : OutputChunk(SYNTHETIC) {
2020-11-16 19:05:01 +03:00
name = ".rela.plt";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_RELA;
shdr.sh_flags = SHF_ALLOC;
2020-12-10 15:11:10 +03:00
shdr.sh_entsize = sizeof(ElfRela);
2020-11-01 02:55:13 +03:00
shdr.sh_addralign = 8;
}
2020-11-17 07:34:02 +03:00
void update_shdr() override;
2020-11-18 15:45:49 +03:00
void copy_buf() override;
2020-11-01 02:55:13 +03:00
};
2020-11-17 07:30:33 +03:00
class RelDynSection : public OutputChunk {
public:
RelDynSection() : OutputChunk(SYNTHETIC) {
name = ".rela.dyn";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_RELA;
shdr.sh_flags = SHF_ALLOC;
2020-12-10 15:11:10 +03:00
shdr.sh_entsize = sizeof(ElfRela);
2020-11-17 07:30:33 +03:00
shdr.sh_addralign = 8;
}
void update_shdr() override;
2020-11-18 14:29:24 +03:00
void copy_buf() override;
2020-11-17 07:30:33 +03:00
};
2020-11-12 16:10:47 +03:00
class StrtabSection : public OutputChunk {
public:
2020-11-19 10:13:19 +03:00
StrtabSection() : OutputChunk(SYNTHETIC) {
name = ".strtab";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_STRTAB;
2020-11-12 16:10:47 +03:00
shdr.sh_addralign = 1;
shdr.sh_size = 1;
}
2020-11-29 12:58:36 +03:00
void update_shdr() override;
2020-11-12 16:10:47 +03:00
};
2020-11-17 06:20:56 +03:00
class ShstrtabSection : public OutputChunk {
public:
ShstrtabSection() : OutputChunk(SYNTHETIC) {
name = ".shstrtab";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_STRTAB;
2020-11-17 06:20:56 +03:00
shdr.sh_addralign = 1;
}
void update_shdr() override;
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-17 06:20:56 +03:00
};
2020-11-17 07:19:54 +03:00
class DynstrSection : public OutputChunk {
public:
DynstrSection() : OutputChunk(SYNTHETIC) {
name = ".dynstr";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_STRTAB;
shdr.sh_flags = SHF_ALLOC;
2020-11-17 07:19:54 +03:00
shdr.sh_size = 1;
shdr.sh_addralign = 1;
}
2020-12-10 07:44:58 +03:00
u32 add_string(std::string_view str);
u32 find_string(std::string_view str);
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-17 07:19:54 +03:00
private:
2020-12-10 07:44:58 +03:00
std::vector<std::string_view> contents;
2020-11-17 07:19:54 +03:00
};
2020-11-16 18:43:32 +03:00
class DynamicSection : public OutputChunk {
public:
DynamicSection() : OutputChunk(SYNTHETIC) {
2020-11-16 19:05:01 +03:00
name = ".dynamic";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_DYNAMIC;
shdr.sh_flags = SHF_ALLOC | SHF_WRITE;
2020-11-16 18:43:32 +03:00
shdr.sh_addralign = 8;
2020-12-10 15:15:47 +03:00
shdr.sh_entsize = sizeof(ElfDyn);
2020-11-16 18:43:32 +03:00
}
void update_shdr() override;
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-16 18:43:32 +03:00
};
2020-10-27 12:59:56 +03:00
class SymtabSection : public OutputChunk {
public:
2020-11-17 07:32:22 +03:00
SymtabSection() : OutputChunk(SYNTHETIC) {
name = ".symtab";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_SYMTAB;
2020-12-10 09:10:18 +03:00
shdr.sh_entsize = sizeof(ElfSym);
2020-10-27 12:59:56 +03:00
shdr.sh_addralign = 8;
2020-12-10 09:10:18 +03:00
shdr.sh_size = sizeof(ElfSym);
2020-10-27 12:59:56 +03:00
}
2020-11-17 07:32:22 +03:00
void update_shdr() override;
2020-11-17 08:28:53 +03:00
void copy_buf() override;
2020-10-27 12:59:56 +03:00
};
2020-11-16 17:40:01 +03:00
class DynsymSection : public OutputChunk {
public:
DynsymSection() : OutputChunk(SYNTHETIC) {
2020-11-16 19:05:01 +03:00
name = ".dynsym";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_DYNSYM;
shdr.sh_flags = SHF_ALLOC;
2020-12-10 09:10:18 +03:00
shdr.sh_entsize = sizeof(ElfSym);
2020-11-16 17:40:01 +03:00
shdr.sh_addralign = 8;
2020-12-10 09:10:18 +03:00
shdr.sh_size = sizeof(ElfSym);
2020-11-16 17:40:01 +03:00
shdr.sh_info = 1;
}
2020-11-17 14:22:52 +03:00
void add_symbol(Symbol *sym);
2020-11-16 19:30:24 +03:00
void update_shdr() override;
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-16 17:40:01 +03:00
2020-11-16 17:48:20 +03:00
std::vector<Symbol *> symbols;
2020-11-16 17:40:01 +03:00
};
2020-11-11 15:12:35 +03:00
class HashSection : public OutputChunk {
public:
HashSection() : OutputChunk(SYNTHETIC) {
2020-11-16 19:05:01 +03:00
name = ".hash";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_HASH;
shdr.sh_flags = SHF_ALLOC;
2020-11-11 15:12:35 +03:00
shdr.sh_entsize = 4;
shdr.sh_addralign = 4;
}
2020-11-16 18:10:38 +03:00
void update_shdr() override;
2020-11-17 07:56:40 +03:00
void copy_buf() override;
2020-11-12 11:15:59 +03:00
2020-11-12 14:38:46 +03:00
private:
2020-12-10 07:44:58 +03:00
static u32 hash(std::string_view name);
2020-11-11 15:12:35 +03:00
};
2020-11-07 15:53:21 +03:00
class MergedSection : public OutputChunk {
public:
2020-12-17 15:19:29 +03:00
static MergedSection *get_instance(std::string_view name, u32 type, u64 flags);
2020-11-07 14:29:06 +03:00
2020-11-07 15:53:21 +03:00
static inline std::vector<MergedSection *> instances;
2020-11-07 04:45:28 +03:00
ConcurrentMap<StringPiece> map;
2020-11-07 04:13:19 +03:00
2020-12-13 19:40:23 +03:00
void copy_buf() override;
2020-11-07 04:13:19 +03:00
private:
2020-12-10 07:44:58 +03:00
MergedSection(std::string_view name, u64 flags, u32 type)
2020-11-08 10:05:36 +03:00
: OutputChunk(SYNTHETIC) {
this->name = name;
shdr.sh_flags = flags;
shdr.sh_type = type;
shdr.sh_addralign = 1;
}
};
2020-11-25 11:20:48 +03:00
class CopyrelSection : public OutputChunk {
public:
CopyrelSection() : OutputChunk(SYNTHETIC) {
name = ".bss";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_NOBITS;
shdr.sh_flags = SHF_ALLOC | SHF_WRITE;
2020-11-25 11:20:48 +03:00
shdr.sh_addralign = 32;
}
void add_symbol(Symbol *sym);
std::vector<Symbol *> symbols;
};
2020-11-27 11:32:25 +03:00
class VersymSection : public OutputChunk {
public:
VersymSection() : OutputChunk(SYNTHETIC) {
name = ".gnu.version";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_GNU_VERSYM;
shdr.sh_flags = SHF_ALLOC;
2020-11-29 10:37:14 +03:00
shdr.sh_entsize = 2;
2020-11-27 11:32:25 +03:00
shdr.sh_addralign = 2;
}
void update_shdr() override;
void copy_buf() override;
2020-11-29 05:40:57 +03:00
std::vector<u16> contents;
2020-11-27 11:32:25 +03:00
};
class VerneedSection : public OutputChunk {
public:
VerneedSection() : OutputChunk(SYNTHETIC) {
name = ".gnu.version_r";
2020-12-10 15:38:37 +03:00
shdr.sh_type = SHT_GNU_VERNEED;
shdr.sh_flags = SHF_ALLOC;
2020-11-29 07:42:27 +03:00
shdr.sh_addralign = 8;
2020-11-27 11:32:25 +03:00
}
void update_shdr() override;
void copy_buf() override;
2020-11-29 05:40:57 +03:00
std::vector<u8> contents;
2020-11-27 11:32:25 +03:00
};
2020-12-10 07:44:58 +03:00
bool is_c_identifier(std::string_view name);
2020-12-10 15:13:02 +03:00
std::vector<ElfPhdr> create_phdr();
2020-11-04 04:43:05 +03:00
2020-10-12 07:30:34 +03:00
//
2020-11-03 04:54:10 +03:00
// object_file.cc
2020-10-12 07:30:34 +03:00
//
struct ComdatGroup {
ComdatGroup(ObjectFile *file, u32 i)
: file(file), section_idx(i) {}
ComdatGroup(const ComdatGroup &other)
2020-10-23 06:43:22 +03:00
: file(other.file.load()), section_idx(other.section_idx) {}
2020-10-23 06:43:22 +03:00
std::atomic<ObjectFile *> file;
u32 section_idx;
};
2020-12-12 07:04:38 +03:00
struct MemoryMappedFile {
MemoryMappedFile(std::string name, u8 *data, u64 size)
: name(name), data(data), size(size) {}
std::string name;
u8 *data;
u64 size;
};
2020-11-24 10:22:32 +03:00
class InputFile {
public:
2020-12-12 06:44:22 +03:00
InputFile(MemoryMappedFile mb);
2020-11-24 10:22:32 +03:00
2020-12-10 08:27:38 +03:00
MemoryMappedFile mb;
2020-12-12 06:41:09 +03:00
ElfEhdr &ehdr;
2020-12-12 06:36:16 +03:00
std::span<ElfShdr> elf_sections;
2020-11-24 10:22:32 +03:00
std::vector<Symbol *> symbols;
2020-12-12 06:41:09 +03:00
std::string name;
bool is_dso;
u32 priority;
2020-11-24 10:22:32 +03:00
std::atomic_bool is_alive = ATOMIC_VAR_INIT(false);
2020-12-12 06:38:30 +03:00
2020-12-12 06:46:24 +03:00
std::string_view get_string(const ElfShdr &shdr) const;
std::string_view get_string(u32 idx) const;
2020-12-12 06:38:30 +03:00
2020-12-12 06:46:24 +03:00
protected:
template<typename T> std::span<T> get_data(const ElfShdr &shdr) const;
template<typename T> std::span<T> get_data(u32 idx) const;
2020-12-13 14:40:01 +03:00
ElfShdr *find_section(u32 type);
2020-11-24 10:22:32 +03:00
};
class ObjectFile : public InputFile {
2020-10-04 12:00:33 +03:00
public:
2020-12-13 12:26:23 +03:00
ObjectFile(MemoryMappedFile mb, std::string archive_name);
2020-10-04 12:00:33 +03:00
void parse();
2020-11-07 12:06:09 +03:00
void initialize_mergeable_sections();
2020-11-05 06:20:16 +03:00
void resolve_symbols();
2020-11-30 10:43:47 +03:00
void mark_live_objects(tbb::parallel_do_feeder<ObjectFile *> &feeder);
2020-11-19 10:09:07 +03:00
void handle_undefined_weak_symbols();
2020-11-08 12:17:24 +03:00
void resolve_comdat_groups();
2020-10-19 15:50:33 +03:00
void eliminate_duplicate_comdat_groups();
2020-11-07 15:53:21 +03:00
void assign_mergeable_string_offsets();
2020-10-27 06:50:25 +03:00
void convert_common_symbols();
2020-10-27 14:58:28 +03:00
void compute_symtab();
2020-11-29 12:31:19 +03:00
void write_symtab();
2020-10-27 13:03:57 +03:00
2020-11-17 07:48:11 +03:00
static ObjectFile *create_internal_file();
2020-11-03 13:33:46 +03:00
2020-12-13 12:26:23 +03:00
std::string archive_name;
2020-11-08 06:10:08 +03:00
std::vector<InputSection *> sections;
2020-12-10 12:23:13 +03:00
std::span<ElfSym> elf_syms;
2020-11-07 05:41:14 +03:00
int first_global = 0;
2020-11-05 06:34:59 +03:00
const bool is_in_archive;
2020-11-26 12:09:32 +03:00
std::atomic_bool has_error = ATOMIC_VAR_INIT(false);
2020-10-09 17:26:26 +03:00
2020-12-18 04:56:56 +03:00
u64 num_dynrel = 0;
2020-12-16 14:56:04 +03:00
u64 reldyn_offset = 0;
2020-11-29 12:45:33 +03:00
u64 local_symtab_offset = 0;
u64 local_symtab_size = 0;
2020-11-29 12:45:33 +03:00
u64 global_symtab_offset = 0;
u64 global_symtab_size = 0;
2020-11-29 12:45:33 +03:00
u64 strtab_offset = 0;
u64 strtab_size = 0;
2020-10-27 14:58:28 +03:00
2020-12-13 19:12:55 +03:00
std::vector<MergeableSection *> mergeable_sections;
2020-11-07 15:53:21 +03:00
2020-10-09 14:47:45 +03:00
private:
2020-10-19 14:05:34 +03:00
void initialize_sections();
void initialize_symbols();
2020-11-08 08:13:59 +03:00
std::vector<StringPieceRef> read_string_pieces(InputSection *isec);
2020-11-24 10:22:32 +03:00
void maybe_override_symbol(Symbol &sym, int symidx);
2020-10-19 14:05:34 +03:00
2020-12-10 14:11:45 +03:00
std::vector<std::pair<ComdatGroup *, std::span<u32>>> comdat_groups;
2020-11-03 08:53:32 +03:00
std::vector<Symbol> local_symbols;
2020-11-07 06:14:37 +03:00
std::vector<StringPieceRef> sym_pieces;
2020-10-27 02:45:20 +03:00
bool has_common_symbol;
2020-10-26 08:52:55 +03:00
2020-12-10 07:44:58 +03:00
std::string_view symbol_strtab;
2020-12-10 09:59:24 +03:00
const ElfShdr *symtab_sec;
2020-10-04 12:00:33 +03:00
};
2020-11-24 10:22:32 +03:00
class SharedFile : public InputFile {
public:
2020-12-12 06:42:43 +03:00
SharedFile(MemoryMappedFile mb, bool as_needed) : InputFile(mb) {
2020-11-30 10:43:47 +03:00
is_alive = !as_needed;
}
2020-11-24 10:22:32 +03:00
void parse();
2020-11-24 10:22:32 +03:00
void resolve_symbols();
2020-12-10 14:12:43 +03:00
std::span<Symbol *> find_aliases(Symbol *sym);
2020-11-24 10:22:32 +03:00
2020-12-10 07:44:58 +03:00
std::string_view soname;
2020-11-27 12:10:09 +03:00
2020-12-10 07:44:58 +03:00
std::vector<std::string_view> version_strings;
2020-11-24 10:22:32 +03:00
private:
2020-12-13 14:40:01 +03:00
std::string_view get_soname();
2020-12-10 09:10:18 +03:00
void maybe_override_symbol(Symbol &sym, const ElfSym &esym);
2020-12-10 07:44:58 +03:00
std::vector<std::string_view> read_verdef();
2020-11-24 10:22:32 +03:00
2020-12-10 09:10:18 +03:00
std::vector<const ElfSym *> elf_syms;
2020-11-28 16:08:46 +03:00
std::vector<u16> versyms;
2020-12-10 07:44:58 +03:00
std::string_view symbol_strtab;
2020-12-10 09:59:24 +03:00
const ElfShdr *symtab_sec;
2020-11-24 10:22:32 +03:00
};
2020-12-13 15:06:25 +03:00
//
// archive_file.cc
//
std::vector<MemoryMappedFile> read_archive_members(MemoryMappedFile mb);
2020-12-12 06:48:00 +03:00
2020-11-29 04:31:04 +03:00
//
// linker_script.cc
//
void parse_linker_script(MemoryMappedFile mb);
2020-12-10 07:44:58 +03:00
void parse_version_script(std::string path);
2020-11-29 04:31:04 +03:00
//
// perf.cc
//
class Counter {
public:
2020-12-10 07:44:58 +03:00
Counter(std::string_view name, u32 value = 0) : name(name), value(value) {
2020-11-29 04:31:04 +03:00
static std::mutex mu;
std::lock_guard lock(mu);
instances.push_back(this);
}
void inc(u32 delta = 1) {
if (enabled)
value += delta;
}
void set(u32 value) {
this->value = value;
}
static void print();
static bool enabled;
private:
2020-12-10 07:44:58 +03:00
std::string_view name;
2020-11-29 04:31:04 +03:00
std::atomic_uint32_t value;
static std::vector<Counter *> instances;
};
2020-12-11 10:51:20 +03:00
class Timer {
public:
Timer(std::string name);
void stop();
static void print();
private:
static std::vector<Timer *> instances;
std::string name;
u64 start;
u64 end;
u64 user;
u64 sys;
bool stopped = false;
};
class ScopedTimer {
public:
ScopedTimer(std::string name) {
timer = new Timer(name);
}
~ScopedTimer() {
timer->stop();
}
private:
Timer *timer;
};
2020-11-29 04:31:04 +03:00
//
// mapfile.cc
//
void print_map();
//
// main.cc
//
2020-12-10 08:27:38 +03:00
MemoryMappedFile find_library(std::string path);
MemoryMappedFile *open_input_file(std::string path);
MemoryMappedFile must_open_input_file(std::string path);
void read_file(MemoryMappedFile mb);
2020-11-29 04:31:04 +03:00
//
// Inline objects and functions
//
namespace out {
inline std::vector<ObjectFile *> objs;
inline std::vector<SharedFile *> dsos;
inline std::vector<OutputChunk *> chunks;
inline u8 *buf;
inline OutputEhdr *ehdr;
inline OutputShdr *shdr;
inline OutputPhdr *phdr;
inline InterpSection *interp;
inline GotSection *got;
inline GotPltSection *gotplt;
inline RelPltSection *relplt;
inline RelDynSection *reldyn;
inline DynamicSection *dynamic;
inline StrtabSection *strtab;
inline DynstrSection *dynstr;
inline HashSection *hash;
inline ShstrtabSection *shstrtab;
inline PltSection *plt;
inline SymtabSection *symtab;
inline DynsymSection *dynsym;
inline CopyrelSection *copyrel;
2020-11-29 05:57:58 +03:00
inline VersymSection *versym;
inline VerneedSection *verneed;
2020-11-29 04:31:04 +03:00
inline u64 tls_end;
inline Symbol *__bss_start;
inline Symbol *__ehdr_start;
inline Symbol *__rela_iplt_start;
inline Symbol *__rela_iplt_end;
inline Symbol *__init_array_start;
inline Symbol *__init_array_end;
inline Symbol *__fini_array_start;
inline Symbol *__fini_array_end;
inline Symbol *__preinit_array_start;
inline Symbol *__preinit_array_end;
inline Symbol *_DYNAMIC;
inline Symbol *_GLOBAL_OFFSET_TABLE_;
inline Symbol *_end;
inline Symbol *_etext;
inline Symbol *_edata;
}
2020-12-10 08:32:42 +03:00
inline void message(std::string msg) {
2020-11-29 04:31:04 +03:00
static std::mutex mu;
std::lock_guard lock(mu);
2020-12-10 16:32:47 +03:00
std::cout << msg << "\n";
2020-11-29 04:31:04 +03:00
}
2020-12-11 03:15:08 +03:00
inline std::string to_string(Symbol sym) {
return std::string(sym.name) + "(" + to_string(sym.file) + ")";
2020-11-29 04:31:04 +03:00
}
inline u64 align_to(u64 val, u64 align) {
assert(__builtin_popcount(align) == 1);
return (val + align - 1) & ~(align - 1);
}
2020-11-15 08:07:40 +03:00
inline u64 Symbol::get_addr() const {
if (piece_ref.piece)
return piece_ref.piece->get_addr() + piece_ref.addend;
2020-12-01 15:43:30 +03:00
2020-11-25 11:20:48 +03:00
if (copyrel_offset != -1)
return out::copyrel->shdr.sh_addr + copyrel_offset;
2020-12-01 15:43:30 +03:00
2020-12-01 11:16:28 +03:00
if (input_section) {
2020-12-01 15:43:30 +03:00
if (!input_section->is_alive) {
// The control can reach here if there's a relocation that refers
// a local symbol belonging to a comdat group section. This is a
// violation of the spec, as all relocations should use only global
// symbols of comdat members. However, .eh_frame tends to have such
// relocations.
return 0;
}
2020-11-25 09:13:06 +03:00
return input_section->get_addr() + value;
2020-12-01 11:16:28 +03:00
}
2020-12-01 15:43:30 +03:00
2020-11-26 07:34:42 +03:00
if (file && file->is_dso && copyrel_offset == -1)
return get_plt_addr();
2020-12-01 15:43:30 +03:00
2020-11-15 08:07:40 +03:00
return value;
}
inline u64 Symbol::get_got_addr() const {
assert(got_idx != -1);
2020-11-17 14:47:22 +03:00
return out::got->shdr.sh_addr + got_idx * GOT_SIZE;
2020-11-15 08:07:40 +03:00
}
inline u64 Symbol::get_gotplt_addr() const {
assert(gotplt_idx != -1);
2020-11-17 14:47:22 +03:00
return out::gotplt->shdr.sh_addr + gotplt_idx * GOT_SIZE;
2020-11-15 08:07:40 +03:00
}
2020-11-21 04:48:51 +03:00
inline u64 Symbol::get_gottpoff_addr() const {
assert(gottpoff_idx != -1);
return out::got->shdr.sh_addr + gottpoff_idx * GOT_SIZE;
2020-11-15 08:07:40 +03:00
}
2020-11-21 04:48:23 +03:00
inline u64 Symbol::get_tlsgd_addr() const {
assert(tlsgd_idx != -1);
return out::got->shdr.sh_addr + tlsgd_idx * GOT_SIZE;
2020-11-15 08:07:40 +03:00
}
2020-11-21 04:48:23 +03:00
inline u64 Symbol::get_tlsld_addr() const {
assert(tlsld_idx != -1);
return out::got->shdr.sh_addr + tlsld_idx * GOT_SIZE;
2020-11-15 08:07:40 +03:00
}
inline u64 Symbol::get_plt_addr() const {
assert(plt_idx != -1);
2020-11-17 14:47:22 +03:00
return out::plt->shdr.sh_addr + plt_idx * PLT_SIZE;
2020-11-15 08:07:40 +03:00
}
inline u64 StringPiece::get_addr() const {
MergeableSection *is = isec.load();
return is->parent.shdr.sh_addr + is->offset + output_offset;
}
2020-11-25 09:13:06 +03:00
inline u64 InputChunk::get_addr() const {
return output_section->shdr.sh_addr + offset;
}
2020-12-10 07:44:58 +03:00
inline u32 elf_hash(std::string_view name) {
2020-11-29 06:59:08 +03:00
u32 h = 0;
2020-11-29 15:05:39 +03:00
for (u8 c : name) {
2020-11-29 06:59:08 +03:00
h = (h << 4) + c;
u32 g = h & 0xf0000000;
if (g != 0)
h ^= g >> 24;
h &= ~g;
}
return h;
}
2020-12-10 07:44:58 +03:00
inline void write_string(u8 *buf, std::string_view str) {
2020-11-15 08:07:40 +03:00
memcpy(buf, str.data(), str.size());
buf[str.size()] = '\0';
}
2020-11-16 18:43:32 +03:00
template <typename T>
inline void write_vector(u8 *buf, const std::vector<T> &vec) {
memcpy(buf, vec.data(), vec.size() * sizeof(T));
}
2020-12-20 09:51:37 +03:00
template <typename T, typename U>
inline void append(std::vector<T> &vec1, std::vector<U> &vec2) {
vec1.insert(vec1.end(), vec2.begin(), vec2.end());
}
2020-11-24 08:31:05 +03:00
template <typename T>
2020-11-17 13:56:02 +03:00
inline std::vector<T> flatten(std::vector<std::vector<T>> &vec) {
std::vector<T> ret;
for (std::vector<T> &v : vec)
2020-12-20 09:51:37 +03:00
append(ret, v);
2020-11-17 13:56:02 +03:00
return ret;
}
2020-12-13 16:31:50 +03:00
template <typename T, typename U>
inline void erase(std::vector<T> &vec, U pred) {
vec.erase(std::remove_if(vec.begin(), vec.end(), pred), vec.end());
}