#pragma once #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Timer.h" #include "tbb/blocked_range.h" #include "tbb/concurrent_hash_map.h" #include "tbb/concurrent_unordered_set.h" #include "tbb/concurrent_vector.h" #include "tbb/global_control.h" #include "tbb/parallel_for_each.h" #include "tbb/parallel_reduce.h" #include "tbb/parallel_sort.h" #include "tbb/partitioner.h" #include "tbb/spin_mutex.h" #include "tbb/task_arena.h" #include "tbb/task_group.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SECTOR_SIZE 512 #define PAGE_SIZE 4096 typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef int8_t i8; typedef int16_t i16; typedef int32_t i32; typedef int64_t i64; using llvm::ArrayRef; using llvm::ErrorOr; using llvm::Error; using llvm::Expected; using llvm::MemoryBufferRef; using llvm::SmallVector; using llvm::StringRef; using llvm::Twine; using llvm::object::ELF64LE; using llvm::object::ELFFile; class Symbol; class ObjectFile; class InputSection; struct Config { StringRef output; bool print_map = false; }; extern Config config; [[noreturn]] inline void error(const Twine &msg) { static std::mutex mu; std::lock_guard lock(mu); llvm::errs() << msg << "\n"; exit(1); } template T check(ErrorOr e) { if (auto ec = e.getError()) error(ec.message()); return std::move(*e); } template T check(Expected e) { if (!e) error(llvm::toString(e.takeError())); return std::move(*e); } template T check2(ErrorOr e, llvm::function_ref prefix) { if (auto ec = e.getError()) error(prefix() + ": " + ec.message()); return std::move(*e); } template T check2(Expected e, llvm::function_ref prefix) { if (!e) error(prefix() + ": " + toString(e.takeError())); return std::move(*e); } inline std::string toString(const Twine &s) { return s.str(); } #define CHECK(E, S) check2((E), [&] { return toString(S); }) class Symbol; class InputSection; class OutputSection; class ObjectFile; std::string toString(ObjectFile *); template static void for_each(T &arr, Callable callback) { #if 1 tbb::parallel_for_each(arr.begin(), arr.end(), callback); #else std::for_each(arr.begin(), arr.end(), callback); #endif } // // Interned string // namespace tbb { template<> struct tbb_hash_compare { static size_t hash(const StringRef& k) { return llvm::hash_value(k); } static bool equal(const StringRef& k1, const StringRef& k2) { return k1 == k2; } }; } template class ConcurrentMap { public: typedef tbb::concurrent_hash_map MapT; ValueT *insert(StringRef key, const ValueT &val) { typename MapT::const_accessor acc; map.insert(acc, std::make_pair(key, val)); return const_cast(&acc->second); } private: MapT map; }; // // Symbol // class Symbol { public: Symbol(StringRef name) : name(name) {} Symbol(const Symbol &other) : name(other.name), file(other.file) {} static Symbol *intern(StringRef name) { static ConcurrentMap map; return map.insert(name, Symbol(name)); } StringRef name; ObjectFile *file = nullptr; InputSection *input_section = nullptr; u64 addr = 0; uint32_t got_offset = 0; uint32_t gotplt_offset = 0; uint32_t gottp_offset = 0; uint32_t plt_offset = 0; tbb::spin_mutex mu; u8 visibility = 0; u8 type = llvm::ELF::STT_NOTYPE; bool is_weak = false; bool is_undef_weak = false; }; inline std::string toString(Symbol sym) { return (StringRef(sym.name) + "(" + toString(sym.file) + ")").str(); } // // input_chunks.cc // class InputSection { public: InputSection(ObjectFile *file, const ELF64LE::Shdr &shdr, StringRef name); void copy_to(u8 *buf); void relocate(u8 *buf); void scan_relocations(i32 &num_got, i32 &num_gotplt, i32 &num_plt, i32 &num_relplt); ObjectFile *file; OutputSection *output_section; ArrayRef rels; const ELF64LE::Shdr &shdr; StringRef name; u64 offset; }; std::string toString(InputSection *isec); inline u64 align_to(u64 val, u64 align) { assert(__builtin_popcount(align) == 1); return (val + align - 1) & ~(align - 1); } // // output_chunks.cc // class OutputChunk { public: OutputChunk() { shdr.sh_addralign = 1; } virtual void copy_to(u8 *buf) = 0; virtual void relocate(u8 *buf) {} bool is_bss() const { return shdr.sh_type == llvm::ELF::SHT_NOBITS; } virtual u64 get_size() const = 0; StringRef name; int shndx = 0; bool starts_new_ptload = false; ELF64LE::Shdr shdr = {}; std::vector sections; }; // ELF header class OutputEhdr : public OutputChunk { public: OutputEhdr() { shdr.sh_flags = llvm::ELF::SHF_ALLOC; } void copy_to(u8 *buf) override {} void relocate(u8 *buf) override; u64 get_size() const override { return sizeof(ELF64LE::Ehdr); } }; // Section header class OutputShdr : public OutputChunk { public: OutputShdr() { shdr.sh_flags = llvm::ELF::SHF_ALLOC; } void copy_to(u8 *buf) override { auto *p = (ELF64LE::Shdr *)(buf + shdr.sh_offset); for (ELF64LE::Shdr *ent : entries) *p++ = *ent; } u64 get_size() const override { return entries.size() * sizeof(ELF64LE::Shdr); } std::vector entries; }; // Program header class OutputPhdr : public OutputChunk { public: OutputPhdr() { shdr.sh_flags = llvm::ELF::SHF_ALLOC; } void copy_to(u8 *buf) override; u64 get_size() const override { return entries.size() * sizeof(ELF64LE::Phdr); } void construct(std::vector §ions); private: struct Phdr { ELF64LE::Phdr phdr; std::vector members; }; std::vector entries; }; // Sections class OutputSection : public OutputChunk { public: static OutputSection *get_instance(StringRef name, u64 flags, u32 type); OutputSection(StringRef name, u64 flags, u32 type) { this->name = name; shdr.sh_flags = flags; shdr.sh_type = type; idx = instances.size(); instances.push_back(this); } void copy_to(u8 *buf) override { if (!is_bss()) for_each(sections, [&](InputSection *isec) { isec->copy_to(buf); }); } void relocate(u8 *buf) override { if (!is_bss()) for_each(sections, [&](InputSection *isec) { isec->relocate(buf); }); } u64 get_size() const override { return shdr.sh_size; } bool empty() const { if (!sections.empty()) for (InputSection *isec : sections) if (isec->shdr.sh_size) return false; return true; } static std::vector instances; u32 idx; }; class InterpSection : public OutputChunk { public: InterpSection() { name = ".interp"; shdr.sh_flags = llvm::ELF::SHF_ALLOC; shdr.sh_type = llvm::ELF::SHT_PROGBITS; } void copy_to(u8 *buf) override { memcpy(buf + shdr.sh_offset, path, sizeof(path)); } u64 get_size() const override { return sizeof(path); } private: static constexpr char path[] = "/lib64/ld-linux-x86-64.so.2"; }; class GotSection : public OutputChunk { public: typedef enum : u8 { REGULAR, TPOFF, IREL } Kind; GotSection(StringRef name) { this->name = name; shdr.sh_flags = llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_WRITE; shdr.sh_type = llvm::ELF::SHT_PROGBITS; shdr.sh_addralign = 8; } void copy_to(u8 *buf) override {} void relocate(u8 *buf) override; u64 get_size() const override { return size; } u64 size = 0; std::vector> symbols; }; class PltSection : public OutputChunk { public: PltSection() { this->name = ".plt"; shdr.sh_flags = llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR; shdr.sh_type = llvm::ELF::SHT_PROGBITS; shdr.sh_addralign = 8; } void copy_to(u8 *buf) override {} void relocate(u8 *buf) override; u64 get_size() const override { return size; } u64 size = 0; std::vector symbols; }; class RelPltSection : public OutputChunk { public: RelPltSection() { this->name = ".rela.plt"; shdr.sh_flags = llvm::ELF::SHF_ALLOC; shdr.sh_type = llvm::ELF::SHT_RELA; shdr.sh_entsize = sizeof(ELF64LE::Rela); shdr.sh_addralign = 8; } void copy_to(u8 *buf) override {} void relocate(u8 *buf) override; u64 get_size() const override { return size; } u64 size = 0; }; class ShstrtabSection : public OutputChunk { public: ShstrtabSection() { this->name = ".shstrtab"; contents = '\0'; shdr.sh_flags = 0; shdr.sh_type = llvm::ELF::SHT_STRTAB; } u64 add_string(StringRef s) { u64 ret = contents.size(); contents += s.str(); contents += '\0'; return ret; } void copy_to(u8 *buf) override { memcpy(buf + shdr.sh_offset, &contents[0], contents.size()); } u64 get_size() const override { return contents.size(); } private: std::string contents; }; class SymtabSection : public OutputChunk { public: SymtabSection() { this->name = ".symtab"; shdr.sh_flags = 0; shdr.sh_type = llvm::ELF::SHT_SYMTAB; shdr.sh_entsize = sizeof(ELF64LE::Sym); shdr.sh_addralign = 8; } void copy_to(u8 *buf) override {} u64 get_size() const override { return size; } u64 size = 0; private: std::vector contents; }; class StrtabSection : public OutputChunk { public: StrtabSection() { this->name = ".strtab"; shdr.sh_flags = 0; shdr.sh_type = llvm::ELF::SHT_STRTAB; } void copy_to(u8 *buf) override {} u64 get_size() const override { return size; } u64 size = 1; }; namespace out { extern OutputEhdr *ehdr; extern OutputShdr *shdr; extern OutputPhdr *phdr; extern InterpSection *interp; extern GotSection *got; extern GotSection *gotplt; extern PltSection *plt; extern RelPltSection *relplt; extern ShstrtabSection *shstrtab; extern SymtabSection *symtab; extern StrtabSection *strtab; extern u64 tls_end; extern Symbol *__bss_start; extern Symbol *__ehdr_start; extern Symbol *__rela_iplt_start; extern Symbol *__rela_iplt_end; extern Symbol *end; extern Symbol *_end; extern Symbol *etext; extern Symbol *_etext; extern Symbol *edata; extern Symbol *_edata; } // // input_files.cc // struct ComdatGroup { ComdatGroup(ObjectFile *file, u32 i) : file(file), section_idx(i) {} ComdatGroup(const ComdatGroup &other) : file(other.file.load()), section_idx(other.section_idx) {} tbb::spin_mutex mu; std::atomic file; u32 section_idx; }; struct StringPiece { StringPiece(StringRef data) : data(data) {} StringPiece(const StringPiece &other) : data(other.data) {} StringRef data; std::atomic_flag flag = ATOMIC_FLAG_INIT; }; class ObjectFile { public: ObjectFile(MemoryBufferRef mb, StringRef archive_name); void parse(); void register_defined_symbols(); void register_undefined_symbols(tbb::parallel_do_feeder &feeder); void hanlde_undefined_weak_symbols(); void eliminate_duplicate_comdat_groups(); void convert_common_symbols(); void fix_sym_addrs(); void compute_symtab(); void write_local_symtab(u8 *buf, u64 symtab_off, u64 strtab_off); void write_global_symtab(u8 *buf, u64 symtab_off, u64 strtab_off); bool is_in_archive(); Symbol *get_symbol(u32 idx) const { if (idx < first_global) return nullptr; return symbols[idx - first_global]; } u64 get_symbol_addr(u32 idx) const { if (idx < first_global) { const ELF64LE::Sym &sym = elf_syms[idx]; if (sym.st_shndx == llvm::ELF::SHN_ABS) return sym.st_value; InputSection *isec = sections[sym.st_shndx]; if (isec) return isec->output_section->shdr.sh_addr + isec->offset + sym.st_value; return 0; } return symbols[idx - first_global]->addr; } std::vector sections; StringRef archive_name; ELFFile obj; std::vector symbols; ArrayRef elf_syms; u32 priority; std::atomic_bool is_alive = ATOMIC_VAR_INIT(false); std::string name; u64 local_symtab_size = 0; u64 local_strtab_size = 0; u64 global_symtab_size = 0; u64 global_strtab_size = 0; // For .strtab construction std::vector local_symbols; private: void initialize_sections(); void initialize_symbols(); void remove_comdat_members(u32 section_idx); void read_string_pieces(const ELF64LE::Shdr &shdr); MemoryBufferRef mb; std::vector> comdat_groups; std::vector merged_strings_alloc; std::vector merged_strings_noalloc; int first_global = 0; bool has_common_symbol; ArrayRef elf_sections; StringRef symbol_strtab; const ELF64LE::Shdr *symtab_sec; }; // // writer.cc // void write(); // // output_file.cc // class OutputFile { public: OutputFile(u64 size); void commit(); private: std::unique_ptr output_buffer; u8 *buf; }; // // mapfile.cc // void print_map(ArrayRef files, ArrayRef output_sections); // // main.cc // MemoryBufferRef readFile(StringRef path); extern std::atomic_int num_defined; extern std::atomic_int num_undefined; extern std::atomic_int num_all_syms; extern std::atomic_int num_comdats; extern std::atomic_int num_regular_sections; extern std::atomic_int num_files; extern std::atomic_int num_relocs; extern std::atomic_int num_relocs_alloc; extern std::atomic_int num_string_pieces; // // Other // inline void memcpy_nontemporal(void *dst_, const void *src_, size_t n) { #if 1 char *src = (char *)src_; char *dst = (char *)dst_; if ((uintptr_t)src % 16 || (uintptr_t)dst % 16) { memcpy(dst, src, n); return; } size_t i = 0; for (; i + 16 < n; i += 16) { __m128 val = __builtin_nontemporal_load((__m128 *)(src + i)); __builtin_nontemporal_store(val, (__m128 *)(dst + i)); } memcpy(dst + i, src + i, n - i); #else memcpy(dst_, src_, n); #endif }