1
1
mirror of https://github.com/rui314/mold.git synced 2024-09-22 10:27:48 +03:00
mold/main.cc

871 lines
25 KiB
C++
Raw Normal View History

2020-10-20 08:54:35 +03:00
#include "mold.h"
2020-10-02 07:28:26 +03:00
2020-11-03 14:49:30 +03:00
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/FileOutputBuffer.h"
2020-11-19 12:03:26 +03:00
#include "llvm/Support/FileSystem.h"
2020-11-03 14:49:30 +03:00
2020-11-09 05:58:48 +03:00
#include <fcntl.h>
2020-09-29 09:05:29 +03:00
#include <iostream>
2020-11-09 06:30:13 +03:00
#include <libgen.h>
2020-11-09 05:58:48 +03:00
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
2020-09-29 09:05:29 +03:00
2020-10-24 12:58:21 +03:00
using namespace llvm;
2020-10-21 05:28:43 +03:00
using namespace llvm::ELF;
2020-11-19 12:03:26 +03:00
using namespace llvm::sys;
2020-10-21 05:28:43 +03:00
2020-10-10 06:47:12 +03:00
using llvm::object::Archive;
2020-10-02 10:47:51 +03:00
using llvm::opt::InputArgList;
2020-10-02 07:28:26 +03:00
2020-11-08 02:39:13 +03:00
class MyTimer {
public:
MyTimer(StringRef name) {
timer = new Timer(name, name);
timer->startTimer();
}
MyTimer(StringRef name, llvm::TimerGroup &tg) {
timer = new Timer(name, name, tg);
timer->startTimer();
}
~MyTimer() { timer->stopTimer(); }
private:
llvm::Timer *timer;
};
2020-11-11 04:42:26 +03:00
llvm::TimerGroup parse_timer("parse", "parse");
llvm::TimerGroup before_copy_timer("before_copy", "before_copy");
llvm::TimerGroup copy_timer("copy", "copy");
2020-10-04 12:00:33 +03:00
//
// Command-line option processing
//
2020-10-02 07:28:26 +03:00
enum {
OPT_INVALID = 0,
#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
#include "options.inc"
#undef OPTION
};
2020-10-02 10:47:51 +03:00
// Create prefix string literals used in Options.td
#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
#include "options.inc"
#undef PREFIX
// Create table mapping all options defined in Options.td
static const llvm::opt::OptTable::Info opt_info[] = {
#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
{X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \
X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
#include "options.inc"
#undef OPTION
};
class MyOptTable : llvm::opt::OptTable {
public:
MyOptTable() : OptTable(opt_info) {}
InputArgList parse(int argc, char **argv);
};
InputArgList MyOptTable::parse(int argc, char **argv) {
2020-11-01 02:55:13 +03:00
unsigned missing_index = 0;
unsigned missing_count = 0;
2020-10-02 10:47:51 +03:00
SmallVector<const char *, 256> vec(argv, argv + argc);
2020-11-01 02:55:13 +03:00
InputArgList args = this->ParseArgs(vec, missing_index, missing_count);
if (missing_count)
error(Twine(args.getArgString(missing_index)) + ": missing argument");
2020-10-02 10:47:51 +03:00
for (auto *arg : args.filtered(OPT_UNKNOWN))
error("unknown argument '" + arg->getAsString(args) + "'");
return args;
}
2020-10-04 12:00:33 +03:00
//
// Main
//
2020-10-14 13:36:06 +03:00
static std::vector<MemoryBufferRef> get_archive_members(MemoryBufferRef mb) {
2020-10-10 06:47:12 +03:00
std::unique_ptr<Archive> file =
CHECK(Archive::create(mb), mb.getBufferIdentifier() + ": failed to parse archive");
std::vector<MemoryBufferRef> vec;
Error err = Error::success();
for (const Archive::Child &c : file->children(err)) {
MemoryBufferRef mbref =
CHECK(c.getMemoryBufferRef(),
mb.getBufferIdentifier() +
": could not get the buffer for a child of the archive");
vec.push_back(mbref);
}
if (err)
error(mb.getBufferIdentifier() + ": Archive::children failed: " +
toString(std::move(err)));
2020-10-10 12:48:38 +03:00
file.release(); // leak
2020-10-10 06:47:12 +03:00
return vec;
}
2020-11-19 03:24:10 +03:00
void read_file(StringRef path) {
2020-11-20 01:32:19 +03:00
path = *new std::string(path);
2020-11-09 05:58:48 +03:00
int fd = open(path.str().c_str(), O_RDONLY);
if (fd == -1)
error("cannot open " + path);
2020-10-10 06:47:12 +03:00
2020-11-09 05:58:48 +03:00
struct stat st;
if (fstat(fd, &st) == -1)
error(path + ": stat failed");
2020-11-03 14:42:50 +03:00
2020-11-09 05:58:48 +03:00
void *addr = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (addr == MAP_FAILED)
error(path + ": mmap failed: " + strerror(errno));
close(fd);
auto &mb = *new MemoryBufferRef(StringRef((char *)addr, st.st_size), path);
switch (identify_magic(mb.getBuffer())) {
2020-10-10 06:47:12 +03:00
case file_magic::archive:
2020-11-09 05:58:48 +03:00
for (MemoryBufferRef member : get_archive_members(mb))
2020-11-17 08:03:32 +03:00
out::files.push_back(new ObjectFile(member, path));
2020-10-10 06:47:12 +03:00
break;
case file_magic::elf_relocatable:
2020-11-04 09:39:31 +03:00
case file_magic::elf_shared_object:
2020-11-17 08:03:32 +03:00
out::files.push_back(new ObjectFile(mb, ""));
2020-10-10 06:47:12 +03:00
break;
2020-11-19 03:24:10 +03:00
case file_magic::unknown:
2020-11-19 13:31:13 +03:00
parse_linker_script(mb.getBufferIdentifier(), mb.getBuffer());
2020-11-19 03:24:10 +03:00
break;
2020-10-10 06:47:12 +03:00
default:
error(path + ": unknown file type");
}
}
2020-10-28 07:42:05 +03:00
template <typename T>
static std::vector<ArrayRef<T>> split(const std::vector<T> &input, int unit) {
ArrayRef<T> arr(input);
std::vector<ArrayRef<T>> vec;
while (arr.size() >= unit) {
vec.push_back(arr.slice(0, unit));
arr = arr.slice(unit);
}
if (!arr.empty())
vec.push_back(arr);
return vec;
}
2020-11-17 08:05:40 +03:00
static void resolve_symbols() {
2020-11-11 04:42:26 +03:00
MyTimer t("resolve_symbols", before_copy_timer);
2020-11-11 04:51:30 +03:00
// Register defined symbols
2020-11-17 08:05:40 +03:00
tbb::parallel_for_each(out::files,
[](ObjectFile *file) { file->resolve_symbols(); });
2020-11-11 04:42:26 +03:00
2020-11-11 04:51:30 +03:00
// Mark archive members we include into the final output.
2020-11-11 04:42:26 +03:00
std::vector<ObjectFile *> root;
2020-11-17 08:05:40 +03:00
for (ObjectFile *file : out::files)
2020-11-15 15:42:36 +03:00
if (file->is_alive && !file->is_dso)
2020-11-11 04:42:26 +03:00
root.push_back(file);
tbb::parallel_do(
root,
[&](ObjectFile *file, tbb::parallel_do_feeder<ObjectFile *> &feeder) {
file->mark_live_archive_members(feeder);
});
// Eliminate unused archive members.
2020-11-17 08:05:40 +03:00
out::files.erase(std::remove_if(out::files.begin(), out::files.end(),
[](ObjectFile *file){ return !file->is_alive; }),
out::files.end());
2020-11-11 04:42:26 +03:00
// Convert weak symbols to absolute symbols with value 0.
2020-11-17 08:05:40 +03:00
tbb::parallel_for_each(out::files, [](ObjectFile *file) {
2020-11-19 10:09:07 +03:00
file->handle_undefined_weak_symbols();
2020-11-11 04:51:30 +03:00
});
2020-11-11 04:42:26 +03:00
}
2020-11-17 08:04:53 +03:00
static void eliminate_comdats() {
2020-11-11 04:42:26 +03:00
MyTimer t("comdat", before_copy_timer);
2020-11-17 08:04:53 +03:00
tbb::parallel_for_each(out::files, [](ObjectFile *file) {
2020-11-08 12:17:24 +03:00
file->resolve_comdat_groups();
});
2020-11-17 08:04:53 +03:00
tbb::parallel_for_each(out::files, [](ObjectFile *file) {
2020-11-08 12:17:24 +03:00
file->eliminate_duplicate_comdat_groups();
});
}
2020-11-17 08:04:53 +03:00
static void handle_mergeable_strings() {
2020-11-11 04:42:26 +03:00
MyTimer t("resolve_strings", before_copy_timer);
2020-11-07 15:53:21 +03:00
// Resolve mergeable string pieces
2020-11-17 08:04:53 +03:00
tbb::parallel_for_each(out::files, [](ObjectFile *file) {
2020-11-08 08:13:59 +03:00
for (MergeableSection &isec : file->mergeable_sections) {
for (StringPieceRef &ref : isec.pieces) {
MergeableSection *cur = ref.piece->isec;
while (!cur || cur->file->priority > isec.file->priority)
if (ref.piece->isec.compare_exchange_strong(cur, &isec))
2020-11-07 15:53:21 +03:00
break;
}
}
});
// Calculate the total bytes of mergeable strings for each input section.
2020-11-17 08:04:53 +03:00
tbb::parallel_for_each(out::files, [](ObjectFile *file) {
2020-11-08 08:13:59 +03:00
for (MergeableSection &isec : file->mergeable_sections) {
2020-11-07 15:53:21 +03:00
u32 offset = 0;
2020-11-08 08:13:59 +03:00
for (StringPieceRef &ref : isec.pieces) {
2020-11-10 11:18:10 +03:00
StringPiece &piece = *ref.piece;
if (piece.isec == &isec && piece.output_offset == -1) {
2020-11-07 15:53:21 +03:00
ref.piece->output_offset = offset;
2020-11-10 09:03:40 +03:00
offset += ref.piece->data.size();
2020-11-07 15:53:21 +03:00
}
}
2020-11-08 08:13:59 +03:00
isec.size = offset;
2020-11-07 15:53:21 +03:00
}
});
// Assign each mergeable input section a unique index.
2020-11-17 08:04:53 +03:00
for (ObjectFile *file : out::files) {
2020-11-08 08:13:59 +03:00
for (MergeableSection &isec : file->mergeable_sections) {
MergedSection &osec = isec.parent;
isec.offset = osec.shdr.sh_size;
osec.shdr.sh_size += isec.size;
2020-11-07 15:53:21 +03:00
}
}
2020-11-08 07:01:46 +03:00
static Counter counter("merged_strings");
for (MergedSection *osec : MergedSection::instances)
counter.inc(osec->map.size());
2020-11-07 15:53:21 +03:00
}
2020-11-11 04:51:30 +03:00
// So far, each input section has a pointer to its corresponding
// output section, but there's no reverse edge to get a list of
// input sections from an output section. This function creates it.
//
// An output section may contain millions of input sections.
// So, we append input sections to output sections in parallel.
2020-11-17 08:04:53 +03:00
static void bin_sections() {
2020-11-11 04:42:26 +03:00
MyTimer t("bin_sections", before_copy_timer);
2020-11-17 08:04:53 +03:00
int unit = (out::files.size() + 127) / 128;
std::vector<ArrayRef<ObjectFile *>> slices = split(out::files, unit);
2020-10-26 07:36:56 +03:00
2020-11-08 04:05:59 +03:00
int num_osec = OutputSection::instances.size();
2020-11-08 06:36:08 +03:00
std::vector<std::vector<std::vector<InputChunk *>>> groups(slices.size());
2020-10-28 08:22:25 +03:00
for (int i = 0; i < groups.size(); i++)
2020-11-08 04:05:59 +03:00
groups[i].resize(num_osec);
2020-10-28 08:06:35 +03:00
tbb::parallel_for(0, (int)slices.size(), [&](int i) {
for (ObjectFile *file : slices[i]) {
for (InputSection *isec : file->sections) {
if (!isec)
continue;
OutputSection *osec = isec->output_section;
2020-10-28 08:22:25 +03:00
groups[i][osec->idx].push_back(isec);
2020-10-28 08:06:35 +03:00
}
}
});
2020-11-08 04:05:59 +03:00
std::vector<int> sizes(num_osec);
2020-10-26 07:36:56 +03:00
2020-11-08 06:36:08 +03:00
for (ArrayRef<std::vector<InputChunk *>> group : groups)
2020-10-28 08:22:25 +03:00
for (int i = 0; i < group.size(); i++)
sizes[i] += group[i].size();
2020-11-08 03:44:27 +03:00
2020-11-08 04:05:59 +03:00
tbb::parallel_for(0, num_osec, [&](int j) {
2020-11-08 06:42:40 +03:00
OutputSection::instances[j]->members.reserve(sizes[j]);
2020-11-08 04:06:36 +03:00
2020-11-08 04:05:59 +03:00
for (int i = 0; i < groups.size(); i++) {
2020-11-08 06:42:40 +03:00
std::vector<InputChunk *> &sections = OutputSection::instances[j]->members;
2020-11-08 04:05:59 +03:00
sections.insert(sections.end(), groups[i][j].begin(), groups[i][j].end());
2020-10-28 08:06:35 +03:00
}
2020-11-08 04:05:59 +03:00
});
2020-10-26 07:36:56 +03:00
}
2020-10-22 17:19:48 +03:00
2020-10-26 08:18:00 +03:00
static void set_isec_offsets() {
2020-11-11 04:42:26 +03:00
MyTimer t("isec_offsets", before_copy_timer);
2020-11-07 18:47:34 +03:00
tbb::parallel_for_each(OutputSection::instances, [&](OutputSection *osec) {
2020-11-08 06:42:40 +03:00
if (osec->members.empty())
2020-10-27 07:52:10 +03:00
return;
2020-11-08 06:42:40 +03:00
std::vector<ArrayRef<InputChunk *>> slices = split(osec->members, 100000);
std::vector<u64> size(slices.size());
std::vector<u32> alignments(slices.size());
2020-10-26 08:18:00 +03:00
2020-10-28 07:42:05 +03:00
tbb::parallel_for(0, (int)slices.size(), [&](int i) {
u64 off = 0;
u32 align = 1;
2020-10-26 10:12:35 +03:00
2020-11-08 06:36:08 +03:00
for (InputChunk *isec : slices[i]) {
2020-10-26 10:12:35 +03:00
off = align_to(off, isec->shdr.sh_addralign);
isec->offset = off;
off += isec->shdr.sh_size;
align = std::max<u32>(align, isec->shdr.sh_addralign);
2020-10-26 10:12:35 +03:00
}
size[i] = off;
alignments[i] = align;
});
u32 align = *std::max_element(alignments.begin(), alignments.end());
2020-10-26 10:12:35 +03:00
std::vector<u64> start(slices.size());
2020-10-28 07:42:05 +03:00
for (int i = 1; i < slices.size(); i++)
2020-11-10 06:23:14 +03:00
start[i] = align_to(start[i - 1] + size[i - 1], align);
2020-10-26 10:58:49 +03:00
2020-10-28 07:42:05 +03:00
tbb::parallel_for(1, (int)slices.size(), [&](int i) {
2020-11-08 06:36:08 +03:00
for (InputChunk *isec : slices[i])
2020-10-26 10:12:35 +03:00
isec->offset += start[i];
});
osec->shdr.sh_size = start.back() + size.back();
2020-10-26 08:18:00 +03:00
osec->shdr.sh_addralign = align;
});
}
2020-11-17 08:04:53 +03:00
static void scan_rels() {
2020-11-15 10:19:21 +03:00
MyTimer t("scan_rels", before_copy_timer);
2020-11-17 14:54:13 +03:00
tbb::parallel_for_each(out::files, [&](ObjectFile *file) {
2020-11-15 10:19:21 +03:00
for (InputSection *isec : file->sections)
if (isec)
isec->scan_relocations();
2020-11-17 14:54:13 +03:00
});
2020-11-17 13:56:02 +03:00
2020-11-17 14:54:13 +03:00
std::vector<std::vector<Symbol *>> vec(out::files.size());
tbb::parallel_for(0, (int)out::files.size(), [&](int i) {
ObjectFile *file = out::files[i];
2020-11-17 13:56:02 +03:00
for (Symbol *sym : file->symbols)
if (sym->file == file && sym->flags)
vec[i].push_back(sym);
2020-11-15 07:01:38 +03:00
});
2020-11-06 06:01:52 +03:00
2020-11-17 14:22:52 +03:00
out::dynsyms = flatten(vec);
2020-11-17 13:56:02 +03:00
2020-11-17 14:22:52 +03:00
for (Symbol *sym : out::dynsyms) {
2020-11-18 14:29:24 +03:00
if (sym->flags & Symbol::NEEDS_GOT)
out::got->add_symbol(sym);
2020-11-06 07:54:37 +03:00
2020-11-18 15:45:49 +03:00
if (sym->flags & Symbol::NEEDS_PLT)
out::plt->add_symbol(sym);
2020-11-17 14:22:52 +03:00
if (sym->flags & Symbol::NEEDS_GOTTPOFF)
2020-11-18 14:29:24 +03:00
out::got->add_gottp_symbol(sym);
2020-11-17 14:22:52 +03:00
2020-11-18 15:45:49 +03:00
if ((sym->flags & Symbol::NEEDS_TLSGD) || (sym->flags & Symbol::NEEDS_TLSLD))
2020-11-17 14:22:52 +03:00
error("not implemented");
2020-11-18 15:45:49 +03:00
}
2020-11-16 16:25:58 +03:00
}
2020-11-11 11:25:00 +03:00
2020-11-17 07:59:24 +03:00
static void write_merged_strings() {
2020-11-11 04:42:26 +03:00
MyTimer t("write_merged_strings", copy_timer);
2020-11-17 07:59:24 +03:00
tbb::parallel_for_each(out::files, [&](ObjectFile *file) {
2020-11-08 08:13:59 +03:00
for (MergeableSection &isec : file->mergeable_sections) {
2020-11-17 07:59:24 +03:00
u8 *base = out::buf + isec.parent.shdr.sh_offset + isec.offset;
2020-11-08 04:31:49 +03:00
2020-11-08 08:13:59 +03:00
for (StringPieceRef &ref : isec.pieces) {
2020-11-08 04:31:49 +03:00
StringPiece &piece = *ref.piece;
2020-11-10 09:03:40 +03:00
if (piece.isec == &isec)
2020-11-08 04:31:49 +03:00
memcpy(base + piece.output_offset, piece.data.data(), piece.data.size());
}
}
});
}
2020-11-17 07:59:24 +03:00
static void clear_padding(u64 filesize) {
2020-11-11 04:42:26 +03:00
MyTimer t("clear_padding", copy_timer);
2020-11-09 15:50:47 +03:00
auto zero = [&](OutputChunk *chunk, u64 next_start) {
2020-11-10 11:32:41 +03:00
u64 pos = chunk->shdr.sh_offset;
if (chunk->shdr.sh_type != SHT_NOBITS)
pos += chunk->shdr.sh_size;
2020-11-17 07:59:24 +03:00
memset(out::buf + pos, 0, next_start - pos);
2020-11-09 15:50:47 +03:00
};
2020-11-17 07:59:24 +03:00
for (int i = 1; i < out::chunks.size(); i++)
zero(out::chunks[i - 1], out::chunks[i]->shdr.sh_offset);
zero(out::chunks.back(), filesize);
2020-11-09 15:50:47 +03:00
}
2020-10-22 12:54:51 +03:00
// We want to sort output sections in the following order.
//
2020-10-22 17:19:48 +03:00
// alloc readonly data
// alloc readonly code
// alloc writable tdata
// alloc writable tbss
// alloc writable data
// alloc writable bss
// nonalloc
2020-11-12 09:25:05 +03:00
static int get_section_rank(const ELF64LE::Shdr &shdr) {
2020-10-29 12:31:06 +03:00
bool alloc = shdr.sh_flags & SHF_ALLOC;
bool writable = shdr.sh_flags & SHF_WRITE;
bool exec = shdr.sh_flags & SHF_EXECINSTR;
bool tls = shdr.sh_flags & SHF_TLS;
2020-10-30 06:47:35 +03:00
bool nobits = shdr.sh_type == SHT_NOBITS;
2020-11-17 08:50:18 +03:00
return (!alloc << 5) | (writable << 4) | (exec << 3) | (!tls << 2) | nobits;
2020-10-22 12:54:51 +03:00
}
2020-11-11 08:13:39 +03:00
static u64 set_osec_offsets(ArrayRef<OutputChunk *> chunks) {
2020-11-11 04:42:26 +03:00
MyTimer t("osec_offset", before_copy_timer);
u64 fileoff = 0;
2020-11-17 09:05:53 +03:00
u64 vaddr = config.image_base;
2020-10-26 08:16:13 +03:00
2020-11-11 08:13:39 +03:00
for (OutputChunk *chunk : chunks) {
2020-10-30 05:40:38 +03:00
if (chunk->starts_new_ptload)
2020-10-26 08:16:13 +03:00
vaddr = align_to(vaddr, PAGE_SIZE);
2020-11-03 14:37:27 +03:00
bool is_bss = chunk->shdr.sh_type == SHT_NOBITS;
if (!is_bss) {
2020-10-30 05:45:10 +03:00
if (vaddr % PAGE_SIZE > fileoff % PAGE_SIZE)
fileoff += vaddr % PAGE_SIZE - fileoff % PAGE_SIZE;
else if (vaddr % PAGE_SIZE < fileoff % PAGE_SIZE)
fileoff = align_to(fileoff, PAGE_SIZE) + vaddr % PAGE_SIZE;
}
2020-10-30 05:40:38 +03:00
fileoff = align_to(fileoff, chunk->shdr.sh_addralign);
2020-10-26 08:16:13 +03:00
vaddr = align_to(vaddr, chunk->shdr.sh_addralign);
chunk->shdr.sh_offset = fileoff;
if (chunk->shdr.sh_flags & SHF_ALLOC)
chunk->shdr.sh_addr = vaddr;
2020-11-03 14:37:27 +03:00
if (!is_bss)
2020-11-03 14:13:03 +03:00
fileoff += chunk->shdr.sh_size;
2020-10-30 05:40:38 +03:00
2020-11-03 14:37:27 +03:00
bool is_tbss = is_bss && (chunk->shdr.sh_flags & SHF_TLS);
2020-10-30 05:40:38 +03:00
if (!is_tbss)
2020-11-03 14:13:03 +03:00
vaddr += chunk->shdr.sh_size;
2020-10-26 08:16:13 +03:00
}
return fileoff;
}
2020-11-11 08:13:39 +03:00
static void fix_synthetic_symbols(ArrayRef<OutputChunk *> chunks) {
2020-11-04 08:23:39 +03:00
auto start = [&](OutputChunk *chunk, Symbol *sym) {
2020-11-11 04:45:52 +03:00
if (sym) {
sym->shndx = chunk->shndx;
sym->value = chunk->shdr.sh_addr;
}
};
2020-11-04 08:23:39 +03:00
auto stop = [&](OutputChunk *chunk, Symbol *sym) {
2020-11-11 04:45:52 +03:00
if (sym) {
sym->shndx = chunk->shndx;
sym->value = chunk->shdr.sh_addr + chunk->shdr.sh_size;
}
};
2020-11-04 08:23:39 +03:00
// __bss_start
2020-11-11 08:13:39 +03:00
for (OutputChunk *chunk : chunks) {
2020-11-08 10:09:01 +03:00
if (chunk->kind == OutputChunk::REGULAR && chunk->name == ".bss") {
2020-11-04 08:23:39 +03:00
start(chunk, out::__bss_start);
break;
}
}
// __ehdr_start
2020-11-11 08:13:39 +03:00
for (OutputChunk *chunk : chunks) {
2020-11-04 08:23:39 +03:00
if (chunk->shndx == 1) {
2020-11-04 08:41:40 +03:00
out::__ehdr_start->shndx = 1;
2020-11-12 07:15:29 +03:00
out::__ehdr_start->value = out::ehdr->shdr.sh_addr;
2020-11-04 08:23:39 +03:00
break;
}
}
// __rela_iplt_start and __rela_iplt_end
2020-11-11 15:32:41 +03:00
start(out::relplt, out::__rela_iplt_start);
stop(out::relplt, out::__rela_iplt_end);
2020-11-04 08:23:39 +03:00
// __{init,fini}_array_{start,end}
2020-11-11 08:13:39 +03:00
for (OutputChunk *chunk : chunks) {
2020-11-04 08:23:39 +03:00
switch (chunk->shdr.sh_type) {
case SHT_INIT_ARRAY:
start(chunk, out::__init_array_start);
stop(chunk, out::__init_array_end);
break;
case SHT_FINI_ARRAY:
start(chunk, out::__fini_array_start);
stop(chunk, out::__fini_array_end);
break;
}
}
// _end, end, _etext, etext, _edata and edata
2020-11-11 08:13:39 +03:00
for (OutputChunk *chunk : chunks) {
2020-11-08 10:09:01 +03:00
if (chunk->kind == OutputChunk::HEADER)
2020-11-04 08:23:39 +03:00
continue;
2020-11-12 07:19:19 +03:00
if (chunk->shdr.sh_flags & SHF_ALLOC)
2020-11-04 08:23:39 +03:00
stop(chunk, out::_end);
2020-11-12 07:19:19 +03:00
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
2020-11-04 08:23:39 +03:00
stop(chunk, out::_etext);
2020-11-12 07:19:19 +03:00
if (chunk->shdr.sh_type != SHT_NOBITS && chunk->shdr.sh_flags & SHF_ALLOC)
2020-11-04 08:23:39 +03:00
stop(chunk, out::_edata);
}
2020-11-13 04:19:47 +03:00
// _DYNAMIC
if (out::dynamic)
start(out::dynamic, out::_DYNAMIC);
// _GLOBAL_OFFSET_TABLE_
if (out::gotplt)
start(out::gotplt, out::_GLOBAL_OFFSET_TABLE_);
2020-11-04 08:23:39 +03:00
// __start_ and __stop_ symbols
2020-11-11 08:13:39 +03:00
for (OutputChunk *chunk : chunks) {
2020-11-11 08:45:17 +03:00
if (is_c_identifier(chunk->name)) {
start(chunk, Symbol::intern(("__start_" + chunk->name).str()));
stop(chunk, Symbol::intern(("__stop_" + chunk->name).str()));
}
2020-11-04 08:23:39 +03:00
}
}
2020-11-09 11:38:12 +03:00
static u8 *open_output_file(u64 filesize) {
2020-11-17 08:03:32 +03:00
MyTimer t("open_file", before_copy_timer);
2020-11-09 11:38:12 +03:00
int fd = open(config.output.str().c_str(), O_RDWR | O_CREAT, 0777);
2020-11-09 06:30:13 +03:00
if (fd == -1)
error("cannot open " + config.output + ": " + strerror(errno));
2020-11-03 14:29:24 +03:00
2020-11-09 11:38:12 +03:00
if (ftruncate(fd, filesize))
2020-11-19 03:24:10 +03:00
error("ftruncate failed");
2020-11-03 14:29:24 +03:00
2020-11-09 11:38:12 +03:00
void *buf = mmap(nullptr, filesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED)
2020-11-09 06:30:13 +03:00
error(config.output + ": mmap failed: " + strerror(errno));
2020-11-09 11:38:12 +03:00
close(fd);
2020-11-12 07:33:57 +03:00
if (config.filler != -1)
memset(buf, config.filler, filesize);
2020-11-09 11:38:12 +03:00
return (u8 *)buf;
2020-11-03 14:29:24 +03:00
}
2020-10-30 07:47:51 +03:00
static int get_thread_count(InputArgList &args) {
if (auto *arg = args.getLastArg(OPT_thread_count)) {
int n;
if (!llvm::to_integer(arg->getValue(), n) || n <= 0)
error(arg->getSpelling() + ": expected a positive integer, but got '" +
arg->getValue() + "'");
return n;
}
return tbb::global_control::active_value(tbb::global_control::max_allowed_parallelism);
}
2020-10-23 06:09:27 +03:00
2020-11-19 11:38:07 +03:00
std::vector<StringRef> get_args(opt::InputArgList &args, int id) {
std::vector<StringRef> vec;
for (auto *arg : args.filtered(id))
vec.push_back(arg->getValue());
return vec;
}
2020-11-12 07:33:57 +03:00
static int parse_filler(opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_filler);
if (!arg)
return -1;
StringRef val = arg->getValue();
if (!val.startswith("0x"))
error("invalid argument: " + arg->getAsString(args));
int ret;
if (!to_integer(val.substr(2), ret, 16))
error("invalid argument: " + arg->getAsString(args));
return (u8)ret;
}
2020-11-19 13:31:13 +03:00
std::string find_library(StringRef name) {
2020-11-19 12:03:26 +03:00
for (StringRef dir : config.library_paths) {
if (std::string path = (dir + "/lib" + name + ".a").str(); fs::exists(path))
return path;
if (!config.is_static)
if (std::string path = (dir + "/lib" + name + ".so").str(); fs::exists(path))
return path;
}
error("library not found: " + name);
}
2020-10-30 07:47:51 +03:00
int main(int argc, char **argv) {
2020-10-14 13:59:51 +03:00
// Parse command line options
2020-10-02 10:47:51 +03:00
MyOptTable opt_table;
2020-10-09 15:10:12 +03:00
InputArgList args = opt_table.parse(argc - 1, argv + 1);
2020-10-02 10:47:51 +03:00
2020-10-30 07:47:51 +03:00
tbb::global_control tbb_cont(tbb::global_control::max_allowed_parallelism,
get_thread_count(args));
2020-11-03 12:02:28 +03:00
Counter::enabled = args.hasArg(OPT_stat);
2020-10-04 12:00:33 +03:00
if (auto *arg = args.getLastArg(OPT_o))
config.output = arg->getValue();
else
error("-o option is missing");
2020-10-29 06:24:54 +03:00
config.print_map = args.hasArg(OPT_print_map);
2020-11-04 12:47:13 +03:00
config.is_static = args.hasArg(OPT_static);
2020-11-12 07:33:57 +03:00
config.filler = parse_filler(args);
2020-11-19 11:38:07 +03:00
config.library_paths = get_args(args, OPT_library_path);
2020-10-29 06:24:54 +03:00
2020-11-05 02:31:32 +03:00
for (auto *arg : args.filtered(OPT_trace_symbol))
Symbol::intern(arg->getValue())->traced = true;
2020-10-13 14:35:35 +03:00
// Open input files
2020-10-25 03:38:53 +03:00
{
2020-11-11 04:42:26 +03:00
MyTimer t("open", parse_timer);
2020-11-19 12:03:26 +03:00
for (auto *arg : args) {
switch (arg->getOption().getID()) {
case OPT_INPUT:
2020-11-17 08:03:32 +03:00
read_file(arg->getValue());
2020-11-19 12:03:26 +03:00
break;
case OPT_library:
read_file(find_library(arg->getValue()));
break;
}
}
2020-11-07 17:00:01 +03:00
}
2020-10-25 03:38:53 +03:00
2020-11-07 17:00:01 +03:00
// Parse input files
{
2020-11-11 04:42:26 +03:00
MyTimer t("parse", parse_timer);
2020-11-17 08:03:32 +03:00
tbb::parallel_for_each(out::files, [](ObjectFile *file) { file->parse(); });
2020-10-25 03:38:53 +03:00
}
2020-10-18 13:17:44 +03:00
2020-11-07 12:06:09 +03:00
{
2020-11-11 04:42:26 +03:00
MyTimer t("merge", parse_timer);
2020-11-17 08:03:32 +03:00
tbb::parallel_for_each(out::files, [](ObjectFile *file) {
2020-11-07 18:47:34 +03:00
file->initialize_mergeable_sections();
});
2020-11-07 12:06:09 +03:00
}
2020-11-06 10:58:13 +03:00
Timer total_timer("total", "total");
total_timer.startTimer();
2020-11-16 18:17:01 +03:00
out::ehdr = new OutputEhdr;
2020-11-16 18:23:51 +03:00
out::shdr = new OutputShdr;
2020-11-16 18:33:41 +03:00
out::phdr = new OutputPhdr;
2020-11-18 11:11:58 +03:00
out::got = new GotSection;
2020-11-13 06:43:59 +03:00
out::gotplt = new GotPltSection;
2020-11-17 07:34:02 +03:00
out::relplt = new RelPltSection;
2020-11-19 10:13:19 +03:00
out::strtab = new StrtabSection;
2020-11-17 06:20:56 +03:00
out::shstrtab = new ShstrtabSection;
2020-11-11 15:32:41 +03:00
out::plt = new PltSection;
2020-11-17 07:32:22 +03:00
out::symtab = new SymtabSection;
2020-11-16 17:40:01 +03:00
out::dynsym = new DynsymSection;
2020-11-17 07:19:54 +03:00
out::dynstr = new DynstrSection;
2020-11-12 08:40:39 +03:00
2020-11-10 13:33:27 +03:00
if (!config.is_static) {
2020-11-16 19:05:01 +03:00
out::interp = new InterpSection;
2020-11-16 18:43:32 +03:00
out::dynamic = new DynamicSection;
2020-11-17 07:30:33 +03:00
out::reldyn = new RelDynSection;
2020-11-11 15:32:41 +03:00
out::hash = new HashSection;
2020-11-10 13:33:27 +03:00
}
2020-10-18 13:05:28 +03:00
// Set priorities to files
2020-10-28 04:15:05 +03:00
int priority = 1;
2020-11-17 08:03:32 +03:00
for (ObjectFile *file : out::files)
2020-11-05 06:34:59 +03:00
if (!file->is_in_archive)
2020-10-28 04:15:05 +03:00
file->priority = priority++;
2020-11-17 08:03:32 +03:00
for (ObjectFile *file : out::files)
2020-11-05 06:34:59 +03:00
if (file->is_in_archive)
2020-10-28 04:15:05 +03:00
file->priority = priority++;
2020-10-18 13:05:28 +03:00
2020-11-11 04:42:26 +03:00
// Resolve symbols and fix the set of object files that are
// included to the final output.
2020-11-17 08:05:40 +03:00
resolve_symbols();
2020-10-19 15:50:33 +03:00
2020-11-05 03:24:47 +03:00
if (args.hasArg(OPT_trace))
2020-11-17 08:03:32 +03:00
for (ObjectFile *file : out::files)
2020-11-16 13:15:27 +03:00
message(toString(file));
2020-11-05 03:24:47 +03:00
2020-11-11 04:42:26 +03:00
// Remove redundant comdat sections (e.g. duplicate inline functions).
2020-11-17 08:04:53 +03:00
eliminate_comdats();
2020-10-10 06:47:12 +03:00
2020-11-11 04:42:26 +03:00
// Merge strings constants in SHF_MERGE sections.
2020-11-17 08:04:53 +03:00
handle_mergeable_strings();
2020-11-07 14:29:06 +03:00
2020-10-27 06:50:25 +03:00
// Create .bss sections for common symbols.
{
2020-11-11 04:42:26 +03:00
MyTimer t("common", before_copy_timer);
2020-11-17 08:03:32 +03:00
tbb::parallel_for_each(out::files,
2020-11-07 18:47:34 +03:00
[](ObjectFile *file) { file->convert_common_symbols(); });
2020-10-27 06:50:25 +03:00
}
2020-10-26 05:34:26 +03:00
// Bin input sections into output sections
2020-11-17 08:04:53 +03:00
bin_sections();
2020-10-23 04:27:11 +03:00
2020-10-29 12:19:10 +03:00
// Assign offsets within an output section to input sections.
2020-11-11 04:42:26 +03:00
set_isec_offsets();
2020-10-22 10:35:17 +03:00
2020-11-12 10:09:17 +03:00
// Sections are added to the section lists in an arbitrary order because
// they are created in parallel. Sor them to to make the output deterministic.
2020-11-12 09:19:30 +03:00
auto section_compare = [](OutputChunk *x, OutputChunk *y) {
return std::make_tuple(x->name, (u32)x->shdr.sh_type, (u64)x->shdr.sh_flags) <
std::make_tuple(y->name, (u32)y->shdr.sh_type, (u64)y->shdr.sh_flags);
};
std::stable_sort(OutputSection::instances.begin(), OutputSection::instances.end(),
section_compare);
std::stable_sort(MergedSection::instances.begin(), MergedSection::instances.end(),
section_compare);
2020-11-12 10:09:17 +03:00
// Add sections to the section lists
2020-11-04 04:39:17 +03:00
for (OutputSection *osec : OutputSection::instances)
2020-11-11 16:14:12 +03:00
if (osec->shdr.sh_size)
2020-11-17 07:48:11 +03:00
out::chunks.push_back(osec);
2020-11-07 15:53:21 +03:00
for (MergedSection *osec : MergedSection::instances)
2020-11-11 16:14:12 +03:00
if (osec->shdr.sh_size)
2020-11-17 07:48:11 +03:00
out::chunks.push_back(osec);
2020-11-07 14:31:09 +03:00
2020-11-04 04:39:17 +03:00
// Create a dummy file containing linker-synthesized symbols
// (e.g. `__bss_start`).
2020-11-17 07:48:11 +03:00
ObjectFile *internal_file = ObjectFile::create_internal_file();
2020-11-04 08:49:30 +03:00
internal_file->priority = priority++;
2020-11-17 08:03:32 +03:00
out::files.push_back(internal_file);
2020-11-04 04:39:17 +03:00
2020-11-09 05:31:00 +03:00
// Beyond this point, no new symbols will be added to the result.
2020-11-17 07:19:54 +03:00
// Copy shared object name strings to .dynsym
2020-11-17 08:03:32 +03:00
for (ObjectFile *file : out::files)
2020-11-12 18:14:15 +03:00
if (file->is_alive && file->is_dso)
2020-11-17 07:19:54 +03:00
out::dynstr->add_string(file->soname);
2020-11-12 18:14:15 +03:00
2020-10-23 03:21:40 +03:00
// Scan relocations to fix the sizes of .got, .plt, .got.plt, .dynstr,
// .rela.dyn, .rela.plt.
2020-11-17 08:04:53 +03:00
scan_rels();
2020-10-27 15:14:33 +03:00
2020-11-09 03:47:58 +03:00
// Add synthetic sections.
2020-11-17 07:48:11 +03:00
out::chunks.push_back(out::got);
out::chunks.push_back(out::plt);
out::chunks.push_back(out::gotplt);
out::chunks.push_back(out::relplt);
out::chunks.push_back(out::reldyn);
out::chunks.push_back(out::dynamic);
out::chunks.push_back(out::dynsym);
out::chunks.push_back(out::dynstr);
out::chunks.push_back(out::shstrtab);
out::chunks.push_back(out::symtab);
out::chunks.push_back(out::strtab);
out::chunks.push_back(out::hash);
out::chunks.erase(std::remove_if(out::chunks.begin(), out::chunks.end(),
[](OutputChunk *c){ return !c; }),
out::chunks.end());
2020-11-09 03:47:58 +03:00
// Sort the sections by section flags so that we'll have to create
// as few segments as possible.
2020-11-17 07:48:11 +03:00
std::stable_sort(out::chunks.begin(), out::chunks.end(),
[](OutputChunk *a, OutputChunk *b) {
2020-11-17 08:50:18 +03:00
return get_section_rank(a->shdr) < get_section_rank(b->shdr);
2020-11-17 07:48:11 +03:00
});
2020-10-29 12:31:06 +03:00
2020-11-09 03:47:58 +03:00
// Add headers and sections that have to be at the beginning
// or the ending of a file.
2020-11-17 07:48:11 +03:00
out::chunks.insert(out::chunks.begin(), out::ehdr);
out::chunks.insert(out::chunks.begin() + 1, out::phdr);
2020-11-11 15:32:41 +03:00
if (out::interp)
2020-11-17 07:48:11 +03:00
out::chunks.insert(out::chunks.begin() + 2, out::interp);
out::chunks.push_back(out::shdr);
2020-10-27 11:36:55 +03:00
2020-11-10 17:31:47 +03:00
// Set section indices.
2020-11-19 11:10:58 +03:00
for (int i = 0, shndx = 1; i < out::chunks.size(); i++)
2020-11-17 07:48:11 +03:00
if (out::chunks[i]->kind != OutputChunk::HEADER)
2020-11-19 11:10:58 +03:00
out::chunks[i]->shndx = shndx++;
2020-11-10 17:31:47 +03:00
2020-11-19 10:20:09 +03:00
// Now that we have computed sizes for all sections and assigned
// section indices to them, so we can fix section header contents
// for all output sections.
2020-11-17 07:48:11 +03:00
for (OutputChunk *chunk : out::chunks)
2020-11-16 18:45:02 +03:00
chunk->update_shdr();
2020-10-30 10:55:59 +03:00
// Assign offsets to output sections
2020-11-17 07:48:11 +03:00
u64 filesize = set_osec_offsets(out::chunks);
2020-10-19 17:37:29 +03:00
2020-11-03 10:51:28 +03:00
// Fix linker-synthesized symbol addresses.
2020-11-17 07:48:11 +03:00
fix_synthetic_symbols(out::chunks);
2020-11-01 07:05:51 +03:00
2020-11-09 03:58:35 +03:00
// At this point, file layout is fixed. Beyond this, you can assume
// that symbol addresses including their GOT/PLT/etc addresses have
// a correct final value.
2020-11-11 04:42:26 +03:00
// Some types of relocations for TLS symbols need the ending address
// of the TLS section. Find it out now.
2020-11-17 07:48:11 +03:00
for (OutputChunk *chunk : out::chunks) {
2020-11-06 06:50:26 +03:00
ELF64LE::Shdr &shdr = chunk->shdr;
if (shdr.sh_flags & SHF_TLS)
out::tls_end = align_to(shdr.sh_addr + shdr.sh_size, shdr.sh_addralign);
}
2020-10-26 08:38:43 +03:00
2020-11-09 10:41:26 +03:00
// Create an output file
2020-11-17 08:03:32 +03:00
out::buf = open_output_file(filesize);
2020-11-09 10:41:26 +03:00
2020-11-17 08:49:07 +03:00
// Copy input sections to the output file
2020-11-12 16:06:47 +03:00
{
MyTimer t("copy", copy_timer);
2020-11-17 08:49:07 +03:00
2020-11-17 07:48:11 +03:00
tbb::parallel_for_each(out::chunks, [&](OutputChunk *chunk) {
2020-11-17 07:51:44 +03:00
chunk->initialize_buf();
2020-11-12 16:06:47 +03:00
});
2020-11-17 07:48:11 +03:00
tbb::parallel_for_each(out::chunks, [&](OutputChunk *chunk) {
2020-11-17 07:56:40 +03:00
chunk->copy_buf();
});
2020-10-30 05:40:38 +03:00
}
2020-10-20 03:20:52 +03:00
2020-11-07 16:54:07 +03:00
// Fill mergeable string sections
2020-11-17 07:59:24 +03:00
write_merged_strings();
2020-11-07 16:54:07 +03:00
2020-11-09 15:50:47 +03:00
// Zero-clear paddings between sections
2020-11-17 07:59:24 +03:00
clear_padding(filesize);
2020-11-09 15:50:47 +03:00
2020-11-09 10:41:26 +03:00
// Commit
2020-10-25 03:38:53 +03:00
{
2020-11-11 04:42:26 +03:00
MyTimer t("munmap", copy_timer);
2020-11-17 07:49:18 +03:00
munmap(out::buf, filesize);
2020-10-25 03:38:53 +03:00
}
2020-10-14 12:41:09 +03:00
2020-11-06 10:58:13 +03:00
total_timer.stopTimer();
2020-10-29 06:24:54 +03:00
if (config.print_map) {
MyTimer t("print_map");
2020-11-17 08:03:32 +03:00
print_map(out::files, out::chunks);
2020-10-29 06:24:54 +03:00
}
2020-10-28 13:34:32 +03:00
#if 0
2020-11-17 08:03:32 +03:00
for (ObjectFile *file : out::files)
2020-10-28 12:29:31 +03:00
for (InputSection *isec : file->sections)
if (isec)
2020-11-16 13:15:27 +03:00
message(toString(isec));
2020-10-28 13:34:32 +03:00
#endif
2020-10-28 12:29:31 +03:00
2020-11-03 11:36:43 +03:00
// Show stat numbers
2020-11-03 11:54:40 +03:00
Counter num_input_sections("input_sections");
2020-11-17 08:03:32 +03:00
for (ObjectFile *file : out::files)
2020-11-03 11:54:40 +03:00
num_input_sections.inc(file->sections.size());
2020-11-03 11:36:43 +03:00
2020-11-17 07:48:11 +03:00
Counter num_output_chunks("output_out::chunks", out::chunks.size());
2020-11-17 08:03:32 +03:00
Counter num_files("files", out::files.size());
2020-11-03 11:43:06 +03:00
Counter filesize_counter("filesize", filesize);
2020-11-03 11:36:43 +03:00
2020-11-03 11:28:06 +03:00
Counter::print();
2020-10-18 10:21:17 +03:00
llvm::TimerGroup::printAll(llvm::outs());
2020-10-28 13:27:23 +03:00
llvm::outs().flush();
2020-10-18 10:21:17 +03:00
_exit(0);
2020-09-29 09:05:29 +03:00
}