#include "mold.h" #include #include #include #include #include #include template static bool is_text_file(Context &ctx, MemoryMappedFile *mb) { u8 *data = mb->data(ctx); return mb->size() >= 4 && isprint(data[0]) && isprint(data[1]) && isprint(data[2]) && isprint(data[3]); } enum class FileType { UNKNOWN, OBJ, DSO, AR, THIN_AR, TEXT }; template static FileType get_file_type(Context &ctx, MemoryMappedFile *mb) { u8 *data = mb->data(ctx); if (mb->size() >= 20 && memcmp(data, "\177ELF", 4) == 0) { ElfEhdr &ehdr = *(ElfEhdr *)data; if (ehdr.e_type == ET_REL) return FileType::OBJ; if (ehdr.e_type == ET_DYN) return FileType::DSO; return FileType::UNKNOWN; } if (mb->size() >= 8 && memcmp(data, "!\n", 8) == 0) return FileType::AR; if (mb->size() >= 8 && memcmp(data, "!\n", 8) == 0) return FileType::THIN_AR; if (is_text_file(ctx, mb)) return FileType::TEXT; return FileType::UNKNOWN; } template static ObjectFile *new_object_file(Context &ctx, MemoryMappedFile *mb, std::string archive_name) { static Counter count("parsed_objs"); count++; bool in_lib = (!archive_name.empty() && !ctx.whole_archive); ObjectFile *file = new ObjectFile(ctx, mb, archive_name, in_lib); ctx.tg.run([file, &ctx]() { file->parse(ctx); }); if (ctx.arg.trace) SyncOut(ctx) << "trace: " << *file; return file; } template static SharedFile *new_shared_file(Context &ctx, MemoryMappedFile *mb) { SharedFile *file = new SharedFile(ctx, mb); ctx.tg.run([file, &ctx]() { file->parse(ctx); }); if (ctx.arg.trace) SyncOut(ctx) << "trace: " << *file; return file; } template class FileCache { public: void store(MemoryMappedFile *mb, T *obj) { Key k(mb->name, mb->size(), mb->mtime); cache[k].push_back(obj); } std::vector get(MemoryMappedFile *mb) { Key k(mb->name, mb->size(), mb->mtime); std::vector objs = cache[k]; cache[k].clear(); return objs; } T *get_one(MemoryMappedFile *mb) { std::vector objs = get(mb); return objs.empty() ? nullptr : objs[0]; } private: typedef std::tuple Key; std::map> cache; }; template void read_file(Context &ctx, MemoryMappedFile *mb) { if (ctx.visited.contains(mb->name)) return; static FileCache> obj_cache; static FileCache> dso_cache; if (ctx.is_preloading) { switch (get_file_type(ctx, mb)) { case FileType::OBJ: obj_cache.store(mb, new_object_file(ctx, mb, "")); return; case FileType::DSO: dso_cache.store(mb, new_shared_file(ctx, mb)); return; case FileType::AR: for (MemoryMappedFile *child : read_fat_archive_members(ctx, mb)) if (get_file_type(ctx, child) == FileType::OBJ) obj_cache.store(mb, new_object_file(ctx, child, mb->name)); return; case FileType::THIN_AR: for (MemoryMappedFile *child : read_thin_archive_members(ctx, mb)) if (get_file_type(ctx, child) == FileType::OBJ) obj_cache.store(child, new_object_file(ctx, child, mb->name)); return; case FileType::TEXT: parse_linker_script(ctx, mb); return; } Fatal(ctx) << mb->name << ": unknown file type"; } switch (get_file_type(ctx, mb)) { case FileType::OBJ: if (ObjectFile *obj = obj_cache.get_one(mb)) ctx.objs.push_back(obj); else ctx.objs.push_back(new_object_file(ctx, mb, "")); return; case FileType::DSO: if (SharedFile *obj = dso_cache.get_one(mb)) ctx.dsos.push_back(obj); else ctx.dsos.push_back(new_shared_file(ctx, mb)); ctx.visited.insert(mb->name); return; case FileType::AR: if (std::vector *> objs = obj_cache.get(mb); !objs.empty()) { append(ctx.objs, objs); } else { for (MemoryMappedFile *child : read_fat_archive_members(ctx, mb)) if (get_file_type(ctx, child) == FileType::OBJ) ctx.objs.push_back(new_object_file(ctx, child, mb->name)); } ctx.visited.insert(mb->name); return; case FileType::THIN_AR: for (MemoryMappedFile *child : read_thin_archive_members(ctx, mb)) { if (ObjectFile *obj = obj_cache.get_one(child)) ctx.objs.push_back(obj); else if (get_file_type(ctx, child) == FileType::OBJ) ctx.objs.push_back(new_object_file(ctx, child, mb->name)); } ctx.visited.insert(mb->name); return; case FileType::TEXT: parse_linker_script(ctx, mb); return; } Fatal(ctx) << mb->name << ": unknown file type"; } template void cleanup() { if (OutputFile::tmpfile) unlink(OutputFile::tmpfile); if (socket_tmpfile) unlink(socket_tmpfile); } template static void signal_handler(int) { cleanup(); _exit(1); } template MemoryMappedFile *find_library(Context &ctx, std::string name) { if (name.starts_with(':')) { for (std::string_view dir : ctx.arg.library_paths) { std::string root = dir.starts_with("/") ? ctx.arg.sysroot : ""; std::string path = root + std::string(dir) + "/" + name.substr(1); if (MemoryMappedFile *mb = MemoryMappedFile::open(path)) return mb; } Fatal(ctx) << "library not found: " << name; } for (std::string_view dir : ctx.arg.library_paths) { std::string root = dir.starts_with("/") ? ctx.arg.sysroot : ""; std::string stem = root + std::string(dir) + "/lib" + name; if (!ctx.is_static) if (MemoryMappedFile *mb = MemoryMappedFile::open(stem + ".so")) return mb; if (MemoryMappedFile *mb = MemoryMappedFile::open(stem + ".a")) return mb; } Fatal(ctx) << "library not found: " << name; } template static void read_input_files(Context &ctx, std::span args) { std::vector> state; while (!args.empty()) { std::string_view arg; if (read_flag(args, "as-needed")) { ctx.as_needed = true; } else if (read_flag(args, "no-as-needed")) { ctx.as_needed = false; } else if (read_flag(args, "whole-archive")) { ctx.whole_archive = true; } else if (read_flag(args, "no-whole-archive")) { ctx.whole_archive = false; } else if (read_flag(args, "Bstatic")) { ctx.is_static = true; } else if (read_flag(args, "Bdynamic")) { ctx.is_static = false; } else if (read_flag(args, "push-state")) { state.push_back({ctx.as_needed, ctx.whole_archive, ctx.is_static}); } else if (read_flag(args, "pop-state")) { if (state.empty()) Fatal(ctx) << "no state pushed before popping"; std::tie(ctx.as_needed, ctx.whole_archive, ctx.is_static) = state.back(); state.pop_back(); } else if (read_arg(ctx, args, arg, "l")) { MemoryMappedFile *mb = find_library(ctx, std::string(arg)); read_file(ctx, mb); } else { read_file(ctx, MemoryMappedFile::must_open(ctx, std::string(args[0]))); args = args.subspan(1); } } } template static void show_stats(Context &ctx) { for (ObjectFile *obj : ctx.objs) { static Counter defined("defined_syms"); defined += obj->first_global - 1; static Counter undefined("undefined_syms"); undefined += obj->symbols.size() - obj->first_global; for (InputSection *sec : obj->sections) { if (!sec) continue; static Counter alloc("reloc_alloc"); static Counter nonalloc("reloc_nonalloc"); if (sec->shdr.sh_flags & SHF_ALLOC) alloc += sec->get_rels(ctx).size(); else nonalloc += sec->get_rels(ctx).size(); } } Counter num_input_sections("input_sections"); for (ObjectFile *file : ctx.objs) num_input_sections += file->sections.size(); Counter num_output_chunks("output_chunks", ctx.chunks.size()); Counter num_objs("num_objs", ctx.objs.size()); Counter num_dsos("num_dsos", ctx.dsos.size()); Counter::print(); } template int do_main(int argc, char **argv) { Context ctx; // Process -run option first. process_run_subcommand() does not return. if (argc >= 2) if (std::string_view arg = argv[1]; arg == "-run" || arg == "--run") process_run_subcommand(ctx, argc, argv); Timer t_all("all"); // Parse non-positional command line options ctx.cmdline_args = expand_response_files(ctx, argv + 1); std::vector file_args; parse_nonpositional_args(ctx, file_args); if (!ctx.arg.preload) if (i64 code; resume_daemon(ctx, argv, &code)) exit(code); tbb::global_control tbb_cont(tbb::global_control::max_allowed_parallelism, ctx.arg.thread_count); signal(SIGINT, signal_handler); signal(SIGTERM, signal_handler); // Preload input files std::function on_complete; if (ctx.arg.preload) { Timer t("preload"); std::function wait_for_client; daemonize(ctx, argv, &wait_for_client, &on_complete); ctx.reset_reader_context(true); read_input_files(ctx, file_args); ctx.tg.wait(); t.stop(); Timer t2("wait_for_client"); wait_for_client(); } else if (ctx.arg.fork) { on_complete = fork_child(); } for (std::string_view arg : ctx.arg.trace_symbol) Symbol::intern(ctx, arg)->traced = true; // Parse input files { Timer t("parse"); ctx.reset_reader_context(false); read_input_files(ctx, file_args); ctx.tg.wait(); } if (ctx.objs.empty()) Fatal(ctx) << "no input files"; // Uniquify shared object files with soname { std::vector *> vec; std::unordered_set seen; for (SharedFile *file : ctx.dsos) if (seen.insert(file->soname).second) vec.push_back(file); ctx.dsos = vec; } Timer t_total("total"); Timer t_before_copy("before_copy"); // Apply -exclude-libs apply_exclude_libs(ctx); // Create instances of linker-synthesized sections such as // .got or .plt. create_synthetic_sections(ctx); // Set unique indices to files. set_file_priority(ctx); // Resolve symbols and fix the set of object files that are // included to the final output. resolve_obj_symbols(ctx); // Remove redundant comdat sections (e.g. duplicate inline functions). eliminate_comdats(ctx); // Create .bss sections for common symbols. convert_common_symbols(ctx); // Apply version scripts. apply_version_script(ctx); // Parse symbol version suffixes (e.g. "foo@ver1"). parse_symbol_version(ctx); // Set is_import and is_export bits for each symbol. compute_import_export(ctx); // Garbage-collect unreachable sections. if (ctx.arg.gc_sections) gc_sections(ctx); // Merge identical read-only sections. if (ctx.arg.icf) icf_sections(ctx); // Compute sizes of sections containing mergeable strings. compute_merged_section_sizes(ctx); // ctx input sections into output sections bin_sections(ctx); // Get a list of output sections. append(ctx.chunks, collect_output_sections(ctx)); // Create a dummy file containing linker-synthesized symbols // (e.g. `__bss_start`). ctx.internal_obj = new ObjectFile(ctx); ctx.internal_obj->resolve_regular_symbols(ctx); ctx.objs.push_back(ctx.internal_obj); // Add symbols from shared object files. resolve_dso_symbols(ctx); // Beyond this point, no new files will be added to ctx.objs // or ctx.dsos. // Convert weak symbols to absolute symbols with value 0. convert_undefined_weak_symbols(ctx); // If we are linking a .so file, remaining undefined symbols does // not cause a linker error. Instead, they are treated as if they // were imported symbols. if (ctx.arg.shared && !ctx.arg.z_defs) { Timer t("claim_unresolved_symbols"); tbb::parallel_for_each(ctx.objs, [](ObjectFile *file) { file->claim_unresolved_symbols(); }); } // Beyond this point, no new symbols will be added to the result. // Make sure that all symbols have been resolved. if (!ctx.arg.allow_multiple_definition) check_duplicate_symbols(ctx); // Compute sizes of output sections while assigning offsets // within an output section to input sections. compute_section_sizes(ctx); // Sort sections by section attributes so that we'll have to // create as few segments as possible. sort(ctx.chunks, [&](OutputChunk *a, OutputChunk *b) { return get_section_rank(ctx, a) < get_section_rank(ctx, b); }); // Copy string referred by .dynamic to .dynstr. for (SharedFile *file : ctx.dsos) ctx.dynstr->add_string(file->soname); for (std::string_view str : ctx.arg.auxiliary) ctx.dynstr->add_string(str); for (std::string_view str : ctx.arg.filter) ctx.dynstr->add_string(str); if (!ctx.arg.rpaths.empty()) ctx.dynstr->add_string(ctx.arg.rpaths); if (!ctx.arg.soname.empty()) ctx.dynstr->add_string(ctx.arg.soname); // Scan relocations to find symbols that need entries in .got, .plt, // .got.plt, .dynsym, .dynstr, etc. scan_rels(ctx); // Sort .dynsym contents. Beyond this point, no symbol will be // added to .dynsym. ctx.dynsym->sort_symbols(ctx); // Fill .gnu.version_d section contents. fill_verdef(ctx); // Fill .gnu.version_r section contents. fill_verneed(ctx); // Compute .symtab and .strtab sizes for each file. { Timer t("compute_symtab"); tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { file->compute_symtab(ctx); }); } // .eh_frame is a special section from the linker's point of view, // as its contents are parsed and reconstructed by the linker, // unlike other sections that are regarded as opaque bytes. // Here, we transplant .eh_frame sections from a regular output // section to the special EHFrameSection. { Timer t("eh_frame"); erase(ctx.chunks, [](OutputChunk *chunk) { return chunk->kind == OutputChunk::REGULAR && chunk->name == ".eh_frame"; }); ctx.eh_frame->construct(ctx); } // Now that we have computed sizes for all sections and assigned // section indices to them, so we can fix section header contents // for all output sections. for (OutputChunk *chunk : ctx.chunks) chunk->update_shdr(ctx); erase(ctx.chunks, [](OutputChunk *chunk) { return chunk->kind == OutputChunk::SYNTHETIC && chunk->shdr.sh_size == 0; }); // Set section indices. for (i64 i = 0, shndx = 1; i < ctx.chunks.size(); i++) if (ctx.chunks[i]->kind != OutputChunk::HEADER) ctx.chunks[i]->shndx = shndx++; for (OutputChunk *chunk : ctx.chunks) chunk->update_shdr(ctx); // Assign offsets to output sections i64 filesize = set_osec_offsets(ctx); // At this point, file layout is fixed. // Fix linker-synthesized symbol addresses. fix_synthetic_symbols(ctx); // Beyond this, you can assume that symbol addresses including their // GOT or PLT addresses have a correct final value. // Some types of relocations for TLS symbols need the TLS segment // address. Find it out now. for (ElfPhdr phdr : create_phdr(ctx)) { if (phdr.p_type == PT_TLS) { ctx.tls_begin = phdr.p_vaddr; ctx.tls_end = align_to(phdr.p_vaddr + phdr.p_memsz, phdr.p_align); break; } } t_before_copy.stop(); // Create an output file OutputFile *file = OutputFile::open(ctx, ctx.arg.output, filesize); ctx.buf = file->buf; Timer t_copy("copy"); // Copy input sections to the output file { Timer t("copy_buf"); tbb::parallel_for_each(ctx.chunks, [&](OutputChunk *chunk) { std::string name(chunk->name); if (name.empty()) name = "(header)"; Timer t2(name, &t); chunk->copy_buf(ctx); }); Error::checkpoint(ctx); } // Dynamic linker works better with sorted .rela.dyn section, // so we sort them. ctx.reldyn->sort(ctx); // Zero-clear paddings between sections clear_padding(ctx, filesize); if (ctx.buildid) { Timer t("build_id"); ctx.buildid->write_buildid(ctx, filesize); } t_copy.stop(); // Commit file->close(ctx); t_total.stop(); t_all.stop(); if (ctx.arg.print_map) print_map(ctx); // Show stats numbers if (ctx.arg.stats) show_stats(ctx); if (ctx.arg.perf) Timer::print(); std::cout << std::flush; std::cerr << std::flush; if (on_complete) on_complete(); if (ctx.arg.quick_exit) std::quick_exit(0); return 0; } enum class MachineType { X86_64, I386 }; static MachineType get_machine_type(int argc, char **argv) { for (i64 i = 1; i < argc; i++) { if (std::string_view(argv[i]) == "-m") { if (i + 1 == argc) break; i++; std::string_view val(argv[i]); if (val == "elf_x86_64") return MachineType::X86_64; if (val == "elf_i386") return MachineType::I386; std::cerr << "unknown -m argument: " << val; exit(1); } } return MachineType::X86_64; // std::cerr << "-m is missing"; // exit(1); } int main(int argc, char **argv) { switch (get_machine_type(argc, argv)) { case MachineType::X86_64: return do_main(argc, argv); case MachineType::I386: return do_main(argc, argv); } } template void read_file(Context &ctx, MemoryMappedFile *mb);