// This file implements the -r or ---relocatable option. If the option // is given to a linker, the linker creates not an executable nor a // shared library file but instead create another object file by // combining input object files. // // The behavior of a linker with -r is pretty different from the one // without -r, and adding code for -r to the regular code path could // severely complicate it. Therefore, we implement -r as an // independent feature from others. That's why this file share only a // small amount of code with other files. // // Since the -r option is a minor feature, we don't aim for speed in // this file. This is a "better than nothing" implementation of the -r // option. // // Here is the strategy as to how to combine multiple object files // into one: // // - Regular sections containing opaque data (e.g. ".text" or // ".data") are just copied as-is without merging. // // - .symtab, .strtab and .shstrtab are merged. // // - COMDAT groups are uniquified. // // - Relocation sections are copied one by one, but we need to fix // symbol indices. #include "mold.h" #include #include template class RObjectFile; template class ROutputChunk { public: ROutputChunk() { out_shdr.sh_addralign = 1; } virtual ~ROutputChunk() = default; virtual void update_shdr(Context &ctx) {} virtual void write_to(Context &ctx) = 0; std::string_view name; i64 shndx = 0; ElfShdr in_shdr = {}; ElfShdr out_shdr = {}; }; template class RInputSection : public ROutputChunk { public: RInputSection(Context &ctx, RObjectFile &file, const ElfShdr &shdr); void update_shdr(Context &ctx) override; void write_to(Context &ctx) override; RObjectFile &file; }; template class RSymtabSection : public ROutputChunk { public: RSymtabSection() { this->name = ".symtab"; this->out_shdr.sh_type = SHT_SYMTAB; this->out_shdr.sh_entsize = sizeof(ElfSym); this->out_shdr.sh_addralign = E::wordsize; } void add_local_symbol(Context &ctx, RObjectFile &file, i64 idx); void add_global_symbol(Context &ctx, RObjectFile &file, i64 idx); void update_shdr(Context &ctx) override; void write_to(Context &ctx) override; std::unordered_map sym_map; std::vector> syms{1}; }; template class RStrtabSection : public ROutputChunk { public: RStrtabSection(std::string_view name) { this->name = name; this->out_shdr.sh_type = SHT_STRTAB; this->out_shdr.sh_size = 1; } i64 add_string(std::string_view str); void write_to(Context &ctx) override; std::unordered_map strings; }; template class ROutputEhdr : public ROutputChunk { public: ROutputEhdr() { this->out_shdr.sh_size = sizeof(ElfEhdr); } void write_to(Context &ctx) override; }; template class ROutputShdr : public ROutputChunk { public: ROutputShdr() { this->out_shdr.sh_size = sizeof(ElfShdr); } void update_shdr(Context &ctx) override; void write_to(Context &ctx) override; }; template class RObjectFile { public: RObjectFile(Context &ctx, MemoryMappedFile &mb, bool is_alive); void remove_comdats(Context &ctx, std::unordered_set &groups); template std::span get_data(Context &ctx, const ElfShdr &shdr); MemoryMappedFile &mb; std::span> elf_sections; std::vector>> sections; std::span> syms; std::vector symidx; std::unordered_set defined_syms; std::unordered_set undef_syms; i64 symtab_shndx = 0; i64 first_global = 0; bool is_alive; const char *strtab = nullptr; const char *shstrtab = nullptr; }; template void RSymtabSection::add_local_symbol(Context &ctx, RObjectFile &file, i64 idx) { ElfSym sym = file.syms[idx]; assert(sym.st_bind == STB_LOCAL); if (!sym.is_undef() && !sym.is_abs() && !sym.is_common()) { if (!file.sections[sym.st_shndx]) return; sym.st_shndx = file.sections[sym.st_shndx]->shndx; } std::string_view name = file.strtab + sym.st_name; sym.st_name = ctx.r_strtab->add_string(name); file.symidx[idx] = syms.size(); syms.push_back(sym); } template void RSymtabSection::add_global_symbol(Context &ctx, RObjectFile &file, i64 idx) { ElfSym sym = file.syms[idx]; assert(sym.st_bind != STB_LOCAL); std::string_view name = file.strtab + sym.st_name; auto [it, inserted] = sym_map.insert({name, syms.size()}); if (inserted) { if (!sym.is_undef() && !sym.is_abs() && !sym.is_common()) sym.st_shndx = file.sections[sym.st_shndx]->shndx; sym.st_name = ctx.r_strtab->add_string(name); file.symidx[idx] = syms.size(); syms.push_back(sym); return; } file.symidx[idx] = it->second; ElfSym &existing = syms[it->second]; if (existing.is_undef() && !sym.is_undef()) { if (!sym.is_abs() && !sym.is_common()) sym.st_shndx = file.sections[sym.st_shndx]->shndx; sym.st_name = existing.st_name; existing = sym; } } template void RSymtabSection::update_shdr(Context &ctx) { this->out_shdr.sh_size = syms.size() * sizeof(ElfSym); this->out_shdr.sh_link = ctx.r_strtab->shndx; } template void RSymtabSection::write_to(Context &ctx) { ElfSym *buf = (ElfSym *)(ctx.buf + this->out_shdr.sh_offset); for (i64 i = 1; i < syms.size(); i++) buf[i] = syms[i]; } template RInputSection::RInputSection(Context &ctx, RObjectFile &file, const ElfShdr &shdr) : file(file) { this->name = file.shstrtab + shdr.sh_name; this->in_shdr = shdr; this->out_shdr = shdr; } template void RInputSection::update_shdr(Context &ctx) { switch (this->in_shdr.sh_type) { case SHT_GROUP: this->out_shdr.sh_link = ctx.r_symtab->shndx; this->out_shdr.sh_info = file.symidx[this->in_shdr.sh_info]; break; case SHT_REL: case SHT_RELA: this->out_shdr.sh_link = ctx.r_symtab->shndx; this->out_shdr.sh_info = file.sections[this->in_shdr.sh_info]->shndx; break; default: if (this->in_shdr.sh_link) { std::unique_ptr> &sec = file.sections[this->in_shdr.sh_info]; if (sec) this->out_shdr.sh_link = sec->shndx; else if (this->in_shdr.sh_link == file.symtab_shndx) this->out_shdr.sh_link = ctx.r_symtab->shndx; } } } template void RInputSection::write_to(Context &ctx) { if (this->in_shdr.sh_type == SHT_NOBITS) return; std::span contents = file.template get_data(ctx, this->in_shdr); memcpy(ctx.buf + this->out_shdr.sh_offset, contents.data(), contents.size()); switch (this->in_shdr.sh_type) { case SHT_GROUP: { u32 *mem = (u32 *)(ctx.buf + this->out_shdr.sh_offset); for (i64 i = 1; i < this->out_shdr.sh_size / sizeof(u32); i++) mem[i] = file.sections[mem[i]]->shndx; break; } case SHT_REL: case SHT_RELA: { ElfRel *rel = (ElfRel *)(ctx.buf + this->out_shdr.sh_offset); i64 size = this->out_shdr.sh_size / sizeof(ElfRel); for (i64 i = 0; i < size; i++) { const ElfSym &sym = file.syms[rel[i].r_sym]; if (sym.is_undef() || sym.is_abs() || sym.is_common() || file.sections[sym.st_shndx]) rel[i].r_sym = file.symidx[rel[i].r_sym]; else memset(rel + i, 0, sizeof(ElfRel)); } i64 i = 0; i64 j = 0; for (; j < size; j++) if (rel[j].r_type) rel[i++] = rel[j]; for (; i < size; i++) memset(rel + i, 0, sizeof(ElfRel)); break; } } } template i64 RStrtabSection::add_string(std::string_view str) { auto [it, inserted] = strings.insert({str, this->out_shdr.sh_size}); if (inserted) this->out_shdr.sh_size += str.size() + 1; return it->second; } template void RStrtabSection::write_to(Context &ctx) { for (auto [str, offset] : strings) memcpy(ctx.buf + this->out_shdr.sh_offset + offset, str.data(), str.size()); } template void ROutputEhdr::write_to(Context &ctx) { ElfEhdr &hdr = *(ElfEhdr *)(ctx.buf + this->out_shdr.sh_offset); memcpy(&hdr.e_ident, "\177ELF", 4); hdr.e_ident[EI_CLASS] = (E::wordsize == 8) ? ELFCLASS64 : ELFCLASS32; hdr.e_ident[EI_DATA] = E::is_le ? ELFDATA2LSB : ELFDATA2MSB; hdr.e_ident[EI_VERSION] = EV_CURRENT; hdr.e_type = ET_REL; hdr.e_machine = E::e_machine; hdr.e_version = EV_CURRENT; hdr.e_shoff = ctx.r_shdr->out_shdr.sh_offset; hdr.e_ehsize = sizeof(ElfEhdr); hdr.e_shentsize = sizeof(ElfShdr); hdr.e_shstrndx = ctx.r_shstrtab->shndx; hdr.e_shnum = ctx.r_shdr->out_shdr.sh_size / sizeof(ElfShdr); hdr.e_shstrndx = ctx.r_shstrtab->shndx; } template void ROutputShdr::update_shdr(Context &ctx) { for (ROutputChunk *chunk : ctx.r_chunks) if (chunk->shndx) this->out_shdr.sh_size += sizeof(ElfShdr); } template void ROutputShdr::write_to(Context &ctx) { ElfShdr *hdr = (ElfShdr *)(ctx.buf + this->out_shdr.sh_offset); for (ROutputChunk *chunk : ctx.r_chunks) if (chunk->shndx) hdr[chunk->shndx] = chunk->out_shdr; } template RObjectFile::RObjectFile(Context &ctx, MemoryMappedFile &mb, bool is_alive) : mb(mb), is_alive(is_alive) { // Read ELF header and section header ElfEhdr &ehdr = *(ElfEhdr *)mb.data(ctx); ElfShdr *sh_begin = (ElfShdr *)(mb.data(ctx) + ehdr.e_shoff); i64 num_sections = (ehdr.e_shnum == 0) ? sh_begin->sh_size : ehdr.e_shnum; elf_sections = {sh_begin, sh_begin + num_sections}; sections.resize(num_sections); // Read .shstrtab i64 shstrtab_idx = (ehdr.e_shstrndx == SHN_XINDEX) ? sh_begin->sh_link : ehdr.e_shstrndx; shstrtab = (char *)(mb.data(ctx) + elf_sections[shstrtab_idx].sh_offset); // Read .symtab for (i64 i = 1; i < elf_sections.size(); i++) { ElfShdr &shdr = elf_sections[i]; if (shdr.sh_type == SHT_SYMTAB) { syms = get_data>(ctx, shdr); strtab = (char *)(mb.data(ctx) + elf_sections[shdr.sh_link].sh_offset); symtab_shndx = i; first_global = shdr.sh_info; break; } } symidx.resize(syms.size()); // Read sections for (i64 i = 1; i < elf_sections.size(); i++) { ElfShdr &shdr = elf_sections[i]; switch (shdr.sh_type) { case SHT_NULL: case SHT_SYMTAB: case SHT_STRTAB: break; default: sections[i].reset(new RInputSection(ctx, *this, shdr)); } } // Read global symbols for (i64 i = first_global; i < syms.size(); i++) { std::string_view name = strtab + syms[i].st_name; if (syms[i].is_defined()) defined_syms.insert(name); else undef_syms.insert(name); } } // Remove duplicate comdat groups template void RObjectFile::remove_comdats(Context &ctx, std::unordered_set &groups) { for (i64 i = 1; i < sections.size(); i++) { if (!sections[i]) continue; ElfShdr &shdr = sections[i]->in_shdr; if (shdr.sh_type != SHT_GROUP) continue; // Get a comdat group signature and insert it into a set. const ElfSym &sym = syms[shdr.sh_info]; std::string_view signature = strtab + sym.st_name; if (groups.insert(signature).second) continue; // If it is a duplicate, remove it and its members. for (i64 j : this->template get_data(ctx, shdr).subspan(1)) sections[j] = nullptr; sections[i] = nullptr; } } template template std::span RObjectFile::get_data(Context &ctx, const ElfShdr &shdr) { T *begin = (T *)(mb.data(ctx) + shdr.sh_offset); T *end = (T *)(mb.data(ctx) + shdr.sh_offset + shdr.sh_size); return {begin, end}; } template static std::vector>> open_files(Context &ctx, std::span args) { std::vector>> files; bool whole_archive = false; while (!args.empty()) { if (read_flag(args, "whole-archive")) { whole_archive = true; continue; } if (read_flag(args, "no-whole-archive")) { whole_archive = false; continue; } std::string_view arg; if (read_arg(ctx, args, arg, "version-script") || read_arg(ctx, args, arg, "dynamic-list")) continue; MemoryMappedFile *mb = nullptr; if (read_arg(ctx, args, arg, "l")) { mb = find_library(ctx, std::string(arg)); } else { if (arg.starts_with('-')) continue; arg = args[0]; args = args.subspan(1); mb = MemoryMappedFile::must_open(ctx, std::string(arg)); } switch (get_file_type(ctx, mb)) { case FileType::OBJ: files.emplace_back(new RObjectFile(ctx, *mb, true)); break; case FileType::AR: case FileType::THIN_AR: for (MemoryMappedFile *child : read_archive_members(ctx, mb)) if (get_file_type(ctx, child) == FileType::OBJ) files.emplace_back(new RObjectFile(ctx, *child, whole_archive)); break; default: break; } } return files; } template static i64 assign_offsets(Context &ctx) { i64 offset = 0; for (ROutputChunk *chunk : ctx.r_chunks) { offset = align_to(offset, chunk->out_shdr.sh_addralign); chunk->out_shdr.sh_offset = offset; offset += chunk->out_shdr.sh_size; } return offset; } static bool contains(std::unordered_set &a, std::unordered_set &b) { for (std::string_view x : b) if (a.contains(x)) return true; return false; } template void combine_objects(Context &ctx, std::span file_args) { // Read object files std::vector>> files = open_files(ctx, file_args); // Identify needed objects std::unordered_set undef_syms; auto add_syms = [&](RObjectFile &file) { undef_syms.insert(file.undef_syms.begin(), file.undef_syms.end()); for (std::string_view name : file.defined_syms) undef_syms.erase(name); file.is_alive = true; }; for (std::unique_ptr> &file : files) if (file->is_alive) add_syms(*file); for (;;) { bool added = false; for (std::unique_ptr> &file : files) { if (!file->is_alive && contains(undef_syms, file->defined_syms)) { add_syms(*file); added = true; } } if (!added) break; } files.erase(std::remove_if(files.begin(), files.end(), [](std::unique_ptr> &file) { return !file->is_alive; }), files.end()); // Remove duplicate comdat groups std::unordered_set comdat_groups; for (std::unique_ptr> &file : files) file->remove_comdats(ctx, comdat_groups); // Create headers and linker-synthesized sections ROutputEhdr ehdr; ROutputShdr shdr; RSymtabSection symtab; RStrtabSection shstrtab(".shstrtab"); RStrtabSection strtab(".strtab"); ctx.r_chunks.push_back(&ehdr); ctx.r_chunks.push_back(&shstrtab); ctx.r_chunks.push_back(&strtab); ctx.r_ehdr = &ehdr; ctx.r_shdr = &shdr; ctx.r_shstrtab = &shstrtab; ctx.r_strtab = &strtab; ctx.r_symtab = &symtab; // Add input sections to output sections for (std::unique_ptr> &file : files) for (std::unique_ptr> &sec : file->sections) if (sec) ctx.r_chunks.push_back(sec.get()); ctx.r_chunks.push_back(&symtab); ctx.r_chunks.push_back(&shdr); // Assign output section indices i64 shndx = 1; for (ROutputChunk *chunk : ctx.r_chunks) if (chunk != &ehdr && chunk != &shdr) chunk->shndx = shndx++; // Add section names to .shstrtab for (ROutputChunk *chunk : ctx.r_chunks) if (chunk->shndx) chunk->out_shdr.sh_name = shstrtab.add_string(chunk->name); // Copy symbols from input objects to an output object for (std::unique_ptr> &file : files) for (i64 i = 1; i < file->first_global; i++) symtab.add_local_symbol(ctx, *file, i); symtab.out_shdr.sh_info = symtab.syms.size(); for (std::unique_ptr> &file : files) for (i64 i = file->first_global; i < file->syms.size(); i++) symtab.add_global_symbol(ctx, *file, i); // Finalize section header for (ROutputChunk *chunk : ctx.r_chunks) chunk->update_shdr(ctx); // Open an output file i64 filesize = assign_offsets(ctx); std::unique_ptr> out = OutputFile::open(ctx, ctx.arg.output, filesize, 0666); memset(out->buf, 0, filesize); ctx.buf = out->buf; // Write to the output file for (ROutputChunk *chunk : ctx.r_chunks) chunk->write_to(ctx); out->close(ctx); } template void combine_objects(Context &, std::span); template void combine_objects(Context &, std::span);