1
1
mirror of https://github.com/rui314/mold.git synced 2024-08-16 08:20:23 +03:00

Compare commits

...

13 Commits

Author SHA1 Message Date
Shiv Dhar
77a79f0e57
Merge 5bf1955fa9 into 7089304046 2024-07-06 02:14:41 +01:00
Rui Ueyama
7089304046 Simplify 2024-07-05 19:47:33 +09:00
Rui Ueyama
5ab3708e9f
Merge pull request #1298 from yujincheng08/patch-1
Fix cross-compile lto on MacOS
2024-07-05 19:45:10 +09:00
Rui Ueyama
aa939d7a6a Temporarily disable ASAN tests
It looks like starting with Clang 18.1.3, ASAN began reporting an
error on OneTBB.
2024-07-05 18:27:00 +09:00
Rui Ueyama
a63bbcd7b1 Attempt to fix CI 2024-07-05 17:58:13 +09:00
Rui Ueyama
339f9c485a Attempt to fix CI 2024-07-05 17:42:32 +09:00
LoveSy
4905f6d3f2
Use macos-12 because macos-11 has been removed
https://github.blog/changelog/2024-05-20-actions-upcoming-changes-to-github-hosted-macos-runners/#macos-11-deprecation-and-removal
2024-07-05 15:32:37 +08:00
Rui Ueyama
cb8cc1c8f0 Refactor 2024-07-05 16:00:09 +09:00
Rui Ueyama
c6b54532e9 Refactor 2024-07-04 16:17:25 +09:00
Rui Ueyama
3936134823 Refactor 2024-07-04 15:08:37 +09:00
Rui Ueyama
55ca05bab6 Refactor 2024-07-04 13:59:29 +09:00
LoveSy
7305a5c5dd
Fix cross-compile lto on MacOS 2024-07-03 21:55:22 +08:00
Shiv Dhar
5bf1955fa9
Fix small typo 2024-03-02 01:36:05 +05:30
13 changed files with 311 additions and 284 deletions

View File

@ -13,7 +13,6 @@ jobs:
target: target:
# Disable PCH for the default configuration. This prevents relying on implicit includes. # Disable PCH for the default configuration. This prevents relying on implicit includes.
- '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On' - '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On'
- '-DMOLD_USE_ASAN=On'
- '-DMOLD_USE_TSAN=On' - '-DMOLD_USE_TSAN=On'
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
@ -110,7 +109,7 @@ jobs:
!build/CMakeFiles !build/CMakeFiles
build-macos: build-macos:
runs-on: macos-11 runs-on: macos-12
strategy: strategy:
matrix: matrix:
target: target:

View File

@ -328,7 +328,6 @@ list(APPEND MOLD_ELF_TEMPLATE_FILES
elf/output-chunks.cc elf/output-chunks.cc
elf/passes.cc elf/passes.cc
elf/relocatable.cc elf/relocatable.cc
elf/subprocess.cc
elf/thunks.cc elf/thunks.cc
elf/tls.cc elf/tls.cc
) )
@ -339,6 +338,12 @@ else()
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-unix.cc) list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-unix.cc)
endif() endif()
if(WIN32)
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-win32.cc)
else()
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-unix.cc)
endif()
function(mold_instantiate_templates SOURCE TARGET) function(mold_instantiate_templates SOURCE TARGET)
set(PATH ${CMAKE_BINARY_DIR}/${SOURCE}.${TARGET}.cc) set(PATH ${CMAKE_BINARY_DIR}/${SOURCE}.${TARGET}.cc)
if(NOT EXISTS ${PATH}) if(NOT EXISTS ${PATH})

View File

@ -163,7 +163,7 @@ tool.
(i.e. `_start`) or a few other root sections. In mold, we are using (i.e. `_start`) or a few other root sections. In mold, we are using
multiple threads to mark sections concurrently. multiple threads to mark sections concurrently.
- Similarly, BFD, gold an lld support Identical Comdat Folding (ICF) - Similarly, BFD, gold and lld support Identical Comdat Folding (ICF)
as yet another size optimization. ICF merges two or more read-only as yet another size optimization. ICF merges two or more read-only
sections that happen to have the same contents and relocations. sections that happen to have the same contents and relocations.
To do that, we have to find isomorphic subgraphs from larger graphs. To do that, we have to find isomorphic subgraphs from larger graphs.

View File

@ -677,104 +677,6 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
} }
} }
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
if (entsize == 1)
return data.find('\0', pos);
for (; pos <= data.size() - entsize; pos += entsize)
if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
return pos;
return data.npos;
}
// Mergeable sections (sections with SHF_MERGE bit) typically contain
// string literals. Linker is expected to split the section contents
// into null-terminated strings, merge them with mergeable strings
// from other object files, and emit uniquified strings to an output
// file.
//
// This mechanism reduces the size of an output file. If two source
// files happen to contain the same string literal, the output will
// contain only a single copy of it.
//
// It is less common than string literals, but mergeable sections can
// contain fixed-sized read-only records too.
//
// This function splits the section contents into small pieces that we
// call "section fragments". Section fragment is a unit of merging.
//
// We do not support mergeable sections that have relocations.
template <typename E>
static std::unique_ptr<MergeableSection<E>>
split_section(Context<E> &ctx, InputSection<E> &sec) {
if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0)
return nullptr;
const ElfShdr<E> &shdr = sec.shdr();
if (!(shdr.sh_flags & SHF_MERGE))
return nullptr;
i64 entsize = shdr.sh_entsize;
if (entsize == 0)
entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (int)shdr.sh_addralign;
if (entsize == 0)
return nullptr;
i64 addralign = shdr.sh_addralign;
if (addralign == 0)
addralign = 1;
std::unique_ptr<MergeableSection<E>> m(new MergeableSection<E>);
m->parent = MergedSection<E>::get_instance(ctx, sec.name(), shdr.sh_type,
shdr.sh_flags, entsize, addralign);
m->p2align = sec.p2align;
// If thes section contents are compressed, uncompress them.
sec.uncompress(ctx);
std::string_view data = sec.contents;
m->contents = sec.contents;
if (data.size() > UINT32_MAX)
Fatal(ctx) << sec << ": mergeable section too large";
// Split sections
if (shdr.sh_flags & SHF_STRINGS) {
for (i64 pos = 0; pos < data.size();) {
m->frag_offsets.push_back(pos);
size_t end = find_null(data, pos, entsize);
if (end == data.npos)
Fatal(ctx) << sec << ": string is not null terminated";
pos = end + entsize;
}
} else {
if (data.size() % entsize)
Fatal(ctx) << sec << ": section size is not multiple of sh_entsize";
m->frag_offsets.reserve(data.size() / entsize);
for (i64 pos = 0; pos < data.size(); pos += entsize)
m->frag_offsets.push_back(pos);
}
// Compute hashes for section pieces
HyperLogLog estimator;
m->hashes.reserve(m->frag_offsets.size());
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
u64 hash = hash_string(m->get_contents(i));
m->hashes.push_back(hash);
estimator.insert(hash);
}
m->parent->estimator.merge(estimator);
static Counter counter("string_fragments");
counter += m->frag_offsets.size();
return m;
}
// Usually a section is an atomic unit of inclusion or exclusion. // Usually a section is an atomic unit of inclusion or exclusion.
// Linker doesn't care about its contents. However, if a section is a // Linker doesn't care about its contents. However, if a section is a
// mergeable section (a section with SHF_MERGE bit set), the linker is // mergeable section (a section with SHF_MERGE bit set), the linker is
@ -818,34 +720,45 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
// section piece, the section piece is attached to the symbol. // section piece, the section piece is attached to the symbol.
template <typename E> template <typename E>
void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) { void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
mergeable_sections.resize(sections.size()); // Convert InputSections to MergeableSections
for (i64 i = 0; i < this->sections.size(); i++) {
InputSection<E> *isec = this->sections[i].get();
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
continue;
for (i64 i = 0; i < sections.size(); i++) { MergedSection<E> *parent =
if (std::unique_ptr<InputSection<E>> &isec = sections[i]) { MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
if (std::unique_ptr<MergeableSection<E>> m = split_section(ctx, *isec)) {
mergeable_sections[i] = std::move(m); if (parent) {
isec->is_alive = false; this->mergeable_sections[i] =
} std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
this->sections[i] = nullptr;
} }
} }
// Split section contents
for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
if (sec)
sec->split_contents(ctx);
} }
template <typename E> template <typename E>
void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) { void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) { for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
if (m) { if (!m)
m->fragments.reserve(m->frag_offsets.size()); continue;
for (i64 i = 0; i < m->frag_offsets.size(); i++) { m->fragments.reserve(m->frag_offsets.size());
SectionFragment<E> *frag =
m->parent->insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
}
// Reclaim memory as we'll never use this vector again for (i64 i = 0; i < m->frag_offsets.size(); i++) {
m->hashes.clear(); SectionFragment<E> *frag =
m->hashes.shrink_to_fit(); m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
} }
// Reclaim memory as we'll never use this vector again
m->hashes.clear();
m->hashes.shrink_to_fit();
} }
// Attach section pieces to symbols. // Attach section pieces to symbols.
@ -872,24 +785,25 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
} }
// Compute the size of frag_syms. // Compute the size of frag_syms.
i64 nfrag_syms = 0; std::vector<InputSection<E> *> vec;
for (std::unique_ptr<InputSection<E>> &isec : sections) for (std::unique_ptr<InputSection<E>> &isec : sections)
if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC)) if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
for (ElfRel<E> &r : isec->get_rels(ctx)) vec.push_back(isec.get());
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)]) i64 nfrag_syms = 0;
nfrag_syms++; for (InputSection<E> *isec : vec)
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
nfrag_syms++;
this->frag_syms.resize(nfrag_syms); this->frag_syms.resize(nfrag_syms);
// For each relocation referring a mergeable section symbol, we create // For each relocation referring to a mergeable section symbol, we
// a new dummy non-section symbol and redirect the relocation to the // create a new dummy non-section symbol and redirect the relocation
// newly-created symbol. // to the newly created symbol.
i64 idx = 0; i64 idx = 0;
for (std::unique_ptr<InputSection<E>> &isec : sections) { for (InputSection<E> *isec : vec) {
if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC))
continue;
for (ElfRel<E> &r : isec->get_rels(ctx)) { for (ElfRel<E> &r : isec->get_rels(ctx)) {
const ElfSym<E> &esym = this->elf_syms[r.r_sym]; const ElfSym<E> &esym = this->elf_syms[r.r_sym];
if (esym.st_type != STT_SECTION) if (esym.st_type != STT_SECTION)
@ -929,6 +843,8 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
template <typename E> template <typename E>
void ObjectFile<E>::parse(Context<E> &ctx) { void ObjectFile<E>::parse(Context<E> &ctx) {
sections.resize(this->elf_sections.size()); sections.resize(this->elf_sections.size());
mergeable_sections.resize(sections.size());
symtab_sec = this->find_section(SHT_SYMTAB); symtab_sec = this->find_section(SHT_SYMTAB);
if (symtab_sec) { if (symtab_sec) {

View File

@ -552,9 +552,88 @@ bool InputSection<E>::record_undef_error(Context<E> &ctx, const ElfRel<E> &rel)
return false; return false;
} }
template <typename E>
MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec)
: parent(parent), section(std::move(isec)), p2align(section->p2align) {
section->uncompress(ctx);
}
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
if (entsize == 1)
return data.find('\0', pos);
for (; pos <= data.size() - entsize; pos += entsize)
if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
return pos;
return data.npos;
}
// Mergeable sections (sections with SHF_MERGE bit) typically contain
// string literals. Linker is expected to split the section contents
// into null-terminated strings, merge them with mergeable strings
// from other object files, and emit uniquified strings to an output
// file.
//
// This mechanism reduces the size of an output file. If two source
// files happen to contain the same string literal, the output will
// contain only a single copy of it.
//
// It is less common than string literals, but mergeable sections can
// contain fixed-sized read-only records too.
//
// This function splits the section contents into small pieces that we
// call "section fragments". Section fragment is a unit of merging.
//
// We do not support mergeable sections that have relocations.
template <typename E>
void MergeableSection<E>::split_contents(Context<E> &ctx) {
std::string_view data = section->contents;
if (data.size() > UINT32_MAX)
Fatal(ctx) << *section
<< ": mergeable section too large";
i64 entsize = parent.shdr.sh_entsize;
// Split sections
if (parent.shdr.sh_flags & SHF_STRINGS) {
for (i64 pos = 0; pos < data.size();) {
frag_offsets.push_back(pos);
size_t end = find_null(data, pos, entsize);
if (end == data.npos)
Fatal(ctx) << *section << ": string is not null terminated";
pos = end + entsize;
}
} else {
if (data.size() % entsize)
Fatal(ctx) << *section << ": section size is not multiple of sh_entsize";
frag_offsets.reserve(data.size() / entsize);
for (i64 pos = 0; pos < data.size(); pos += entsize)
frag_offsets.push_back(pos);
}
// Compute hashes for section pieces
HyperLogLog estimator;
hashes.reserve(frag_offsets.size());
for (i64 i = 0; i < frag_offsets.size(); i++) {
u64 hash = hash_string(get_contents(i));
hashes.push_back(hash);
estimator.insert(hash);
}
parent.estimator.merge(estimator);
static Counter counter("string_fragments");
counter += frag_offsets.size();
}
using E = MOLD_TARGET; using E = MOLD_TARGET;
template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &); template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);
template class InputSection<E>; template class InputSection<E>;
template class MergeableSection<E>;
} // namespace mold::elf } // namespace mold::elf

View File

@ -567,11 +567,7 @@ static ElfSym<E> to_elf_sym(PluginSymbol &psym) {
// Returns false if it's GCC. // Returns false if it's GCC.
template <typename E> template <typename E>
static bool is_llvm(Context<E> &ctx) { static bool is_llvm(Context<E> &ctx) {
#ifdef __MINGW32__ return ctx.arg.plugin.find("LLVMgold.") != ctx.arg.plugin.npos;
return ctx.arg.plugin.ends_with("LLVMgold.dll");
#else
return ctx.arg.plugin.ends_with("LLVMgold.so");
#endif
} }
// Returns true if a given linker plugin supports the get_symbols_v3 API. // Returns true if a given linker plugin supports the get_symbols_v3 API.

View File

@ -345,12 +345,8 @@ int elf_main(int argc, char **argv) {
Context<E> ctx; Context<E> ctx;
// Process -run option first. process_run_subcommand() does not return. // Process -run option first. process_run_subcommand() does not return.
if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) { if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv))
#if defined(_WIN32) || defined(__APPLE__)
Fatal(ctx) << "-run is supported only on Unix";
#endif
process_run_subcommand(ctx, argc, argv); process_run_subcommand(ctx, argc, argv);
}
// Parse non-positional command line options // Parse non-positional command line options
ctx.cmdline_args = expand_response_files(ctx, argv); ctx.cmdline_args = expand_response_files(ctx, argv);
@ -375,12 +371,8 @@ int elf_main(int argc, char **argv) {
<< ": " << errno_string(); << ": " << errno_string();
// Fork a subprocess unless --no-fork is given. // Fork a subprocess unless --no-fork is given.
std::function<void()> on_complete;
#if !defined(_WIN32) && !defined(__APPLE__)
if (ctx.arg.fork) if (ctx.arg.fork)
on_complete = fork_child(); fork_child();
#endif
acquire_global_lock(); acquire_global_lock();
@ -664,8 +656,13 @@ int elf_main(int argc, char **argv) {
// so we sort them. // so we sort them.
ctx.reldyn->sort(ctx); ctx.reldyn->sort(ctx);
// Zero-clear paddings between sections // .note.gnu.build-id section contains a cryptographic hash of the
clear_padding(ctx); // entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid) {
compute_build_id(ctx);
ctx.buildid->copy_buf(ctx);
}
// .gdb_index's contents cannot be constructed before applying // .gdb_index's contents cannot be constructed before applying
// relocations to other debug sections. We have relocated debug // relocations to other debug sections. We have relocated debug
@ -673,12 +670,6 @@ int elf_main(int argc, char **argv) {
if (ctx.gdb_index) if (ctx.gdb_index)
write_gdb_index(ctx); write_gdb_index(ctx);
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid)
ctx.buildid->write_buildid(ctx);
t_copy.stop(); t_copy.stop();
ctx.checkpoint(); ctx.checkpoint();
@ -707,8 +698,8 @@ int elf_main(int argc, char **argv) {
std::cout << std::flush; std::cout << std::flush;
std::cerr << std::flush; std::cerr << std::flush;
if (on_complete) if (ctx.arg.fork)
on_complete(); notify_parent();
release_global_lock(); release_global_lock();

View File

@ -47,6 +47,7 @@ template <typename E> class Symbol;
template <typename E> struct CieRecord; template <typename E> struct CieRecord;
template <typename E> struct Context; template <typename E> struct Context;
template <typename E> struct FdeRecord; template <typename E> struct FdeRecord;
template <typename E> class MergeableSection;
template <typename E> class RelocSection; template <typename E> class RelocSection;
template <typename E> template <typename E>
@ -799,8 +800,7 @@ template <typename E>
class MergedSection : public Chunk<E> { class MergedSection : public Chunk<E> {
public: public:
static MergedSection<E> * static MergedSection<E> *
get_instance(Context<E> &ctx, std::string_view name, i64 type, i64 flags, get_instance(Context<E> &ctx, std::string_view name, const ElfShdr<E> &shdr);
i64 entsize, i64 addralign);
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data, SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
u64 hash, i64 p2align); u64 hash, i64 p2align);
@ -949,9 +949,8 @@ public:
void update_shdr(Context<E> &ctx) override; void update_shdr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override; void copy_buf(Context<E> &ctx) override;
void write_buildid(Context<E> &ctx);
static constexpr i64 HEADER_SIZE = 16; std::vector<u8> contents;
}; };
template <typename E> template <typename E>
@ -1090,12 +1089,17 @@ struct ComdatGroupRef {
}; };
template <typename E> template <typename E>
struct MergeableSection { class MergeableSection {
public:
MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec);
void split_contents(Context<E> &ctx);
std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset); std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
std::string_view get_contents(i64 idx); std::string_view get_contents(i64 idx);
MergedSection<E> *parent; MergedSection<E> &parent;
std::string_view contents; std::unique_ptr<InputSection<E>> section;
std::vector<u32> frag_offsets; std::vector<u32> frag_offsets;
std::vector<u32> hashes; std::vector<u32> hashes;
std::vector<SectionFragment<E> *> fragments; std::vector<SectionFragment<E> *> fragments;
@ -1368,7 +1372,8 @@ void print_map(Context<E> &ctx);
// subprocess.cc // subprocess.cc
// //
std::function<void()> fork_child(); void fork_child();
void notify_parent();
template <typename E> template <typename E>
[[noreturn]] [[noreturn]]
@ -1426,11 +1431,11 @@ template <typename E> void apply_version_script(Context<E> &);
template <typename E> void parse_symbol_version(Context<E> &); template <typename E> void parse_symbol_version(Context<E> &);
template <typename E> void compute_import_export(Context<E> &); template <typename E> void compute_import_export(Context<E> &);
template <typename E> void compute_address_significance(Context<E> &); template <typename E> void compute_address_significance(Context<E> &);
template <typename E> void clear_padding(Context<E> &);
template <typename E> void compute_section_headers(Context<E> &); template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &); template <typename E> i64 set_osec_offsets(Context<E> &);
template <typename E> void fix_synthetic_symbols(Context<E> &); template <typename E> void fix_synthetic_symbols(Context<E> &);
template <typename E> i64 compress_debug_sections(Context<E> &); template <typename E> i64 compress_debug_sections(Context<E> &);
template <typename E> void compute_build_id(Context<E> &);
template <typename E> void write_dependency_file(Context<E> &); template <typename E> void write_dependency_file(Context<E> &);
template <typename E> void show_stats(Context<E> &); template <typename E> void show_stats(Context<E> &);
@ -1565,7 +1570,18 @@ private:
// //
struct BuildId { struct BuildId {
i64 size() const; i64 size() const {
switch (kind) {
case HEX:
return value.size();
case HASH:
return hash_size;
case UUID:
return 16;
default:
unreachable();
}
}
enum { NONE, HEX, HASH, UUID } kind = NONE; enum { NONE, HEX, HASH, UUID } kind = NONE;
std::vector<u8> value; std::vector<u8> value;
@ -2411,8 +2427,8 @@ template <typename E>
std::string_view MergeableSection<E>::get_contents(i64 i) { std::string_view MergeableSection<E>::get_contents(i64 i) {
i64 cur = frag_offsets[i]; i64 cur = frag_offsets[i];
if (i == frag_offsets.size() - 1) if (i == frag_offsets.size() - 1)
return contents.substr(cur); return section->contents.substr(cur);
return contents.substr(cur, frag_offsets[i + 1] - cur); return section->contents.substr(cur, frag_offsets[i + 1] - cur);
} }
template <typename E> template <typename E>

View File

@ -1,7 +1,5 @@
#include "mold.h" #include "mold.h"
#include "config.h" #include "config.h"
#include "blake3.h"
#include <cctype> #include <cctype>
#include <set> #include <set>
@ -1915,15 +1913,26 @@ MergedSection<E>::MergedSection(std::string_view name, i64 flags, i64 type,
template <typename E> template <typename E>
MergedSection<E> * MergedSection<E> *
MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name, MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
i64 type, i64 flags, const ElfShdr<E> &shdr) {
i64 entsize, i64 addralign) { if (!(shdr.sh_flags & SHF_MERGE))
return nullptr;
i64 addralign = std::max<i64>(1, shdr.sh_addralign);
i64 flags = shdr.sh_flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
i64 entsize = shdr.sh_entsize;
if (entsize == 0)
entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (i64)shdr.sh_addralign;
if (entsize == 0)
return nullptr;
name = get_merged_output_name(ctx, name, flags, entsize, addralign); name = get_merged_output_name(ctx, name, flags, entsize, addralign);
flags = flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
auto find = [&]() -> MergedSection * { auto find = [&]() -> MergedSection * {
for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections) for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
if (name == osec->name && flags == osec->shdr.sh_flags && if (name == osec->name && flags == osec->shdr.sh_flags &&
type == osec->shdr.sh_type && entsize == osec->shdr.sh_entsize) shdr.sh_type == osec->shdr.sh_type &&
entsize == osec->shdr.sh_entsize)
return osec.get(); return osec.get();
return nullptr; return nullptr;
}; };
@ -1941,7 +1950,7 @@ MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
if (MergedSection *osec = find()) if (MergedSection *osec = find())
return osec; return osec;
MergedSection *osec = new MergedSection(name, flags, type, entsize); MergedSection *osec = new MergedSection(name, flags, shdr.sh_type, entsize);
ctx.merged_sections.emplace_back(osec); ctx.merged_sections.emplace_back(osec);
return osec; return osec;
} }
@ -2526,89 +2535,21 @@ void VerdefSection<E>::copy_buf(Context<E> &ctx) {
write_vector(ctx.buf + this->shdr.sh_offset, contents); write_vector(ctx.buf + this->shdr.sh_offset, contents);
} }
inline i64 BuildId::size() const {
switch (kind) {
case HEX:
return value.size();
case HASH:
return hash_size;
case UUID:
return 16;
default:
unreachable();
}
}
template <typename E> template <typename E>
void BuildIdSection<E>::update_shdr(Context<E> &ctx) { void BuildIdSection<E>::update_shdr(Context<E> &ctx) {
this->shdr.sh_size = HEADER_SIZE + ctx.arg.build_id.size(); this->shdr.sh_size = ctx.arg.build_id.size() + 16; // +16 for the header
} }
template <typename E> template <typename E>
void BuildIdSection<E>::copy_buf(Context<E> &ctx) { void BuildIdSection<E>::copy_buf(Context<E> &ctx) {
U32<E> *base = (U32<E> *)(ctx.buf + this->shdr.sh_offset); U32<E> *base = (U32<E> *)(ctx.buf + this->shdr.sh_offset);
memset(base, 0, this->shdr.sh_size); memset(base, 0, this->shdr.sh_size);
base[0] = 4; // Name size
base[1] = ctx.arg.build_id.size(); // Hash size
base[2] = NT_GNU_BUILD_ID; // Type
memcpy(base + 3, "GNU", 4); // Name string
}
// BLAKE3 is a cryptographic hash function just like SHA256. base[0] = 4; // Name size
// We use it instead of SHA256 because it's faster. base[1] = ctx.arg.build_id.size(); // Hash size
static void blake3_hash(u8 *buf, i64 size, u8 *out) { base[2] = NT_GNU_BUILD_ID; // Type
blake3_hasher hasher; memcpy(base + 3, "GNU", 4); // Name string
blake3_hasher_init(&hasher); write_vector(base + 4, contents); // Build ID
blake3_hasher_update(&hasher, buf, size);
blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
}
template <typename E>
void BuildIdSection<E>::write_buildid(Context<E> &ctx) {
Timer t(ctx, "build_id");
u8 *buf = ctx.buf + this->shdr.sh_offset + HEADER_SIZE;
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
write_vector(buf, ctx.arg.build_id.value);
return;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
// so that subsequent close(2) call will become quicker.
if (i > 0 && ctx.output_file->is_mmapped)
madvise(begin, end - begin, MADV_DONTNEED);
#endif
});
u8 digest[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), digest);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
memcpy(buf, digest, ctx.arg.build_id.size());
return;
}
case BuildId::UUID: {
get_random_bytes(buf, 16);
// Indicate that this is UUIDv4 as defined by RFC4122
buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
return;
}
default:
unreachable();
}
} }
template <typename E> template <typename E>

View File

@ -1,4 +1,5 @@
#include "mold.h" #include "mold.h"
#include "blake3.h"
#include <fstream> #include <fstream>
#include <functional> #include <functional>
@ -442,10 +443,13 @@ static std::string get_cmdline_args(Context<E> &ctx) {
template <typename E> template <typename E>
void add_comment_string(Context<E> &ctx, std::string str) { void add_comment_string(Context<E> &ctx, std::string str) {
MergedSection<E> *sec = ElfShdr<E> shdr = {};
MergedSection<E>::get_instance(ctx, ".comment", SHT_PROGBITS, shdr.sh_type = SHT_PROGBITS;
SHF_MERGE | SHF_STRINGS, 1, 1); shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
shdr.sh_entsize = 1;
shdr.sh_addralign = 1;
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
if (sec->map.nbuckets == 0) if (sec->map.nbuckets == 0)
sec->map.resize(4096); sec->map.resize(4096);
@ -1720,6 +1724,22 @@ void copy_chunks(Context<E> &ctx) {
if constexpr (is_arm32<E>) if constexpr (is_arm32<E>)
fixup_arm_exidx_section(ctx); fixup_arm_exidx_section(ctx);
// Zero-clear paddings between chunks
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
memset(ctx.buf + pos, 0, next_start - pos);
};
std::vector<Chunk<E> *> chunks = ctx.chunks;
std::erase_if(chunks, [](Chunk<E> *chunk) {
return chunk->shdr.sh_type == SHT_NOBITS;
});
for (i64 i = 1; i < chunks.size(); i++)
zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
zero(chunks.back(), ctx.output_file->filesize);
} }
// Rewrite the leading endbr64 instruction with a nop if a function // Rewrite the leading endbr64 instruction with a nop if a function
@ -2168,26 +2188,6 @@ void compute_address_significance(Context<E> &ctx) {
}); });
} }
template <typename E>
void clear_padding(Context<E> &ctx) {
Timer t(ctx, "clear_padding");
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
memset(ctx.buf + pos, 0, next_start - pos);
};
std::vector<Chunk<E> *> chunks = ctx.chunks;
std::erase_if(chunks, [](Chunk<E> *chunk) {
return chunk->shdr.sh_type == SHT_NOBITS;
});
for (i64 i = 1; i < chunks.size(); i++)
zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
zero(chunks.back(), ctx.output_file->filesize);
}
// We want to sort output chunks in the following order. // We want to sort output chunks in the following order.
// //
// <ELF header> // <ELF header>
@ -2999,6 +2999,65 @@ i64 compress_debug_sections(Context<E> &ctx) {
return set_osec_offsets(ctx); return set_osec_offsets(ctx);
} }
// BLAKE3 is a cryptographic hash function just like SHA256.
// We use it instead of SHA256 because it's faster.
static void blake3_hash(u8 *buf, i64 size, u8 *out) {
blake3_hasher hasher;
blake3_hasher_init(&hasher);
blake3_hasher_update(&hasher, buf, size);
blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
}
template <typename E>
void compute_build_id(Context<E> &ctx) {
Timer t(ctx, "compute_build_id");
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
ctx.buildid->contents = ctx.arg.build_id.value;
break;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
// so that subsequent close(2) call will become quicker.
if (i > 0 && ctx.output_file->is_mmapped)
madvise(begin, end - begin, MADV_DONTNEED);
#endif
});
u8 buf[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), buf);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
break;
}
case BuildId::UUID: {
u8 buf[16];
get_random_bytes(buf, 16);
// Indicate that this is UUIDv4 as defined by RFC4122
buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
ctx.buildid->contents = {buf, buf + 16};
break;
}
default:
unreachable();
}
}
// Write Makefile-style dependency rules to a file specified by // Write Makefile-style dependency rules to a file specified by
// --dependency-file. This is analogous to the compiler's -M flag. // --dependency-file. This is analogous to the compiler's -M flag.
template <typename E> template <typename E>
@ -3134,11 +3193,11 @@ template void apply_version_script(Context<E> &);
template void parse_symbol_version(Context<E> &); template void parse_symbol_version(Context<E> &);
template void compute_import_export(Context<E> &); template void compute_import_export(Context<E> &);
template void compute_address_significance(Context<E> &); template void compute_address_significance(Context<E> &);
template void clear_padding(Context<E> &);
template void compute_section_headers(Context<E> &); template void compute_section_headers(Context<E> &);
template i64 set_osec_offsets(Context<E> &); template i64 set_osec_offsets(Context<E> &);
template void fix_synthetic_symbols(Context<E> &); template void fix_synthetic_symbols(Context<E> &);
template i64 compress_debug_sections(Context<E> &); template i64 compress_debug_sections(Context<E> &);
template void compute_build_id(Context<E> &);
template void write_dependency_file(Context<E> &); template void write_dependency_file(Context<E> &);
template void show_stats(Context<E> &); template void show_stats(Context<E> &);

View File

@ -176,7 +176,6 @@ void combine_objects(Context<E> &ctx) {
ctx.buf = ctx.output_file->buf; ctx.buf = ctx.output_file->buf;
copy_chunks(ctx); copy_chunks(ctx);
clear_padding(ctx);
ctx.output_file->close(ctx); ctx.output_file->close(ctx);
ctx.checkpoint(); ctx.checkpoint();

View File

@ -1,5 +1,3 @@
#if !defined(_WIN32) && !defined(__APPLE__)
#include "mold.h" #include "mold.h"
#include "config.h" #include "config.h"
@ -14,10 +12,12 @@
namespace mold::elf { namespace mold::elf {
#ifdef MOLD_X86_64 #ifdef MOLD_X86_64
static int pipe_write_fd = -1;
// Exiting from a program with large memory usage is slow -- // Exiting from a program with large memory usage is slow --
// it may take a few hundred milliseconds. To hide the latency, // it may take a few hundred milliseconds. To hide the latency,
// we fork a child and let it do the actual linking work. // we fork a child and let it do the actual linking work.
std::function<void()> fork_child() { void fork_child() {
int pipefd[2]; int pipefd[2];
if (pipe(pipefd) == -1) { if (pipe(pipefd) == -1) {
perror("pipe"); perror("pipe");
@ -50,12 +50,16 @@ std::function<void()> fork_child() {
// Child // Child
close(pipefd[0]); close(pipefd[0]);
pipe_write_fd = pipefd[1];
}
return [=] { void notify_parent() {
char buf[] = {1}; if (pipe_write_fd == -1)
[[maybe_unused]] int n = write(pipefd[1], buf, 1); return;
assert(n == 1);
}; char buf[] = {1};
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
assert(n == 1);
} }
#endif #endif
@ -84,6 +88,9 @@ static std::string find_dso(Context<E> &ctx, std::filesystem::path self) {
template <typename E> template <typename E>
[[noreturn]] [[noreturn]]
void process_run_subcommand(Context<E> &ctx, int argc, char **argv) { void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
#ifdef __APPLE__
Fatal(ctx) << "-run is not supported on macOS";
#else
assert(argv[1] == "-run"s || argv[1] == "--run"s); assert(argv[1] == "-run"s || argv[1] == "--run"s);
if (!argv[2]) if (!argv[2])
@ -111,6 +118,7 @@ void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
// Execute a given command // Execute a given command
execvp(argv[2], argv + 2); execvp(argv[2], argv + 2);
Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string(); Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string();
#endif
} }
using E = MOLD_TARGET; using E = MOLD_TARGET;
@ -118,5 +126,3 @@ using E = MOLD_TARGET;
template void process_run_subcommand(Context<E> &, int, char **); template void process_run_subcommand(Context<E> &, int, char **);
} // namespace mold::elf } // namespace mold::elf
#endif

20
elf/subprocess-win32.cc Normal file
View File

@ -0,0 +1,20 @@
#include "mold.h"
namespace mold::elf {
#ifdef MOLD_X86_64
void fork_child() {}
void notify_parent() {}
#endif
template <typename E>
[[noreturn]]
void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
Fatal(ctx) << "-run is supported only on Unix";
}
using E = MOLD_TARGET;
template void process_run_subcommand(Context<E> &, int, char **);
} // namespace mold::elf