1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-05 17:17:40 +03:00

Compare commits

...

13 Commits

Author SHA1 Message Date
Christian Sattler
9fdd93bdc8
Merge 24f818820b into 7089304046 2024-07-06 02:14:41 +01:00
Rui Ueyama
7089304046 Simplify 2024-07-05 19:47:33 +09:00
Rui Ueyama
5ab3708e9f
Merge pull request #1298 from yujincheng08/patch-1
Fix cross-compile lto on MacOS
2024-07-05 19:45:10 +09:00
Rui Ueyama
aa939d7a6a Temporarily disable ASAN tests
It looks like starting with Clang 18.1.3, ASAN began reporting an
error on OneTBB.
2024-07-05 18:27:00 +09:00
Rui Ueyama
a63bbcd7b1 Attempt to fix CI 2024-07-05 17:58:13 +09:00
Rui Ueyama
339f9c485a Attempt to fix CI 2024-07-05 17:42:32 +09:00
LoveSy
4905f6d3f2
Use macos-12 because macos-11 has been removed
https://github.blog/changelog/2024-05-20-actions-upcoming-changes-to-github-hosted-macos-runners/#macos-11-deprecation-and-removal
2024-07-05 15:32:37 +08:00
Rui Ueyama
cb8cc1c8f0 Refactor 2024-07-05 16:00:09 +09:00
Rui Ueyama
c6b54532e9 Refactor 2024-07-04 16:17:25 +09:00
Rui Ueyama
3936134823 Refactor 2024-07-04 15:08:37 +09:00
Rui Ueyama
55ca05bab6 Refactor 2024-07-04 13:59:29 +09:00
LoveSy
7305a5c5dd
Fix cross-compile lto on MacOS 2024-07-03 21:55:22 +08:00
Christian Sattler
24f818820b
Fix typos in design.md 2021-12-26 14:37:35 +01:00
13 changed files with 312 additions and 285 deletions

View File

@ -13,7 +13,6 @@ jobs:
target:
# Disable PCH for the default configuration. This prevents relying on implicit includes.
- '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On'
- '-DMOLD_USE_ASAN=On'
- '-DMOLD_USE_TSAN=On'
runs-on: ubuntu-20.04
steps:
@ -110,7 +109,7 @@ jobs:
!build/CMakeFiles
build-macos:
runs-on: macos-11
runs-on: macos-12
strategy:
matrix:
target:

View File

@ -328,7 +328,6 @@ list(APPEND MOLD_ELF_TEMPLATE_FILES
elf/output-chunks.cc
elf/passes.cc
elf/relocatable.cc
elf/subprocess.cc
elf/thunks.cc
elf/tls.cc
)
@ -339,6 +338,12 @@ else()
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-unix.cc)
endif()
if(WIN32)
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-win32.cc)
else()
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-unix.cc)
endif()
function(mold_instantiate_templates SOURCE TARGET)
set(PATH ${CMAKE_BINARY_DIR}/${SOURCE}.${TARGET}.cc)
if(NOT EXISTS ${PATH})

View File

@ -163,7 +163,7 @@ tool.
(i.e. `_start`) or a few other root sections. In mold, we are using
multiple threads to mark sections concurrently.
- Similarly, BFD, gold an lld support Identical Comdat Folding (ICF)
- Similarly, BFD, gold and lld support Identical Comdat Folding (ICF)
as yet another size optimization. ICF merges two or more read-only
sections that happen to have the same contents and relocations.
To do that, we have to find isomorphic subgraphs from larger graphs.
@ -381,7 +381,7 @@ not plan to implement and why I turned them down.
fixing the final file layout.
The other reason to reject this idea is because there's good a
chance for this idea to have a negative impact on linker's overall
chance for this idea to have a negative impact on the linker's overall
performance. If we copy file contents before fixing the layout, we
can't apply relocations to them while copying because symbol
addresses are not available yet. If we fix the file layout first, we

View File

@ -677,104 +677,6 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
}
}
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
if (entsize == 1)
return data.find('\0', pos);
for (; pos <= data.size() - entsize; pos += entsize)
if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
return pos;
return data.npos;
}
// Mergeable sections (sections with SHF_MERGE bit) typically contain
// string literals. Linker is expected to split the section contents
// into null-terminated strings, merge them with mergeable strings
// from other object files, and emit uniquified strings to an output
// file.
//
// This mechanism reduces the size of an output file. If two source
// files happen to contain the same string literal, the output will
// contain only a single copy of it.
//
// It is less common than string literals, but mergeable sections can
// contain fixed-sized read-only records too.
//
// This function splits the section contents into small pieces that we
// call "section fragments". Section fragment is a unit of merging.
//
// We do not support mergeable sections that have relocations.
template <typename E>
static std::unique_ptr<MergeableSection<E>>
split_section(Context<E> &ctx, InputSection<E> &sec) {
if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0)
return nullptr;
const ElfShdr<E> &shdr = sec.shdr();
if (!(shdr.sh_flags & SHF_MERGE))
return nullptr;
i64 entsize = shdr.sh_entsize;
if (entsize == 0)
entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (int)shdr.sh_addralign;
if (entsize == 0)
return nullptr;
i64 addralign = shdr.sh_addralign;
if (addralign == 0)
addralign = 1;
std::unique_ptr<MergeableSection<E>> m(new MergeableSection<E>);
m->parent = MergedSection<E>::get_instance(ctx, sec.name(), shdr.sh_type,
shdr.sh_flags, entsize, addralign);
m->p2align = sec.p2align;
// If thes section contents are compressed, uncompress them.
sec.uncompress(ctx);
std::string_view data = sec.contents;
m->contents = sec.contents;
if (data.size() > UINT32_MAX)
Fatal(ctx) << sec << ": mergeable section too large";
// Split sections
if (shdr.sh_flags & SHF_STRINGS) {
for (i64 pos = 0; pos < data.size();) {
m->frag_offsets.push_back(pos);
size_t end = find_null(data, pos, entsize);
if (end == data.npos)
Fatal(ctx) << sec << ": string is not null terminated";
pos = end + entsize;
}
} else {
if (data.size() % entsize)
Fatal(ctx) << sec << ": section size is not multiple of sh_entsize";
m->frag_offsets.reserve(data.size() / entsize);
for (i64 pos = 0; pos < data.size(); pos += entsize)
m->frag_offsets.push_back(pos);
}
// Compute hashes for section pieces
HyperLogLog estimator;
m->hashes.reserve(m->frag_offsets.size());
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
u64 hash = hash_string(m->get_contents(i));
m->hashes.push_back(hash);
estimator.insert(hash);
}
m->parent->estimator.merge(estimator);
static Counter counter("string_fragments");
counter += m->frag_offsets.size();
return m;
}
// Usually a section is an atomic unit of inclusion or exclusion.
// Linker doesn't care about its contents. However, if a section is a
// mergeable section (a section with SHF_MERGE bit set), the linker is
@ -818,34 +720,45 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
// section piece, the section piece is attached to the symbol.
template <typename E>
void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
mergeable_sections.resize(sections.size());
// Convert InputSections to MergeableSections
for (i64 i = 0; i < this->sections.size(); i++) {
InputSection<E> *isec = this->sections[i].get();
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
continue;
for (i64 i = 0; i < sections.size(); i++) {
if (std::unique_ptr<InputSection<E>> &isec = sections[i]) {
if (std::unique_ptr<MergeableSection<E>> m = split_section(ctx, *isec)) {
mergeable_sections[i] = std::move(m);
isec->is_alive = false;
}
MergedSection<E> *parent =
MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
if (parent) {
this->mergeable_sections[i] =
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
this->sections[i] = nullptr;
}
}
// Split section contents
for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
if (sec)
sec->split_contents(ctx);
}
template <typename E>
void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
if (m) {
m->fragments.reserve(m->frag_offsets.size());
if (!m)
continue;
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
SectionFragment<E> *frag =
m->parent->insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
}
m->fragments.reserve(m->frag_offsets.size());
// Reclaim memory as we'll never use this vector again
m->hashes.clear();
m->hashes.shrink_to_fit();
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
SectionFragment<E> *frag =
m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
}
// Reclaim memory as we'll never use this vector again
m->hashes.clear();
m->hashes.shrink_to_fit();
}
// Attach section pieces to symbols.
@ -872,24 +785,25 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
}
// Compute the size of frag_syms.
i64 nfrag_syms = 0;
std::vector<InputSection<E> *> vec;
for (std::unique_ptr<InputSection<E>> &isec : sections)
if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
nfrag_syms++;
vec.push_back(isec.get());
i64 nfrag_syms = 0;
for (InputSection<E> *isec : vec)
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
nfrag_syms++;
this->frag_syms.resize(nfrag_syms);
// For each relocation referring a mergeable section symbol, we create
// a new dummy non-section symbol and redirect the relocation to the
// newly-created symbol.
// For each relocation referring to a mergeable section symbol, we
// create a new dummy non-section symbol and redirect the relocation
// to the newly created symbol.
i64 idx = 0;
for (std::unique_ptr<InputSection<E>> &isec : sections) {
if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC))
continue;
for (InputSection<E> *isec : vec) {
for (ElfRel<E> &r : isec->get_rels(ctx)) {
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
if (esym.st_type != STT_SECTION)
@ -929,6 +843,8 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
template <typename E>
void ObjectFile<E>::parse(Context<E> &ctx) {
sections.resize(this->elf_sections.size());
mergeable_sections.resize(sections.size());
symtab_sec = this->find_section(SHT_SYMTAB);
if (symtab_sec) {

View File

@ -552,9 +552,88 @@ bool InputSection<E>::record_undef_error(Context<E> &ctx, const ElfRel<E> &rel)
return false;
}
template <typename E>
MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec)
: parent(parent), section(std::move(isec)), p2align(section->p2align) {
section->uncompress(ctx);
}
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
if (entsize == 1)
return data.find('\0', pos);
for (; pos <= data.size() - entsize; pos += entsize)
if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
return pos;
return data.npos;
}
// Mergeable sections (sections with SHF_MERGE bit) typically contain
// string literals. Linker is expected to split the section contents
// into null-terminated strings, merge them with mergeable strings
// from other object files, and emit uniquified strings to an output
// file.
//
// This mechanism reduces the size of an output file. If two source
// files happen to contain the same string literal, the output will
// contain only a single copy of it.
//
// It is less common than string literals, but mergeable sections can
// contain fixed-sized read-only records too.
//
// This function splits the section contents into small pieces that we
// call "section fragments". Section fragment is a unit of merging.
//
// We do not support mergeable sections that have relocations.
template <typename E>
void MergeableSection<E>::split_contents(Context<E> &ctx) {
std::string_view data = section->contents;
if (data.size() > UINT32_MAX)
Fatal(ctx) << *section
<< ": mergeable section too large";
i64 entsize = parent.shdr.sh_entsize;
// Split sections
if (parent.shdr.sh_flags & SHF_STRINGS) {
for (i64 pos = 0; pos < data.size();) {
frag_offsets.push_back(pos);
size_t end = find_null(data, pos, entsize);
if (end == data.npos)
Fatal(ctx) << *section << ": string is not null terminated";
pos = end + entsize;
}
} else {
if (data.size() % entsize)
Fatal(ctx) << *section << ": section size is not multiple of sh_entsize";
frag_offsets.reserve(data.size() / entsize);
for (i64 pos = 0; pos < data.size(); pos += entsize)
frag_offsets.push_back(pos);
}
// Compute hashes for section pieces
HyperLogLog estimator;
hashes.reserve(frag_offsets.size());
for (i64 i = 0; i < frag_offsets.size(); i++) {
u64 hash = hash_string(get_contents(i));
hashes.push_back(hash);
estimator.insert(hash);
}
parent.estimator.merge(estimator);
static Counter counter("string_fragments");
counter += frag_offsets.size();
}
using E = MOLD_TARGET;
template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);
template class InputSection<E>;
template class MergeableSection<E>;
} // namespace mold::elf

View File

@ -567,11 +567,7 @@ static ElfSym<E> to_elf_sym(PluginSymbol &psym) {
// Returns false if it's GCC.
template <typename E>
static bool is_llvm(Context<E> &ctx) {
#ifdef __MINGW32__
return ctx.arg.plugin.ends_with("LLVMgold.dll");
#else
return ctx.arg.plugin.ends_with("LLVMgold.so");
#endif
return ctx.arg.plugin.find("LLVMgold.") != ctx.arg.plugin.npos;
}
// Returns true if a given linker plugin supports the get_symbols_v3 API.

View File

@ -345,12 +345,8 @@ int elf_main(int argc, char **argv) {
Context<E> ctx;
// Process -run option first. process_run_subcommand() does not return.
if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) {
#if defined(_WIN32) || defined(__APPLE__)
Fatal(ctx) << "-run is supported only on Unix";
#endif
if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv))
process_run_subcommand(ctx, argc, argv);
}
// Parse non-positional command line options
ctx.cmdline_args = expand_response_files(ctx, argv);
@ -375,12 +371,8 @@ int elf_main(int argc, char **argv) {
<< ": " << errno_string();
// Fork a subprocess unless --no-fork is given.
std::function<void()> on_complete;
#if !defined(_WIN32) && !defined(__APPLE__)
if (ctx.arg.fork)
on_complete = fork_child();
#endif
fork_child();
acquire_global_lock();
@ -664,8 +656,13 @@ int elf_main(int argc, char **argv) {
// so we sort them.
ctx.reldyn->sort(ctx);
// Zero-clear paddings between sections
clear_padding(ctx);
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid) {
compute_build_id(ctx);
ctx.buildid->copy_buf(ctx);
}
// .gdb_index's contents cannot be constructed before applying
// relocations to other debug sections. We have relocated debug
@ -673,12 +670,6 @@ int elf_main(int argc, char **argv) {
if (ctx.gdb_index)
write_gdb_index(ctx);
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid)
ctx.buildid->write_buildid(ctx);
t_copy.stop();
ctx.checkpoint();
@ -707,8 +698,8 @@ int elf_main(int argc, char **argv) {
std::cout << std::flush;
std::cerr << std::flush;
if (on_complete)
on_complete();
if (ctx.arg.fork)
notify_parent();
release_global_lock();

View File

@ -47,6 +47,7 @@ template <typename E> class Symbol;
template <typename E> struct CieRecord;
template <typename E> struct Context;
template <typename E> struct FdeRecord;
template <typename E> class MergeableSection;
template <typename E> class RelocSection;
template <typename E>
@ -799,8 +800,7 @@ template <typename E>
class MergedSection : public Chunk<E> {
public:
static MergedSection<E> *
get_instance(Context<E> &ctx, std::string_view name, i64 type, i64 flags,
i64 entsize, i64 addralign);
get_instance(Context<E> &ctx, std::string_view name, const ElfShdr<E> &shdr);
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
u64 hash, i64 p2align);
@ -949,9 +949,8 @@ public:
void update_shdr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_buildid(Context<E> &ctx);
static constexpr i64 HEADER_SIZE = 16;
std::vector<u8> contents;
};
template <typename E>
@ -1090,12 +1089,17 @@ struct ComdatGroupRef {
};
template <typename E>
struct MergeableSection {
class MergeableSection {
public:
MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec);
void split_contents(Context<E> &ctx);
std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
std::string_view get_contents(i64 idx);
MergedSection<E> *parent;
std::string_view contents;
MergedSection<E> &parent;
std::unique_ptr<InputSection<E>> section;
std::vector<u32> frag_offsets;
std::vector<u32> hashes;
std::vector<SectionFragment<E> *> fragments;
@ -1368,7 +1372,8 @@ void print_map(Context<E> &ctx);
// subprocess.cc
//
std::function<void()> fork_child();
void fork_child();
void notify_parent();
template <typename E>
[[noreturn]]
@ -1426,11 +1431,11 @@ template <typename E> void apply_version_script(Context<E> &);
template <typename E> void parse_symbol_version(Context<E> &);
template <typename E> void compute_import_export(Context<E> &);
template <typename E> void compute_address_significance(Context<E> &);
template <typename E> void clear_padding(Context<E> &);
template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &);
template <typename E> void fix_synthetic_symbols(Context<E> &);
template <typename E> i64 compress_debug_sections(Context<E> &);
template <typename E> void compute_build_id(Context<E> &);
template <typename E> void write_dependency_file(Context<E> &);
template <typename E> void show_stats(Context<E> &);
@ -1565,7 +1570,18 @@ private:
//
struct BuildId {
i64 size() const;
i64 size() const {
switch (kind) {
case HEX:
return value.size();
case HASH:
return hash_size;
case UUID:
return 16;
default:
unreachable();
}
}
enum { NONE, HEX, HASH, UUID } kind = NONE;
std::vector<u8> value;
@ -2411,8 +2427,8 @@ template <typename E>
std::string_view MergeableSection<E>::get_contents(i64 i) {
i64 cur = frag_offsets[i];
if (i == frag_offsets.size() - 1)
return contents.substr(cur);
return contents.substr(cur, frag_offsets[i + 1] - cur);
return section->contents.substr(cur);
return section->contents.substr(cur, frag_offsets[i + 1] - cur);
}
template <typename E>

View File

@ -1,7 +1,5 @@
#include "mold.h"
#include "config.h"
#include "blake3.h"
#include <cctype>
#include <set>
@ -1915,15 +1913,26 @@ MergedSection<E>::MergedSection(std::string_view name, i64 flags, i64 type,
template <typename E>
MergedSection<E> *
MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
i64 type, i64 flags,
i64 entsize, i64 addralign) {
const ElfShdr<E> &shdr) {
if (!(shdr.sh_flags & SHF_MERGE))
return nullptr;
i64 addralign = std::max<i64>(1, shdr.sh_addralign);
i64 flags = shdr.sh_flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
i64 entsize = shdr.sh_entsize;
if (entsize == 0)
entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (i64)shdr.sh_addralign;
if (entsize == 0)
return nullptr;
name = get_merged_output_name(ctx, name, flags, entsize, addralign);
flags = flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
auto find = [&]() -> MergedSection * {
for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
if (name == osec->name && flags == osec->shdr.sh_flags &&
type == osec->shdr.sh_type && entsize == osec->shdr.sh_entsize)
shdr.sh_type == osec->shdr.sh_type &&
entsize == osec->shdr.sh_entsize)
return osec.get();
return nullptr;
};
@ -1941,7 +1950,7 @@ MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
if (MergedSection *osec = find())
return osec;
MergedSection *osec = new MergedSection(name, flags, type, entsize);
MergedSection *osec = new MergedSection(name, flags, shdr.sh_type, entsize);
ctx.merged_sections.emplace_back(osec);
return osec;
}
@ -2526,89 +2535,21 @@ void VerdefSection<E>::copy_buf(Context<E> &ctx) {
write_vector(ctx.buf + this->shdr.sh_offset, contents);
}
inline i64 BuildId::size() const {
switch (kind) {
case HEX:
return value.size();
case HASH:
return hash_size;
case UUID:
return 16;
default:
unreachable();
}
}
template <typename E>
void BuildIdSection<E>::update_shdr(Context<E> &ctx) {
this->shdr.sh_size = HEADER_SIZE + ctx.arg.build_id.size();
this->shdr.sh_size = ctx.arg.build_id.size() + 16; // +16 for the header
}
template <typename E>
void BuildIdSection<E>::copy_buf(Context<E> &ctx) {
U32<E> *base = (U32<E> *)(ctx.buf + this->shdr.sh_offset);
memset(base, 0, this->shdr.sh_size);
base[0] = 4; // Name size
base[1] = ctx.arg.build_id.size(); // Hash size
base[2] = NT_GNU_BUILD_ID; // Type
memcpy(base + 3, "GNU", 4); // Name string
}
// BLAKE3 is a cryptographic hash function just like SHA256.
// We use it instead of SHA256 because it's faster.
static void blake3_hash(u8 *buf, i64 size, u8 *out) {
blake3_hasher hasher;
blake3_hasher_init(&hasher);
blake3_hasher_update(&hasher, buf, size);
blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
}
template <typename E>
void BuildIdSection<E>::write_buildid(Context<E> &ctx) {
Timer t(ctx, "build_id");
u8 *buf = ctx.buf + this->shdr.sh_offset + HEADER_SIZE;
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
write_vector(buf, ctx.arg.build_id.value);
return;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
// so that subsequent close(2) call will become quicker.
if (i > 0 && ctx.output_file->is_mmapped)
madvise(begin, end - begin, MADV_DONTNEED);
#endif
});
u8 digest[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), digest);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
memcpy(buf, digest, ctx.arg.build_id.size());
return;
}
case BuildId::UUID: {
get_random_bytes(buf, 16);
// Indicate that this is UUIDv4 as defined by RFC4122
buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
return;
}
default:
unreachable();
}
base[0] = 4; // Name size
base[1] = ctx.arg.build_id.size(); // Hash size
base[2] = NT_GNU_BUILD_ID; // Type
memcpy(base + 3, "GNU", 4); // Name string
write_vector(base + 4, contents); // Build ID
}
template <typename E>

View File

@ -1,4 +1,5 @@
#include "mold.h"
#include "blake3.h"
#include <fstream>
#include <functional>
@ -442,10 +443,13 @@ static std::string get_cmdline_args(Context<E> &ctx) {
template <typename E>
void add_comment_string(Context<E> &ctx, std::string str) {
MergedSection<E> *sec =
MergedSection<E>::get_instance(ctx, ".comment", SHT_PROGBITS,
SHF_MERGE | SHF_STRINGS, 1, 1);
ElfShdr<E> shdr = {};
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
shdr.sh_entsize = 1;
shdr.sh_addralign = 1;
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
if (sec->map.nbuckets == 0)
sec->map.resize(4096);
@ -1720,6 +1724,22 @@ void copy_chunks(Context<E> &ctx) {
if constexpr (is_arm32<E>)
fixup_arm_exidx_section(ctx);
// Zero-clear paddings between chunks
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
memset(ctx.buf + pos, 0, next_start - pos);
};
std::vector<Chunk<E> *> chunks = ctx.chunks;
std::erase_if(chunks, [](Chunk<E> *chunk) {
return chunk->shdr.sh_type == SHT_NOBITS;
});
for (i64 i = 1; i < chunks.size(); i++)
zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
zero(chunks.back(), ctx.output_file->filesize);
}
// Rewrite the leading endbr64 instruction with a nop if a function
@ -2168,26 +2188,6 @@ void compute_address_significance(Context<E> &ctx) {
});
}
template <typename E>
void clear_padding(Context<E> &ctx) {
Timer t(ctx, "clear_padding");
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
memset(ctx.buf + pos, 0, next_start - pos);
};
std::vector<Chunk<E> *> chunks = ctx.chunks;
std::erase_if(chunks, [](Chunk<E> *chunk) {
return chunk->shdr.sh_type == SHT_NOBITS;
});
for (i64 i = 1; i < chunks.size(); i++)
zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
zero(chunks.back(), ctx.output_file->filesize);
}
// We want to sort output chunks in the following order.
//
// <ELF header>
@ -2999,6 +2999,65 @@ i64 compress_debug_sections(Context<E> &ctx) {
return set_osec_offsets(ctx);
}
// BLAKE3 is a cryptographic hash function just like SHA256.
// We use it instead of SHA256 because it's faster.
static void blake3_hash(u8 *buf, i64 size, u8 *out) {
blake3_hasher hasher;
blake3_hasher_init(&hasher);
blake3_hasher_update(&hasher, buf, size);
blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
}
template <typename E>
void compute_build_id(Context<E> &ctx) {
Timer t(ctx, "compute_build_id");
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
ctx.buildid->contents = ctx.arg.build_id.value;
break;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
// so that subsequent close(2) call will become quicker.
if (i > 0 && ctx.output_file->is_mmapped)
madvise(begin, end - begin, MADV_DONTNEED);
#endif
});
u8 buf[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), buf);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
break;
}
case BuildId::UUID: {
u8 buf[16];
get_random_bytes(buf, 16);
// Indicate that this is UUIDv4 as defined by RFC4122
buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
ctx.buildid->contents = {buf, buf + 16};
break;
}
default:
unreachable();
}
}
// Write Makefile-style dependency rules to a file specified by
// --dependency-file. This is analogous to the compiler's -M flag.
template <typename E>
@ -3134,11 +3193,11 @@ template void apply_version_script(Context<E> &);
template void parse_symbol_version(Context<E> &);
template void compute_import_export(Context<E> &);
template void compute_address_significance(Context<E> &);
template void clear_padding(Context<E> &);
template void compute_section_headers(Context<E> &);
template i64 set_osec_offsets(Context<E> &);
template void fix_synthetic_symbols(Context<E> &);
template i64 compress_debug_sections(Context<E> &);
template void compute_build_id(Context<E> &);
template void write_dependency_file(Context<E> &);
template void show_stats(Context<E> &);

View File

@ -176,7 +176,6 @@ void combine_objects(Context<E> &ctx) {
ctx.buf = ctx.output_file->buf;
copy_chunks(ctx);
clear_padding(ctx);
ctx.output_file->close(ctx);
ctx.checkpoint();

View File

@ -1,5 +1,3 @@
#if !defined(_WIN32) && !defined(__APPLE__)
#include "mold.h"
#include "config.h"
@ -14,10 +12,12 @@
namespace mold::elf {
#ifdef MOLD_X86_64
static int pipe_write_fd = -1;
// Exiting from a program with large memory usage is slow --
// it may take a few hundred milliseconds. To hide the latency,
// we fork a child and let it do the actual linking work.
std::function<void()> fork_child() {
void fork_child() {
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("pipe");
@ -50,12 +50,16 @@ std::function<void()> fork_child() {
// Child
close(pipefd[0]);
pipe_write_fd = pipefd[1];
}
return [=] {
char buf[] = {1};
[[maybe_unused]] int n = write(pipefd[1], buf, 1);
assert(n == 1);
};
void notify_parent() {
if (pipe_write_fd == -1)
return;
char buf[] = {1};
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
assert(n == 1);
}
#endif
@ -84,6 +88,9 @@ static std::string find_dso(Context<E> &ctx, std::filesystem::path self) {
template <typename E>
[[noreturn]]
void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
#ifdef __APPLE__
Fatal(ctx) << "-run is not supported on macOS";
#else
assert(argv[1] == "-run"s || argv[1] == "--run"s);
if (!argv[2])
@ -111,6 +118,7 @@ void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
// Execute a given command
execvp(argv[2], argv + 2);
Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string();
#endif
}
using E = MOLD_TARGET;
@ -118,5 +126,3 @@ using E = MOLD_TARGET;
template void process_run_subcommand(Context<E> &, int, char **);
} // namespace mold::elf
#endif

20
elf/subprocess-win32.cc Normal file
View File

@ -0,0 +1,20 @@
#include "mold.h"
namespace mold::elf {
#ifdef MOLD_X86_64
void fork_child() {}
void notify_parent() {}
#endif
template <typename E>
[[noreturn]]
void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
Fatal(ctx) << "-run is supported only on Unix";
}
using E = MOLD_TARGET;
template void process_run_subcommand(Context<E> &, int, char **);
} // namespace mold::elf