1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-05 17:17:40 +03:00

Compare commits

...

13 Commits

Author SHA1 Message Date
Felix Yan
cd35eb175b
Merge 46c4b826aa into 7089304046 2024-07-06 02:14:41 +01:00
Rui Ueyama
7089304046 Simplify 2024-07-05 19:47:33 +09:00
Rui Ueyama
5ab3708e9f
Merge pull request #1298 from yujincheng08/patch-1
Fix cross-compile lto on MacOS
2024-07-05 19:45:10 +09:00
Rui Ueyama
aa939d7a6a Temporarily disable ASAN tests
It looks like starting with Clang 18.1.3, ASAN began reporting an
error on OneTBB.
2024-07-05 18:27:00 +09:00
Rui Ueyama
a63bbcd7b1 Attempt to fix CI 2024-07-05 17:58:13 +09:00
Rui Ueyama
339f9c485a Attempt to fix CI 2024-07-05 17:42:32 +09:00
LoveSy
4905f6d3f2
Use macos-12 because macos-11 has been removed
https://github.blog/changelog/2024-05-20-actions-upcoming-changes-to-github-hosted-macos-runners/#macos-11-deprecation-and-removal
2024-07-05 15:32:37 +08:00
Rui Ueyama
cb8cc1c8f0 Refactor 2024-07-05 16:00:09 +09:00
Rui Ueyama
c6b54532e9 Refactor 2024-07-04 16:17:25 +09:00
Rui Ueyama
3936134823 Refactor 2024-07-04 15:08:37 +09:00
Rui Ueyama
55ca05bab6 Refactor 2024-07-04 13:59:29 +09:00
LoveSy
7305a5c5dd
Fix cross-compile lto on MacOS 2024-07-03 21:55:22 +08:00
Felix Yan
46c4b826aa
Correct typos in docs/bugs.md 2022-08-22 12:25:28 +03:00
13 changed files with 312 additions and 285 deletions

View File

@ -13,7 +13,6 @@ jobs:
target:
# Disable PCH for the default configuration. This prevents relying on implicit includes.
- '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On'
- '-DMOLD_USE_ASAN=On'
- '-DMOLD_USE_TSAN=On'
runs-on: ubuntu-20.04
steps:
@ -110,7 +109,7 @@ jobs:
!build/CMakeFiles
build-macos:
runs-on: macos-11
runs-on: macos-12
strategy:
matrix:
target:

View File

@ -328,7 +328,6 @@ list(APPEND MOLD_ELF_TEMPLATE_FILES
elf/output-chunks.cc
elf/passes.cc
elf/relocatable.cc
elf/subprocess.cc
elf/thunks.cc
elf/tls.cc
)
@ -339,6 +338,12 @@ else()
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-unix.cc)
endif()
if(WIN32)
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-win32.cc)
else()
list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-unix.cc)
endif()
function(mold_instantiate_templates SOURCE TARGET)
set(PATH ${CMAKE_BINARY_DIR}/${SOURCE}.${TARGET}.cc)
if(NOT EXISTS ${PATH})

View File

@ -88,9 +88,9 @@ Problem: A statically-linked "hello world" program crashes after
reading a thread-local variable.
Investigation: Thread-local variables are very different from other
types of varaibles because there may be more than one instance of the
types of variables because there may be more than one instance of the
same variable in memory. Each thread has its copy of thread-local
varaibles. `%fs` segment register points the end of the variable area
variables. `%fs` segment register points the end of the variable area
for the current thread, and the variables are accessed as an offset
from `%fs`.

View File

@ -677,104 +677,6 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
}
}
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
if (entsize == 1)
return data.find('\0', pos);
for (; pos <= data.size() - entsize; pos += entsize)
if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
return pos;
return data.npos;
}
// Mergeable sections (sections with SHF_MERGE bit) typically contain
// string literals. Linker is expected to split the section contents
// into null-terminated strings, merge them with mergeable strings
// from other object files, and emit uniquified strings to an output
// file.
//
// This mechanism reduces the size of an output file. If two source
// files happen to contain the same string literal, the output will
// contain only a single copy of it.
//
// It is less common than string literals, but mergeable sections can
// contain fixed-sized read-only records too.
//
// This function splits the section contents into small pieces that we
// call "section fragments". Section fragment is a unit of merging.
//
// We do not support mergeable sections that have relocations.
template <typename E>
static std::unique_ptr<MergeableSection<E>>
split_section(Context<E> &ctx, InputSection<E> &sec) {
if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0)
return nullptr;
const ElfShdr<E> &shdr = sec.shdr();
if (!(shdr.sh_flags & SHF_MERGE))
return nullptr;
i64 entsize = shdr.sh_entsize;
if (entsize == 0)
entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (int)shdr.sh_addralign;
if (entsize == 0)
return nullptr;
i64 addralign = shdr.sh_addralign;
if (addralign == 0)
addralign = 1;
std::unique_ptr<MergeableSection<E>> m(new MergeableSection<E>);
m->parent = MergedSection<E>::get_instance(ctx, sec.name(), shdr.sh_type,
shdr.sh_flags, entsize, addralign);
m->p2align = sec.p2align;
// If thes section contents are compressed, uncompress them.
sec.uncompress(ctx);
std::string_view data = sec.contents;
m->contents = sec.contents;
if (data.size() > UINT32_MAX)
Fatal(ctx) << sec << ": mergeable section too large";
// Split sections
if (shdr.sh_flags & SHF_STRINGS) {
for (i64 pos = 0; pos < data.size();) {
m->frag_offsets.push_back(pos);
size_t end = find_null(data, pos, entsize);
if (end == data.npos)
Fatal(ctx) << sec << ": string is not null terminated";
pos = end + entsize;
}
} else {
if (data.size() % entsize)
Fatal(ctx) << sec << ": section size is not multiple of sh_entsize";
m->frag_offsets.reserve(data.size() / entsize);
for (i64 pos = 0; pos < data.size(); pos += entsize)
m->frag_offsets.push_back(pos);
}
// Compute hashes for section pieces
HyperLogLog estimator;
m->hashes.reserve(m->frag_offsets.size());
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
u64 hash = hash_string(m->get_contents(i));
m->hashes.push_back(hash);
estimator.insert(hash);
}
m->parent->estimator.merge(estimator);
static Counter counter("string_fragments");
counter += m->frag_offsets.size();
return m;
}
// Usually a section is an atomic unit of inclusion or exclusion.
// Linker doesn't care about its contents. However, if a section is a
// mergeable section (a section with SHF_MERGE bit set), the linker is
@ -818,34 +720,45 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
// section piece, the section piece is attached to the symbol.
template <typename E>
void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
mergeable_sections.resize(sections.size());
// Convert InputSections to MergeableSections
for (i64 i = 0; i < this->sections.size(); i++) {
InputSection<E> *isec = this->sections[i].get();
if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
continue;
for (i64 i = 0; i < sections.size(); i++) {
if (std::unique_ptr<InputSection<E>> &isec = sections[i]) {
if (std::unique_ptr<MergeableSection<E>> m = split_section(ctx, *isec)) {
mergeable_sections[i] = std::move(m);
isec->is_alive = false;
}
MergedSection<E> *parent =
MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
if (parent) {
this->mergeable_sections[i] =
std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
this->sections[i] = nullptr;
}
}
// Split section contents
for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
if (sec)
sec->split_contents(ctx);
}
template <typename E>
void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
if (m) {
m->fragments.reserve(m->frag_offsets.size());
if (!m)
continue;
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
SectionFragment<E> *frag =
m->parent->insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
}
m->fragments.reserve(m->frag_offsets.size());
// Reclaim memory as we'll never use this vector again
m->hashes.clear();
m->hashes.shrink_to_fit();
for (i64 i = 0; i < m->frag_offsets.size(); i++) {
SectionFragment<E> *frag =
m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
m->fragments.push_back(frag);
}
// Reclaim memory as we'll never use this vector again
m->hashes.clear();
m->hashes.shrink_to_fit();
}
// Attach section pieces to symbols.
@ -872,24 +785,25 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
}
// Compute the size of frag_syms.
i64 nfrag_syms = 0;
std::vector<InputSection<E> *> vec;
for (std::unique_ptr<InputSection<E>> &isec : sections)
if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
nfrag_syms++;
vec.push_back(isec.get());
i64 nfrag_syms = 0;
for (InputSection<E> *isec : vec)
for (ElfRel<E> &r : isec->get_rels(ctx))
if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
nfrag_syms++;
this->frag_syms.resize(nfrag_syms);
// For each relocation referring a mergeable section symbol, we create
// a new dummy non-section symbol and redirect the relocation to the
// newly-created symbol.
// For each relocation referring to a mergeable section symbol, we
// create a new dummy non-section symbol and redirect the relocation
// to the newly created symbol.
i64 idx = 0;
for (std::unique_ptr<InputSection<E>> &isec : sections) {
if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC))
continue;
for (InputSection<E> *isec : vec) {
for (ElfRel<E> &r : isec->get_rels(ctx)) {
const ElfSym<E> &esym = this->elf_syms[r.r_sym];
if (esym.st_type != STT_SECTION)
@ -929,6 +843,8 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
template <typename E>
void ObjectFile<E>::parse(Context<E> &ctx) {
sections.resize(this->elf_sections.size());
mergeable_sections.resize(sections.size());
symtab_sec = this->find_section(SHT_SYMTAB);
if (symtab_sec) {

View File

@ -552,9 +552,88 @@ bool InputSection<E>::record_undef_error(Context<E> &ctx, const ElfRel<E> &rel)
return false;
}
template <typename E>
MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec)
: parent(parent), section(std::move(isec)), p2align(section->p2align) {
section->uncompress(ctx);
}
static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
if (entsize == 1)
return data.find('\0', pos);
for (; pos <= data.size() - entsize; pos += entsize)
if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
return pos;
return data.npos;
}
// Mergeable sections (sections with SHF_MERGE bit) typically contain
// string literals. Linker is expected to split the section contents
// into null-terminated strings, merge them with mergeable strings
// from other object files, and emit uniquified strings to an output
// file.
//
// This mechanism reduces the size of an output file. If two source
// files happen to contain the same string literal, the output will
// contain only a single copy of it.
//
// It is less common than string literals, but mergeable sections can
// contain fixed-sized read-only records too.
//
// This function splits the section contents into small pieces that we
// call "section fragments". Section fragment is a unit of merging.
//
// We do not support mergeable sections that have relocations.
template <typename E>
void MergeableSection<E>::split_contents(Context<E> &ctx) {
std::string_view data = section->contents;
if (data.size() > UINT32_MAX)
Fatal(ctx) << *section
<< ": mergeable section too large";
i64 entsize = parent.shdr.sh_entsize;
// Split sections
if (parent.shdr.sh_flags & SHF_STRINGS) {
for (i64 pos = 0; pos < data.size();) {
frag_offsets.push_back(pos);
size_t end = find_null(data, pos, entsize);
if (end == data.npos)
Fatal(ctx) << *section << ": string is not null terminated";
pos = end + entsize;
}
} else {
if (data.size() % entsize)
Fatal(ctx) << *section << ": section size is not multiple of sh_entsize";
frag_offsets.reserve(data.size() / entsize);
for (i64 pos = 0; pos < data.size(); pos += entsize)
frag_offsets.push_back(pos);
}
// Compute hashes for section pieces
HyperLogLog estimator;
hashes.reserve(frag_offsets.size());
for (i64 i = 0; i < frag_offsets.size(); i++) {
u64 hash = hash_string(get_contents(i));
hashes.push_back(hash);
estimator.insert(hash);
}
parent.estimator.merge(estimator);
static Counter counter("string_fragments");
counter += frag_offsets.size();
}
using E = MOLD_TARGET;
template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);
template class InputSection<E>;
template class MergeableSection<E>;
} // namespace mold::elf

View File

@ -567,11 +567,7 @@ static ElfSym<E> to_elf_sym(PluginSymbol &psym) {
// Returns false if it's GCC.
template <typename E>
static bool is_llvm(Context<E> &ctx) {
#ifdef __MINGW32__
return ctx.arg.plugin.ends_with("LLVMgold.dll");
#else
return ctx.arg.plugin.ends_with("LLVMgold.so");
#endif
return ctx.arg.plugin.find("LLVMgold.") != ctx.arg.plugin.npos;
}
// Returns true if a given linker plugin supports the get_symbols_v3 API.

View File

@ -345,12 +345,8 @@ int elf_main(int argc, char **argv) {
Context<E> ctx;
// Process -run option first. process_run_subcommand() does not return.
if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) {
#if defined(_WIN32) || defined(__APPLE__)
Fatal(ctx) << "-run is supported only on Unix";
#endif
if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv))
process_run_subcommand(ctx, argc, argv);
}
// Parse non-positional command line options
ctx.cmdline_args = expand_response_files(ctx, argv);
@ -375,12 +371,8 @@ int elf_main(int argc, char **argv) {
<< ": " << errno_string();
// Fork a subprocess unless --no-fork is given.
std::function<void()> on_complete;
#if !defined(_WIN32) && !defined(__APPLE__)
if (ctx.arg.fork)
on_complete = fork_child();
#endif
fork_child();
acquire_global_lock();
@ -664,8 +656,13 @@ int elf_main(int argc, char **argv) {
// so we sort them.
ctx.reldyn->sort(ctx);
// Zero-clear paddings between sections
clear_padding(ctx);
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid) {
compute_build_id(ctx);
ctx.buildid->copy_buf(ctx);
}
// .gdb_index's contents cannot be constructed before applying
// relocations to other debug sections. We have relocated debug
@ -673,12 +670,6 @@ int elf_main(int argc, char **argv) {
if (ctx.gdb_index)
write_gdb_index(ctx);
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid)
ctx.buildid->write_buildid(ctx);
t_copy.stop();
ctx.checkpoint();
@ -707,8 +698,8 @@ int elf_main(int argc, char **argv) {
std::cout << std::flush;
std::cerr << std::flush;
if (on_complete)
on_complete();
if (ctx.arg.fork)
notify_parent();
release_global_lock();

View File

@ -47,6 +47,7 @@ template <typename E> class Symbol;
template <typename E> struct CieRecord;
template <typename E> struct Context;
template <typename E> struct FdeRecord;
template <typename E> class MergeableSection;
template <typename E> class RelocSection;
template <typename E>
@ -799,8 +800,7 @@ template <typename E>
class MergedSection : public Chunk<E> {
public:
static MergedSection<E> *
get_instance(Context<E> &ctx, std::string_view name, i64 type, i64 flags,
i64 entsize, i64 addralign);
get_instance(Context<E> &ctx, std::string_view name, const ElfShdr<E> &shdr);
SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
u64 hash, i64 p2align);
@ -949,9 +949,8 @@ public:
void update_shdr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;
void write_buildid(Context<E> &ctx);
static constexpr i64 HEADER_SIZE = 16;
std::vector<u8> contents;
};
template <typename E>
@ -1090,12 +1089,17 @@ struct ComdatGroupRef {
};
template <typename E>
struct MergeableSection {
class MergeableSection {
public:
MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
std::unique_ptr<InputSection<E>> &isec);
void split_contents(Context<E> &ctx);
std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
std::string_view get_contents(i64 idx);
MergedSection<E> *parent;
std::string_view contents;
MergedSection<E> &parent;
std::unique_ptr<InputSection<E>> section;
std::vector<u32> frag_offsets;
std::vector<u32> hashes;
std::vector<SectionFragment<E> *> fragments;
@ -1368,7 +1372,8 @@ void print_map(Context<E> &ctx);
// subprocess.cc
//
std::function<void()> fork_child();
void fork_child();
void notify_parent();
template <typename E>
[[noreturn]]
@ -1426,11 +1431,11 @@ template <typename E> void apply_version_script(Context<E> &);
template <typename E> void parse_symbol_version(Context<E> &);
template <typename E> void compute_import_export(Context<E> &);
template <typename E> void compute_address_significance(Context<E> &);
template <typename E> void clear_padding(Context<E> &);
template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &);
template <typename E> void fix_synthetic_symbols(Context<E> &);
template <typename E> i64 compress_debug_sections(Context<E> &);
template <typename E> void compute_build_id(Context<E> &);
template <typename E> void write_dependency_file(Context<E> &);
template <typename E> void show_stats(Context<E> &);
@ -1565,7 +1570,18 @@ private:
//
struct BuildId {
i64 size() const;
i64 size() const {
switch (kind) {
case HEX:
return value.size();
case HASH:
return hash_size;
case UUID:
return 16;
default:
unreachable();
}
}
enum { NONE, HEX, HASH, UUID } kind = NONE;
std::vector<u8> value;
@ -2411,8 +2427,8 @@ template <typename E>
std::string_view MergeableSection<E>::get_contents(i64 i) {
i64 cur = frag_offsets[i];
if (i == frag_offsets.size() - 1)
return contents.substr(cur);
return contents.substr(cur, frag_offsets[i + 1] - cur);
return section->contents.substr(cur);
return section->contents.substr(cur, frag_offsets[i + 1] - cur);
}
template <typename E>

View File

@ -1,7 +1,5 @@
#include "mold.h"
#include "config.h"
#include "blake3.h"
#include <cctype>
#include <set>
@ -1915,15 +1913,26 @@ MergedSection<E>::MergedSection(std::string_view name, i64 flags, i64 type,
template <typename E>
MergedSection<E> *
MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
i64 type, i64 flags,
i64 entsize, i64 addralign) {
const ElfShdr<E> &shdr) {
if (!(shdr.sh_flags & SHF_MERGE))
return nullptr;
i64 addralign = std::max<i64>(1, shdr.sh_addralign);
i64 flags = shdr.sh_flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
i64 entsize = shdr.sh_entsize;
if (entsize == 0)
entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (i64)shdr.sh_addralign;
if (entsize == 0)
return nullptr;
name = get_merged_output_name(ctx, name, flags, entsize, addralign);
flags = flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
auto find = [&]() -> MergedSection * {
for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
if (name == osec->name && flags == osec->shdr.sh_flags &&
type == osec->shdr.sh_type && entsize == osec->shdr.sh_entsize)
shdr.sh_type == osec->shdr.sh_type &&
entsize == osec->shdr.sh_entsize)
return osec.get();
return nullptr;
};
@ -1941,7 +1950,7 @@ MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
if (MergedSection *osec = find())
return osec;
MergedSection *osec = new MergedSection(name, flags, type, entsize);
MergedSection *osec = new MergedSection(name, flags, shdr.sh_type, entsize);
ctx.merged_sections.emplace_back(osec);
return osec;
}
@ -2526,89 +2535,21 @@ void VerdefSection<E>::copy_buf(Context<E> &ctx) {
write_vector(ctx.buf + this->shdr.sh_offset, contents);
}
inline i64 BuildId::size() const {
switch (kind) {
case HEX:
return value.size();
case HASH:
return hash_size;
case UUID:
return 16;
default:
unreachable();
}
}
template <typename E>
void BuildIdSection<E>::update_shdr(Context<E> &ctx) {
this->shdr.sh_size = HEADER_SIZE + ctx.arg.build_id.size();
this->shdr.sh_size = ctx.arg.build_id.size() + 16; // +16 for the header
}
template <typename E>
void BuildIdSection<E>::copy_buf(Context<E> &ctx) {
U32<E> *base = (U32<E> *)(ctx.buf + this->shdr.sh_offset);
memset(base, 0, this->shdr.sh_size);
base[0] = 4; // Name size
base[1] = ctx.arg.build_id.size(); // Hash size
base[2] = NT_GNU_BUILD_ID; // Type
memcpy(base + 3, "GNU", 4); // Name string
}
// BLAKE3 is a cryptographic hash function just like SHA256.
// We use it instead of SHA256 because it's faster.
static void blake3_hash(u8 *buf, i64 size, u8 *out) {
blake3_hasher hasher;
blake3_hasher_init(&hasher);
blake3_hasher_update(&hasher, buf, size);
blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
}
template <typename E>
void BuildIdSection<E>::write_buildid(Context<E> &ctx) {
Timer t(ctx, "build_id");
u8 *buf = ctx.buf + this->shdr.sh_offset + HEADER_SIZE;
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
write_vector(buf, ctx.arg.build_id.value);
return;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
// so that subsequent close(2) call will become quicker.
if (i > 0 && ctx.output_file->is_mmapped)
madvise(begin, end - begin, MADV_DONTNEED);
#endif
});
u8 digest[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), digest);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
memcpy(buf, digest, ctx.arg.build_id.size());
return;
}
case BuildId::UUID: {
get_random_bytes(buf, 16);
// Indicate that this is UUIDv4 as defined by RFC4122
buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
return;
}
default:
unreachable();
}
base[0] = 4; // Name size
base[1] = ctx.arg.build_id.size(); // Hash size
base[2] = NT_GNU_BUILD_ID; // Type
memcpy(base + 3, "GNU", 4); // Name string
write_vector(base + 4, contents); // Build ID
}
template <typename E>

View File

@ -1,4 +1,5 @@
#include "mold.h"
#include "blake3.h"
#include <fstream>
#include <functional>
@ -442,10 +443,13 @@ static std::string get_cmdline_args(Context<E> &ctx) {
template <typename E>
void add_comment_string(Context<E> &ctx, std::string str) {
MergedSection<E> *sec =
MergedSection<E>::get_instance(ctx, ".comment", SHT_PROGBITS,
SHF_MERGE | SHF_STRINGS, 1, 1);
ElfShdr<E> shdr = {};
shdr.sh_type = SHT_PROGBITS;
shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
shdr.sh_entsize = 1;
shdr.sh_addralign = 1;
MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
if (sec->map.nbuckets == 0)
sec->map.resize(4096);
@ -1720,6 +1724,22 @@ void copy_chunks(Context<E> &ctx) {
if constexpr (is_arm32<E>)
fixup_arm_exidx_section(ctx);
// Zero-clear paddings between chunks
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
memset(ctx.buf + pos, 0, next_start - pos);
};
std::vector<Chunk<E> *> chunks = ctx.chunks;
std::erase_if(chunks, [](Chunk<E> *chunk) {
return chunk->shdr.sh_type == SHT_NOBITS;
});
for (i64 i = 1; i < chunks.size(); i++)
zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
zero(chunks.back(), ctx.output_file->filesize);
}
// Rewrite the leading endbr64 instruction with a nop if a function
@ -2168,26 +2188,6 @@ void compute_address_significance(Context<E> &ctx) {
});
}
template <typename E>
void clear_padding(Context<E> &ctx) {
Timer t(ctx, "clear_padding");
auto zero = [&](Chunk<E> *chunk, i64 next_start) {
i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
memset(ctx.buf + pos, 0, next_start - pos);
};
std::vector<Chunk<E> *> chunks = ctx.chunks;
std::erase_if(chunks, [](Chunk<E> *chunk) {
return chunk->shdr.sh_type == SHT_NOBITS;
});
for (i64 i = 1; i < chunks.size(); i++)
zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
zero(chunks.back(), ctx.output_file->filesize);
}
// We want to sort output chunks in the following order.
//
// <ELF header>
@ -2999,6 +2999,65 @@ i64 compress_debug_sections(Context<E> &ctx) {
return set_osec_offsets(ctx);
}
// BLAKE3 is a cryptographic hash function just like SHA256.
// We use it instead of SHA256 because it's faster.
static void blake3_hash(u8 *buf, i64 size, u8 *out) {
blake3_hasher hasher;
blake3_hasher_init(&hasher);
blake3_hasher_update(&hasher, buf, size);
blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
}
template <typename E>
void compute_build_id(Context<E> &ctx) {
Timer t(ctx, "compute_build_id");
switch (ctx.arg.build_id.kind) {
case BuildId::HEX:
ctx.buildid->contents = ctx.arg.build_id.value;
break;
case BuildId::HASH: {
i64 shard_size = 4 * 1024 * 1024;
i64 filesize = ctx.output_file->filesize;
i64 num_shards = align_to(filesize, shard_size) / shard_size;
std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
u8 *begin = ctx.buf + shard_size * i;
u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
#ifdef HAVE_MADVISE
// Make the kernel page out the file contents we've just written
// so that subsequent close(2) call will become quicker.
if (i > 0 && ctx.output_file->is_mmapped)
madvise(begin, end - begin, MADV_DONTNEED);
#endif
});
u8 buf[BLAKE3_OUT_LEN];
blake3_hash(shards.data(), shards.size(), buf);
assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
break;
}
case BuildId::UUID: {
u8 buf[16];
get_random_bytes(buf, 16);
// Indicate that this is UUIDv4 as defined by RFC4122
buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
ctx.buildid->contents = {buf, buf + 16};
break;
}
default:
unreachable();
}
}
// Write Makefile-style dependency rules to a file specified by
// --dependency-file. This is analogous to the compiler's -M flag.
template <typename E>
@ -3134,11 +3193,11 @@ template void apply_version_script(Context<E> &);
template void parse_symbol_version(Context<E> &);
template void compute_import_export(Context<E> &);
template void compute_address_significance(Context<E> &);
template void clear_padding(Context<E> &);
template void compute_section_headers(Context<E> &);
template i64 set_osec_offsets(Context<E> &);
template void fix_synthetic_symbols(Context<E> &);
template i64 compress_debug_sections(Context<E> &);
template void compute_build_id(Context<E> &);
template void write_dependency_file(Context<E> &);
template void show_stats(Context<E> &);

View File

@ -176,7 +176,6 @@ void combine_objects(Context<E> &ctx) {
ctx.buf = ctx.output_file->buf;
copy_chunks(ctx);
clear_padding(ctx);
ctx.output_file->close(ctx);
ctx.checkpoint();

View File

@ -1,5 +1,3 @@
#if !defined(_WIN32) && !defined(__APPLE__)
#include "mold.h"
#include "config.h"
@ -14,10 +12,12 @@
namespace mold::elf {
#ifdef MOLD_X86_64
static int pipe_write_fd = -1;
// Exiting from a program with large memory usage is slow --
// it may take a few hundred milliseconds. To hide the latency,
// we fork a child and let it do the actual linking work.
std::function<void()> fork_child() {
void fork_child() {
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("pipe");
@ -50,12 +50,16 @@ std::function<void()> fork_child() {
// Child
close(pipefd[0]);
pipe_write_fd = pipefd[1];
}
return [=] {
char buf[] = {1};
[[maybe_unused]] int n = write(pipefd[1], buf, 1);
assert(n == 1);
};
void notify_parent() {
if (pipe_write_fd == -1)
return;
char buf[] = {1};
[[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
assert(n == 1);
}
#endif
@ -84,6 +88,9 @@ static std::string find_dso(Context<E> &ctx, std::filesystem::path self) {
template <typename E>
[[noreturn]]
void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
#ifdef __APPLE__
Fatal(ctx) << "-run is not supported on macOS";
#else
assert(argv[1] == "-run"s || argv[1] == "--run"s);
if (!argv[2])
@ -111,6 +118,7 @@ void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
// Execute a given command
execvp(argv[2], argv + 2);
Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string();
#endif
}
using E = MOLD_TARGET;
@ -118,5 +126,3 @@ using E = MOLD_TARGET;
template void process_run_subcommand(Context<E> &, int, char **);
} // namespace mold::elf
#endif

20
elf/subprocess-win32.cc Normal file
View File

@ -0,0 +1,20 @@
#include "mold.h"
namespace mold::elf {
#ifdef MOLD_X86_64
void fork_child() {}
void notify_parent() {}
#endif
template <typename E>
[[noreturn]]
void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
Fatal(ctx) << "-run is supported only on Unix";
}
using E = MOLD_TARGET;
template void process_run_subcommand(Context<E> &, int, char **);
} // namespace mold::elf