#pragma once
#include "inttypes.h"
#include <array>
#include <atomic>
#include <bit>
#include <bitset>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <fcntl.h>
#include <filesystem>
#include <iostream>
#include <mutex>
#include <optional>
#include <span>
#include <sstream>
#include <string>
#include <string_view>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <tbb/concurrent_vector.h>
#include <tbb/enumerable_thread_specific.h>
#include <unistd.h>
#include <vector>
#define XXH_INLINE_ALL 1
#include <xxhash/xxhash.h>
#ifdef NDEBUG
# define unreachable() __builtin_unreachable()
# define unreachable() assert(0 && "unreachable")
inline uint64_t hash_string(std::string_view str) {
return XXH3_64bits(str.data(), str.size());
class HashCmp {
static size_t hash(const std::string_view &k) {
return hash_string(k);
static bool equal(const std::string_view &k1, const std::string_view &k2) {
return k1 == k2;
namespace mold {
using namespace std::literals::string_literals;
using namespace std::literals::string_view_literals;
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
template <typename C> class OutputFile;
inline char *output_tmpfile;
inline char *socket_tmpfile;
inline thread_local bool opt_demangle;
inline u8 *output_buffer_start = nullptr;
inline u8 *output_buffer_end = nullptr;
extern const std::string mold_version;
std::string_view errno_string();
void cleanup();
void install_signal_handler();
i64 get_default_thread_count();
// Error output
template <typename C>
class SyncOut {
SyncOut(C &ctx, std::ostream &out = std::cout) : out(out) {
opt_demangle = ctx.arg.demangle;
~SyncOut() {
std::scoped_lock lock(mu);
out << ss.str() << "\n";
template <class T> SyncOut &operator<<(T &&val) {
ss << std::forward<T>(val);
return *this;
static inline std::mutex mu;
std::ostream &out;
std::stringstream ss;
template <typename C>
static std::string add_color(C &ctx, std::string msg) {
if (ctx.arg.color_diagnostics)
return "mold: \033[0;1;31m" + msg + ":\033[0m ";
return "mold: " + msg + ": ";
template <typename C>
class Fatal {
Fatal(C &ctx) : out(ctx, std::cerr) {
out << add_color(ctx, "fatal");
[[noreturn]] ~Fatal() {
template <class T> Fatal &operator<<(T &&val) {
out << std::forward<T>(val);
return *this;
SyncOut<C> out;
template <typename C>
class Error {
Error(C &ctx) : out(ctx, std::cerr) {
if (ctx.arg.noinhibit_exec) {
out << add_color(ctx, "warning");
} else {
out << add_color(ctx, "error");
ctx.has_error = true;
template <class T> Error &operator<<(T &&val) {
out << std::forward<T>(val);
return *this;
SyncOut<C> out;
template <typename C>
class Warn {
Warn(C &ctx) : out(ctx, std::cerr) {
if (ctx.arg.fatal_warnings) {
out << add_color(ctx, "error");
ctx.has_error = true;
} else {
out << add_color(ctx, "warning");
template <class T> Warn &operator<<(T &&val) {
out << std::forward<T>(val);
return *this;
SyncOut<C> out;
// Utility functions
// Some C++ libraries haven't implemented std::has_single_bit yet.
inline bool has_single_bit(u64 val) {
return std::popcount(val) == 1;
// Some C++ libraries haven't implemented std::bit_ceil yet.
inline u64 bit_ceil(u64 val) {
if (has_single_bit(val))
return val;
return (u64)1 << (64 - std::countl_zero(val));
inline u64 align_to(u64 val, u64 align) {
if (align == 0)
return val;
return (val + align - 1) & ~(align - 1);
inline u64 align_down(u64 val, u64 align) {
return val & ~(align - 1);
inline u64 bit(u64 val, i64 pos) {
return (val >> pos) & 1;
// Returns [hi:lo] bits of val.
inline u64 bits(u64 val, u64 hi, u64 lo) {
return (val >> lo) & (((u64)1 << (hi - lo + 1)) - 1);
inline i64 sign_extend(u64 val, i64 size) {
return (i64)(val << (63 - size)) >> (63 - size);
template <typename T, typename Compare = std::less<T>>
void update_minimum(std::atomic<T> &atomic, u64 new_val, Compare cmp = {}) {
T old_val = atomic;
while (cmp(new_val, old_val) &&
!atomic.compare_exchange_weak(old_val, new_val));
template <typename T, typename Compare = std::less<T>>
void update_maximum(std::atomic<T> &atomic, u64 new_val, Compare cmp = {}) {
T old_val = atomic;
while (cmp(old_val, new_val) &&
!atomic.compare_exchange_weak(old_val, new_val));
template <typename T, typename U>
inline void append(std::vector<T> &vec1, std::vector<U> vec2) {
vec1.insert(vec1.end(), vec2.begin(), vec2.end());
template <typename T>
inline std::vector<T> flatten(std::vector<std::vector<T>> &vec) {
std::vector<T> ret;
for (std::vector<T> &v : vec)
append(ret, v);
return ret;
template <typename T>
inline void sort(T &vec) {
std::stable_sort(vec.begin(), vec.end());
template <typename T, typename U>
inline void sort(T &vec, U less) {
std::stable_sort(vec.begin(), vec.end(), less);
template <typename T>
inline void remove_duplicates(std::vector<T> &vec) {
vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
inline i64 write_string(void *buf, std::string_view str) {
memcpy(buf, str.data(), str.size());
*((u8 *)buf + str.size()) = '\0';
return str.size() + 1;
template <typename T>
inline i64 write_vector(void *buf, const std::vector<T> &vec) {
i64 sz = vec.size() * sizeof(T);
memcpy(buf, vec.data(), sz);
return sz;
inline void encode_uleb(std::vector<u8> &vec, u64 val) {
do {
u8 byte = val & 0x7f;
val >>= 7;
vec.push_back(val ? (byte | 0x80) : byte);
} while (val);
inline i64 write_uleb(u8 *buf, u64 val) {
i64 i = 0;
do {
u8 byte = val & 0x7f;
val >>= 7;
buf[i++] = val ? (byte | 0x80) : byte;
} while (val);
return i;
inline u64 read_uleb(u8 *&buf) {
u64 val = 0;
u8 shift = 0;
u8 byte;
do {
byte = *buf++;
val |= (byte & 0x7f) << shift;
shift += 7;
} while (byte & 0x80);
return val;
inline u64 read_uleb(u8 const*&buf) {
return read_uleb(const_cast<u8 *&>(buf));
inline u64 read_uleb(std::string_view &str) {
u8 *start = (u8 *)&str[0];
u8 *ptr = start;
u64 val = read_uleb(ptr);
str = str.substr(ptr - start);
return val;
inline i64 uleb_size(u64 val) {
#pragma GCC unroll 8
for (int i = 1; i < 9; i++)
if (val < ((u64)1 << (7 * i)))
return i;
return 9;
template <typename C>
std::string_view save_string(C &ctx, const std::string &str) {
u8 *buf = new u8[str.size() + 1];
memcpy(buf, str.data(), str.size());
buf[str.size()] = '\0';
return {(char *)buf, str.size()};
// Concurrent Map
// This is an implementation of a fast concurrent hash map. Unlike
// ordinary hash tables, this impl just aborts if it becomes full.
// So you need to give a correct estimation of the final size before
// using it. We use this hash map to uniquify pieces of data in
// mergeable sections.
template <typename T>
class ConcurrentMap {
ConcurrentMap() {}
ConcurrentMap(i64 nbuckets) {
~ConcurrentMap() {
if (keys) {
free((void *)keys);
free((void *)key_sizes);
free((void *)values);
void resize(i64 nbuckets) {
nbuckets = std::max<i64>(MIN_NBUCKETS, bit_ceil(nbuckets));
this->nbuckets = nbuckets;
keys = (std::atomic<const char *> *)calloc(nbuckets, sizeof(keys[0]));
key_sizes = (u32 *)calloc(nbuckets, sizeof(key_sizes[0]));
values = (T *)calloc(nbuckets, sizeof(values[0]));
std::pair<T *, bool> insert(std::string_view key, u64 hash, const T &val) {
if (!keys)
return {nullptr, false};
i64 idx = hash & (nbuckets - 1);
i64 retry = 0;
while (retry < MAX_RETRY) {
const char *ptr = keys[idx];
if (ptr == marker) {
if (ptr == nullptr) {
if (!keys[idx].compare_exchange_weak(ptr, marker))
new (values + idx) T(val);
key_sizes[idx] = key.size();
keys[idx] = key.data();
return {values + idx, true};
if (key.size() == key_sizes[idx] &&
memcmp(ptr, key.data(), key_sizes[idx]) == 0)
return {values + idx, false};
u64 mask = nbuckets / NUM_SHARDS - 1;
idx = (idx & ~mask) | ((idx + 1) & mask);
assert(false && "ConcurrentMap is full");
return {nullptr, false};
bool has_key(i64 idx) {
return keys[idx].load(std::memory_order_relaxed);
static constexpr i64 MIN_NBUCKETS = 2048;
static constexpr i64 NUM_SHARDS = 16;
static constexpr i64 MAX_RETRY = 128;
i64 nbuckets = 0;
std::atomic<const char *> *keys = nullptr;
u32 *key_sizes = nullptr;
T *values = nullptr;
static void pause() {
#if defined(__x86_64__)
asm volatile("pause");
#elif defined(__aarch64__)
asm volatile("yield");
static constexpr const char *marker = "marker";
// output-file.h
template <typename C>
class OutputFile {
static std::unique_ptr<OutputFile<C>>
open(C &ctx, std::string path, i64 filesize, i64 perm);
virtual void close(C &ctx) = 0;
virtual ~OutputFile() = default;
u8 *buf = nullptr;
std::string path;
i64 filesize;
bool is_mmapped;
bool is_unmapped = false;
OutputFile(std::string path, i64 filesize, bool is_mmapped)
: path(path), filesize(filesize), is_mmapped(is_mmapped) {}
// hyperloglog.cc
class HyperLogLog {
HyperLogLog() : buckets(NBUCKETS) {}
void insert(u32 hash) {
update_maximum(buckets[hash & (NBUCKETS - 1)], std::countl_zero(hash) + 1);
i64 get_cardinality() const;
void merge(const HyperLogLog &other) {
for (i64 i = 0; i < NBUCKETS; i++)
update_maximum(buckets[i], other.buckets[i]);
static constexpr i64 NBUCKETS = 2048;
static constexpr double ALPHA = 0.79402;
std::vector<std::atomic_uint8_t> buckets;
// glob.cc
class Glob {
typedef enum { STRING, STAR, QUESTION, BRACKET } Kind;
struct Element {
Element(Kind k) : kind(k) {}
Kind kind;
std::string str;
std::bitset<256> bitset;
static std::optional<Glob> compile(std::string_view pat);
bool match(std::string_view str);
Glob(std::vector<Element> &&vec) : elements(vec) {}
static bool do_match(std::string_view str, std::span<Element> elements);
std::vector<Element> elements;
// multi-glob.cc
class MultiGlob {
bool add(std::string_view pat, u32 val);
bool empty() const { return strings.empty(); }
std::optional<u32> find(std::string_view str);
struct TrieNode {
u32 value = -1;
TrieNode *suffix_link = nullptr;
std::unique_ptr<TrieNode> children[256];
void compile();
void fix_suffix_links(TrieNode &node);
void fix_values();
std::vector<std::string> strings;
std::unique_ptr<TrieNode> root;
std::vector<std::pair<Glob, u32>> globs;
std::vector<u32> values;
std::once_flag once;
bool is_compiled = false;
// uuid.cc
std::array<u8, 16> get_uuid_v4();
// filepath.cc
template <typename T>
std::filesystem::path filepath(const T &path) {
return {path, std::filesystem::path::format::generic_format};
std::string get_realpath(std::string_view path);
std::string path_clean(std::string_view path);
std::filesystem::path to_abs_path(std::filesystem::path path);
// demangle.cc
std::string_view demangle(std::string_view name);
std::optional<std::string_view> cpp_demangle(std::string_view name);
// compress.cc
class ZlibCompressor {
ZlibCompressor(u8 *buf, i64 size);
void write_to(u8 *buf);
i64 size() const;
std::vector<std::vector<u8>> shards;
u64 checksum = 0;
class GzipCompressor {
GzipCompressor(std::string_view input);
void write_to(u8 *buf);
i64 size() const;
std::vector<std::vector<u8>> shards;
u32 checksum = 0;
u32 uncompressed_size = 0;
// perf.cc
// Counter is used to collect statistics numbers.
class Counter {
Counter(std::string_view name, i64 value = 0) : name(name), values(value) {
static std::mutex mu;
std::scoped_lock lock(mu);
Counter &operator++(int) {
if (enabled)
return *this;
Counter &operator+=(int delta) {
if (enabled)
values.local() += delta;
return *this;
static void print();
static inline bool enabled = false;
i64 get_value();
std::string_view name;
tbb::enumerable_thread_specific<i64> values;
static inline std::vector<Counter *> instances;
// Timer and TimeRecord records elapsed time (wall clock time)
// used by each pass of the linker.
struct TimerRecord {
TimerRecord(std::string name, TimerRecord *parent = nullptr);
void stop();
std::string name;
TimerRecord *parent;
tbb::concurrent_vector<TimerRecord *> children;
i64 start;
i64 end;
i64 user;
i64 sys;
bool stopped = false;
print_timer_records(tbb::concurrent_vector<std::unique_ptr<TimerRecord>> &);
template <typename C>
class Timer {
Timer(C &ctx, std::string name, Timer *parent = nullptr) {
record = new TimerRecord(name, parent ? parent->record : nullptr);
Timer(const Timer &) = delete;
~Timer() {
void stop() {
TimerRecord *record;
// tar.cc
// TarFile is a class to create a tar file.
// If you pass `--repro` to mold, mold collects all input files and
// put them into `<output-file-path>.repro.tar`, so that it is easy to
// run the same command with the same command line arguments.
class TarWriter {
static std::unique_ptr<TarWriter>
open(std::string output_path, std::string basedir);
void append(std::string path, std::string_view data);
static constexpr i64 BLOCK_SIZE = 512;
TarWriter(FILE *out, std::string basedir) : out(out), basedir(basedir) {}
FILE *out = nullptr;
std::string basedir;
// Memory-mapped file
// MappedFile represents an mmap'ed input file.
// mold uses mmap-IO only.
template <typename C>
class MappedFile {
static MappedFile *open(C &ctx, std::string path);
static MappedFile *must_open(C &ctx, std::string path);
MappedFile *slice(C &ctx, std::string name, u64 start, u64 size);
std::string_view get_contents() {
return std::string_view((char *)data, size);
i64 get_offset() const {
return parent ? (data - parent->data + parent->get_offset()) : 0;
// Returns a string that uniquely identify a file that is possibly
// in an archive.
std::string get_identifier() const {
if (parent) {
// We use the file offset within an archive as an identifier
// because archive members may have the same name.
return parent->name + ":" + std::to_string(get_offset());
if (thin_parent) {
// If this is a thin archive member, the filename part is
// guaranteed to be unique.
return thin_parent->name + ":" + name;
return name;
std::string name;
u8 *data = nullptr;
i64 size = 0;
i64 mtime = 0;
bool given_fullpath = true;
MappedFile *parent = nullptr;
MappedFile *thin_parent = nullptr;
int fd = -1;
template <typename C>
MappedFile<C> *MappedFile<C>::open(C &ctx, std::string path) {
if (path.starts_with('/') && !ctx.arg.chroot.empty())
path = ctx.arg.chroot + "/" + path_clean(path);
i64 fd = ::open(path.c_str(), O_RDONLY);
if (fd == -1)
return nullptr;
struct stat st;
if (fstat(fd, &st) == -1)
Fatal(ctx) << path << ": fstat failed: " << errno_string();
MappedFile *mf = new MappedFile;
mf->name = path;
mf->size = st.st_size;
#ifdef __APPLE__
mf->mtime = (u64)st.st_mtimespec.tv_sec * 1000000000 + st.st_mtimespec.tv_nsec;
mf->mtime = (u64)st.st_mtim.tv_sec * 1000000000 + st.st_mtim.tv_nsec;
if (st.st_size > 0) {
mf->data = (u8 *)mmap(nullptr, st.st_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE, fd, 0);
if (mf->data == MAP_FAILED)
Fatal(ctx) << path << ": mmap failed: " << errno_string();
return mf;
template <typename C>
MappedFile<C> *MappedFile<C>::must_open(C &ctx, std::string path) {
if (MappedFile *mf = MappedFile::open(ctx, path))
return mf;
Fatal(ctx) << "cannot open " << path << ": " << errno_string();
template <typename C>
MappedFile<C> *
MappedFile<C>::slice(C &ctx, std::string name, u64 start, u64 size) {
MappedFile *mf = new MappedFile<C>;
mf->name = name;
mf->data = data + start;
mf->size = size;
mf->parent = this;
return mf;
template <typename C>
MappedFile<C>::~MappedFile() {
if (size && !parent)
munmap(data, size);
} // namespace mold