Merge 5bf1955fa9 into 7089304046

Simplify
Merge pull request #1298 from yujincheng08/patch-1
2024-07-14 16:20:34 +03:00 · 2024-07-06 02:14:41 +01:00 · 2024-07-05 19:47:33 +09:00 · 2024-07-05 19:45:10 +09:00 · 2024-07-05 18:27:00 +09:00 · 2024-07-05 17:58:13 +09:00
13 changed files with 311 additions and 284 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -13,7 +13,6 @@ jobs:
        target:
        # Disable PCH for the default configuration. This prevents relying on implicit includes.
        - '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On'
-        - '-DMOLD_USE_ASAN=On'
        - '-DMOLD_USE_TSAN=On'
    runs-on: ubuntu-20.04
    steps:
@ -110,7 +109,7 @@ jobs:
          !build/CMakeFiles

  build-macos:
-    runs-on: macos-11
+    runs-on: macos-12
    strategy:
      matrix:
        target:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -328,7 +328,6 @@ list(APPEND MOLD_ELF_TEMPLATE_FILES
  elf/output-chunks.cc
  elf/passes.cc
  elf/relocatable.cc
-  elf/subprocess.cc
  elf/thunks.cc
  elf/tls.cc
  )
@ -339,6 +338,12 @@ else()
  list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-unix.cc)
 endif()

+if(WIN32)
+  list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-win32.cc)
+else()
+  list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-unix.cc)
+endif()
+
 function(mold_instantiate_templates SOURCE TARGET)
  set(PATH ${CMAKE_BINARY_DIR}/${SOURCE}.${TARGET}.cc)
  if(NOT EXISTS ${PATH})
--- a/docs/design.md
+++ b/docs/design.md
@ -163,7 +163,7 @@ tool.
  (i.e. `_start`) or a few other root sections. In mold, we are using
  multiple threads to mark sections concurrently.

- Similarly, BFD, gold an lld support Identical Comdat Folding (ICF)
+- Similarly, BFD, gold and lld support Identical Comdat Folding (ICF)
  as yet another size optimization. ICF merges two or more read-only
  sections that happen to have the same contents and relocations.
  To do that, we have to find isomorphic subgraphs from larger graphs.
--- a/elf/input-files.cc
+++ b/elf/input-files.cc
@ -677,104 +677,6 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
  }
 }

-static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
-  if (entsize == 1)
-    return data.find('\0', pos);
-
-  for (; pos <= data.size() - entsize; pos += entsize)
-    if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
-      return pos;
-
-  return data.npos;
-}
-
-// Mergeable sections (sections with SHF_MERGE bit) typically contain
-// string literals. Linker is expected to split the section contents
-// into null-terminated strings, merge them with mergeable strings
-// from other object files, and emit uniquified strings to an output
-// file.
-//
-// This mechanism reduces the size of an output file. If two source
-// files happen to contain the same string literal, the output will
-// contain only a single copy of it.
-//
-// It is less common than string literals, but mergeable sections can
-// contain fixed-sized read-only records too.
-//
-// This function splits the section contents into small pieces that we
-// call "section fragments". Section fragment is a unit of merging.
-//
-// We do not support mergeable sections that have relocations.
-template <typename E>
-static std::unique_ptr<MergeableSection<E>>
-split_section(Context<E> &ctx, InputSection<E> &sec) {
-  if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0)
-    return nullptr;
-
-  const ElfShdr<E> &shdr = sec.shdr();
-  if (!(shdr.sh_flags & SHF_MERGE))
-    return nullptr;
-
-  i64 entsize = shdr.sh_entsize;
-  if (entsize == 0)
-    entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (int)shdr.sh_addralign;
-
-  if (entsize == 0)
-    return nullptr;
-
-  i64 addralign = shdr.sh_addralign;
-  if (addralign == 0)
-    addralign = 1;
-
-  std::unique_ptr<MergeableSection<E>> m(new MergeableSection<E>);
-  m->parent = MergedSection<E>::get_instance(ctx, sec.name(), shdr.sh_type,
-                                             shdr.sh_flags, entsize, addralign);
-  m->p2align = sec.p2align;
-
-  // If thes section contents are compressed, uncompress them.
-  sec.uncompress(ctx);
-
-  std::string_view data = sec.contents;
-  m->contents = sec.contents;
-
-  if (data.size() > UINT32_MAX)
-    Fatal(ctx) << sec << ": mergeable section too large";
-
-  // Split sections
-  if (shdr.sh_flags & SHF_STRINGS) {
-    for (i64 pos = 0; pos < data.size();) {
-      m->frag_offsets.push_back(pos);
-      size_t end = find_null(data, pos, entsize);
-      if (end == data.npos)
-        Fatal(ctx) << sec << ": string is not null terminated";
-      pos = end + entsize;
-    }
-  } else {
-    if (data.size() % entsize)
-      Fatal(ctx) << sec << ": section size is not multiple of sh_entsize";
-    m->frag_offsets.reserve(data.size() / entsize);
-
-    for (i64 pos = 0; pos < data.size(); pos += entsize)
-      m->frag_offsets.push_back(pos);
-  }
-
-  // Compute hashes for section pieces
-  HyperLogLog estimator;
-  m->hashes.reserve(m->frag_offsets.size());
-
-  for (i64 i = 0; i < m->frag_offsets.size(); i++) {
-    u64 hash = hash_string(m->get_contents(i));
-    m->hashes.push_back(hash);
-    estimator.insert(hash);
-  }
-
-  m->parent->estimator.merge(estimator);
-
-  static Counter counter("string_fragments");
-  counter += m->frag_offsets.size();
-  return m;
-}
-
 // Usually a section is an atomic unit of inclusion or exclusion.
 // Linker doesn't care about its contents. However, if a section is a
 // mergeable section (a section with SHF_MERGE bit set), the linker is
@ -818,34 +720,45 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
 // section piece, the section piece is attached to the symbol.
 template <typename E>
 void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
-  mergeable_sections.resize(sections.size());
+  // Convert InputSections to MergeableSections
+  for (i64 i = 0; i < this->sections.size(); i++) {
+    InputSection<E> *isec = this->sections[i].get();
+    if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
+      continue;

-  for (i64 i = 0; i < sections.size(); i++) {
-    if (std::unique_ptr<InputSection<E>> &isec = sections[i]) {
-      if (std::unique_ptr<MergeableSection<E>> m = split_section(ctx, *isec)) {
-        mergeable_sections[i] = std::move(m);
-        isec->is_alive = false;
-      }
+    MergedSection<E> *parent =
+      MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
+
+    if (parent) {
+      this->mergeable_sections[i] =
+        std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
+      this->sections[i] = nullptr;
    }
  }
+
+  // Split section contents
+  for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
+    if (sec)
+      sec->split_contents(ctx);
 }

 template <typename E>
 void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
  for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
-    if (m) {
-      m->fragments.reserve(m->frag_offsets.size());
+    if (!m)
+      continue;

-      for (i64 i = 0; i < m->frag_offsets.size(); i++) {
-        SectionFragment<E> *frag =
-          m->parent->insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
-        m->fragments.push_back(frag);
-      }
+    m->fragments.reserve(m->frag_offsets.size());

-      // Reclaim memory as we'll never use this vector again
-      m->hashes.clear();
-      m->hashes.shrink_to_fit();
+    for (i64 i = 0; i < m->frag_offsets.size(); i++) {
+      SectionFragment<E> *frag =
+        m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
+      m->fragments.push_back(frag);
    }
+
+    // Reclaim memory as we'll never use this vector again
+    m->hashes.clear();
+    m->hashes.shrink_to_fit();
  }

  // Attach section pieces to symbols.
@ -872,24 +785,25 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
  }

  // Compute the size of frag_syms.
-  i64 nfrag_syms = 0;
+  std::vector<InputSection<E> *> vec;
  for (std::unique_ptr<InputSection<E>> &isec : sections)
    if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
-      for (ElfRel<E> &r : isec->get_rels(ctx))
-        if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
-            esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
-          nfrag_syms++;
+      vec.push_back(isec.get());
+
+  i64 nfrag_syms = 0;
+  for (InputSection<E> *isec : vec)
+    for (ElfRel<E> &r : isec->get_rels(ctx))
+      if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
+          esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
+        nfrag_syms++;

  this->frag_syms.resize(nfrag_syms);

-  // For each relocation referring a mergeable section symbol, we create
-  // a new dummy non-section symbol and redirect the relocation to the
-  // newly-created symbol.
+  // For each relocation referring to a mergeable section symbol, we
+  // create a new dummy non-section symbol and redirect the relocation
+  // to the newly created symbol.
  i64 idx = 0;
-  for (std::unique_ptr<InputSection<E>> &isec : sections) {
-    if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC))
-      continue;
-
+  for (InputSection<E> *isec : vec) {
    for (ElfRel<E> &r : isec->get_rels(ctx)) {
      const ElfSym<E> &esym = this->elf_syms[r.r_sym];
      if (esym.st_type != STT_SECTION)
@ -929,6 +843,8 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
 template <typename E>
 void ObjectFile<E>::parse(Context<E> &ctx) {
  sections.resize(this->elf_sections.size());
+  mergeable_sections.resize(sections.size());
+
  symtab_sec = this->find_section(SHT_SYMTAB);

  if (symtab_sec) {
--- a/elf/input-sections.cc
+++ b/elf/input-sections.cc
@ -552,9 +552,88 @@ bool InputSection<E>::record_undef_error(Context<E> &ctx, const ElfRel<E> &rel)
  return false;
 }

+template <typename E>
+MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
+                                      std::unique_ptr<InputSection<E>> &isec)
+  : parent(parent), section(std::move(isec)), p2align(section->p2align) {
+  section->uncompress(ctx);
+}
+
+static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
+  if (entsize == 1)
+    return data.find('\0', pos);
+
+  for (; pos <= data.size() - entsize; pos += entsize)
+    if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
+      return pos;
+
+  return data.npos;
+}
+
+// Mergeable sections (sections with SHF_MERGE bit) typically contain
+// string literals. Linker is expected to split the section contents
+// into null-terminated strings, merge them with mergeable strings
+// from other object files, and emit uniquified strings to an output
+// file.
+//
+// This mechanism reduces the size of an output file. If two source
+// files happen to contain the same string literal, the output will
+// contain only a single copy of it.
+//
+// It is less common than string literals, but mergeable sections can
+// contain fixed-sized read-only records too.
+//
+// This function splits the section contents into small pieces that we
+// call "section fragments". Section fragment is a unit of merging.
+//
+// We do not support mergeable sections that have relocations.
+template <typename E>
+void MergeableSection<E>::split_contents(Context<E> &ctx) {
+  std::string_view data = section->contents;
+  if (data.size() > UINT32_MAX)
+    Fatal(ctx) << *section
+               << ": mergeable section too large";
+
+  i64 entsize = parent.shdr.sh_entsize;
+
+  // Split sections
+  if (parent.shdr.sh_flags & SHF_STRINGS) {
+    for (i64 pos = 0; pos < data.size();) {
+      frag_offsets.push_back(pos);
+      size_t end = find_null(data, pos, entsize);
+      if (end == data.npos)
+        Fatal(ctx) << *section << ": string is not null terminated";
+      pos = end + entsize;
+    }
+  } else {
+    if (data.size() % entsize)
+      Fatal(ctx) << *section << ": section size is not multiple of sh_entsize";
+    frag_offsets.reserve(data.size() / entsize);
+
+    for (i64 pos = 0; pos < data.size(); pos += entsize)
+      frag_offsets.push_back(pos);
+  }
+
+  // Compute hashes for section pieces
+  HyperLogLog estimator;
+  hashes.reserve(frag_offsets.size());
+
+  for (i64 i = 0; i < frag_offsets.size(); i++) {
+    u64 hash = hash_string(get_contents(i));
+    hashes.push_back(hash);
+    estimator.insert(hash);
+  }
+
+  parent.estimator.merge(estimator);
+
+  static Counter counter("string_fragments");
+  counter += frag_offsets.size();
+}
+
 using E = MOLD_TARGET;

 template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);
 template class InputSection<E>;
+template class MergeableSection<E>;

 } // namespace mold::elf
--- a/elf/lto-unix.cc
+++ b/elf/lto-unix.cc
@ -567,11 +567,7 @@ static ElfSym<E> to_elf_sym(PluginSymbol &psym) {
 // Returns false if it's GCC.
 template <typename E>
 static bool is_llvm(Context<E> &ctx) {
-#ifdef __MINGW32__
-  return ctx.arg.plugin.ends_with("LLVMgold.dll");
-#else
-  return ctx.arg.plugin.ends_with("LLVMgold.so");
-#endif
+  return ctx.arg.plugin.find("LLVMgold.") != ctx.arg.plugin.npos;
 }

 // Returns true if a given linker plugin supports the get_symbols_v3 API.
--- a/elf/main.cc
+++ b/elf/main.cc
@ -345,12 +345,8 @@ int elf_main(int argc, char **argv) {
  Context<E> ctx;

  // Process -run option first. process_run_subcommand() does not return.
-  if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) {
-#if defined(_WIN32) || defined(__APPLE__)
-    Fatal(ctx) << "-run is supported only on Unix";
-#endif
+  if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv))
    process_run_subcommand(ctx, argc, argv);
-  }

  // Parse non-positional command line options
  ctx.cmdline_args = expand_response_files(ctx, argv);
@ -375,12 +371,8 @@ int elf_main(int argc, char **argv) {
                 << ": " << errno_string();

  // Fork a subprocess unless --no-fork is given.
-  std::function<void()> on_complete;
-
-#if !defined(_WIN32) && !defined(__APPLE__)
  if (ctx.arg.fork)
-    on_complete = fork_child();
-#endif
+    fork_child();

  acquire_global_lock();

@ -664,8 +656,13 @@ int elf_main(int argc, char **argv) {
  // so we sort them.
  ctx.reldyn->sort(ctx);

-  // Zero-clear paddings between sections
-  clear_padding(ctx);
+  // .note.gnu.build-id section contains a cryptographic hash of the
+  // entire output file. Now that we wrote everything except build-id,
+  // we can compute it.
+  if (ctx.buildid) {
+    compute_build_id(ctx);
+    ctx.buildid->copy_buf(ctx);
+  }

  // .gdb_index's contents cannot be constructed before applying
  // relocations to other debug sections. We have relocated debug
@ -673,12 +670,6 @@ int elf_main(int argc, char **argv) {
  if (ctx.gdb_index)
    write_gdb_index(ctx);

-  // .note.gnu.build-id section contains a cryptographic hash of the
-  // entire output file. Now that we wrote everything except build-id,
-  // we can compute it.
-  if (ctx.buildid)
-    ctx.buildid->write_buildid(ctx);
-
  t_copy.stop();
  ctx.checkpoint();

@ -707,8 +698,8 @@ int elf_main(int argc, char **argv) {
  std::cout << std::flush;
  std::cerr << std::flush;

-  if (on_complete)
-    on_complete();
+  if (ctx.arg.fork)
+    notify_parent();

  release_global_lock();

--- a/elf/mold.h
+++ b/elf/mold.h
@ -47,6 +47,7 @@ template <typename E> class Symbol;
 template <typename E> struct CieRecord;
 template <typename E> struct Context;
 template <typename E> struct FdeRecord;
+template <typename E> class MergeableSection;
 template <typename E> class RelocSection;

 template <typename E>
@ -799,8 +800,7 @@ template <typename E>
 class MergedSection : public Chunk<E> {
 public:
  static MergedSection<E> *
-  get_instance(Context<E> &ctx, std::string_view name, i64 type, i64 flags,
-               i64 entsize, i64 addralign);
+  get_instance(Context<E> &ctx, std::string_view name, const ElfShdr<E> &shdr);

  SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
                             u64 hash, i64 p2align);
@ -949,9 +949,8 @@ public:

  void update_shdr(Context<E> &ctx) override;
  void copy_buf(Context<E> &ctx) override;
-  void write_buildid(Context<E> &ctx);

-  static constexpr i64 HEADER_SIZE = 16;
+  std::vector<u8> contents;
 };

 template <typename E>
@ -1090,12 +1089,17 @@ struct ComdatGroupRef {
 };

 template <typename E>
-struct MergeableSection {
+class MergeableSection {
+public:
+  MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
+                   std::unique_ptr<InputSection<E>> &isec);
+
+  void split_contents(Context<E> &ctx);
  std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
  std::string_view get_contents(i64 idx);

-  MergedSection<E> *parent;
-  std::string_view contents;
+  MergedSection<E> &parent;
+  std::unique_ptr<InputSection<E>> section;
  std::vector<u32> frag_offsets;
  std::vector<u32> hashes;
  std::vector<SectionFragment<E> *> fragments;
@ -1368,7 +1372,8 @@ void print_map(Context<E> &ctx);
 // subprocess.cc
 //

-std::function<void()> fork_child();
+void fork_child();
+void notify_parent();

 template <typename E>
 [[noreturn]]
@ -1426,11 +1431,11 @@ template <typename E> void apply_version_script(Context<E> &);
 template <typename E> void parse_symbol_version(Context<E> &);
 template <typename E> void compute_import_export(Context<E> &);
 template <typename E> void compute_address_significance(Context<E> &);
-template <typename E> void clear_padding(Context<E> &);
 template <typename E> void compute_section_headers(Context<E> &);
 template <typename E> i64 set_osec_offsets(Context<E> &);
 template <typename E> void fix_synthetic_symbols(Context<E> &);
 template <typename E> i64 compress_debug_sections(Context<E> &);
+template <typename E> void compute_build_id(Context<E> &);
 template <typename E> void write_dependency_file(Context<E> &);
 template <typename E> void show_stats(Context<E> &);

@ -1565,7 +1570,18 @@ private:
 //

 struct BuildId {
-  i64 size() const;
+  i64 size() const {
+    switch (kind) {
+    case HEX:
+      return value.size();
+    case HASH:
+      return hash_size;
+    case UUID:
+      return 16;
+    default:
+      unreachable();
+    }
+  }

  enum { NONE, HEX, HASH, UUID } kind = NONE;
  std::vector<u8> value;
@ -2411,8 +2427,8 @@ template <typename E>
 std::string_view MergeableSection<E>::get_contents(i64 i) {
  i64 cur = frag_offsets[i];
  if (i == frag_offsets.size() - 1)
-    return contents.substr(cur);
-  return contents.substr(cur, frag_offsets[i + 1] - cur);
+    return section->contents.substr(cur);
+  return section->contents.substr(cur, frag_offsets[i + 1] - cur);
 }

 template <typename E>
--- a/elf/output-chunks.cc
+++ b/elf/output-chunks.cc
@ -1,7 +1,5 @@
 #include "mold.h"
-
 #include "config.h"
-#include "blake3.h"

 #include <cctype>
 #include <set>
@ -1915,15 +1913,26 @@ MergedSection<E>::MergedSection(std::string_view name, i64 flags, i64 type,
 template <typename E>
 MergedSection<E> *
 MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
-                               i64 type, i64 flags,
-                               i64 entsize, i64 addralign) {
+                               const ElfShdr<E> &shdr) {
+  if (!(shdr.sh_flags & SHF_MERGE))
+    return nullptr;
+
+  i64 addralign = std::max<i64>(1, shdr.sh_addralign);
+  i64 flags = shdr.sh_flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
+
+  i64 entsize = shdr.sh_entsize;
+  if (entsize == 0)
+    entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (i64)shdr.sh_addralign;
+  if (entsize == 0)
+    return nullptr;
+
  name = get_merged_output_name(ctx, name, flags, entsize, addralign);
-  flags = flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;

  auto find = [&]() -> MergedSection * {
    for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
      if (name == osec->name && flags == osec->shdr.sh_flags &&
-          type == osec->shdr.sh_type && entsize == osec->shdr.sh_entsize)
+          shdr.sh_type == osec->shdr.sh_type &&
+          entsize == osec->shdr.sh_entsize)
        return osec.get();
    return nullptr;
  };
@ -1941,7 +1950,7 @@ MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
  if (MergedSection *osec = find())
    return osec;

-  MergedSection *osec = new MergedSection(name, flags, type, entsize);
+  MergedSection *osec = new MergedSection(name, flags, shdr.sh_type, entsize);
  ctx.merged_sections.emplace_back(osec);
  return osec;
 }
@ -2526,89 +2535,21 @@ void VerdefSection<E>::copy_buf(Context<E> &ctx) {
  write_vector(ctx.buf + this->shdr.sh_offset, contents);
 }

-inline i64 BuildId::size() const {
-  switch (kind) {
-  case HEX:
-    return value.size();
-  case HASH:
-    return hash_size;
-  case UUID:
-    return 16;
-  default:
-    unreachable();
-  }
-}
-
 template <typename E>
 void BuildIdSection<E>::update_shdr(Context<E> &ctx) {
-  this->shdr.sh_size = HEADER_SIZE + ctx.arg.build_id.size();
+  this->shdr.sh_size = ctx.arg.build_id.size() + 16; // +16 for the header
 }

 template <typename E>
 void BuildIdSection<E>::copy_buf(Context<E> &ctx) {
  U32<E> *base = (U32<E> *)(ctx.buf + this->shdr.sh_offset);
  memset(base, 0, this->shdr.sh_size);
-  base[0] = 4;                          // Name size
-  base[1] = ctx.arg.build_id.size();    // Hash size
-  base[2] = NT_GNU_BUILD_ID;            // Type
-  memcpy(base + 3, "GNU", 4);           // Name string
-}

-// BLAKE3 is a cryptographic hash function just like SHA256.
-// We use it instead of SHA256 because it's faster.
-static void blake3_hash(u8 *buf, i64 size, u8 *out) {
-  blake3_hasher hasher;
-  blake3_hasher_init(&hasher);
-  blake3_hasher_update(&hasher, buf, size);
-  blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
-}
-
-template <typename E>
-void BuildIdSection<E>::write_buildid(Context<E> &ctx) {
-  Timer t(ctx, "build_id");
-  u8 *buf = ctx.buf + this->shdr.sh_offset + HEADER_SIZE;
-
-  switch (ctx.arg.build_id.kind) {
-  case BuildId::HEX:
-    write_vector(buf, ctx.arg.build_id.value);
-    return;
-  case BuildId::HASH: {
-    i64 shard_size = 4 * 1024 * 1024;
-    i64 filesize = ctx.output_file->filesize;
-    i64 num_shards = align_to(filesize, shard_size) / shard_size;
-    std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
-
-    tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
-      u8 *begin = ctx.buf + shard_size * i;
-      u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
-      blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
-
-#ifdef HAVE_MADVISE
-      // Make the kernel page out the file contents we've just written
-      // so that subsequent close(2) call will become quicker.
-      if (i > 0 && ctx.output_file->is_mmapped)
-        madvise(begin, end - begin, MADV_DONTNEED);
-#endif
-    });
-
-    u8 digest[BLAKE3_OUT_LEN];
-    blake3_hash(shards.data(), shards.size(), digest);
-
-    assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
-    memcpy(buf, digest, ctx.arg.build_id.size());
-    return;
-  }
-  case BuildId::UUID: {
-    get_random_bytes(buf, 16);
-
-    // Indicate that this is UUIDv4 as defined by RFC4122
-    buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
-    buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
-    return;
-  }
-  default:
-    unreachable();
-  }
+  base[0] = 4;                       // Name size
+  base[1] = ctx.arg.build_id.size(); // Hash size
+  base[2] = NT_GNU_BUILD_ID;         // Type
+  memcpy(base + 3, "GNU", 4);        // Name string
+  write_vector(base + 4, contents);  // Build ID
 }

 template <typename E>
--- a/elf/passes.cc
+++ b/elf/passes.cc
@ -1,4 +1,5 @@
 #include "mold.h"
+#include "blake3.h"

 #include <fstream>
 #include <functional>
@ -442,10 +443,13 @@ static std::string get_cmdline_args(Context<E> &ctx) {

 template <typename E>
 void add_comment_string(Context<E> &ctx, std::string str) {
-  MergedSection<E> *sec =
-    MergedSection<E>::get_instance(ctx, ".comment", SHT_PROGBITS,
-                                   SHF_MERGE | SHF_STRINGS, 1, 1);
+  ElfShdr<E> shdr = {};
+  shdr.sh_type = SHT_PROGBITS;
+  shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
+  shdr.sh_entsize = 1;
+  shdr.sh_addralign = 1;

+  MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
  if (sec->map.nbuckets == 0)
    sec->map.resize(4096);

@ -1720,6 +1724,22 @@ void copy_chunks(Context<E> &ctx) {

  if constexpr (is_arm32<E>)
    fixup_arm_exidx_section(ctx);
+
+  // Zero-clear paddings between chunks
+  auto zero = [&](Chunk<E> *chunk, i64 next_start) {
+    i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
+    memset(ctx.buf + pos, 0, next_start - pos);
+  };
+
+  std::vector<Chunk<E> *> chunks = ctx.chunks;
+
+  std::erase_if(chunks, [](Chunk<E> *chunk) {
+    return chunk->shdr.sh_type == SHT_NOBITS;
+  });
+
+  for (i64 i = 1; i < chunks.size(); i++)
+    zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
+  zero(chunks.back(), ctx.output_file->filesize);
 }

 // Rewrite the leading endbr64 instruction with a nop if a function
@ -2168,26 +2188,6 @@ void compute_address_significance(Context<E> &ctx) {
  });
 }

-template <typename E>
-void clear_padding(Context<E> &ctx) {
-  Timer t(ctx, "clear_padding");
-
-  auto zero = [&](Chunk<E> *chunk, i64 next_start) {
-    i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
-    memset(ctx.buf + pos, 0, next_start - pos);
-  };
-
-  std::vector<Chunk<E> *> chunks = ctx.chunks;
-
-  std::erase_if(chunks, [](Chunk<E> *chunk) {
-    return chunk->shdr.sh_type == SHT_NOBITS;
-  });
-
-  for (i64 i = 1; i < chunks.size(); i++)
-    zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
-  zero(chunks.back(), ctx.output_file->filesize);
-}
-
 // We want to sort output chunks in the following order.
 //
 //   <ELF header>
@ -2999,6 +2999,65 @@ i64 compress_debug_sections(Context<E> &ctx) {
  return set_osec_offsets(ctx);
 }

+// BLAKE3 is a cryptographic hash function just like SHA256.
+// We use it instead of SHA256 because it's faster.
+static void blake3_hash(u8 *buf, i64 size, u8 *out) {
+  blake3_hasher hasher;
+  blake3_hasher_init(&hasher);
+  blake3_hasher_update(&hasher, buf, size);
+  blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
+}
+
+template <typename E>
+void compute_build_id(Context<E> &ctx) {
+  Timer t(ctx, "compute_build_id");
+
+  switch (ctx.arg.build_id.kind) {
+  case BuildId::HEX:
+    ctx.buildid->contents = ctx.arg.build_id.value;
+    break;
+  case BuildId::HASH: {
+    i64 shard_size = 4 * 1024 * 1024;
+    i64 filesize = ctx.output_file->filesize;
+    i64 num_shards = align_to(filesize, shard_size) / shard_size;
+    std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
+
+    tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
+      u8 *begin = ctx.buf + shard_size * i;
+      u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
+      blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
+
+#ifdef HAVE_MADVISE
+      // Make the kernel page out the file contents we've just written
+      // so that subsequent close(2) call will become quicker.
+      if (i > 0 && ctx.output_file->is_mmapped)
+        madvise(begin, end - begin, MADV_DONTNEED);
+#endif
+    });
+
+    u8 buf[BLAKE3_OUT_LEN];
+    blake3_hash(shards.data(), shards.size(), buf);
+
+    assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
+    ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
+    break;
+  }
+  case BuildId::UUID: {
+    u8 buf[16];
+    get_random_bytes(buf, 16);
+
+    // Indicate that this is UUIDv4 as defined by RFC4122
+    buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
+    buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
+    ctx.buildid->contents = {buf, buf + 16};
+    break;
+  }
+  default:
+    unreachable();
+  }
+}
+
+
 // Write Makefile-style dependency rules to a file specified by
 // --dependency-file. This is analogous to the compiler's -M flag.
 template <typename E>
@ -3134,11 +3193,11 @@ template void apply_version_script(Context<E> &);
 template void parse_symbol_version(Context<E> &);
 template void compute_import_export(Context<E> &);
 template void compute_address_significance(Context<E> &);
-template void clear_padding(Context<E> &);
 template void compute_section_headers(Context<E> &);
 template i64 set_osec_offsets(Context<E> &);
 template void fix_synthetic_symbols(Context<E> &);
 template i64 compress_debug_sections(Context<E> &);
+template void compute_build_id(Context<E> &);
 template void write_dependency_file(Context<E> &);
 template void show_stats(Context<E> &);

--- a/elf/relocatable.cc
+++ b/elf/relocatable.cc
@ -176,7 +176,6 @@ void combine_objects(Context<E> &ctx) {
  ctx.buf = ctx.output_file->buf;

  copy_chunks(ctx);
-  clear_padding(ctx);
  ctx.output_file->close(ctx);
  ctx.checkpoint();

--- a/elf/subprocess-unix.cc
+++ b/elf/subprocess-unix.cc
@ -1,5 +1,3 @@
-#if !defined(_WIN32) && !defined(__APPLE__)
-
 #include "mold.h"
 #include "config.h"

@ -14,10 +12,12 @@
 namespace mold::elf {

 #ifdef MOLD_X86_64
+static int pipe_write_fd = -1;
+
 // Exiting from a program with large memory usage is slow --
 // it may take a few hundred milliseconds. To hide the latency,
 // we fork a child and let it do the actual linking work.
-std::function<void()> fork_child() {
+void fork_child() {
  int pipefd[2];
  if (pipe(pipefd) == -1) {
    perror("pipe");
@ -50,12 +50,16 @@ std::function<void()> fork_child() {

  // Child
  close(pipefd[0]);
+  pipe_write_fd = pipefd[1];
+}

-  return [=] {
-    char buf[] = {1};
-    [[maybe_unused]] int n = write(pipefd[1], buf, 1);
-    assert(n == 1);
-  };
+void notify_parent() {
+  if (pipe_write_fd == -1)
+    return;
+
+  char buf[] = {1};
+  [[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
+  assert(n == 1);
 }
 #endif

@ -84,6 +88,9 @@ static std::string find_dso(Context<E> &ctx, std::filesystem::path self) {
 template <typename E>
 [[noreturn]]
 void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
+#ifdef __APPLE__
+  Fatal(ctx) << "-run is not supported on macOS";
+#else
  assert(argv[1] == "-run"s || argv[1] == "--run"s);

  if (!argv[2])
@ -111,6 +118,7 @@ void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
  // Execute a given command
  execvp(argv[2], argv + 2);
  Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string();
+#endif
 }

 using E = MOLD_TARGET;
@ -118,5 +126,3 @@ using E = MOLD_TARGET;
 template void process_run_subcommand(Context<E> &, int, char **);

 } // namespace mold::elf
-
-#endif
--- a/elf/subprocess-win32.cc
+++ b/elf/subprocess-win32.cc
@ -0,0 +1,20 @@
+#include "mold.h"
+
+namespace mold::elf {
+
+#ifdef MOLD_X86_64
+void fork_child() {}
+void notify_parent() {}
+#endif
+
+template <typename E>
+[[noreturn]]
+void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
+  Fatal(ctx) << "-run is supported only on Unix";
+}
+
+using E = MOLD_TARGET;
+
+template void process_run_subcommand(Context<E> &, int, char **);
+
+} // namespace mold::elf
Author	SHA1	Message	Date
Shiv Dhar	77a79f0e57	Merge `5bf1955fa9` into `7089304046`	2024-07-06 02:14:41 +01:00
Rui Ueyama	7089304046	Simplify	2024-07-05 19:47:33 +09:00
Rui Ueyama	5ab3708e9f	Merge pull request #1298 from yujincheng08/patch-1 Fix cross-compile lto on MacOS	2024-07-05 19:45:10 +09:00
Rui Ueyama	aa939d7a6a	Temporarily disable ASAN tests It looks like starting with Clang 18.1.3, ASAN began reporting an error on OneTBB.	2024-07-05 18:27:00 +09:00
Rui Ueyama	a63bbcd7b1	Attempt to fix CI	2024-07-05 17:58:13 +09:00
Rui Ueyama	339f9c485a	Attempt to fix CI	2024-07-05 17:42:32 +09:00
LoveSy	4905f6d3f2	Use macos-12 because macos-11 has been removed https://github.blog/changelog/2024-05-20-actions-upcoming-changes-to-github-hosted-macos-runners/#macos-11-deprecation-and-removal	2024-07-05 15:32:37 +08:00
Rui Ueyama	cb8cc1c8f0	Refactor	2024-07-05 16:00:09 +09:00
Rui Ueyama	c6b54532e9	Refactor	2024-07-04 16:17:25 +09:00
Rui Ueyama	3936134823	Refactor	2024-07-04 15:08:37 +09:00
Rui Ueyama	55ca05bab6	Refactor	2024-07-04 13:59:29 +09:00
LoveSy	7305a5c5dd	Fix cross-compile lto on MacOS	2024-07-03 21:55:22 +08:00
Shiv Dhar	5bf1955fa9	Fix small typo	2024-03-02 01:36:05 +05:30