Merge 24f818820b into 7089304046

Simplify
Merge pull request #1298 from yujincheng08/patch-1
2024-10-05 17:17:40 +03:00 · 2024-07-06 02:14:41 +01:00 · 2024-07-05 19:47:33 +09:00 · 2024-07-05 19:45:10 +09:00 · 2024-07-05 18:27:00 +09:00 · 2024-07-05 17:58:13 +09:00
13 changed files with 312 additions and 285 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -13,7 +13,6 @@ jobs:
        target:
        # Disable PCH for the default configuration. This prevents relying on implicit includes.
        - '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On'
-        - '-DMOLD_USE_ASAN=On'
        - '-DMOLD_USE_TSAN=On'
    runs-on: ubuntu-20.04
    steps:
@ -110,7 +109,7 @@ jobs:
          !build/CMakeFiles

  build-macos:
-    runs-on: macos-11
+    runs-on: macos-12
    strategy:
      matrix:
        target:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -328,7 +328,6 @@ list(APPEND MOLD_ELF_TEMPLATE_FILES
  elf/output-chunks.cc
  elf/passes.cc
  elf/relocatable.cc
-  elf/subprocess.cc
  elf/thunks.cc
  elf/tls.cc
  )
@ -339,6 +338,12 @@ else()
  list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-unix.cc)
 endif()

+if(WIN32)
+  list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-win32.cc)
+else()
+  list(APPEND MOLD_ELF_TEMPLATE_FILES elf/subprocess-unix.cc)
+endif()
+
 function(mold_instantiate_templates SOURCE TARGET)
  set(PATH ${CMAKE_BINARY_DIR}/${SOURCE}.${TARGET}.cc)
  if(NOT EXISTS ${PATH})
--- a/docs/design.md
+++ b/docs/design.md
@ -163,7 +163,7 @@ tool.
  (i.e. `_start`) or a few other root sections. In mold, we are using
  multiple threads to mark sections concurrently.

- Similarly, BFD, gold an lld support Identical Comdat Folding (ICF)
+- Similarly, BFD, gold and lld support Identical Comdat Folding (ICF)
  as yet another size optimization. ICF merges two or more read-only
  sections that happen to have the same contents and relocations.
  To do that, we have to find isomorphic subgraphs from larger graphs.
@ -381,7 +381,7 @@ not plan to implement and why I turned them down.
  fixing the final file layout.

  The other reason to reject this idea is because there's good a
-  chance for this idea to have a negative impact on linker's overall
+  chance for this idea to have a negative impact on the linker's overall
  performance. If we copy file contents before fixing the layout, we
  can't apply relocations to them while copying because symbol
  addresses are not available yet. If we fix the file layout first, we
--- a/elf/input-files.cc
+++ b/elf/input-files.cc
@ -677,104 +677,6 @@ void ObjectFile<E>::sort_relocations(Context<E> &ctx) {
  }
 }

-static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
-  if (entsize == 1)
-    return data.find('\0', pos);
-
-  for (; pos <= data.size() - entsize; pos += entsize)
-    if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
-      return pos;
-
-  return data.npos;
-}
-
-// Mergeable sections (sections with SHF_MERGE bit) typically contain
-// string literals. Linker is expected to split the section contents
-// into null-terminated strings, merge them with mergeable strings
-// from other object files, and emit uniquified strings to an output
-// file.
-//
-// This mechanism reduces the size of an output file. If two source
-// files happen to contain the same string literal, the output will
-// contain only a single copy of it.
-//
-// It is less common than string literals, but mergeable sections can
-// contain fixed-sized read-only records too.
-//
-// This function splits the section contents into small pieces that we
-// call "section fragments". Section fragment is a unit of merging.
-//
-// We do not support mergeable sections that have relocations.
-template <typename E>
-static std::unique_ptr<MergeableSection<E>>
-split_section(Context<E> &ctx, InputSection<E> &sec) {
-  if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0)
-    return nullptr;
-
-  const ElfShdr<E> &shdr = sec.shdr();
-  if (!(shdr.sh_flags & SHF_MERGE))
-    return nullptr;
-
-  i64 entsize = shdr.sh_entsize;
-  if (entsize == 0)
-    entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (int)shdr.sh_addralign;
-
-  if (entsize == 0)
-    return nullptr;
-
-  i64 addralign = shdr.sh_addralign;
-  if (addralign == 0)
-    addralign = 1;
-
-  std::unique_ptr<MergeableSection<E>> m(new MergeableSection<E>);
-  m->parent = MergedSection<E>::get_instance(ctx, sec.name(), shdr.sh_type,
-                                             shdr.sh_flags, entsize, addralign);
-  m->p2align = sec.p2align;
-
-  // If thes section contents are compressed, uncompress them.
-  sec.uncompress(ctx);
-
-  std::string_view data = sec.contents;
-  m->contents = sec.contents;
-
-  if (data.size() > UINT32_MAX)
-    Fatal(ctx) << sec << ": mergeable section too large";
-
-  // Split sections
-  if (shdr.sh_flags & SHF_STRINGS) {
-    for (i64 pos = 0; pos < data.size();) {
-      m->frag_offsets.push_back(pos);
-      size_t end = find_null(data, pos, entsize);
-      if (end == data.npos)
-        Fatal(ctx) << sec << ": string is not null terminated";
-      pos = end + entsize;
-    }
-  } else {
-    if (data.size() % entsize)
-      Fatal(ctx) << sec << ": section size is not multiple of sh_entsize";
-    m->frag_offsets.reserve(data.size() / entsize);
-
-    for (i64 pos = 0; pos < data.size(); pos += entsize)
-      m->frag_offsets.push_back(pos);
-  }
-
-  // Compute hashes for section pieces
-  HyperLogLog estimator;
-  m->hashes.reserve(m->frag_offsets.size());
-
-  for (i64 i = 0; i < m->frag_offsets.size(); i++) {
-    u64 hash = hash_string(m->get_contents(i));
-    m->hashes.push_back(hash);
-    estimator.insert(hash);
-  }
-
-  m->parent->estimator.merge(estimator);
-
-  static Counter counter("string_fragments");
-  counter += m->frag_offsets.size();
-  return m;
-}
-
 // Usually a section is an atomic unit of inclusion or exclusion.
 // Linker doesn't care about its contents. However, if a section is a
 // mergeable section (a section with SHF_MERGE bit set), the linker is
@ -818,34 +720,45 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
 // section piece, the section piece is attached to the symbol.
 template <typename E>
 void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
-  mergeable_sections.resize(sections.size());
+  // Convert InputSections to MergeableSections
+  for (i64 i = 0; i < this->sections.size(); i++) {
+    InputSection<E> *isec = this->sections[i].get();
+    if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1)
+      continue;

-  for (i64 i = 0; i < sections.size(); i++) {
-    if (std::unique_ptr<InputSection<E>> &isec = sections[i]) {
-      if (std::unique_ptr<MergeableSection<E>> m = split_section(ctx, *isec)) {
-        mergeable_sections[i] = std::move(m);
-        isec->is_alive = false;
-      }
+    MergedSection<E> *parent =
+      MergedSection<E>::get_instance(ctx, isec->name(), isec->shdr());
+
+    if (parent) {
+      this->mergeable_sections[i] =
+        std::make_unique<MergeableSection<E>>(ctx, *parent, this->sections[i]);
+      this->sections[i] = nullptr;
    }
  }
+
+  // Split section contents
+  for (std::unique_ptr<MergeableSection<E>> &sec : mergeable_sections)
+    if (sec)
+      sec->split_contents(ctx);
 }

 template <typename E>
 void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
  for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections) {
-    if (m) {
-      m->fragments.reserve(m->frag_offsets.size());
+    if (!m)
+      continue;

-      for (i64 i = 0; i < m->frag_offsets.size(); i++) {
-        SectionFragment<E> *frag =
-          m->parent->insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
-        m->fragments.push_back(frag);
-      }
+    m->fragments.reserve(m->frag_offsets.size());

-      // Reclaim memory as we'll never use this vector again
-      m->hashes.clear();
-      m->hashes.shrink_to_fit();
+    for (i64 i = 0; i < m->frag_offsets.size(); i++) {
+      SectionFragment<E> *frag =
+        m->parent.insert(ctx, m->get_contents(i), m->hashes[i], m->p2align);
+      m->fragments.push_back(frag);
    }
+
+    // Reclaim memory as we'll never use this vector again
+    m->hashes.clear();
+    m->hashes.shrink_to_fit();
  }

  // Attach section pieces to symbols.
@ -872,24 +785,25 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
  }

  // Compute the size of frag_syms.
-  i64 nfrag_syms = 0;
+  std::vector<InputSection<E> *> vec;
  for (std::unique_ptr<InputSection<E>> &isec : sections)
    if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC))
-      for (ElfRel<E> &r : isec->get_rels(ctx))
-        if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
-            esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
-          nfrag_syms++;
+      vec.push_back(isec.get());
+
+  i64 nfrag_syms = 0;
+  for (InputSection<E> *isec : vec)
+    for (ElfRel<E> &r : isec->get_rels(ctx))
+      if (const ElfSym<E> &esym = this->elf_syms[r.r_sym];
+          esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
+        nfrag_syms++;

  this->frag_syms.resize(nfrag_syms);

-  // For each relocation referring a mergeable section symbol, we create
-  // a new dummy non-section symbol and redirect the relocation to the
-  // newly-created symbol.
+  // For each relocation referring to a mergeable section symbol, we
+  // create a new dummy non-section symbol and redirect the relocation
+  // to the newly created symbol.
  i64 idx = 0;
-  for (std::unique_ptr<InputSection<E>> &isec : sections) {
-    if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC))
-      continue;
-
+  for (InputSection<E> *isec : vec) {
    for (ElfRel<E> &r : isec->get_rels(ctx)) {
      const ElfSym<E> &esym = this->elf_syms[r.r_sym];
      if (esym.st_type != STT_SECTION)
@ -929,6 +843,8 @@ void ObjectFile<E>::resolve_section_pieces(Context<E> &ctx) {
 template <typename E>
 void ObjectFile<E>::parse(Context<E> &ctx) {
  sections.resize(this->elf_sections.size());
+  mergeable_sections.resize(sections.size());
+
  symtab_sec = this->find_section(SHT_SYMTAB);

  if (symtab_sec) {
--- a/elf/input-sections.cc
+++ b/elf/input-sections.cc
@ -552,9 +552,88 @@ bool InputSection<E>::record_undef_error(Context<E> &ctx, const ElfRel<E> &rel)
  return false;
 }

+template <typename E>
+MergeableSection<E>::MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
+                                      std::unique_ptr<InputSection<E>> &isec)
+  : parent(parent), section(std::move(isec)), p2align(section->p2align) {
+  section->uncompress(ctx);
+}
+
+static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
+  if (entsize == 1)
+    return data.find('\0', pos);
+
+  for (; pos <= data.size() - entsize; pos += entsize)
+    if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos)
+      return pos;
+
+  return data.npos;
+}
+
+// Mergeable sections (sections with SHF_MERGE bit) typically contain
+// string literals. Linker is expected to split the section contents
+// into null-terminated strings, merge them with mergeable strings
+// from other object files, and emit uniquified strings to an output
+// file.
+//
+// This mechanism reduces the size of an output file. If two source
+// files happen to contain the same string literal, the output will
+// contain only a single copy of it.
+//
+// It is less common than string literals, but mergeable sections can
+// contain fixed-sized read-only records too.
+//
+// This function splits the section contents into small pieces that we
+// call "section fragments". Section fragment is a unit of merging.
+//
+// We do not support mergeable sections that have relocations.
+template <typename E>
+void MergeableSection<E>::split_contents(Context<E> &ctx) {
+  std::string_view data = section->contents;
+  if (data.size() > UINT32_MAX)
+    Fatal(ctx) << *section
+               << ": mergeable section too large";
+
+  i64 entsize = parent.shdr.sh_entsize;
+
+  // Split sections
+  if (parent.shdr.sh_flags & SHF_STRINGS) {
+    for (i64 pos = 0; pos < data.size();) {
+      frag_offsets.push_back(pos);
+      size_t end = find_null(data, pos, entsize);
+      if (end == data.npos)
+        Fatal(ctx) << *section << ": string is not null terminated";
+      pos = end + entsize;
+    }
+  } else {
+    if (data.size() % entsize)
+      Fatal(ctx) << *section << ": section size is not multiple of sh_entsize";
+    frag_offsets.reserve(data.size() / entsize);
+
+    for (i64 pos = 0; pos < data.size(); pos += entsize)
+      frag_offsets.push_back(pos);
+  }
+
+  // Compute hashes for section pieces
+  HyperLogLog estimator;
+  hashes.reserve(frag_offsets.size());
+
+  for (i64 i = 0; i < frag_offsets.size(); i++) {
+    u64 hash = hash_string(get_contents(i));
+    hashes.push_back(hash);
+    estimator.insert(hash);
+  }
+
+  parent.estimator.merge(estimator);
+
+  static Counter counter("string_fragments");
+  counter += frag_offsets.size();
+}
+
 using E = MOLD_TARGET;

 template bool cie_equals(const CieRecord<E> &, const CieRecord<E> &);
 template class InputSection<E>;
+template class MergeableSection<E>;

 } // namespace mold::elf
--- a/elf/lto-unix.cc
+++ b/elf/lto-unix.cc
@ -567,11 +567,7 @@ static ElfSym<E> to_elf_sym(PluginSymbol &psym) {
 // Returns false if it's GCC.
 template <typename E>
 static bool is_llvm(Context<E> &ctx) {
-#ifdef __MINGW32__
-  return ctx.arg.plugin.ends_with("LLVMgold.dll");
-#else
-  return ctx.arg.plugin.ends_with("LLVMgold.so");
-#endif
+  return ctx.arg.plugin.find("LLVMgold.") != ctx.arg.plugin.npos;
 }

 // Returns true if a given linker plugin supports the get_symbols_v3 API.
--- a/elf/main.cc
+++ b/elf/main.cc
@ -345,12 +345,8 @@ int elf_main(int argc, char **argv) {
  Context<E> ctx;

  // Process -run option first. process_run_subcommand() does not return.
-  if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) {
-#if defined(_WIN32) || defined(__APPLE__)
-    Fatal(ctx) << "-run is supported only on Unix";
-#endif
+  if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv))
    process_run_subcommand(ctx, argc, argv);
-  }

  // Parse non-positional command line options
  ctx.cmdline_args = expand_response_files(ctx, argv);
@ -375,12 +371,8 @@ int elf_main(int argc, char **argv) {
                 << ": " << errno_string();

  // Fork a subprocess unless --no-fork is given.
-  std::function<void()> on_complete;
-
-#if !defined(_WIN32) && !defined(__APPLE__)
  if (ctx.arg.fork)
-    on_complete = fork_child();
-#endif
+    fork_child();

  acquire_global_lock();

@ -664,8 +656,13 @@ int elf_main(int argc, char **argv) {
  // so we sort them.
  ctx.reldyn->sort(ctx);

-  // Zero-clear paddings between sections
-  clear_padding(ctx);
+  // .note.gnu.build-id section contains a cryptographic hash of the
+  // entire output file. Now that we wrote everything except build-id,
+  // we can compute it.
+  if (ctx.buildid) {
+    compute_build_id(ctx);
+    ctx.buildid->copy_buf(ctx);
+  }

  // .gdb_index's contents cannot be constructed before applying
  // relocations to other debug sections. We have relocated debug
@ -673,12 +670,6 @@ int elf_main(int argc, char **argv) {
  if (ctx.gdb_index)
    write_gdb_index(ctx);

-  // .note.gnu.build-id section contains a cryptographic hash of the
-  // entire output file. Now that we wrote everything except build-id,
-  // we can compute it.
-  if (ctx.buildid)
-    ctx.buildid->write_buildid(ctx);
-
  t_copy.stop();
  ctx.checkpoint();

@ -707,8 +698,8 @@ int elf_main(int argc, char **argv) {
  std::cout << std::flush;
  std::cerr << std::flush;

-  if (on_complete)
-    on_complete();
+  if (ctx.arg.fork)
+    notify_parent();

  release_global_lock();

--- a/elf/mold.h
+++ b/elf/mold.h
@ -47,6 +47,7 @@ template <typename E> class Symbol;
 template <typename E> struct CieRecord;
 template <typename E> struct Context;
 template <typename E> struct FdeRecord;
+template <typename E> class MergeableSection;
 template <typename E> class RelocSection;

 template <typename E>
@ -799,8 +800,7 @@ template <typename E>
 class MergedSection : public Chunk<E> {
 public:
  static MergedSection<E> *
-  get_instance(Context<E> &ctx, std::string_view name, i64 type, i64 flags,
-               i64 entsize, i64 addralign);
+  get_instance(Context<E> &ctx, std::string_view name, const ElfShdr<E> &shdr);

  SectionFragment<E> *insert(Context<E> &ctx, std::string_view data,
                             u64 hash, i64 p2align);
@ -949,9 +949,8 @@ public:

  void update_shdr(Context<E> &ctx) override;
  void copy_buf(Context<E> &ctx) override;
-  void write_buildid(Context<E> &ctx);

-  static constexpr i64 HEADER_SIZE = 16;
+  std::vector<u8> contents;
 };

 template <typename E>
@ -1090,12 +1089,17 @@ struct ComdatGroupRef {
 };

 template <typename E>
-struct MergeableSection {
+class MergeableSection {
+public:
+  MergeableSection(Context<E> &ctx, MergedSection<E> &parent,
+                   std::unique_ptr<InputSection<E>> &isec);
+
+  void split_contents(Context<E> &ctx);
  std::pair<SectionFragment<E> *, i64> get_fragment(i64 offset);
  std::string_view get_contents(i64 idx);

-  MergedSection<E> *parent;
-  std::string_view contents;
+  MergedSection<E> &parent;
+  std::unique_ptr<InputSection<E>> section;
  std::vector<u32> frag_offsets;
  std::vector<u32> hashes;
  std::vector<SectionFragment<E> *> fragments;
@ -1368,7 +1372,8 @@ void print_map(Context<E> &ctx);
 // subprocess.cc
 //

-std::function<void()> fork_child();
+void fork_child();
+void notify_parent();

 template <typename E>
 [[noreturn]]
@ -1426,11 +1431,11 @@ template <typename E> void apply_version_script(Context<E> &);
 template <typename E> void parse_symbol_version(Context<E> &);
 template <typename E> void compute_import_export(Context<E> &);
 template <typename E> void compute_address_significance(Context<E> &);
-template <typename E> void clear_padding(Context<E> &);
 template <typename E> void compute_section_headers(Context<E> &);
 template <typename E> i64 set_osec_offsets(Context<E> &);
 template <typename E> void fix_synthetic_symbols(Context<E> &);
 template <typename E> i64 compress_debug_sections(Context<E> &);
+template <typename E> void compute_build_id(Context<E> &);
 template <typename E> void write_dependency_file(Context<E> &);
 template <typename E> void show_stats(Context<E> &);

@ -1565,7 +1570,18 @@ private:
 //

 struct BuildId {
-  i64 size() const;
+  i64 size() const {
+    switch (kind) {
+    case HEX:
+      return value.size();
+    case HASH:
+      return hash_size;
+    case UUID:
+      return 16;
+    default:
+      unreachable();
+    }
+  }

  enum { NONE, HEX, HASH, UUID } kind = NONE;
  std::vector<u8> value;
@ -2411,8 +2427,8 @@ template <typename E>
 std::string_view MergeableSection<E>::get_contents(i64 i) {
  i64 cur = frag_offsets[i];
  if (i == frag_offsets.size() - 1)
-    return contents.substr(cur);
-  return contents.substr(cur, frag_offsets[i + 1] - cur);
+    return section->contents.substr(cur);
+  return section->contents.substr(cur, frag_offsets[i + 1] - cur);
 }

 template <typename E>
--- a/elf/output-chunks.cc
+++ b/elf/output-chunks.cc
@ -1,7 +1,5 @@
 #include "mold.h"
-
 #include "config.h"
-#include "blake3.h"

 #include <cctype>
 #include <set>
@ -1915,15 +1913,26 @@ MergedSection<E>::MergedSection(std::string_view name, i64 flags, i64 type,
 template <typename E>
 MergedSection<E> *
 MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
-                               i64 type, i64 flags,
-                               i64 entsize, i64 addralign) {
+                               const ElfShdr<E> &shdr) {
+  if (!(shdr.sh_flags & SHF_MERGE))
+    return nullptr;
+
+  i64 addralign = std::max<i64>(1, shdr.sh_addralign);
+  i64 flags = shdr.sh_flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;
+
+  i64 entsize = shdr.sh_entsize;
+  if (entsize == 0)
+    entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (i64)shdr.sh_addralign;
+  if (entsize == 0)
+    return nullptr;
+
  name = get_merged_output_name(ctx, name, flags, entsize, addralign);
-  flags = flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED;

  auto find = [&]() -> MergedSection * {
    for (std::unique_ptr<MergedSection<E>> &osec : ctx.merged_sections)
      if (name == osec->name && flags == osec->shdr.sh_flags &&
-          type == osec->shdr.sh_type && entsize == osec->shdr.sh_entsize)
+          shdr.sh_type == osec->shdr.sh_type &&
+          entsize == osec->shdr.sh_entsize)
        return osec.get();
    return nullptr;
  };
@ -1941,7 +1950,7 @@ MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
  if (MergedSection *osec = find())
    return osec;

-  MergedSection *osec = new MergedSection(name, flags, type, entsize);
+  MergedSection *osec = new MergedSection(name, flags, shdr.sh_type, entsize);
  ctx.merged_sections.emplace_back(osec);
  return osec;
 }
@ -2526,89 +2535,21 @@ void VerdefSection<E>::copy_buf(Context<E> &ctx) {
  write_vector(ctx.buf + this->shdr.sh_offset, contents);
 }

-inline i64 BuildId::size() const {
-  switch (kind) {
-  case HEX:
-    return value.size();
-  case HASH:
-    return hash_size;
-  case UUID:
-    return 16;
-  default:
-    unreachable();
-  }
-}
-
 template <typename E>
 void BuildIdSection<E>::update_shdr(Context<E> &ctx) {
-  this->shdr.sh_size = HEADER_SIZE + ctx.arg.build_id.size();
+  this->shdr.sh_size = ctx.arg.build_id.size() + 16; // +16 for the header
 }

 template <typename E>
 void BuildIdSection<E>::copy_buf(Context<E> &ctx) {
  U32<E> *base = (U32<E> *)(ctx.buf + this->shdr.sh_offset);
  memset(base, 0, this->shdr.sh_size);
-  base[0] = 4;                          // Name size
-  base[1] = ctx.arg.build_id.size();    // Hash size
-  base[2] = NT_GNU_BUILD_ID;            // Type
-  memcpy(base + 3, "GNU", 4);           // Name string
-}

-// BLAKE3 is a cryptographic hash function just like SHA256.
-// We use it instead of SHA256 because it's faster.
-static void blake3_hash(u8 *buf, i64 size, u8 *out) {
-  blake3_hasher hasher;
-  blake3_hasher_init(&hasher);
-  blake3_hasher_update(&hasher, buf, size);
-  blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
-}
-
-template <typename E>
-void BuildIdSection<E>::write_buildid(Context<E> &ctx) {
-  Timer t(ctx, "build_id");
-  u8 *buf = ctx.buf + this->shdr.sh_offset + HEADER_SIZE;
-
-  switch (ctx.arg.build_id.kind) {
-  case BuildId::HEX:
-    write_vector(buf, ctx.arg.build_id.value);
-    return;
-  case BuildId::HASH: {
-    i64 shard_size = 4 * 1024 * 1024;
-    i64 filesize = ctx.output_file->filesize;
-    i64 num_shards = align_to(filesize, shard_size) / shard_size;
-    std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
-
-    tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
-      u8 *begin = ctx.buf + shard_size * i;
-      u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
-      blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
-
-#ifdef HAVE_MADVISE
-      // Make the kernel page out the file contents we've just written
-      // so that subsequent close(2) call will become quicker.
-      if (i > 0 && ctx.output_file->is_mmapped)
-        madvise(begin, end - begin, MADV_DONTNEED);
-#endif
-    });
-
-    u8 digest[BLAKE3_OUT_LEN];
-    blake3_hash(shards.data(), shards.size(), digest);
-
-    assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
-    memcpy(buf, digest, ctx.arg.build_id.size());
-    return;
-  }
-  case BuildId::UUID: {
-    get_random_bytes(buf, 16);
-
-    // Indicate that this is UUIDv4 as defined by RFC4122
-    buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
-    buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
-    return;
-  }
-  default:
-    unreachable();
-  }
+  base[0] = 4;                       // Name size
+  base[1] = ctx.arg.build_id.size(); // Hash size
+  base[2] = NT_GNU_BUILD_ID;         // Type
+  memcpy(base + 3, "GNU", 4);        // Name string
+  write_vector(base + 4, contents);  // Build ID
 }

 template <typename E>
--- a/elf/passes.cc
+++ b/elf/passes.cc
@ -1,4 +1,5 @@
 #include "mold.h"
+#include "blake3.h"

 #include <fstream>
 #include <functional>
@ -442,10 +443,13 @@ static std::string get_cmdline_args(Context<E> &ctx) {

 template <typename E>
 void add_comment_string(Context<E> &ctx, std::string str) {
-  MergedSection<E> *sec =
-    MergedSection<E>::get_instance(ctx, ".comment", SHT_PROGBITS,
-                                   SHF_MERGE | SHF_STRINGS, 1, 1);
+  ElfShdr<E> shdr = {};
+  shdr.sh_type = SHT_PROGBITS;
+  shdr.sh_flags = SHF_MERGE | SHF_STRINGS;
+  shdr.sh_entsize = 1;
+  shdr.sh_addralign = 1;

+  MergedSection<E> *sec = MergedSection<E>::get_instance(ctx, ".comment", shdr);
  if (sec->map.nbuckets == 0)
    sec->map.resize(4096);

@ -1720,6 +1724,22 @@ void copy_chunks(Context<E> &ctx) {

  if constexpr (is_arm32<E>)
    fixup_arm_exidx_section(ctx);
+
+  // Zero-clear paddings between chunks
+  auto zero = [&](Chunk<E> *chunk, i64 next_start) {
+    i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
+    memset(ctx.buf + pos, 0, next_start - pos);
+  };
+
+  std::vector<Chunk<E> *> chunks = ctx.chunks;
+
+  std::erase_if(chunks, [](Chunk<E> *chunk) {
+    return chunk->shdr.sh_type == SHT_NOBITS;
+  });
+
+  for (i64 i = 1; i < chunks.size(); i++)
+    zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
+  zero(chunks.back(), ctx.output_file->filesize);
 }

 // Rewrite the leading endbr64 instruction with a nop if a function
@ -2168,26 +2188,6 @@ void compute_address_significance(Context<E> &ctx) {
  });
 }

-template <typename E>
-void clear_padding(Context<E> &ctx) {
-  Timer t(ctx, "clear_padding");
-
-  auto zero = [&](Chunk<E> *chunk, i64 next_start) {
-    i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size;
-    memset(ctx.buf + pos, 0, next_start - pos);
-  };
-
-  std::vector<Chunk<E> *> chunks = ctx.chunks;
-
-  std::erase_if(chunks, [](Chunk<E> *chunk) {
-    return chunk->shdr.sh_type == SHT_NOBITS;
-  });
-
-  for (i64 i = 1; i < chunks.size(); i++)
-    zero(chunks[i - 1], chunks[i]->shdr.sh_offset);
-  zero(chunks.back(), ctx.output_file->filesize);
-}
-
 // We want to sort output chunks in the following order.
 //
 //   <ELF header>
@ -2999,6 +2999,65 @@ i64 compress_debug_sections(Context<E> &ctx) {
  return set_osec_offsets(ctx);
 }

+// BLAKE3 is a cryptographic hash function just like SHA256.
+// We use it instead of SHA256 because it's faster.
+static void blake3_hash(u8 *buf, i64 size, u8 *out) {
+  blake3_hasher hasher;
+  blake3_hasher_init(&hasher);
+  blake3_hasher_update(&hasher, buf, size);
+  blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN);
+}
+
+template <typename E>
+void compute_build_id(Context<E> &ctx) {
+  Timer t(ctx, "compute_build_id");
+
+  switch (ctx.arg.build_id.kind) {
+  case BuildId::HEX:
+    ctx.buildid->contents = ctx.arg.build_id.value;
+    break;
+  case BuildId::HASH: {
+    i64 shard_size = 4 * 1024 * 1024;
+    i64 filesize = ctx.output_file->filesize;
+    i64 num_shards = align_to(filesize, shard_size) / shard_size;
+    std::vector<u8> shards(num_shards * BLAKE3_OUT_LEN);
+
+    tbb::parallel_for((i64)0, num_shards, [&](i64 i) {
+      u8 *begin = ctx.buf + shard_size * i;
+      u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size;
+      blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN);
+
+#ifdef HAVE_MADVISE
+      // Make the kernel page out the file contents we've just written
+      // so that subsequent close(2) call will become quicker.
+      if (i > 0 && ctx.output_file->is_mmapped)
+        madvise(begin, end - begin, MADV_DONTNEED);
+#endif
+    });
+
+    u8 buf[BLAKE3_OUT_LEN];
+    blake3_hash(shards.data(), shards.size(), buf);
+
+    assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN);
+    ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()};
+    break;
+  }
+  case BuildId::UUID: {
+    u8 buf[16];
+    get_random_bytes(buf, 16);
+
+    // Indicate that this is UUIDv4 as defined by RFC4122
+    buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000;
+    buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000;
+    ctx.buildid->contents = {buf, buf + 16};
+    break;
+  }
+  default:
+    unreachable();
+  }
+}
+
+
 // Write Makefile-style dependency rules to a file specified by
 // --dependency-file. This is analogous to the compiler's -M flag.
 template <typename E>
@ -3134,11 +3193,11 @@ template void apply_version_script(Context<E> &);
 template void parse_symbol_version(Context<E> &);
 template void compute_import_export(Context<E> &);
 template void compute_address_significance(Context<E> &);
-template void clear_padding(Context<E> &);
 template void compute_section_headers(Context<E> &);
 template i64 set_osec_offsets(Context<E> &);
 template void fix_synthetic_symbols(Context<E> &);
 template i64 compress_debug_sections(Context<E> &);
+template void compute_build_id(Context<E> &);
 template void write_dependency_file(Context<E> &);
 template void show_stats(Context<E> &);

--- a/elf/relocatable.cc
+++ b/elf/relocatable.cc
@ -176,7 +176,6 @@ void combine_objects(Context<E> &ctx) {
  ctx.buf = ctx.output_file->buf;

  copy_chunks(ctx);
-  clear_padding(ctx);
  ctx.output_file->close(ctx);
  ctx.checkpoint();

--- a/elf/subprocess-unix.cc
+++ b/elf/subprocess-unix.cc
@ -1,5 +1,3 @@
-#if !defined(_WIN32) && !defined(__APPLE__)
-
 #include "mold.h"
 #include "config.h"

@ -14,10 +12,12 @@
 namespace mold::elf {

 #ifdef MOLD_X86_64
+static int pipe_write_fd = -1;
+
 // Exiting from a program with large memory usage is slow --
 // it may take a few hundred milliseconds. To hide the latency,
 // we fork a child and let it do the actual linking work.
-std::function<void()> fork_child() {
+void fork_child() {
  int pipefd[2];
  if (pipe(pipefd) == -1) {
    perror("pipe");
@ -50,12 +50,16 @@ std::function<void()> fork_child() {

  // Child
  close(pipefd[0]);
+  pipe_write_fd = pipefd[1];
+}

-  return [=] {
-    char buf[] = {1};
-    [[maybe_unused]] int n = write(pipefd[1], buf, 1);
-    assert(n == 1);
-  };
+void notify_parent() {
+  if (pipe_write_fd == -1)
+    return;
+
+  char buf[] = {1};
+  [[maybe_unused]] int n = write(pipe_write_fd, buf, 1);
+  assert(n == 1);
 }
 #endif

@ -84,6 +88,9 @@ static std::string find_dso(Context<E> &ctx, std::filesystem::path self) {
 template <typename E>
 [[noreturn]]
 void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
+#ifdef __APPLE__
+  Fatal(ctx) << "-run is not supported on macOS";
+#else
  assert(argv[1] == "-run"s || argv[1] == "--run"s);

  if (!argv[2])
@ -111,6 +118,7 @@ void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
  // Execute a given command
  execvp(argv[2], argv + 2);
  Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string();
+#endif
 }

 using E = MOLD_TARGET;
@ -118,5 +126,3 @@ using E = MOLD_TARGET;
 template void process_run_subcommand(Context<E> &, int, char **);

 } // namespace mold::elf
-
-#endif
--- a/elf/subprocess-win32.cc
+++ b/elf/subprocess-win32.cc
@ -0,0 +1,20 @@
+#include "mold.h"
+
+namespace mold::elf {
+
+#ifdef MOLD_X86_64
+void fork_child() {}
+void notify_parent() {}
+#endif
+
+template <typename E>
+[[noreturn]]
+void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
+  Fatal(ctx) << "-run is supported only on Unix";
+}
+
+using E = MOLD_TARGET;
+
+template void process_run_subcommand(Context<E> &, int, char **);
+
+} // namespace mold::elf
Author	SHA1	Message	Date
Christian Sattler	9fdd93bdc8	Merge `24f818820b` into `7089304046`	2024-07-06 02:14:41 +01:00
Rui Ueyama	7089304046	Simplify	2024-07-05 19:47:33 +09:00
Rui Ueyama	5ab3708e9f	Merge pull request #1298 from yujincheng08/patch-1 Fix cross-compile lto on MacOS	2024-07-05 19:45:10 +09:00
Rui Ueyama	aa939d7a6a	Temporarily disable ASAN tests It looks like starting with Clang 18.1.3, ASAN began reporting an error on OneTBB.	2024-07-05 18:27:00 +09:00
Rui Ueyama	a63bbcd7b1	Attempt to fix CI	2024-07-05 17:58:13 +09:00
Rui Ueyama	339f9c485a	Attempt to fix CI	2024-07-05 17:42:32 +09:00
LoveSy	4905f6d3f2	Use macos-12 because macos-11 has been removed https://github.blog/changelog/2024-05-20-actions-upcoming-changes-to-github-hosted-macos-runners/#macos-11-deprecation-and-removal	2024-07-05 15:32:37 +08:00
Rui Ueyama	cb8cc1c8f0	Refactor	2024-07-05 16:00:09 +09:00
Rui Ueyama	c6b54532e9	Refactor	2024-07-04 16:17:25 +09:00
Rui Ueyama	3936134823	Refactor	2024-07-04 15:08:37 +09:00
Rui Ueyama	55ca05bab6	Refactor	2024-07-04 13:59:29 +09:00
LoveSy	7305a5c5dd	Fix cross-compile lto on MacOS	2024-07-03 21:55:22 +08:00
Christian Sattler	24f818820b	Fix typos in design.md	2021-12-26 14:37:35 +01:00