diff --git a/common/common.h b/common/common.h
index 34f32d4f..c19a3e4e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -524,6 +524,14 @@ inline bool remove_prefix(std::string_view &s, std::string_view prefix) {
 // Concurrent Map
 //
 
+static inline void pause() {
+#if defined(__x86_64__)
+  asm volatile("pause");
+#elif defined(__arm__) || defined(__aarch64__)
+  asm volatile("yield");
+#endif
+}
+
 // This is an implementation of a fast concurrent hash map. Unlike
 // ordinary hash tables, this impl just aborts if it becomes full.
 // So you need to give a correct estimation of the final size before
@@ -576,7 +584,7 @@ public:
 #endif
   }
 
-  std::pair<T *, bool> insert(std::string_view key, u64 hash, const T &val) {
+  std::pair<T *, bool> insert(std::string_view key, u32 hash, const T &val) {
     assert(has_single_bit(nbuckets));
 
     i64 begin = hash & (nbuckets - 1);
@@ -586,8 +594,8 @@ public:
       i64 idx = (begin & ~mask) | ((begin + i) & mask);
       Entry &ent = entries[idx];
 
-      // It seems avoiding compare-and-exchange is faster overall at
-      // least on my Zen4 machine, so do it.
+      // It seems avoiding compare-and-swap is faster overall at least
+      // on my Zen4 machine, so do it.
       if (const char *ptr = ent.key.load(std::memory_order_acquire);
           ptr != nullptr && ptr != (char *)-1) {
         if (key == std::string_view(ptr, ent.keylen))
@@ -686,15 +694,6 @@ public:
 
   Entry *entries = nullptr;
   i64 nbuckets = 0;
-
-private:
-  static void pause() {
-#if defined(__x86_64__)
-    asm volatile("pause");
-#elif defined(__aarch64__)
-    asm volatile("yield");
-#endif
-  }
 };
 
 //
diff --git a/elf/input-files.cc b/elf/input-files.cc
index e022294e..8b249e3b 100644
--- a/elf/input-files.cc
+++ b/elf/input-files.cc
@@ -699,7 +699,7 @@ static size_t find_null(std::string_view data, i64 pos, i64 entsize) {
 template <typename E>
 static std::unique_ptr<MergeableSection<E>>
 split_section(Context<E> &ctx, InputSection<E> &sec) {
-  if (!sec.is_alive || sec.relsec_idx != -1)
+  if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0)
     return nullptr;
 
   const ElfShdr<E> &shdr = sec.shdr();
@@ -719,12 +719,9 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
 
   std::unique_ptr<MergeableSection<E>> m(new MergeableSection<E>);
   m->parent = MergedSection<E>::get_instance(ctx, sec.name(), shdr.sh_type,
-                                               shdr.sh_flags, entsize, addralign);
+                                             shdr.sh_flags, entsize, addralign);
   m->p2align = sec.p2align;
 
-  if (sec.sh_size == 0)
-    return m;
-
   // If thes section contents are compressed, uncompress them.
   sec.uncompress(ctx);
 
diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc
index d0ed7aa6..f0b7498f 100644
--- a/elf/output-chunks.cc
+++ b/elf/output-chunks.cc
@@ -103,12 +103,12 @@ void OutputShdr<E>::copy_buf(Context<E> &ctx) {
   ElfShdr<E> *hdr = (ElfShdr<E> *)(ctx.buf + this->shdr.sh_offset);
   memset(hdr, 0, this->shdr.sh_size);
 
+  if (ctx.shstrtab && SHN_LORESERVE <= ctx.shstrtab->shndx)
+    hdr[0].sh_link = ctx.shstrtab->shndx;
+
   i64 shnum = ctx.shdr->shdr.sh_size / sizeof(ElfShdr<E>);
   if (UINT16_MAX < shnum)
-    hdr->sh_size = shnum;
-
-  if (ctx.shstrtab && SHN_LORESERVE <= ctx.shstrtab->shndx)
-    hdr->sh_link = ctx.shstrtab->shndx;
+    hdr[0].sh_size = shnum;
 
   for (Chunk<E> *chunk : ctx.chunks)
     if (chunk->shndx)
diff --git a/elf/passes.cc b/elf/passes.cc
index dfffbdf2..8b5f4991 100644
--- a/elf/passes.cc
+++ b/elf/passes.cc
@@ -2955,7 +2955,6 @@ void fix_synthetic_symbols(Context<E> &ctx) {
     }
   }
 
-
   // --section-order symbols
   for (SectionOrder &ord : ctx.arg.section_order)
     if (ord.type == SectionOrder::SYMBOL)