diff --git a/common/common.h b/common/common.h index 34f32d4f..c19a3e4e 100644 --- a/common/common.h +++ b/common/common.h @@ -524,6 +524,14 @@ inline bool remove_prefix(std::string_view &s, std::string_view prefix) { // Concurrent Map // +static inline void pause() { +#if defined(__x86_64__) + asm volatile("pause"); +#elif defined(__arm__) || defined(__aarch64__) + asm volatile("yield"); +#endif +} + // This is an implementation of a fast concurrent hash map. Unlike // ordinary hash tables, this impl just aborts if it becomes full. // So you need to give a correct estimation of the final size before @@ -576,7 +584,7 @@ public: #endif } - std::pair insert(std::string_view key, u64 hash, const T &val) { + std::pair insert(std::string_view key, u32 hash, const T &val) { assert(has_single_bit(nbuckets)); i64 begin = hash & (nbuckets - 1); @@ -586,8 +594,8 @@ public: i64 idx = (begin & ~mask) | ((begin + i) & mask); Entry &ent = entries[idx]; - // It seems avoiding compare-and-exchange is faster overall at - // least on my Zen4 machine, so do it. + // It seems avoiding compare-and-swap is faster overall at least + // on my Zen4 machine, so do it. if (const char *ptr = ent.key.load(std::memory_order_acquire); ptr != nullptr && ptr != (char *)-1) { if (key == std::string_view(ptr, ent.keylen)) @@ -686,15 +694,6 @@ public: Entry *entries = nullptr; i64 nbuckets = 0; - -private: - static void pause() { -#if defined(__x86_64__) - asm volatile("pause"); -#elif defined(__aarch64__) - asm volatile("yield"); -#endif - } }; // diff --git a/elf/input-files.cc b/elf/input-files.cc index e022294e..8b249e3b 100644 --- a/elf/input-files.cc +++ b/elf/input-files.cc @@ -699,7 +699,7 @@ static size_t find_null(std::string_view data, i64 pos, i64 entsize) { template static std::unique_ptr> split_section(Context &ctx, InputSection &sec) { - if (!sec.is_alive || sec.relsec_idx != -1) + if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0) return nullptr; const ElfShdr &shdr = sec.shdr(); @@ -719,12 +719,9 @@ split_section(Context &ctx, InputSection &sec) { std::unique_ptr> m(new MergeableSection); m->parent = MergedSection::get_instance(ctx, sec.name(), shdr.sh_type, - shdr.sh_flags, entsize, addralign); + shdr.sh_flags, entsize, addralign); m->p2align = sec.p2align; - if (sec.sh_size == 0) - return m; - // If thes section contents are compressed, uncompress them. sec.uncompress(ctx); diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index d0ed7aa6..f0b7498f 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -103,12 +103,12 @@ void OutputShdr::copy_buf(Context &ctx) { ElfShdr *hdr = (ElfShdr *)(ctx.buf + this->shdr.sh_offset); memset(hdr, 0, this->shdr.sh_size); + if (ctx.shstrtab && SHN_LORESERVE <= ctx.shstrtab->shndx) + hdr[0].sh_link = ctx.shstrtab->shndx; + i64 shnum = ctx.shdr->shdr.sh_size / sizeof(ElfShdr); if (UINT16_MAX < shnum) - hdr->sh_size = shnum; - - if (ctx.shstrtab && SHN_LORESERVE <= ctx.shstrtab->shndx) - hdr->sh_link = ctx.shstrtab->shndx; + hdr[0].sh_size = shnum; for (Chunk *chunk : ctx.chunks) if (chunk->shndx) diff --git a/elf/passes.cc b/elf/passes.cc index dfffbdf2..8b5f4991 100644 --- a/elf/passes.cc +++ b/elf/passes.cc @@ -2955,7 +2955,6 @@ void fix_synthetic_symbols(Context &ctx) { } } - // --section-order symbols for (SectionOrder &ord : ctx.arg.section_order) if (ord.type == SectionOrder::SYMBOL)