#include "mold.h" #include #include #include #include #include #include #include static constexpr i64 HASH_SIZE = 16; typedef std::array Digest; static bool is_eligible(InputSection &isec) { return (isec.shdr.sh_flags & SHF_ALLOC) && (isec.shdr.sh_type != SHT_NOBITS) && !(isec.shdr.sh_flags & SHF_WRITE) && !(isec.shdr.sh_type == SHT_INIT_ARRAY || isec.name == ".init") && !(isec.shdr.sh_type == SHT_FINI_ARRAY || isec.name == ".fini"); } static Digest digest_final(SHA256_CTX &ctx) { u8 digest[SHA256_SIZE]; assert(SHA256_Final(digest, &ctx) == 1); Digest arr; memcpy(arr.data(), digest, HASH_SIZE); return arr; } static Digest compute_digest(InputSection &isec) { SHA256_CTX ctx; SHA256_Init(&ctx); auto hash_i64 = [&](i64 val) { SHA256_Update(&ctx, &val, 8); }; auto hash_string = [&](std::string_view str) { hash_i64(str.size()); SHA256_Update(&ctx, str.data(), str.size()); }; auto hash_symbol = [&](Symbol &sym) { if (SectionFragment *frag = sym.fragref.frag) { hash_i64(2); hash_i64(sym.fragref.addend); hash_string(frag->data); } else if (!sym.input_section) { hash_i64(3); hash_i64(sym.value); } else { hash_i64(4); } }; hash_string(isec.get_contents()); hash_i64(isec.shdr.sh_flags); hash_i64(isec.fdes.size()); hash_i64(isec.rels.size()); for (FdeRecord &fde : isec.fdes) { hash_string(fde.contents); hash_i64(fde.rels.size()); for (EhReloc &rel : fde.rels) { hash_symbol(rel.sym); hash_i64(rel.type); hash_i64(rel.offset); hash_i64(rel.addend); } } i64 ref_idx = 0; for (i64 i = 0; i < isec.rels.size(); i++) { ElfRela &rel = isec.rels[i]; hash_i64(rel.r_offset); hash_i64(rel.r_type); hash_i64(rel.r_addend); if (isec.has_fragments[i]) { SectionFragmentRef &ref = isec.rel_fragments[ref_idx++]; hash_i64(1); hash_i64(ref.addend); hash_string(ref.frag->data); } else { hash_symbol(*isec.file->symbols[rel.r_sym]); } } return digest_final(ctx); } static Digest get_random_bytes() { Digest arr; assert(RAND_bytes(arr.data(), HASH_SIZE) == 1); return arr; } static void gather_sections(std::vector &digests, std::vector §ions, std::vector &edge_indices, std::vector &edges) { Timer t("gather"); struct Entry { InputSection *isec; Digest digest; bool is_eligible; }; // Count the number of input sections for each input file. std::vector num_sections(out::objs.size()); tbb::parallel_for((i64)0, (i64)out::objs.size(), [&](i64 i) { for (InputSection *isec : out::objs[i]->sections) if (isec) num_sections[i]++; }); // Assign each object file a unique index in `entries`. std::vector section_indices(out::objs.size()); for (i64 i = 0; i < out::objs.size() - 1; i++) section_indices[i + 1] = section_indices[i] + num_sections[i]; std::vector entries(section_indices.back() + num_sections.back()); tbb::enumerable_thread_specific num_eligibles; // Fill `entries` contents. tbb::parallel_for((i64)0, (i64)out::objs.size(), [&](i64 i) { i64 idx = section_indices[i]; for (InputSection *isec : out::objs[i]->sections) { if (!isec) continue; Entry &ent = entries[idx++]; ent.isec = isec; ent.is_eligible = is_eligible(*isec); ent.digest = ent.is_eligible ? compute_digest(*isec) : get_random_bytes(); if (ent.is_eligible) num_eligibles.local() += 1; } }); // Sort `entries` so that all eligible sections precede non-eligible sections. // Eligible sections are sorted by SHA hash. tbb::parallel_sort(entries.begin(), entries.end(), [](const Entry &a, const Entry &b) { if (!a.is_eligible || !b.is_eligible) return a.is_eligible && !b.is_eligible; return a.digest < b.digest; }); // Copy contents from `entries` to `sections` and `digests`. sections.resize(num_eligibles.combine(std::plus())); digests.resize(entries.size()); tbb::parallel_for((i64)0, (i64)entries.size(), [&](i64 i) { Entry &ent = entries[i]; ent.isec->icf_idx = i; digests[i] = ent.digest; if (i < sections.size()) sections[i] = ent.isec; }); // Count the number of outgoing edges for each eligible section. std::vector num_edges(sections.size()); tbb::parallel_for((i64)0, (i64)num_edges.size(), [&](i64 i) { assert(entries[i].is_eligible); InputSection &isec = *sections[i]; for (i64 j = 0; j < isec.rels.size(); j++) { if (!isec.has_fragments[j]) { ElfRela &rel = isec.rels[j]; Symbol &sym = *isec.file->symbols[rel.r_sym]; if (!sym.fragref.frag && sym.input_section) num_edges[i]++; } } }); // Assign each eligible section a unique index in `edges`. edge_indices.resize(num_edges.size()); for (i64 i = 0; i < num_edges.size() - 1; i++) edge_indices[i + 1] = edge_indices[i] + num_edges[i]; edges.resize(edge_indices.back() + num_edges.back()); // Fill `edges` contents. tbb::parallel_for((i64)0, (i64)num_edges.size(), [&](i64 i) { InputSection &isec = *sections[i]; i64 idx = edge_indices[i]; for (i64 j = 0; j < isec.rels.size(); j++) { if (!isec.has_fragments[j]) { ElfRela &rel = isec.rels[j]; Symbol &sym = *isec.file->symbols[rel.r_sym]; if (!sym.fragref.frag && sym.input_section) { assert(sym.input_section->icf_idx != -1); edges[idx++] = sym.input_section->icf_idx; } } } }); } void icf_sections() { Timer t("icf"); // Prepare for the propagation rounds. std::vector digests0; std::vector sections; std::vector edge_indices; std::vector edges; gather_sections(digests0, sections, edge_indices, edges); std::vector> digests(2); digests[0] = std::move(digests0); digests[1] = digests[0]; i64 slot = 0; auto count_num_classes = [&]() { tbb::enumerable_thread_specific num_classes; tbb::parallel_for((i64)0, (i64)sections.size() - 1, [&](i64 i) { if (digests[slot][i] != digests[slot][i + 1]) num_classes.local()++; }); return num_classes.combine(std::plus()); }; i64 num_classes = count_num_classes(); SyncOut() << "num_classes=" << num_classes; Timer t2("propagate"); static Counter round("icf_round"); // Execute the propagation rounds until convergence is obtained. for (;;) { round.inc(); tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) { SHA256_CTX ctx; SHA256_Init(&ctx); SHA256_Update(&ctx, digests[slot][i].data(), HASH_SIZE); i64 begin = edge_indices[i]; i64 end = (i + 1 == sections.size()) ? edges.size() : edge_indices[i + 1]; for (i64 j = begin; j < end; j++) SHA256_Update(&ctx, digests[slot][edges[j]].data(), HASH_SIZE); digests[slot ^ 1][i] = digest_final(ctx); }); slot ^= 1; i64 n = count_num_classes(); SyncOut() << "num_classes=" << n; if (n == num_classes) break; num_classes = n; } t2.stop(); // Group sections by SHA1 digest. Timer t3("merge"); struct Entry { InputSection *isec; Digest digest; }; std::vector entries; entries.resize(sections.size()); tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) { entries[i] = {sections[i], digests[slot][i]}; }); tbb::parallel_sort(entries.begin(), entries.end(), [](auto &a, auto &b) { if (a.digest != b.digest) return a.digest < b.digest; return a.isec->get_priority() < b.isec->get_priority(); }); tbb::parallel_for((i64)0, (i64)entries.size() - 1, [&](i64 i) { if (i == 0 || entries[i - 1].digest != entries[i].digest) { InputSection *leader = entries[i].isec; leader->leader = leader; for (i64 j = i + 1; entries[i].digest == entries[j].digest; j++) entries[j].isec = leader; } }); }