2021-01-30 14:11:05 +03:00
|
|
|
// This file implements the Identical Comdat Folding feature which can
|
|
|
|
// reduce the output file size of a typical program by a few percent.
|
|
|
|
// ICF identifies read-only input sections that happen to be identical.
|
|
|
|
// It then leaves one of them and discards the others.
|
|
|
|
//
|
|
|
|
// Two sections are considered identical by ICF if they have the exact
|
|
|
|
// same contents, metadata such as section flags, exception handling
|
|
|
|
// records, and relocations. The last one is interesting because two
|
|
|
|
// relocations are considered identical if they point to the _same_
|
|
|
|
// section in terms of ICF. To see what that means, consider two sections,
|
|
|
|
// A and B, which are identical except one pair of relocations. Say, A has
|
|
|
|
// a relocation to section C, and B has a relocation to D. In this case, A
|
|
|
|
// and B are considered identical if C and D are considered identical.
|
|
|
|
// C and D can either be really the same section or two different sections
|
|
|
|
// that are considered identical by ICF.
|
|
|
|
//
|
|
|
|
// This problem boils down to one in graph theory. Input to ICF can be
|
|
|
|
// considered as a directed graph in which vertices are sections and edges
|
|
|
|
// are relocations. We want to find as many isomorphic subgraphs as
|
|
|
|
// possible.
|
|
|
|
//
|
|
|
|
// Solving such problem is computationally intensive task, but mold is quite
|
|
|
|
// fast. For Chromium, mold's ICF finishes in less than 1 second with 20
|
|
|
|
// threads. This is contrary to lld and gold, which take about 5 and 50
|
|
|
|
// seconds to run ICF under the same condition, respectively.
|
|
|
|
//
|
|
|
|
// mold's ICF is faster because we are using a better algorithm.
|
|
|
|
// Our algorithm requires less overall computation, so it is faster than
|
|
|
|
// the others with a single thread. It's also highly parallelizable and
|
|
|
|
// its working set is small, so it scales pretty well with number of
|
|
|
|
// available cores.
|
|
|
|
|
2021-01-27 13:07:55 +03:00
|
|
|
#include "mold.h"
|
|
|
|
|
2021-01-27 13:46:25 +03:00
|
|
|
#include <array>
|
2021-01-27 13:21:29 +03:00
|
|
|
#include <openssl/sha.h>
|
2021-01-29 12:28:31 +03:00
|
|
|
#include <tbb/concurrent_unordered_map.h>
|
2021-01-29 13:40:35 +03:00
|
|
|
#include <tbb/concurrent_vector.h>
|
2021-01-27 14:28:06 +03:00
|
|
|
#include <tbb/enumerable_thread_specific.h>
|
2021-01-27 13:07:55 +03:00
|
|
|
#include <tbb/parallel_for.h>
|
|
|
|
#include <tbb/parallel_for_each.h>
|
2021-01-27 14:18:03 +03:00
|
|
|
#include <tbb/parallel_sort.h>
|
2021-01-27 13:07:55 +03:00
|
|
|
|
|
|
|
static constexpr i64 HASH_SIZE = 16;
|
|
|
|
|
2021-01-28 06:39:22 +03:00
|
|
|
typedef std::array<u8, HASH_SIZE> Digest;
|
|
|
|
|
2021-01-29 14:46:34 +03:00
|
|
|
namespace tbb {
|
|
|
|
template<> struct tbb_hash<Digest> {
|
|
|
|
size_t operator()(const Digest &k) const {
|
|
|
|
return *(i64 *)&k[0];
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-01-30 08:34:11 +03:00
|
|
|
static bool cie_equal(const CieRecord &a, const CieRecord &b) {
|
|
|
|
return a.contents == b.contents && a.rels == b.rels;
|
2021-01-30 08:30:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void uniquify_cies() {
|
|
|
|
Timer t("uniquify_cies");
|
|
|
|
std::vector<CieRecord *> cies;
|
|
|
|
|
|
|
|
for (ObjectFile *file : out::objs) {
|
|
|
|
for (CieRecord &cie : file->cies) {
|
|
|
|
for (i64 i = 0; i < cies.size(); i++) {
|
|
|
|
if (cie_equal(cie, *cies[i])) {
|
|
|
|
cie.icf_idx = i;
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
2021-01-30 08:34:11 +03:00
|
|
|
cie.icf_idx = cies.size();
|
2021-01-30 08:30:31 +03:00
|
|
|
cies.push_back(&cie);
|
|
|
|
found:;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-27 13:07:55 +03:00
|
|
|
static bool is_eligible(InputSection &isec) {
|
2021-01-28 09:47:34 +03:00
|
|
|
bool is_alloc = (isec.shdr.sh_flags & SHF_ALLOC);
|
2021-01-28 13:19:39 +03:00
|
|
|
bool is_executable = (isec.shdr.sh_flags & SHF_EXECINSTR);
|
2021-01-30 01:55:07 +03:00
|
|
|
bool is_relro = (isec.name == ".data.rel.ro" ||
|
|
|
|
isec.name.starts_with(".data.rel.ro."));
|
2021-01-30 12:25:55 +03:00
|
|
|
bool is_readonly = !(isec.shdr.sh_flags & SHF_WRITE) || is_relro;
|
2021-01-28 09:47:34 +03:00
|
|
|
bool is_bss = (isec.shdr.sh_type == SHT_NOBITS);
|
2021-01-29 13:49:33 +03:00
|
|
|
bool is_empty = (isec.shdr.sh_size == 0);
|
2021-01-28 09:47:34 +03:00
|
|
|
bool is_init = (isec.shdr.sh_type == SHT_INIT_ARRAY || isec.name == ".init");
|
|
|
|
bool is_fini = (isec.shdr.sh_type == SHT_FINI_ARRAY || isec.name == ".fini");
|
|
|
|
bool is_enumerable = is_c_identifier(isec.name);
|
|
|
|
|
2021-01-30 12:25:55 +03:00
|
|
|
return is_alloc && is_executable && is_readonly && !is_bss &&
|
2021-01-29 13:49:33 +03:00
|
|
|
!is_empty && !is_init && !is_fini && !is_enumerable;
|
2021-01-27 13:07:55 +03:00
|
|
|
}
|
|
|
|
|
2021-01-28 06:39:22 +03:00
|
|
|
static Digest digest_final(SHA256_CTX &ctx) {
|
2021-01-30 09:06:12 +03:00
|
|
|
u8 buf[SHA256_SIZE];
|
|
|
|
assert(SHA256_Final(buf, &ctx) == 1);
|
2021-01-27 16:01:45 +03:00
|
|
|
|
2021-01-29 15:27:27 +03:00
|
|
|
Digest digest;
|
2021-01-30 09:06:12 +03:00
|
|
|
memcpy(digest.data(), buf, HASH_SIZE);
|
2021-01-29 15:27:27 +03:00
|
|
|
return digest;
|
2021-01-27 16:01:45 +03:00
|
|
|
}
|
|
|
|
|
2021-01-29 12:06:58 +03:00
|
|
|
static bool is_leaf(InputSection &isec) {
|
|
|
|
if (!isec.rels.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (FdeRecord &fde : isec.fdes)
|
|
|
|
if (fde.rels.size() > 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-01-30 09:06:12 +03:00
|
|
|
static size_t combine_hash(size_t a, size_t b) {
|
|
|
|
return a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2));
|
|
|
|
}
|
|
|
|
|
2021-01-29 12:06:58 +03:00
|
|
|
struct LeafHasher {
|
2021-01-29 12:28:31 +03:00
|
|
|
size_t operator()(const InputSection *isec) const {
|
|
|
|
size_t h = std::hash<std::string_view>()(isec->get_contents());
|
|
|
|
for (FdeRecord &fde : isec->fdes) {
|
2021-01-29 12:06:58 +03:00
|
|
|
size_t h2 = std::hash<std::string_view>()(fde.contents.substr(8));
|
2021-01-30 09:06:12 +03:00
|
|
|
h = combine_hash(h, h2);
|
2021-01-29 12:06:58 +03:00
|
|
|
}
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-01-29 12:28:31 +03:00
|
|
|
struct LeafEq {
|
|
|
|
bool operator()(const InputSection *a, const InputSection *b) const {
|
|
|
|
if (a->get_contents() != b->get_contents())
|
2021-01-29 12:06:58 +03:00
|
|
|
return false;
|
2021-01-29 12:28:31 +03:00
|
|
|
if (a->fdes.size() != b->fdes.size())
|
2021-01-29 12:06:58 +03:00
|
|
|
return false;
|
2021-01-29 12:28:31 +03:00
|
|
|
for (i64 i = 0; i < a->fdes.size(); i++) {
|
|
|
|
if (a->fdes[i].contents.size() != b->fdes[i].contents.size())
|
2021-01-29 12:06:58 +03:00
|
|
|
return false;
|
2021-01-29 12:28:31 +03:00
|
|
|
if (a->fdes[i].contents.substr(8) != b->fdes[i].contents.substr(8))
|
2021-01-29 12:06:58 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-01-29 14:18:04 +03:00
|
|
|
static void merge_leaf_nodes() {
|
2021-01-30 01:55:07 +03:00
|
|
|
Timer t("merge_leaf_nodes");
|
2021-01-29 14:18:04 +03:00
|
|
|
|
2021-01-29 14:55:54 +03:00
|
|
|
static Counter eligible("icf_eligibles");
|
|
|
|
static Counter non_eligible("icf_non_eligibles");
|
|
|
|
static Counter leaf("icf_leaf_nodes");
|
|
|
|
|
2021-01-29 14:18:04 +03:00
|
|
|
tbb::concurrent_unordered_map<InputSection *, InputSection *,
|
|
|
|
LeafHasher, LeafEq> map;
|
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, (i64)out::objs.size(), [&](i64 i) {
|
|
|
|
for (InputSection *isec : out::objs[i]->sections) {
|
2021-01-29 14:55:54 +03:00
|
|
|
if (!isec)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!is_eligible(*isec)) {
|
2021-01-29 15:44:46 +03:00
|
|
|
non_eligible++;
|
2021-01-29 14:18:04 +03:00
|
|
|
continue;
|
2021-01-29 14:55:54 +03:00
|
|
|
}
|
2021-01-29 14:18:04 +03:00
|
|
|
|
|
|
|
if (is_leaf(*isec)) {
|
2021-01-29 15:44:46 +03:00
|
|
|
leaf++;
|
2021-01-29 14:18:04 +03:00
|
|
|
isec->icf_leaf = true;
|
|
|
|
auto [it, inserted] = map.insert({isec, isec});
|
|
|
|
if (!inserted && isec->get_priority() < it->second->get_priority())
|
|
|
|
it->second = isec;
|
|
|
|
} else {
|
2021-01-29 15:44:46 +03:00
|
|
|
eligible++;
|
2021-01-29 14:18:04 +03:00
|
|
|
isec->icf_eligible = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, (i64)out::objs.size(), [&](i64 i) {
|
|
|
|
for (InputSection *isec : out::objs[i]->sections) {
|
2021-01-29 14:56:36 +03:00
|
|
|
if (isec && isec->icf_leaf) {
|
|
|
|
auto it = map.find(isec);
|
|
|
|
assert(it != map.end());
|
|
|
|
isec->leader = it->second;
|
|
|
|
}
|
2021-01-29 14:18:04 +03:00
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-01-28 06:39:22 +03:00
|
|
|
static Digest compute_digest(InputSection &isec) {
|
2021-01-27 13:21:29 +03:00
|
|
|
SHA256_CTX ctx;
|
|
|
|
SHA256_Init(&ctx);
|
|
|
|
|
2021-01-29 16:24:58 +03:00
|
|
|
auto hash = [&](auto val) {
|
|
|
|
SHA256_Update(&ctx, &val, sizeof(val));
|
2021-01-28 01:26:42 +03:00
|
|
|
};
|
|
|
|
|
2021-01-28 07:39:21 +03:00
|
|
|
auto hash_string = [&](std::string_view str) {
|
2021-01-29 16:24:58 +03:00
|
|
|
hash(str.size());
|
2021-01-28 07:39:21 +03:00
|
|
|
SHA256_Update(&ctx, str.data(), str.size());
|
|
|
|
};
|
|
|
|
|
2021-01-28 01:26:42 +03:00
|
|
|
auto hash_symbol = [&](Symbol &sym) {
|
2021-01-29 12:42:11 +03:00
|
|
|
InputSection *isec = sym.input_section;
|
|
|
|
|
2021-01-28 11:07:55 +03:00
|
|
|
if (SectionFragment *frag = sym.frag) {
|
2021-01-29 16:24:58 +03:00
|
|
|
hash('2');
|
2021-01-28 01:26:42 +03:00
|
|
|
hash_string(frag->data);
|
2021-01-29 12:42:11 +03:00
|
|
|
} else if (!isec) {
|
2021-01-29 16:24:58 +03:00
|
|
|
hash('3');
|
2021-01-29 12:42:11 +03:00
|
|
|
} else if (isec->leader) {
|
2021-01-29 16:24:58 +03:00
|
|
|
hash('4');
|
|
|
|
hash(isec->leader->get_priority());
|
2021-01-29 12:42:11 +03:00
|
|
|
} else if (isec->icf_eligible) {
|
2021-01-29 16:24:58 +03:00
|
|
|
hash('5');
|
2021-01-29 06:45:37 +03:00
|
|
|
} else {
|
2021-01-29 16:24:58 +03:00
|
|
|
hash('6');
|
|
|
|
hash(isec->get_priority());
|
2021-01-28 01:20:28 +03:00
|
|
|
}
|
2021-01-29 16:24:58 +03:00
|
|
|
hash(sym.value);
|
2021-01-28 01:20:28 +03:00
|
|
|
};
|
|
|
|
|
2021-01-28 01:26:42 +03:00
|
|
|
hash_string(isec.get_contents());
|
2021-01-29 16:24:58 +03:00
|
|
|
hash(isec.shdr.sh_flags);
|
|
|
|
hash(isec.fdes.size());
|
|
|
|
hash(isec.rels.size());
|
2021-01-27 13:21:29 +03:00
|
|
|
|
2021-01-28 01:20:28 +03:00
|
|
|
for (FdeRecord &fde : isec.fdes) {
|
2021-01-30 08:30:31 +03:00
|
|
|
hash(isec.file->cies[fde.cie_idx].icf_idx);
|
|
|
|
|
2021-01-29 16:24:58 +03:00
|
|
|
// Bytes 0 to 4 contain the length of this record, and
|
2021-01-29 12:06:58 +03:00
|
|
|
// bytes 4 to 8 contain an offset to CIE.
|
2021-01-28 11:24:19 +03:00
|
|
|
hash_string(fde.contents.substr(8));
|
|
|
|
|
2021-01-29 16:24:58 +03:00
|
|
|
hash(fde.rels.size());
|
2021-01-28 01:20:28 +03:00
|
|
|
|
2021-01-28 11:24:19 +03:00
|
|
|
for (EhReloc &rel : std::span(fde.rels).subspan(1)) {
|
2021-01-28 01:26:42 +03:00
|
|
|
hash_symbol(rel.sym);
|
2021-01-29 16:24:58 +03:00
|
|
|
hash(rel.type);
|
|
|
|
hash(rel.offset);
|
|
|
|
hash(rel.addend);
|
2021-01-28 01:20:28 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-27 15:24:28 +03:00
|
|
|
i64 ref_idx = 0;
|
|
|
|
|
|
|
|
for (i64 i = 0; i < isec.rels.size(); i++) {
|
|
|
|
ElfRela &rel = isec.rels[i];
|
2021-01-29 16:24:58 +03:00
|
|
|
hash(rel.r_offset);
|
|
|
|
hash(rel.r_type);
|
|
|
|
hash(rel.r_addend);
|
2021-01-27 13:21:29 +03:00
|
|
|
|
2021-01-27 15:24:28 +03:00
|
|
|
if (isec.has_fragments[i]) {
|
|
|
|
SectionFragmentRef &ref = isec.rel_fragments[ref_idx++];
|
2021-01-29 16:24:58 +03:00
|
|
|
hash('1');
|
|
|
|
hash(ref.addend);
|
2021-01-28 01:26:42 +03:00
|
|
|
hash_string(ref.frag->data);
|
2021-01-27 15:43:01 +03:00
|
|
|
} else {
|
2021-01-28 01:26:42 +03:00
|
|
|
hash_symbol(*isec.file->symbols[rel.r_sym]);
|
2021-01-27 13:21:29 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-27 16:01:45 +03:00
|
|
|
return digest_final(ctx);
|
2021-01-27 13:46:25 +03:00
|
|
|
}
|
2021-01-27 13:07:55 +03:00
|
|
|
|
2021-01-29 06:45:37 +03:00
|
|
|
static std::vector<InputSection *> gather_sections() {
|
2021-01-29 06:57:00 +03:00
|
|
|
Timer t("gather_sections");
|
2021-01-28 07:38:28 +03:00
|
|
|
|
|
|
|
// Count the number of input sections for each input file.
|
2021-01-27 14:18:03 +03:00
|
|
|
std::vector<i64> num_sections(out::objs.size());
|
2021-01-27 13:07:55 +03:00
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, (i64)out::objs.size(), [&](i64 i) {
|
2021-01-27 14:18:03 +03:00
|
|
|
for (InputSection *isec : out::objs[i]->sections)
|
2021-01-29 12:37:49 +03:00
|
|
|
if (isec && isec->icf_eligible)
|
2021-01-27 14:18:03 +03:00
|
|
|
num_sections[i]++;
|
2021-01-27 13:07:55 +03:00
|
|
|
});
|
|
|
|
|
2021-01-28 07:30:12 +03:00
|
|
|
std::vector<i64> section_indices(out::objs.size());
|
|
|
|
for (i64 i = 0; i < out::objs.size() - 1; i++)
|
2021-01-27 14:18:03 +03:00
|
|
|
section_indices[i + 1] = section_indices[i] + num_sections[i];
|
2021-01-27 13:46:25 +03:00
|
|
|
|
2021-01-29 06:45:37 +03:00
|
|
|
std::vector<InputSection *> sections(section_indices.back() + num_sections.back());
|
2021-01-27 13:07:55 +03:00
|
|
|
|
2021-01-29 06:45:37 +03:00
|
|
|
// Fill `sections` contents.
|
2021-01-27 13:07:55 +03:00
|
|
|
tbb::parallel_for((i64)0, (i64)out::objs.size(), [&](i64 i) {
|
2021-01-27 14:18:03 +03:00
|
|
|
i64 idx = section_indices[i];
|
2021-01-29 12:37:49 +03:00
|
|
|
for (InputSection *isec : out::objs[i]->sections)
|
|
|
|
if (isec && isec->icf_eligible)
|
2021-01-29 06:45:37 +03:00
|
|
|
sections[idx++] = isec;
|
|
|
|
});
|
2021-01-28 13:19:39 +03:00
|
|
|
|
2021-01-29 06:45:37 +03:00
|
|
|
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
|
|
|
|
sections[i]->icf_idx = i;
|
2021-01-27 14:49:29 +03:00
|
|
|
});
|
|
|
|
|
2021-01-29 06:45:37 +03:00
|
|
|
return sections;
|
|
|
|
}
|
2021-01-27 14:30:22 +03:00
|
|
|
|
2021-01-29 06:45:37 +03:00
|
|
|
static std::vector<Digest> compute_digests(std::span<InputSection *> sections) {
|
|
|
|
Timer t("compute_digests");
|
2021-01-27 14:30:22 +03:00
|
|
|
|
2021-01-29 06:50:16 +03:00
|
|
|
std::vector<Digest> digests(sections.size());
|
|
|
|
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
|
2021-01-29 06:45:37 +03:00
|
|
|
digests[i] = compute_digest(*sections[i]);
|
2021-01-27 14:45:32 +03:00
|
|
|
});
|
2021-01-29 06:45:37 +03:00
|
|
|
return digests;
|
2021-01-27 13:07:55 +03:00
|
|
|
}
|
|
|
|
|
2021-01-29 06:53:03 +03:00
|
|
|
static void gather_edges(std::span<InputSection *> sections,
|
|
|
|
std::vector<u32> &edges, std::vector<u32> &edge_indices) {
|
|
|
|
Timer t("gather_edges");
|
2021-01-29 06:56:46 +03:00
|
|
|
|
|
|
|
std::vector<i64> num_edges(sections.size());
|
2021-01-29 06:57:55 +03:00
|
|
|
edge_indices.resize(sections.size());
|
2021-01-29 06:56:46 +03:00
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
|
|
|
|
InputSection &isec = *sections[i];
|
|
|
|
assert(isec.icf_eligible);
|
|
|
|
|
|
|
|
for (i64 j = 0; j < isec.rels.size(); j++) {
|
|
|
|
if (!isec.has_fragments[j]) {
|
|
|
|
ElfRela &rel = isec.rels[j];
|
|
|
|
Symbol &sym = *isec.file->symbols[rel.r_sym];
|
|
|
|
if (!sym.frag && sym.input_section && sym.input_section->icf_eligible)
|
|
|
|
num_edges[i]++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
for (i64 i = 0; i < num_edges.size() - 1; i++)
|
|
|
|
edge_indices[i + 1] = edge_indices[i] + num_edges[i];
|
|
|
|
|
|
|
|
edges.resize(edge_indices.back() + num_edges.back());
|
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, (i64)num_edges.size(), [&](i64 i) {
|
|
|
|
InputSection &isec = *sections[i];
|
|
|
|
i64 idx = edge_indices[i];
|
|
|
|
|
|
|
|
for (i64 j = 0; j < isec.rels.size(); j++) {
|
|
|
|
if (!isec.has_fragments[j]) {
|
|
|
|
ElfRela &rel = isec.rels[j];
|
|
|
|
Symbol &sym = *isec.file->symbols[rel.r_sym];
|
|
|
|
if (!sym.frag && sym.input_section && sym.input_section->icf_eligible)
|
|
|
|
edges[idx++] = sym.input_section->icf_idx;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
2021-01-29 06:53:03 +03:00
|
|
|
}
|
|
|
|
|
2021-01-30 05:21:15 +03:00
|
|
|
static i64 propagate(std::span<std::vector<Digest>> digests,
|
2021-01-30 03:24:42 +03:00
|
|
|
std::span<u32> edges, std::span<u32> edge_indices,
|
2021-01-30 03:29:36 +03:00
|
|
|
bool &slot, tbb::affinity_partitioner &ap) {
|
2021-01-30 03:24:42 +03:00
|
|
|
static Counter round("icf_round");
|
|
|
|
round++;
|
|
|
|
|
|
|
|
i64 num_digests = digests[0].size();
|
|
|
|
tbb::enumerable_thread_specific<i64> changed;
|
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, num_digests, [&](i64 i) {
|
|
|
|
if (digests[slot][i] == digests[!slot][i])
|
|
|
|
return;
|
|
|
|
|
2021-01-29 07:42:42 +03:00
|
|
|
SHA256_CTX ctx;
|
|
|
|
SHA256_Init(&ctx);
|
2021-01-30 03:24:42 +03:00
|
|
|
SHA256_Update(&ctx, digests[2][i].data(), HASH_SIZE);
|
2021-01-29 07:42:42 +03:00
|
|
|
|
2021-01-29 14:25:00 +03:00
|
|
|
i64 begin = edge_indices[i];
|
2021-01-30 03:24:42 +03:00
|
|
|
i64 end = (i + 1 == num_digests) ? edges.size() : edge_indices[i + 1];
|
2021-01-29 14:25:00 +03:00
|
|
|
|
2021-01-29 07:42:42 +03:00
|
|
|
for (i64 j = begin; j < end; j++)
|
|
|
|
SHA256_Update(&ctx, digests[slot][edges[j]].data(), HASH_SIZE);
|
|
|
|
|
2021-01-29 17:27:35 +03:00
|
|
|
digests[!slot][i] = digest_final(ctx);
|
2021-01-30 03:24:42 +03:00
|
|
|
|
|
|
|
if (digests[slot][i] != digests[!slot][i])
|
|
|
|
changed.local()++;
|
2021-01-29 14:25:00 +03:00
|
|
|
}, ap);
|
2021-01-30 03:24:42 +03:00
|
|
|
|
2021-01-30 03:29:36 +03:00
|
|
|
slot = !slot;
|
2021-01-30 03:24:42 +03:00
|
|
|
return changed.combine(std::plus());
|
2021-01-29 07:42:42 +03:00
|
|
|
}
|
|
|
|
|
2021-01-30 09:06:12 +03:00
|
|
|
static i64 count_num_classes(std::span<Digest> digests,
|
|
|
|
tbb::affinity_partitioner &ap) {
|
2021-01-29 08:13:44 +03:00
|
|
|
std::vector<Digest> vec(digests.begin(), digests.end());
|
|
|
|
tbb::parallel_sort(vec);
|
|
|
|
|
|
|
|
tbb::enumerable_thread_specific<i64> num_classes;
|
|
|
|
tbb::parallel_for((i64)0, (i64)vec.size() - 1, [&](i64 i) {
|
|
|
|
if (vec[i] != vec[i + 1])
|
|
|
|
num_classes.local()++;
|
2021-01-30 09:06:12 +03:00
|
|
|
}, ap);
|
2021-01-29 08:13:44 +03:00
|
|
|
return num_classes.combine(std::plus());
|
|
|
|
}
|
|
|
|
|
2021-01-29 13:40:35 +03:00
|
|
|
static void print_icf_sections() {
|
|
|
|
tbb::concurrent_vector<InputSection *> leaders;
|
|
|
|
tbb::concurrent_unordered_multimap<InputSection *, InputSection *> map;
|
|
|
|
|
|
|
|
tbb::parallel_for_each(out::objs, [&](ObjectFile *file) {
|
2021-01-29 13:43:26 +03:00
|
|
|
for (InputSection *isec : file->sections) {
|
2021-01-29 13:40:35 +03:00
|
|
|
if (isec && isec->leader) {
|
2021-01-29 15:54:05 +03:00
|
|
|
if (isec == isec->leader)
|
2021-01-29 13:40:35 +03:00
|
|
|
leaders.push_back(isec);
|
2021-01-29 15:54:05 +03:00
|
|
|
else
|
2021-01-29 13:40:35 +03:00
|
|
|
map.insert({isec->leader, isec});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
tbb::parallel_sort(leaders.begin(), leaders.end(),
|
|
|
|
[&](InputSection *a, InputSection *b) {
|
|
|
|
return a->get_priority() < b->get_priority();
|
|
|
|
});
|
|
|
|
|
|
|
|
i64 saved_bytes = 0;
|
|
|
|
|
|
|
|
for (InputSection *leader : leaders) {
|
|
|
|
auto [begin, end] = map.equal_range(leader);
|
|
|
|
if (begin == end)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
SyncOut() << "selected section " << *leader;
|
|
|
|
|
|
|
|
i64 n = 0;
|
|
|
|
for (auto it = begin; it != end; it++) {
|
|
|
|
SyncOut() << " removing identical section " << *it->second;
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
saved_bytes += leader->get_contents().size() * n;
|
|
|
|
}
|
|
|
|
|
|
|
|
SyncOut() << "ICF saved " << saved_bytes << " bytes";
|
|
|
|
}
|
|
|
|
|
2021-01-27 13:07:55 +03:00
|
|
|
void icf_sections() {
|
|
|
|
Timer t("icf");
|
2021-01-30 08:30:31 +03:00
|
|
|
|
|
|
|
uniquify_cies();
|
2021-01-29 14:18:04 +03:00
|
|
|
merge_leaf_nodes();
|
2021-01-29 12:06:58 +03:00
|
|
|
|
2021-01-28 06:39:22 +03:00
|
|
|
// Prepare for the propagation rounds.
|
2021-01-29 06:45:37 +03:00
|
|
|
std::vector<InputSection *> sections = gather_sections();
|
|
|
|
|
2021-01-30 03:24:42 +03:00
|
|
|
std::vector<std::vector<Digest>> digests(3);
|
2021-01-29 07:42:42 +03:00
|
|
|
digests[0] = compute_digests(sections);
|
|
|
|
digests[1].resize(digests[0].size());
|
2021-01-30 03:24:42 +03:00
|
|
|
digests[2] = digests[0];
|
2021-01-29 07:42:42 +03:00
|
|
|
|
2021-01-29 06:53:03 +03:00
|
|
|
std::vector<u32> edges;
|
2021-01-29 07:42:42 +03:00
|
|
|
std::vector<u32> edge_indices;
|
2021-01-29 06:53:03 +03:00
|
|
|
gather_edges(sections, edges, edge_indices);
|
|
|
|
|
2021-01-29 17:27:35 +03:00
|
|
|
bool slot = 0;
|
2021-01-29 07:42:42 +03:00
|
|
|
|
|
|
|
// Execute the propagation rounds until convergence is obtained.
|
2021-01-29 08:39:42 +03:00
|
|
|
{
|
2021-01-29 14:25:00 +03:00
|
|
|
Timer t("propagate");
|
|
|
|
tbb::affinity_partitioner ap;
|
|
|
|
|
2021-01-30 03:24:42 +03:00
|
|
|
i64 num_changed = -1;
|
|
|
|
for (;;) {
|
|
|
|
i64 n = propagate(digests, edges, edge_indices, slot, ap);
|
|
|
|
if (n == num_changed)
|
|
|
|
break;
|
|
|
|
num_changed = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
i64 num_classes = -1;
|
2021-01-29 15:09:06 +03:00
|
|
|
for (;;) {
|
2021-01-30 03:29:36 +03:00
|
|
|
for (i64 i = 0; i < 10; i++)
|
2021-01-29 15:09:06 +03:00
|
|
|
propagate(digests, edges, edge_indices, slot, ap);
|
2021-01-29 15:13:10 +03:00
|
|
|
|
2021-01-30 09:06:12 +03:00
|
|
|
i64 n = count_num_classes(digests[slot], ap);
|
2021-01-29 15:09:06 +03:00
|
|
|
if (n == num_classes)
|
|
|
|
break;
|
|
|
|
num_classes = n;
|
2021-01-29 08:17:09 +03:00
|
|
|
}
|
2021-01-29 07:42:42 +03:00
|
|
|
}
|
2021-01-28 06:39:22 +03:00
|
|
|
|
2021-01-29 15:13:10 +03:00
|
|
|
// Group sections by SHA digest.
|
2021-01-30 01:55:07 +03:00
|
|
|
{
|
|
|
|
Timer t("group");
|
|
|
|
|
2021-01-30 03:41:41 +03:00
|
|
|
auto *map = new tbb::concurrent_unordered_map<Digest, InputSection *>;
|
2021-01-30 01:55:07 +03:00
|
|
|
std::span<Digest> digest = digests[slot];
|
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
|
|
|
|
InputSection *isec = sections[i];
|
2021-01-30 03:41:41 +03:00
|
|
|
auto [it, inserted] = map->insert({digest[i], isec});
|
2021-01-30 01:55:07 +03:00
|
|
|
if (!inserted && isec->get_priority() < it->second->get_priority())
|
|
|
|
it->second = isec;
|
|
|
|
});
|
|
|
|
|
|
|
|
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
|
2021-01-30 03:41:41 +03:00
|
|
|
auto it = map->find(digest[i]);
|
|
|
|
assert(it != map->end());
|
2021-01-30 01:55:07 +03:00
|
|
|
sections[i]->leader = it->second;
|
|
|
|
});
|
|
|
|
}
|
2021-01-28 08:35:56 +03:00
|
|
|
|
2021-01-29 13:40:35 +03:00
|
|
|
if (config.print_icf_sections)
|
|
|
|
print_icf_sections();
|
|
|
|
|
2021-01-28 08:35:56 +03:00
|
|
|
// Re-assign input sections to symbols.
|
2021-01-30 01:55:07 +03:00
|
|
|
{
|
|
|
|
Timer t("reassign");
|
|
|
|
tbb::parallel_for_each(out::objs, [](ObjectFile *file) {
|
|
|
|
for (Symbol *sym : file->symbols) {
|
|
|
|
if (sym->file != file)
|
|
|
|
continue;
|
|
|
|
InputSection *isec = sym->input_section;
|
|
|
|
if (isec && isec->leader && isec->leader != isec) {
|
|
|
|
sym->input_section = isec->leader;
|
|
|
|
isec->kill();
|
|
|
|
}
|
2021-01-29 12:47:05 +03:00
|
|
|
}
|
2021-01-30 01:55:07 +03:00
|
|
|
});
|
|
|
|
}
|
2021-01-27 13:07:55 +03:00
|
|
|
}
|