1
1
mirror of https://github.com/rui314/mold.git synced 2024-10-04 16:48:04 +03:00

[Mach-O] Merge string constants in __TEXT,__cstring

This commit is contained in:
Rui Ueyama 2022-05-26 12:02:55 +08:00
parent 14c717d698
commit e770df42d9
4 changed files with 174 additions and 35 deletions

View File

@ -168,18 +168,21 @@ struct SplitInfo {
};
template <typename E>
static std::vector<SplitInfo<E>> split(Context<E> &ctx, ObjectFile<E> &file) {
std::vector<SplitInfo<E>> vec;
static std::vector<SplitInfo<E>>
split_regular_sections(Context<E> &ctx, ObjectFile<E> &file) {
std::vector<SplitInfo<E>> vec(file.sections.size());
for (std::unique_ptr<InputSection<E>> &isec : file.sections)
vec.push_back({isec.get()});
for (i64 i = 0; i < file.sections.size(); i++)
if (InputSection<E> *isec = file.sections[i].get())
if (!isec->hdr.match("__TEXT", "__cstring"))
vec[i].isec = isec;
// Find all symbols whose type is N_SECT.
for (i64 i = 0; i < file.mach_syms.size(); i++) {
MachSym &msym = file.mach_syms[i];
if (msym.type == N_SECT && file.sections[msym.sect - 1]) {
if (msym.type == N_SECT && vec[msym.sect - 1].isec) {
SplitRegion r;
r.offset = msym.value - file.sections[msym.sect - 1]->hdr.addr;
r.offset = msym.value - vec[msym.sect - 1].isec->hdr.addr;
r.symidx = i;
r.is_alt_entry = (msym.desc & N_ALT_ENTRY);
vec[msym.sect - 1].regions.push_back(r);
@ -231,36 +234,67 @@ template <typename E>
void ObjectFile<E>::split_subsections(Context<E> &ctx) {
sym_to_subsec.resize(mach_syms.size());
// Split a section into subsections.
for (SplitInfo<E> &info : split(ctx, *this)) {
InputSection<E> &isec = *info.isec;
for (SplitRegion &r : info.regions) {
if (!r.is_alt_entry) {
auto add = [&](InputSection<E> &isec, u32 offset, u32 size, u8 p2align) {
Subsection<E> *subsec = new Subsection<E>{
.isec = isec,
.input_offset = r.offset,
.input_size = r.size,
.input_addr = (u32)(isec.hdr.addr + r.offset),
.p2align = (u8)isec.hdr.p2align,
.input_offset = offset,
.input_size = size,
.input_addr = (u32)(isec.hdr.addr + offset),
.p2align = p2align,
};
subsec_pool.emplace_back(subsec);
subsections.push_back(subsec);
}
};
// Split regular sections into subsections.
for (SplitInfo<E> &info : split_regular_sections(ctx, *this)) {
InputSection<E> &isec = *info.isec;
for (SplitRegion &r : info.regions) {
if (!r.is_alt_entry)
add(isec, r.offset, r.size, isec.hdr.p2align);
if (r.symidx != -1)
sym_to_subsec[r.symidx] = subsections.back();
}
}
// Split __cstring section.
for (std::unique_ptr<InputSection<E>> &isec : sections) {
if (isec && isec->hdr.match("__TEXT", "__cstring")) {
std::string_view str = isec->contents;
size_t pos = 0;
while (pos < str.size()) {
size_t pos2 = str.find('\0', pos);
if (pos2 == str.npos)
Fatal(ctx) << *this << " corruupted __TEXT,__cstring";
// A constant string in __cstring has no alignment info, so we
// need to infer it. We assume that the maximum alignment
// requirement is 16, which should be enough for all machine
// instructions.
u8 p2align = std::max(4, std::countr_zero(pos));
add(*isec, pos, pos2 - pos + 1, p2align);
pos = str.find_first_not_of('\0', pos2);
}
}
}
sort(subsections, [](Subsection<E> *a, Subsection<E> *b) {
return a->input_addr < b->input_addr;
});
// Fix local symbols `subsec` members.
for (i64 i = 0; i < mach_syms.size(); i++) {
MachSym &msym = mach_syms[i];
if (!msym.ext && msym.type == N_SECT) {
Symbol<E> &sym = *this->syms[i];
if (Subsection<E> *subsec = sym_to_subsec[i]) {
if (!msym.ext && msym.type == N_SECT) {
Subsection<E> *subsec = sym_to_subsec[i];
if (!subsec)
subsec = find_subsection(ctx, msym.value);
if (subsec) {
sym.subsec = subsec;
sym.value = msym.value - subsec->input_addr;
} else {

View File

@ -226,6 +226,66 @@ static void claim_unresolved_symbols(Context<E> &ctx) {
}
}
template <typename E>
static void merge_cstring_sections(Context<E> &ctx) {
Timer t(ctx, "merge_cstring_sections");
// Insert all strings into a hash table to merge them.
std::unordered_map<std::string_view, Subsection<E> *> map;
for (ObjectFile<E> *file : ctx.objs) {
for (Subsection<E> *subsec : file->subsections) {
if (&subsec->isec.osec == ctx.cstring) {
std::string_view str = subsec->get_contents();
auto pair = map.insert({str, subsec});
if (pair.second) {
Subsection<E> *existing = pair.first->second;
if (existing->p2align < subsec->p2align)
pair.first->second = subsec;
}
}
}
}
// Replace subsections
for (ObjectFile<E> *file : ctx.objs) {
for (Subsection<E> *subsec : file->subsections) {
if (&subsec->isec.osec == ctx.cstring) {
std::string_view str = subsec->get_contents();
auto it = map.find(str);
if (it->second != subsec) {
subsec->is_coalesced = true;
subsec->replacer = it->second;
}
}
}
}
for (ObjectFile<E> *file : ctx.objs)
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
if (isec)
for (Relocation<E> &r : isec->rels)
if (r.subsec && r.subsec->is_coalesced)
r.subsec = r.subsec->replacer;
auto replace = [&](InputFile<E> *file) {
for (Symbol<E> *sym : file->syms)
if (sym->subsec && sym->subsec->is_coalesced)
sym->subsec = sym->subsec->replacer;
};
for (InputFile<E> *file : ctx.objs)
replace(file);
for (InputFile<E> *file : ctx.dylibs)
replace(file);
for (ObjectFile<E> *file : ctx.objs) {
std::erase_if(file->subsections, [](Subsection<E> *subsec) {
return subsec->is_coalesced;
});
}
}
template <typename E>
static void create_synthetic_chunks(Context<E> &ctx) {
for (ObjectFile<E> *file : ctx.objs)
@ -603,6 +663,8 @@ static int do_main(int argc, char **argv) {
claim_unresolved_symbols(ctx);
merge_cstring_sections(ctx);
if (ctx.arg.dead_strip)
dead_strip(ctx);

View File

@ -229,6 +229,8 @@ public:
void scan_relocations(Context<E> &ctx);
void apply_reloc(Context<E> &ctx, u8 *buf);
union {
struct {
InputSection<E> &isec;
u32 input_offset = 0;
u32 input_size = 0;
@ -239,7 +241,13 @@ public:
u32 unwind_offset = 0;
u32 nunwind = 0;
u8 p2align = 0;
};
Subsection<E> *replacer; // Used if is_coalesced is true
};
std::atomic_bool is_alive = true;
bool is_coalesced = false;
};
template <typename E>

35
test/macho/cstring.sh Executable file
View File

@ -0,0 +1,35 @@
#!/bin/bash
export LC_ALL=C
set -e
CC="${TEST_CC:-cc}"
CXX="${TEST_CXX:-c++}"
GCC="${TEST_GCC:-gcc}"
GXX="${TEST_GXX:-g++}"
OBJDUMP="${OBJDUMP:-objdump}"
MACHINE="${MACHINE:-$(uname -m)}"
testname=$(basename "$0" .sh)
echo -n "Testing $testname ... "
cd "$(dirname "$0")"/../..
t=out/test/macho/$testname
mkdir -p $t
cat <<EOF | $CC -o $t/a.o -c -xc -
const char *x = "Hello world\n";
EOF
cat <<EOF | $CC -o $t/b.o -c -xc -
#include <stdio.h>
extern const char *x;
const char *y = "Hello world\n";
const char *z = "Howdy world\n";
int main() {
printf("%d %d\n", x == y, y == z);
}
EOF
clang --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o
$t/exe | grep -q '^1 0$'
echo OK