mirror of
https://github.com/rui314/mold.git
synced 2024-10-04 16:48:04 +03:00
[Mach-O] Merge string constants in __TEXT,__cstring
This commit is contained in:
parent
14c717d698
commit
e770df42d9
@ -168,18 +168,21 @@ struct SplitInfo {
|
||||
};
|
||||
|
||||
template <typename E>
|
||||
static std::vector<SplitInfo<E>> split(Context<E> &ctx, ObjectFile<E> &file) {
|
||||
std::vector<SplitInfo<E>> vec;
|
||||
static std::vector<SplitInfo<E>>
|
||||
split_regular_sections(Context<E> &ctx, ObjectFile<E> &file) {
|
||||
std::vector<SplitInfo<E>> vec(file.sections.size());
|
||||
|
||||
for (std::unique_ptr<InputSection<E>> &isec : file.sections)
|
||||
vec.push_back({isec.get()});
|
||||
for (i64 i = 0; i < file.sections.size(); i++)
|
||||
if (InputSection<E> *isec = file.sections[i].get())
|
||||
if (!isec->hdr.match("__TEXT", "__cstring"))
|
||||
vec[i].isec = isec;
|
||||
|
||||
// Find all symbols whose type is N_SECT.
|
||||
for (i64 i = 0; i < file.mach_syms.size(); i++) {
|
||||
MachSym &msym = file.mach_syms[i];
|
||||
if (msym.type == N_SECT && file.sections[msym.sect - 1]) {
|
||||
if (msym.type == N_SECT && vec[msym.sect - 1].isec) {
|
||||
SplitRegion r;
|
||||
r.offset = msym.value - file.sections[msym.sect - 1]->hdr.addr;
|
||||
r.offset = msym.value - vec[msym.sect - 1].isec->hdr.addr;
|
||||
r.symidx = i;
|
||||
r.is_alt_entry = (msym.desc & N_ALT_ENTRY);
|
||||
vec[msym.sect - 1].regions.push_back(r);
|
||||
@ -231,36 +234,67 @@ template <typename E>
|
||||
void ObjectFile<E>::split_subsections(Context<E> &ctx) {
|
||||
sym_to_subsec.resize(mach_syms.size());
|
||||
|
||||
// Split a section into subsections.
|
||||
for (SplitInfo<E> &info : split(ctx, *this)) {
|
||||
InputSection<E> &isec = *info.isec;
|
||||
|
||||
for (SplitRegion &r : info.regions) {
|
||||
if (!r.is_alt_entry) {
|
||||
auto add = [&](InputSection<E> &isec, u32 offset, u32 size, u8 p2align) {
|
||||
Subsection<E> *subsec = new Subsection<E>{
|
||||
.isec = isec,
|
||||
.input_offset = r.offset,
|
||||
.input_size = r.size,
|
||||
.input_addr = (u32)(isec.hdr.addr + r.offset),
|
||||
.p2align = (u8)isec.hdr.p2align,
|
||||
.input_offset = offset,
|
||||
.input_size = size,
|
||||
.input_addr = (u32)(isec.hdr.addr + offset),
|
||||
.p2align = p2align,
|
||||
};
|
||||
|
||||
subsec_pool.emplace_back(subsec);
|
||||
subsections.push_back(subsec);
|
||||
}
|
||||
};
|
||||
|
||||
// Split regular sections into subsections.
|
||||
for (SplitInfo<E> &info : split_regular_sections(ctx, *this)) {
|
||||
InputSection<E> &isec = *info.isec;
|
||||
for (SplitRegion &r : info.regions) {
|
||||
if (!r.is_alt_entry)
|
||||
add(isec, r.offset, r.size, isec.hdr.p2align);
|
||||
if (r.symidx != -1)
|
||||
sym_to_subsec[r.symidx] = subsections.back();
|
||||
}
|
||||
}
|
||||
|
||||
// Split __cstring section.
|
||||
for (std::unique_ptr<InputSection<E>> &isec : sections) {
|
||||
if (isec && isec->hdr.match("__TEXT", "__cstring")) {
|
||||
std::string_view str = isec->contents;
|
||||
size_t pos = 0;
|
||||
|
||||
while (pos < str.size()) {
|
||||
size_t pos2 = str.find('\0', pos);
|
||||
if (pos2 == str.npos)
|
||||
Fatal(ctx) << *this << " corruupted __TEXT,__cstring";
|
||||
|
||||
// A constant string in __cstring has no alignment info, so we
|
||||
// need to infer it. We assume that the maximum alignment
|
||||
// requirement is 16, which should be enough for all machine
|
||||
// instructions.
|
||||
u8 p2align = std::max(4, std::countr_zero(pos));
|
||||
add(*isec, pos, pos2 - pos + 1, p2align);
|
||||
pos = str.find_first_not_of('\0', pos2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort(subsections, [](Subsection<E> *a, Subsection<E> *b) {
|
||||
return a->input_addr < b->input_addr;
|
||||
});
|
||||
|
||||
// Fix local symbols `subsec` members.
|
||||
for (i64 i = 0; i < mach_syms.size(); i++) {
|
||||
MachSym &msym = mach_syms[i];
|
||||
if (!msym.ext && msym.type == N_SECT) {
|
||||
Symbol<E> &sym = *this->syms[i];
|
||||
|
||||
if (Subsection<E> *subsec = sym_to_subsec[i]) {
|
||||
if (!msym.ext && msym.type == N_SECT) {
|
||||
Subsection<E> *subsec = sym_to_subsec[i];
|
||||
if (!subsec)
|
||||
subsec = find_subsection(ctx, msym.value);
|
||||
|
||||
if (subsec) {
|
||||
sym.subsec = subsec;
|
||||
sym.value = msym.value - subsec->input_addr;
|
||||
} else {
|
||||
|
@ -226,6 +226,66 @@ static void claim_unresolved_symbols(Context<E> &ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
static void merge_cstring_sections(Context<E> &ctx) {
|
||||
Timer t(ctx, "merge_cstring_sections");
|
||||
|
||||
// Insert all strings into a hash table to merge them.
|
||||
std::unordered_map<std::string_view, Subsection<E> *> map;
|
||||
|
||||
for (ObjectFile<E> *file : ctx.objs) {
|
||||
for (Subsection<E> *subsec : file->subsections) {
|
||||
if (&subsec->isec.osec == ctx.cstring) {
|
||||
std::string_view str = subsec->get_contents();
|
||||
auto pair = map.insert({str, subsec});
|
||||
if (pair.second) {
|
||||
Subsection<E> *existing = pair.first->second;
|
||||
if (existing->p2align < subsec->p2align)
|
||||
pair.first->second = subsec;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Replace subsections
|
||||
for (ObjectFile<E> *file : ctx.objs) {
|
||||
for (Subsection<E> *subsec : file->subsections) {
|
||||
if (&subsec->isec.osec == ctx.cstring) {
|
||||
std::string_view str = subsec->get_contents();
|
||||
auto it = map.find(str);
|
||||
if (it->second != subsec) {
|
||||
subsec->is_coalesced = true;
|
||||
subsec->replacer = it->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (ObjectFile<E> *file : ctx.objs)
|
||||
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
|
||||
if (isec)
|
||||
for (Relocation<E> &r : isec->rels)
|
||||
if (r.subsec && r.subsec->is_coalesced)
|
||||
r.subsec = r.subsec->replacer;
|
||||
|
||||
auto replace = [&](InputFile<E> *file) {
|
||||
for (Symbol<E> *sym : file->syms)
|
||||
if (sym->subsec && sym->subsec->is_coalesced)
|
||||
sym->subsec = sym->subsec->replacer;
|
||||
};
|
||||
|
||||
for (InputFile<E> *file : ctx.objs)
|
||||
replace(file);
|
||||
for (InputFile<E> *file : ctx.dylibs)
|
||||
replace(file);
|
||||
|
||||
for (ObjectFile<E> *file : ctx.objs) {
|
||||
std::erase_if(file->subsections, [](Subsection<E> *subsec) {
|
||||
return subsec->is_coalesced;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
static void create_synthetic_chunks(Context<E> &ctx) {
|
||||
for (ObjectFile<E> *file : ctx.objs)
|
||||
@ -603,6 +663,8 @@ static int do_main(int argc, char **argv) {
|
||||
|
||||
claim_unresolved_symbols(ctx);
|
||||
|
||||
merge_cstring_sections(ctx);
|
||||
|
||||
if (ctx.arg.dead_strip)
|
||||
dead_strip(ctx);
|
||||
|
||||
|
@ -229,6 +229,8 @@ public:
|
||||
void scan_relocations(Context<E> &ctx);
|
||||
void apply_reloc(Context<E> &ctx, u8 *buf);
|
||||
|
||||
union {
|
||||
struct {
|
||||
InputSection<E> &isec;
|
||||
u32 input_offset = 0;
|
||||
u32 input_size = 0;
|
||||
@ -239,7 +241,13 @@ public:
|
||||
u32 unwind_offset = 0;
|
||||
u32 nunwind = 0;
|
||||
u8 p2align = 0;
|
||||
};
|
||||
|
||||
Subsection<E> *replacer; // Used if is_coalesced is true
|
||||
};
|
||||
|
||||
std::atomic_bool is_alive = true;
|
||||
bool is_coalesced = false;
|
||||
};
|
||||
|
||||
template <typename E>
|
||||
|
35
test/macho/cstring.sh
Executable file
35
test/macho/cstring.sh
Executable file
@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
export LC_ALL=C
|
||||
set -e
|
||||
CC="${TEST_CC:-cc}"
|
||||
CXX="${TEST_CXX:-c++}"
|
||||
GCC="${TEST_GCC:-gcc}"
|
||||
GXX="${TEST_GXX:-g++}"
|
||||
OBJDUMP="${OBJDUMP:-objdump}"
|
||||
MACHINE="${MACHINE:-$(uname -m)}"
|
||||
testname=$(basename "$0" .sh)
|
||||
echo -n "Testing $testname ... "
|
||||
cd "$(dirname "$0")"/../..
|
||||
t=out/test/macho/$testname
|
||||
mkdir -p $t
|
||||
|
||||
cat <<EOF | $CC -o $t/a.o -c -xc -
|
||||
const char *x = "Hello world\n";
|
||||
EOF
|
||||
|
||||
cat <<EOF | $CC -o $t/b.o -c -xc -
|
||||
#include <stdio.h>
|
||||
|
||||
extern const char *x;
|
||||
const char *y = "Hello world\n";
|
||||
const char *z = "Howdy world\n";
|
||||
|
||||
int main() {
|
||||
printf("%d %d\n", x == y, y == z);
|
||||
}
|
||||
EOF
|
||||
|
||||
clang --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o
|
||||
$t/exe | grep -q '^1 0$'
|
||||
|
||||
echo OK
|
Loading…
Reference in New Issue
Block a user