diff --git a/common/common.h b/common/common.h index 700bd75e..e89f33d2 100644 --- a/common/common.h +++ b/common/common.h @@ -874,8 +874,8 @@ std::filesystem::path to_abs_path(std::filesystem::path path); // demangle.cc // -std::string_view demangle(std::string_view name); -std::optional cpp_demangle(std::string_view name); +std::optional demangle_cpp(std::string_view name); +std::optional demangle_rust(std::string_view name); // // compress.cc diff --git a/common/demangle.cc b/common/demangle.cc index 57c5e3e6..bb2a3a39 100644 --- a/common/demangle.cc +++ b/common/demangle.cc @@ -10,24 +10,7 @@ namespace mold { -std::string_view demangle(std::string_view name) { - static thread_local char *buf; - free(buf); - - // Try to demangle as a Rust symbol. Since legacy-style Rust symbols - // are also valid as a C++ mangled name, we need to call this before - // cpp_demangle. - buf = rust_demangle(std::string(name).c_str(), 0); - if (buf) - return buf; - - // Try to demangle as a C++ symbol. - if (std::optional s = cpp_demangle(name)) - return *s; - return name; -} - -std::optional cpp_demangle(std::string_view name) { +std::optional demangle_cpp(std::string_view name) { static thread_local char *buf; static thread_local size_t buflen; @@ -47,4 +30,13 @@ std::optional cpp_demangle(std::string_view name) { return {}; } +std::optional demangle_rust(std::string_view name) { + static thread_local char *buf; + free(buf); + buf = rust_demangle(std::string(name).c_str(), 0); + if (buf) + return buf; + return {}; +} + } // namespace mold diff --git a/elf/input-files.cc b/elf/input-files.cc index 39a92892..e8999c8f 100644 --- a/elf/input-files.cc +++ b/elf/input-files.cc @@ -10,6 +10,31 @@ namespace mold::elf { +template +static bool is_rust_symbol(const Symbol &sym) { + // The legacy Rust mangling scheme is indistinguishtable from C++. + // We don't want to accidentally demangle C++ symbols as Rust ones. + // So, the legacy mangling scheme will be demangled only when we + // know the object file was created by rustc. + if (sym.file && !sym.file->is_dso && ((ObjectFile *)sym.file)->is_rust_obj) + return true; + + // "_R" is the prefix of the new Rust mangling scheme. + return sym.name().starts_with("_R"); +} + +template +std::string_view demangle(const Symbol &sym) { + if (is_rust_symbol(sym)) { + if (std::optional s = demangle_rust(sym.name())) + return *s; + } else { + if (std::optional s = demangle_cpp(sym.name())) + return *s; + } + return sym.name(); +} + template InputFile::InputFile(Context &ctx, MappedFile> *mf) : mf(mf), filename(mf->name) { @@ -299,6 +324,10 @@ void ObjectFile::initialize_sections(Context &ctx) { is_debug_section(shdr, name)) continue; + if (name == ".comment" && + this->get_string(ctx, shdr).starts_with("rustc ")) + is_rust_obj = true; + // If an output file doesn't have a section header (i.e. // --oformat=binary is given), we discard all non-memory-allocated // sections. This is because without a section header, we can't find @@ -1512,6 +1541,7 @@ using E = MOLD_TARGET; template class InputFile; template class ObjectFile; template class SharedFile; +template std::string_view demangle(const Symbol &); template std::ostream &operator<<(std::ostream &, const InputFile &); } // namespace mold::elf diff --git a/elf/input-sections.cc b/elf/input-sections.cc index 2ed9aa83..4000b827 100644 --- a/elf/input-sections.cc +++ b/elf/input-sections.cc @@ -462,14 +462,15 @@ void InputSection::write_to(Context &ctx, u8 *buf) { // Get the name of a function containin a given offset. template -std::string_view InputSection::get_func_name(Context &ctx, i64 offset) const { - for (const ElfSym &esym : file.elf_syms) { +std::string_view +InputSection::get_func_name(Context &ctx, i64 offset) const { + for (Symbol *sym : file.symbols) { + const ElfSym &esym = sym->esym(); if (esym.st_shndx == shndx && esym.st_type == STT_FUNC && esym.st_value <= offset && offset < esym.st_value + esym.st_size) { - std::string_view name = file.symbol_strtab.data() + esym.st_name; if (ctx.arg.demangle) - return demangle(name); - return name; + return demangle(*sym); + return sym->name(); } } return ""; @@ -508,7 +509,7 @@ bool InputSection::record_undef_error(Context &ctx, const ElfRel &rel) ss << ":(" << func << ")"; typename decltype(ctx.undef_errors)::accessor acc; - ctx.undef_errors.insert(acc, {sym.name(), {}}); + ctx.undef_errors.insert(acc, {&sym, {}}); acc->second.push_back(ss.str()); }; diff --git a/elf/mold.h b/elf/mold.h index e4816c4a..ae096009 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -1188,6 +1188,7 @@ public: std::map gnu_properties; bool is_lto_obj = false; bool needs_executable_stack = false; + bool is_rust_obj = false; u64 num_dynrel = 0; u64 reldyn_offset = 0; @@ -1806,7 +1807,7 @@ struct Context { Atomic has_textrel = false; Atomic num_ifunc_dynrels = 0; - tbb::concurrent_hash_map> undef_errors; + tbb::concurrent_hash_map *, std::vector> undef_errors; // Output chunks OutputEhdr *ehdr = nullptr; @@ -2176,6 +2177,9 @@ public: [[no_unique_address]] SymbolExtras extra; }; +template +std::string_view demangle(const Symbol &sym); + // If we haven't seen the same `key` before, create a new instance // of Symbol and returns it. Otherwise, returns the previously- // instantiated object. `key` is usually the same as `name`. @@ -2195,7 +2199,7 @@ Symbol *get_symbol(Context &ctx, std::string_view name) { template std::ostream &operator<<(std::ostream &out, const Symbol &sym) { if (opt_demangle) - out << demangle(sym.name()); + out << demangle(sym); else out << sym.name(); return out; diff --git a/elf/passes.cc b/elf/passes.cc index 13fad126..ddd9bde2 100644 --- a/elf/passes.cc +++ b/elf/passes.cc @@ -1492,14 +1492,13 @@ void report_undef_errors(Context &ctx) { constexpr i64 max_errors = 3; for (auto &pair : ctx.undef_errors) { - std::string_view sym_name = pair.first; + Symbol *sym = pair.first; std::span errors = pair.second; - if (ctx.arg.demangle) - sym_name = demangle(sym_name); - std::stringstream ss; - ss << "undefined symbol: " << sym_name << "\n"; + ss << "undefined symbol: " + << (ctx.arg.demangle ? demangle(*sym) : sym->name()) + << "\n"; for (i64 i = 0; i < errors.size() && i < max_errors; i++) ss << errors[i]; @@ -1677,7 +1676,7 @@ void apply_version_script(Context &ctx) { // Match non-mangled symbols against the C++ pattern as well. // Weird, but required to match other linkers' behavior. if (!cpp_matcher.empty()) { - if (std::optional s = cpp_demangle(name)) + if (std::optional s = demangle_cpp(name)) name = *s; if (std::optional idx = cpp_matcher.find(name)) match = std::min(match, *idx); @@ -1886,7 +1885,7 @@ void compute_import_export(Context &ctx) { if (matcher.find(name)) { handle_match(sym); } else if (!cpp_matcher.empty()) { - if (std::optional s = cpp_demangle(name)) + if (std::optional s = demangle_cpp(name)) name = *s; if (cpp_matcher.find(name)) handle_match(sym); diff --git a/test/elf/demangle-cpp.sh b/test/elf/demangle-cpp.sh new file mode 100755 index 00000000..d4db602d --- /dev/null +++ b/test/elf/demangle-cpp.sh @@ -0,0 +1,19 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' | $CC -c -o $t/a.o -xc - +void _ZN2ns7versionEv(); +int main() { _ZN2ns7versionEv(); } +EOF + +! $CC -B. -o $t/exe1 $t/a.o 2> $t/log || false +grep -Fq 'ns::version()' $t/log + +cat <<'EOF' | $CC -c -o $t/b.o -xc - +void _ZN2ns7versionEv(); +int main() { _ZN2ns7versionEv(); } +__attribute__((section(".comment"))) char str[] = "rustc version x.y.z\n"; +EOF + +! $CC -B. -o $t/exe2 $t/b.o 2> $t/log || false +grep -Fq 'ns::versionv' $t/log