From a4398c7c322ec072a6e50fa70d0f8e85e681ca6c Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Tue, 19 Apr 2022 12:28:37 +0800 Subject: [PATCH] [ELF] Factor out DWARF-related code to dwarf.cc --- elf/dwarf.cc | 286 +++++++++++++++++++++++++++++++++++ elf/mold.h | 26 ++-- elf/output-chunks.cc | 274 --------------------------------- test/elf/gdb-index-dwarf5.sh | 57 ------- 4 files changed, 301 insertions(+), 342 deletions(-) create mode 100644 elf/dwarf.cc delete mode 100755 test/elf/gdb-index-dwarf5.sh diff --git a/elf/dwarf.cc b/elf/dwarf.cc new file mode 100644 index 00000000..15527ce7 --- /dev/null +++ b/elf/dwarf.cc @@ -0,0 +1,286 @@ +#include "mold.h" + +namespace mold::elf { + +// The hash function for .gdb_index. +static u32 gdb_hash(std::string_view name) { + u32 h = 0; + for (u8 c : name) { + if ('A' <= c && c <= 'Z') + c = 'a' + c - 'A'; + h = h * 67 + c - 113; + } + return h; +} + +// Split .debug_info into so-called "compilation units". A .debug_info +// section usually contains one compunit unless it was created by `ld -r`. +// This is for --gdb-index. +template +std::vector +read_compunits(Context &ctx, ObjectFile &file) { + file.debug_info->uncompress(ctx); + std::string_view data = file.debug_info->contents; + std::vector vec; + + while (!data.empty()) { + if (data.size() < 4) + Fatal(ctx) << *file.debug_info << ": corrupted .debug_info"; + i64 len = *(u32 *)data.data() + 4; + vec.push_back(data.substr(0, len)); + data = data.substr(len); + } + return vec; +} + +// Parses .debug_gnu_pubnames and .debug_gnu_pubtypes. These sections +// start with a 14 bytes header followed by (4-byte offset, 1-byte type, +// null-terminated string) tuples. +// +// The 4-byte offset is an offset into .debug_info that contains details +// about the name. The 1-byte type is a type of the corresponding name +// (e.g. function, variable or datatype). The string is a name of a +// function, a variable or a type. +template +std::vector read_pubnames(Context &ctx, ObjectFile &file) { + std::vector vec; + + auto get_cu_idx = [&](InputSection &isec, i64 offset) { + i64 off = 0; + for (i64 i = 0; i < file.compunits.size(); i++) { + if (offset == off) + return file.compunits_idx + i; + off += file.compunits[i].size(); + } + Fatal(ctx) << isec << ": corrupted debug_info_offset"; + }; + + auto read = [&](InputSection &isec) { + isec.uncompress(ctx); + std::string_view contents = isec.contents; + + while (!contents.empty()) { + if (contents.size() < 14) + Fatal(ctx) << isec << ": corrupted header"; + + u32 len = *(u32 *)contents.data() + 4; + u32 debug_info_offset = *(u32 *)(contents.data() + 6); + u32 cu_idx = get_cu_idx(isec, debug_info_offset); + + std::string_view data = contents.substr(14, len - 14); + contents = contents.substr(len); + + while (!data.empty()) { + u32 offset = *(u32 *)data.data(); + data = data.substr(4); + if (offset == 0) + break; + + u8 type = data[0]; + data = data.substr(1); + + std::string_view name = data.data(); + data = data.substr(name.size() + 1); + + vec.push_back({name, gdb_hash(name), offset + debug_info_offset, + (type << 24) | cu_idx}); + } + } + }; + + if (file.debug_pubnames) + read(*file.debug_pubnames); + if (file.debug_pubtypes) + read(*file.debug_pubtypes); + return vec; +} + +// Try to find a compilation unit from .debug_info and its +// corresponding record from .debug_abbrev and returns them. +template +static std::pair +find_compunit(Context &ctx, ObjectFile &file, i64 offset) { + // Read .debug_info to find the record at a given offset. + u8 *cu = (u8 *)(ctx.buf + ctx.debug_info->shdr.sh_offset + offset); + u32 dwarf_version = *(u16 *)(cu + 4); + u32 abbrev_offset; + + switch (dwarf_version) { + case 4: + abbrev_offset = *(u32 *)(cu + 6); + cu += 11; + break; + case 5: + abbrev_offset = *(u32 *)(cu + 8); + cu += 12; + break; + default: + Fatal(ctx) << file << ": --gdb-index: unknown DWARF version " + << dwarf_version; + } + + u32 abbrev_code = read_uleb(cu); + + // Find a .debug_abbrev record corresponding to the .debug_info record. + // We assume the .debug_info record at a given offset is of + // DW_TAG_compile_unit which describes a compunit. + u8 *abbrev = (u8 *)(ctx.buf + ctx.debug_abbrev->shdr.sh_offset + abbrev_offset); + + for (;;) { + u32 code = read_uleb(abbrev); + if (code == 0) { + Fatal(ctx) << file << ": --gdb-index: .debug_abbrev does not contain" + << " a record for the first .debug_info record"; + return {}; + } + + if (code == abbrev_code) { + // Found a record + u64 abbrev_tag = read_uleb(abbrev); + if (abbrev_tag != DW_TAG_compile_unit) { + Fatal(ctx) << file << ": --gdb-index: the first entry's tag is not " + << " DW_TAG_compile_unit but 0x" << std::hex << abbrev_tag; + return {}; + } + break; + } + + // Skip an uninteresting record + for (;;) { + u64 name = read_uleb(abbrev); + u64 form = read_uleb(abbrev); + if (name == 0 && form == 0) + break; + } + } + + abbrev++; // skip has_children byte + return {cu, abbrev}; +} + +// Returns a list of address ranges explained by a compunit at the +// `offset` in an output .debug_info section. +// +// .debug_info contains DWARF debug info records, so this function +// parses DWARF. If a designated compunit contains multiple ranges, the +// ranges are read from .debug_ranges. Otherwise, a range is read +// directly from .debug_info. +template +std::vector +read_address_areas(Context &ctx, ObjectFile &file, i64 offset) { + u8 *cu; + u8 *abbrev; + std::tie(cu, abbrev) = find_compunit(ctx, file, offset); + + std::optional low_pc; + + for (;;) { + u64 name = read_uleb(abbrev); + u64 form = read_uleb(abbrev); + if (name == 0 && form == 0) + break; + + auto read_value = [&]() -> u64 { + switch (form) { + case DW_FORM_flag_present: + return 0; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_strx1: + case DW_FORM_addrx1: + case DW_FORM_ref1: + return *cu++; + case DW_FORM_data2: + case DW_FORM_strx2: + case DW_FORM_addrx2: + case DW_FORM_ref2: { + u64 val = *(u16 *)cu; + cu += 2; + return val; + } + case DW_FORM_data4: + case DW_FORM_strp: + case DW_FORM_sec_offset: + case DW_FORM_line_strp: + case DW_FORM_strx4: + case DW_FORM_addrx4: + case DW_FORM_ref4: { + u64 val = *(u32 *)cu; + cu += 4; + return val; + } + case DW_FORM_data8: + case DW_FORM_ref8: { + u64 val = *(u64 *)cu; + cu += 8; + return val; + } + case DW_FORM_addr: + case DW_FORM_ref_addr: { + u64 val = *(typename E::WordTy *)cu; + cu += E::word_size; + return val; + } + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_ref_udata: + return read_uleb(cu); + case DW_FORM_string: { + while (*cu) + cu++; + cu++; + return 0; + } + default: + Fatal(ctx) << file << ": --gdb-index: unknown debug info form: 0x" + << std::hex << form; + return 0; + } + }; + + switch (name) { + case DW_AT_low_pc: + *low_pc = read_value(); + break; + case DW_AT_high_pc: + if (low_pc) + Fatal(ctx) << file << ": --gdb-index: missing DW_AT_low_pc"; + + if (form == DW_FORM_addr) + return {*low_pc, read_value()}; + return {*low_pc, *low_pc + read_value()}; + case DW_AT_ranges: { + if (!ctx.debug_ranges) + Fatal(ctx) << file << ": --gdb-index: missing debug_ranges"; + + u64 offset = read_value(); + typename E::WordTy *range = + (typename E::WordTy *)(ctx.buf + ctx.debug_ranges->shdr.sh_offset + offset); + + std::vector vec; + for (i64 i = 0; range[i] || range[i + 1]; i += 2) { + vec.push_back(range[i]); + vec.push_back(range[i + 1]); + } + return vec; + } + default: + read_value(); + break; + } + } + + return {}; +} + +#define INSTANTIATE(E) \ + template std::vector \ + read_compunits(Context &, ObjectFile &); \ + template std::vector \ + read_pubnames(Context &, ObjectFile &); \ + template std::vector \ + read_address_areas(Context &, ObjectFile &, i64) + +INSTANTIATE_ALL; + +} // namespace mold::elf diff --git a/elf/mold.h b/elf/mold.h index d72d6b78..9d8f0044 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -940,17 +940,6 @@ private: i64 num_symtab_entries = 0; i64 attrs_size = 0; - std::vector - read_compunits(Context &ctx, ObjectFile &file); - - std::vector read_pubnames(Context &ctx, ObjectFile &file); - - std::pair find_compunit(Context &ctx, ObjectFile &file, - i64 offset); - - std::vector read_address_areas(Context &ctx, ObjectFile &file, - i64 offset); - ConcurrentMap map; }; @@ -994,6 +983,21 @@ private: bool is_c_identifier(std::string_view name); +// +// dwarf.cc +// + +template +std::vector +read_compunits(Context &ctx, ObjectFile &file); + +template +std::vector read_pubnames(Context &ctx, ObjectFile &file); + +template +std::vector +read_address_areas(Context &ctx, ObjectFile &file, i64 offset); + // // input-files.cc // diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index 89fe63b9..0ccdde68 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -31,17 +31,6 @@ static u32 djb_hash(std::string_view name) { return h; } -// The hash function for .gdb_index. -static u32 gdb_hash(std::string_view name) { - u32 h = 0; - for (u8 c : name) { - if ('A' <= c && c <= 'Z') - c = 'a' + c - 'A'; - h = h * 67 + c - 113; - } - return h; -} - template void Chunk::write_to(Context &ctx, u8 *buf) { Fatal(ctx) << name << ": write_to is called on an invalid section"; @@ -2413,269 +2402,6 @@ void GdbIndexSection::write_address_areas(Context &ctx) { }); } -// Returns the list of compilation units in .gdb_index. A .gdb_index -// usually contains only one compilatation unit unless the object was -// built by `ld -r`. -template -std::vector -GdbIndexSection::read_compunits(Context &ctx, ObjectFile &file) { - file.debug_info->uncompress(ctx); - std::string_view data = file.debug_info->contents; - std::vector vec; - - while (!data.empty()) { - if (data.size() < 4) - Fatal(ctx) << *file.debug_info << ": corrupted .debug_info"; - i64 len = *(u32 *)data.data() + 4; - vec.push_back(data.substr(0, len)); - data = data.substr(len); - } - return vec; -} - -// Parses .debug_gnu_pubnames and .debug_gnu_pubtypes. These sections -// start with a 14 bytes header followed by (4-byte offset, 1-byte type, -// null-terminated string) tuples. -// -// The 4-byte offset is an offset into .debug_info that contains details -// about the name. The 1-byte type is a type of the corresponding name -// (e.g. function, variable or datatype). The string is a name of a -// function, a variable or a type. -template -std::vector -GdbIndexSection::read_pubnames(Context &ctx, ObjectFile &file) { - std::vector vec; - - auto get_cu_idx = [&](InputSection &isec, i64 offset) { - i64 off = 0; - for (i64 i = 0; i < file.compunits.size(); i++) { - if (offset == off) - return file.compunits_idx + i; - off += file.compunits[i].size(); - } - Fatal(ctx) << isec << ": corrupted debug_info_offset"; - }; - - auto read = [&](InputSection &isec) { - isec.uncompress(ctx); - std::string_view contents = isec.contents; - - while (!contents.empty()) { - if (contents.size() < 14) - Fatal(ctx) << isec << ": corrupted header"; - - u32 len = *(u32 *)contents.data() + 4; - u32 debug_info_offset = *(u32 *)(contents.data() + 6); - u32 cu_idx = get_cu_idx(isec, debug_info_offset); - - std::string_view data = contents.substr(14, len - 14); - contents = contents.substr(len); - - while (!data.empty()) { - u32 offset = *(u32 *)data.data(); - data = data.substr(4); - if (offset == 0) - break; - - u8 type = data[0]; - data = data.substr(1); - - std::string_view name = data.data(); - data = data.substr(name.size() + 1); - - vec.push_back({name, gdb_hash(name), offset + debug_info_offset, - (type << 24) | cu_idx}); - } - } - }; - - if (file.debug_pubnames) - read(*file.debug_pubnames); - if (file.debug_pubtypes) - read(*file.debug_pubtypes); - return vec; -} - -// Try to find a compilation unit from .debug_info and its -// corresponding record from .debug_abbrev and returns them. -template -std::pair -GdbIndexSection::find_compunit(Context &ctx, ObjectFile &file, - i64 offset) { - // Read .debug_info to find the record at a given offset. - u8 *cu = (u8 *)(ctx.buf + ctx.debug_info->shdr.sh_offset + offset); - u32 dwarf_version = *(u16 *)(cu + 4); - u32 abbrev_offset; - - switch (dwarf_version) { - case 4: - abbrev_offset = *(u32 *)(cu + 6); - cu += 11; - break; - case 5: - abbrev_offset = *(u32 *)(cu + 8); - cu += 12; - break; - default: - Fatal(ctx) << file << ": --gdb-index: unknown DWARF version " - << dwarf_version; - } - - u32 abbrev_code = read_uleb(cu); - - // Find a .debug_abbrev record corresponding to the .debug_info record. - // We assume the .debug_info record at a given offset is of - // DW_TAG_compile_unit which describes a compunit. - u8 *abbrev = (u8 *)(ctx.buf + ctx.debug_abbrev->shdr.sh_offset + abbrev_offset); - - for (;;) { - u32 code = read_uleb(abbrev); - if (code == 0) { - Fatal(ctx) << file << ": --gdb-index: .debug_abbrev does not contain" - << " a record for the first .debug_info record"; - return {}; - } - - if (code == abbrev_code) { - // Found a record - u64 abbrev_tag = read_uleb(abbrev); - if (abbrev_tag != DW_TAG_compile_unit) { - Fatal(ctx) << file << ": --gdb-index: the first entry's tag is not " - << " DW_TAG_compile_unit but 0x" << std::hex << abbrev_tag; - return {}; - } - break; - } - - // Skip an uninteresting record - for (;;) { - u64 name = read_uleb(abbrev); - u64 form = read_uleb(abbrev); - if (name == 0 && form == 0) - break; - } - } - - abbrev++; // skip has_children byte - return {cu, abbrev}; -} - -// Returns a list of address ranges explained by a compunit at the -// `offset` in an output .debug_info section. -// -// .debug_info contains DWARF debug info records, so this function -// parses DWARF. If a designated compunit contains multiple ranges, the -// ranges are read from .debug_ranges. Otherwise, a range is read -// directly from .debug_info. -template -std::vector -GdbIndexSection::read_address_areas(Context &ctx, ObjectFile &file, - i64 offset) { - u8 *cu; - u8 *abbrev; - std::tie(cu, abbrev) = find_compunit(ctx, file, offset); - - std::optional low_pc; - - for (;;) { - u64 name = read_uleb(abbrev); - u64 form = read_uleb(abbrev); - if (name == 0 && form == 0) - break; - - auto read_value = [&]() -> u64 { - switch (form) { - case DW_FORM_flag_present: - return 0; - case DW_FORM_data1: - case DW_FORM_flag: - case DW_FORM_strx1: - case DW_FORM_addrx1: - case DW_FORM_ref1: - return *cu++; - case DW_FORM_data2: - case DW_FORM_strx2: - case DW_FORM_addrx2: - case DW_FORM_ref2: { - u64 val = *(u16 *)cu; - cu += 2; - return val; - } - case DW_FORM_data4: - case DW_FORM_strp: - case DW_FORM_sec_offset: - case DW_FORM_line_strp: - case DW_FORM_strx4: - case DW_FORM_addrx4: - case DW_FORM_ref4: { - u64 val = *(u32 *)cu; - cu += 4; - return val; - } - case DW_FORM_data8: - case DW_FORM_ref8: { - u64 val = *(u64 *)cu; - cu += 8; - return val; - } - case DW_FORM_addr: - case DW_FORM_ref_addr: { - u64 val = *(typename E::WordTy *)cu; - cu += E::word_size; - return val; - } - case DW_FORM_strx: - case DW_FORM_addrx: - case DW_FORM_ref_udata: - return read_uleb(cu); - case DW_FORM_string: { - while (*cu) - cu++; - cu++; - return 0; - } - default: - Fatal(ctx) << file << ": --gdb-index: unknown debug info form: 0x" - << std::hex << form; - return 0; - } - }; - - switch (name) { - case DW_AT_low_pc: - *low_pc = read_value(); - break; - case DW_AT_high_pc: - if (low_pc) - Fatal(ctx) << file << ": --gdb-index: missing DW_AT_low_pc"; - - if (form == DW_FORM_addr) - return {*low_pc, read_value()}; - return {*low_pc, *low_pc + read_value()}; - case DW_AT_ranges: { - if (!ctx.debug_ranges) - Fatal(ctx) << file << ": --gdb-index: missing debug_ranges"; - - u64 offset = read_value(); - typename E::WordTy *range = - (typename E::WordTy *)(ctx.buf + ctx.debug_ranges->shdr.sh_offset + offset); - - std::vector vec; - for (i64 i = 0; range[i] || range[i + 1]; i += 2) { - vec.push_back(range[i]); - vec.push_back(range[i + 1]); - } - return vec; - } - default: - read_value(); - break; - } - } - - return {}; -} - template GabiCompressedSection::GabiCompressedSection(Context &ctx, Chunk &chunk) { diff --git a/test/elf/gdb-index-dwarf5.sh b/test/elf/gdb-index-dwarf5.sh deleted file mode 100755 index 498f35a2..00000000 --- a/test/elf/gdb-index-dwarf5.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -export LC_ALL=C -set -e -CC="${CC:-cc}" -CXX="${CXX:-c++}" -GCC="${GCC:-gcc}" -GXX="${GXX:-g++}" -OBJDUMP="${OBJDUMP:-objdump}" -MACHINE="${MACHINE:-$(uname -m)}" -testname=$(basename "$0" .sh) -echo -n "Testing $testname ... " -cd "$(dirname "$0")"/../.. -mold="$(pwd)/mold" -t=out/test/elf/$testname -mkdir -p $t - -[ $MACHINE = $(uname -m) ] || { echo skipped; exit; } - -which gdb >& /dev/null || { echo skipped; exit; } - -echo 'int main() {}' | $CC -gdwarf-5 -o /dev/null -xc - >& /dev/null || \ - { echo skipped; exit; } - -cat < $t/a.c -#include - -void hello() { - printf("Hello world\n"); -} - -void greet() { - hello(); -} -EOF - -$CC -o $t/b.o -c -ggnu-pubnames -gdwarf-5 -g $t/a.c -$CC -o $t/c.o -c -ggnu-pubnames -gdwarf-5 -g $t/a.c -gz - -cat <& /dev/null - -$CC -B. -o $t/exe2 $t/c.o $t/d.o -Wl,--gdb-index -$QEMU $t/exe2 | grep -q 'Hello world' -readelf -WS $t/exe2 | fgrep -q .gdb_index -DEBUGINFOD_URLS= gdb $t/exe2 -ex 'b main' -ex run -ex cont -ex quit >& /dev/null - -echo OK