mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-28 21:54:40 +03:00
disasm: Insert symbol names in disassembly stream
The symbol name insertion scheme is different from objdump -d's. Compare the output on Build/Userland/id: * disasm: ... _start (08048305-0804836b): 08048305 push ebp ... 08048366 call 0x0000df56 0804836b o16 nop 0804836d o16 nop 0804836f nop (deregister_tm_clones (08048370-08048370)) 08048370 mov eax, 0x080643e0 ... _ZN2AK8Utf8ViewC1ERKNS_6StringE (0805d9b2-0805d9b7): _ZN2AK8Utf8ViewC2ERKNS_6StringE (0805d9b2-0805d9b7): 0805d9b2 jmp 0x00014ff2 0805d9b7 nop * objdump -d: 08048305 <_start>: 8048305: 55 push %ebp ... 8048366: e8 9b dc 00 00 call 8056006 <exit> 804836b: 66 90 xchg %ax,%ax 804836d: 66 90 xchg %ax,%ax 804836f: 90 nop 08048370 <deregister_tm_clones>: 8048370: b8 e0 43 06 08 mov $0x80643e0,%eax ... 0805d9b2 <_ZN2AK8Utf8ViewC1ERKNS_6StringE>: 805d9b2: e9 eb f6 ff ff jmp 805d0a2 <_ZN2AK10StringViewC1ERKNS_6StringE> 805d9b7: 90 nop Differences: 1. disasm can show multiple symbols that cover the same instructions. I've only seen this happen for C1/C2 (and D1/D2) ctor/dtor pairs, but it could conceivably happen with ICF as well. 2. disasm separates instructions that do not belong to a symbol with a newline, so that nop padding isn't shown as part of a function when it technically isn't. 3. disasm shows symbols that are skipped (due to having size 0) in parenthesis, separated from preceding and following instructions.
This commit is contained in:
parent
2eae70b097
commit
6613a4cb8c
Notes:
sideshowbarker
2024-07-19 04:07:13 +09:00
Author: https://github.com/nico Commit: https://github.com/SerenityOS/serenity/commit/6613a4cb8cf Pull-request: https://github.com/SerenityOS/serenity/pull/3066
@ -158,6 +158,13 @@ public:
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
bool operator<(const StringView& other) const
|
||||
{
|
||||
if (int c = __builtin_memcmp(m_characters, other.m_characters, min(m_length, other.m_length)))
|
||||
return c < 0;
|
||||
return m_length < other.m_length;
|
||||
}
|
||||
|
||||
const StringImpl* impl() const { return m_impl; }
|
||||
|
||||
String to_string() const;
|
||||
|
@ -73,6 +73,7 @@ public:
|
||||
return *this;
|
||||
}
|
||||
ALWAYS_INLINE ElementType& operator*() { return m_vector[m_index]; }
|
||||
ALWAYS_INLINE ElementType* operator->() { return &m_vector[m_index]; }
|
||||
size_t operator-(const VectorIterator& other) { return m_index - other.m_index; }
|
||||
|
||||
bool is_end() const { return m_index == m_vector.size(); }
|
||||
|
@ -26,12 +26,16 @@
|
||||
|
||||
#include <AK/LogStream.h>
|
||||
#include <AK/MappedFile.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibCore/ArgsParser.h>
|
||||
#include <LibELF/Loader.h>
|
||||
#include <LibX86/Disassembler.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
//#define DISASM_DUMP
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const char* path = nullptr;
|
||||
@ -46,9 +50,22 @@ int main(int argc, char** argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct Symbol {
|
||||
size_t value;
|
||||
size_t size;
|
||||
StringView name;
|
||||
|
||||
size_t address() const { return value; }
|
||||
size_t address_end() const { return value + size; }
|
||||
|
||||
bool contains(size_t virtual_address) { return address() <= virtual_address && virtual_address < address_end(); }
|
||||
};
|
||||
Vector<Symbol> symbols;
|
||||
|
||||
const u8* asm_data = (const u8*)file.data();
|
||||
size_t asm_size = file.size();
|
||||
size_t file_offset = 0;
|
||||
Vector<Symbol>::Iterator current_symbol = symbols.begin();
|
||||
if (asm_size >= 4 && strncmp((const char*)asm_data, "\u007fELF", 4) == 0) {
|
||||
if (auto elf = ELF::Loader::create(asm_data, asm_size)) {
|
||||
elf->image().for_each_section_of_type(SHT_PROGBITS, [&](const ELF::Image::Section& section) {
|
||||
@ -60,18 +77,71 @@ int main(int argc, char** argv)
|
||||
file_offset = section.address();
|
||||
return IterationDecision::Break;
|
||||
});
|
||||
symbols.ensure_capacity(elf->image().symbol_count() + 1);
|
||||
symbols.append({ 0, 0, StringView() }); // Sentinel.
|
||||
elf->image().for_each_symbol([&](const ELF::Image::Symbol& symbol) {
|
||||
symbols.append({ symbol.value(), symbol.size(), symbol.name() });
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
quick_sort(symbols, [](auto& a, auto& b) {
|
||||
if (a.value != b.value)
|
||||
return a.value < b.value;
|
||||
if (a.size != b.size)
|
||||
return a.size < b.size;
|
||||
return a.name < b.name;
|
||||
});
|
||||
#ifdef DISASM_DUMP
|
||||
for (size_t i = 0; i < symbols.size(); ++i)
|
||||
dbg() << symbols[i].name << ": " << (void*)(uintptr_t)symbols[i].value << ", " << symbols[i].size;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
X86::SimpleInstructionStream stream(asm_data, asm_size);
|
||||
X86::Disassembler disassembler(stream);
|
||||
|
||||
bool is_first_symbol = true;
|
||||
bool current_instruction_is_in_symbol = false;
|
||||
|
||||
for (;;) {
|
||||
auto offset = stream.offset();
|
||||
auto insn = disassembler.next();
|
||||
if (!insn.has_value())
|
||||
break;
|
||||
out() << String::format("%08x", file_offset + offset) << " " << insn.value().to_string(offset);
|
||||
|
||||
// Prefix regions of instructions belonging to a symbol with the symbol's name.
|
||||
// Separate regions of instructions belonging to distinct symbols with newlines,
|
||||
// and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
|
||||
// Interesting cases:
|
||||
// - More than 1 symbol covering a region of instructions (ICF, D1/D2)
|
||||
// - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
|
||||
// Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
|
||||
size_t virtual_offset = file_offset + offset;
|
||||
if (current_symbol < symbols.end() && !current_symbol->contains(virtual_offset)) {
|
||||
if (!is_first_symbol && current_instruction_is_in_symbol) {
|
||||
// The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
|
||||
out();
|
||||
current_instruction_is_in_symbol = (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset));
|
||||
}
|
||||
|
||||
// Try to find symbol covering current instruction, if one exists.
|
||||
while (current_symbol + 1 < symbols.end() && !(current_symbol + 1)->contains(virtual_offset) && (current_symbol + 1)->address() <= virtual_offset) {
|
||||
++current_symbol;
|
||||
if (!is_first_symbol)
|
||||
out() << "\n(" << current_symbol->name << " (" << String::format("%08x-%08x", current_symbol->address(), current_symbol->address_end()) << "))\n";
|
||||
}
|
||||
while (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)) {
|
||||
if (!is_first_symbol && !current_instruction_is_in_symbol)
|
||||
out();
|
||||
++current_symbol;
|
||||
current_instruction_is_in_symbol = true;
|
||||
out() << current_symbol->name << " (" << String::format("%08x-%08x", current_symbol->address(), current_symbol->address_end()) << "):";
|
||||
}
|
||||
|
||||
is_first_symbol = false;
|
||||
}
|
||||
|
||||
out() << String::format("%08x", virtual_offset) << " " << insn.value().to_string(offset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user