2020-05-09 15:09:40 +03:00
/*
* Copyright ( c ) 2020 , Andreas Kling < kling @ serenityos . org >
*
2021-04-22 11:24:48 +03:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-05-09 15:09:40 +03:00
*/
2021-01-17 22:28:43 +03:00
# include <AK/Debug.h>
2021-05-19 15:35:34 +03:00
# include <AK/OwnPtr.h>
2020-08-10 04:55:32 +03:00
# include <AK/QuickSort.h>
# include <AK/Vector.h>
2020-08-05 23:35:35 +03:00
# include <LibCore/ArgsParser.h>
2021-11-23 13:32:25 +03:00
# include <LibCore/MappedFile.h>
2020-12-25 04:14:56 +03:00
# include <LibELF/Image.h>
2020-04-11 14:16:17 +03:00
# include <LibX86/Disassembler.h>
2020-08-16 19:39:06 +03:00
# include <LibX86/ELFSymbolProvider.h>
2020-08-09 05:08:13 +03:00
# include <string.h>
2020-04-11 14:16:17 +03:00
int main ( int argc , char * * argv )
{
2020-08-05 23:35:35 +03:00
const char * path = nullptr ;
Core : : ArgsParser args_parser ;
2020-12-05 18:22:58 +03:00
args_parser . set_general_help (
" Disassemble an executable, and show human-readable "
" assembly code for each function. " ) ;
2020-08-05 23:35:35 +03:00
args_parser . add_positional_argument ( path , " Path to i386 binary file " , " path " ) ;
args_parser . parse ( argc , argv ) ;
2020-04-11 14:16:17 +03:00
2021-11-23 13:32:25 +03:00
auto file_or_error = Core : : MappedFile : : map ( path ) ;
2021-01-10 17:55:54 +03:00
if ( file_or_error . is_error ( ) ) {
2021-11-07 02:37:07 +03:00
warnln ( " Could not map file: {} " , file_or_error . error ( ) ) ;
2020-07-31 12:21:33 +03:00
return 1 ;
}
2020-04-11 14:16:17 +03:00
2021-01-10 17:55:54 +03:00
auto & file = * file_or_error . value ( ) ;
2020-08-10 04:55:32 +03:00
struct Symbol {
size_t value ;
size_t size ;
StringView name ;
size_t address ( ) const { return value ; }
size_t address_end ( ) const { return value + size ; }
bool contains ( size_t virtual_address ) { return address ( ) < = virtual_address & & virtual_address < address_end ( ) ; }
} ;
Vector < Symbol > symbols ;
2020-08-09 05:08:13 +03:00
const u8 * asm_data = ( const u8 * ) file . data ( ) ;
size_t asm_size = file . size ( ) ;
size_t file_offset = 0 ;
2020-08-10 04:55:32 +03:00
Vector < Symbol > : : Iterator current_symbol = symbols . begin ( ) ;
2020-08-16 19:39:06 +03:00
OwnPtr < X86 : : ELFSymbolProvider > symbol_provider ; // nullptr for non-ELF disassembly.
2020-12-25 04:14:56 +03:00
OwnPtr < ELF : : Image > elf ;
2020-08-09 05:08:13 +03:00
if ( asm_size > = 4 & & strncmp ( ( const char * ) asm_data , " \u007f ELF " , 4 ) = = 0 ) {
2020-12-25 04:14:56 +03:00
elf = make < ELF : : Image > ( asm_data , asm_size ) ;
if ( elf - > is_valid ( ) ) {
2020-08-17 12:30:00 +03:00
symbol_provider = make < X86 : : ELFSymbolProvider > ( * elf ) ;
2020-12-25 04:14:56 +03:00
elf - > for_each_section_of_type ( SHT_PROGBITS , [ & ] ( const ELF : : Image : : Section & section ) {
2020-08-09 05:08:13 +03:00
// FIXME: Disassemble all SHT_PROGBITS sections, not just .text.
if ( section . name ( ) ! = " .text " )
return IterationDecision : : Continue ;
asm_data = ( const u8 * ) section . raw_data ( ) ;
asm_size = section . size ( ) ;
file_offset = section . address ( ) ;
return IterationDecision : : Break ;
} ) ;
2020-12-25 04:14:56 +03:00
symbols . ensure_capacity ( elf - > symbol_count ( ) + 1 ) ;
2020-08-10 04:55:32 +03:00
symbols . append ( { 0 , 0 , StringView ( ) } ) ; // Sentinel.
2020-12-25 04:14:56 +03:00
elf - > for_each_symbol ( [ & ] ( const ELF : : Image : : Symbol & symbol ) {
2020-08-10 04:55:32 +03:00
symbols . append ( { symbol . value ( ) , symbol . size ( ) , symbol . name ( ) } ) ;
return IterationDecision : : Continue ;
} ) ;
quick_sort ( symbols , [ ] ( auto & a , auto & b ) {
if ( a . value ! = b . value )
return a . value < b . value ;
if ( a . size ! = b . size )
return a . size < b . size ;
return a . name < b . name ;
} ) ;
2021-01-24 01:59:27 +03:00
if constexpr ( DISASM_DUMP_DEBUG ) {
2021-01-17 22:28:43 +03:00
for ( size_t i = 0 ; i < symbols . size ( ) ; + + i )
dbgln ( " {}: {:p}, {} " , symbols [ i ] . name , symbols [ i ] . value , symbols [ i ] . size ) ;
}
2020-08-09 05:08:13 +03:00
}
}
X86 : : SimpleInstructionStream stream ( asm_data , asm_size ) ;
2020-04-11 14:16:17 +03:00
X86 : : Disassembler disassembler ( stream ) ;
2020-08-10 04:55:32 +03:00
bool is_first_symbol = true ;
bool current_instruction_is_in_symbol = false ;
2020-04-11 14:16:17 +03:00
for ( ; ; ) {
auto offset = stream . offset ( ) ;
auto insn = disassembler . next ( ) ;
if ( ! insn . has_value ( ) )
break ;
2020-08-10 04:55:32 +03:00
// Prefix regions of instructions belonging to a symbol with the symbol's name.
// Separate regions of instructions belonging to distinct symbols with newlines,
// and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
// Interesting cases:
// - More than 1 symbol covering a region of instructions (ICF, D1/D2)
// - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
// Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
size_t virtual_offset = file_offset + offset ;
if ( current_symbol < symbols . end ( ) & & ! current_symbol - > contains ( virtual_offset ) ) {
if ( ! is_first_symbol & & current_instruction_is_in_symbol ) {
// The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
2020-10-23 19:37:35 +03:00
outln ( ) ;
2020-08-10 04:55:32 +03:00
current_instruction_is_in_symbol = ( current_symbol + 1 < symbols . end ( ) & & ( current_symbol + 1 ) - > contains ( virtual_offset ) ) ;
}
// Try to find symbol covering current instruction, if one exists.
while ( current_symbol + 1 < symbols . end ( ) & & ! ( current_symbol + 1 ) - > contains ( virtual_offset ) & & ( current_symbol + 1 ) - > address ( ) < = virtual_offset ) {
+ + current_symbol ;
if ( ! is_first_symbol )
2020-10-23 19:37:35 +03:00
outln ( " \n ({} ({:p}-{:p})) \n " , current_symbol - > name , current_symbol - > address ( ) , current_symbol - > address_end ( ) ) ;
2020-08-10 04:55:32 +03:00
}
while ( current_symbol + 1 < symbols . end ( ) & & ( current_symbol + 1 ) - > contains ( virtual_offset ) ) {
if ( ! is_first_symbol & & ! current_instruction_is_in_symbol )
2020-10-23 19:37:35 +03:00
outln ( ) ;
2020-08-10 04:55:32 +03:00
+ + current_symbol ;
current_instruction_is_in_symbol = true ;
2020-10-23 19:37:35 +03:00
outln ( " {} ({:p}-{:p}): " , current_symbol - > name , current_symbol - > address ( ) , current_symbol - > address_end ( ) ) ;
2020-08-10 04:55:32 +03:00
}
is_first_symbol = false ;
}
2020-10-23 19:37:35 +03:00
outln ( " {:p} {} " , virtual_offset , insn . value ( ) . to_string ( virtual_offset , symbol_provider ) ) ;
2020-04-11 14:16:17 +03:00
}
}