2020-01-18 11:38:21 +03:00
/*
* Copyright ( c ) 2018 - 2020 , Andreas Kling < kling @ serenityos . org >
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
*
* 1. Redistributions of source code must retain the above copyright notice , this
* list of conditions and the following disclaimer .
*
* 2. Redistributions in binary form must reproduce the above copyright notice ,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL
* DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY ,
* OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*/
2020-03-08 14:05:14 +03:00
# include <AK/Memory.h>
2020-01-05 10:37:05 +03:00
# include <AK/StringBuilder.h>
2020-03-23 15:45:10 +03:00
# include <AK/StringView.h>
2020-01-05 10:37:05 +03:00
# include <LibELF/ELFImage.h>
2018-10-10 12:53:07 +03:00
2020-01-06 23:04:57 +03:00
ELFImage : : ELFImage ( const u8 * buffer , size_t size )
2018-10-18 16:38:04 +03:00
: m_buffer ( buffer )
2020-01-06 23:04:57 +03:00
, m_size ( size )
2018-10-18 16:03:10 +03:00
{
2018-11-04 16:09:30 +03:00
m_valid = parse ( ) ;
2018-10-18 16:03:10 +03:00
}
2018-10-10 12:53:07 +03:00
ELFImage : : ~ ELFImage ( )
{
}
2019-01-31 19:31:23 +03:00
static const char * object_file_type_to_string ( Elf32_Half type )
2018-10-10 12:53:07 +03:00
{
switch ( type ) {
2019-06-07 18:12:30 +03:00
case ET_NONE :
return " None " ;
case ET_REL :
return " Relocatable " ;
case ET_EXEC :
return " Executable " ;
case ET_DYN :
return " Shared object " ;
case ET_CORE :
return " Core " ;
default :
return " (?) " ;
2018-10-10 12:53:07 +03:00
}
}
2020-01-17 00:04:44 +03:00
StringView ELFImage : : section_index_to_string ( unsigned index ) const
2018-10-10 12:53:07 +03:00
{
if ( index = = SHN_UNDEF )
return " Undefined " ;
if ( index > = SHN_LORESERVE )
return " Reserved " ;
return section ( index ) . name ( ) ;
}
2018-11-04 16:09:30 +03:00
unsigned ELFImage : : symbol_count ( ) const
2018-10-10 12:53:07 +03:00
{
2018-11-04 16:09:30 +03:00
return section ( m_symbol_table_section_index ) . entry_count ( ) ;
2018-10-10 12:53:07 +03:00
}
2019-07-18 13:13:57 +03:00
void ELFImage : : dump ( ) const
2018-10-10 12:53:07 +03:00
{
2019-12-29 09:20:38 +03:00
dbgprintf ( " ELFImage{%p} { \n " , this ) ;
dbgprintf ( " is_valid: %u \n " , is_valid ( ) ) ;
2018-10-10 12:53:07 +03:00
2018-11-04 16:09:30 +03:00
if ( ! is_valid ( ) ) {
2019-12-29 09:20:38 +03:00
dbgprintf ( " } \n " ) ;
2018-10-10 12:53:07 +03:00
return ;
}
2019-12-29 09:20:38 +03:00
dbgprintf ( " type: %s \n " , object_file_type_to_string ( header ( ) . e_type ) ) ;
dbgprintf ( " machine: %u \n " , header ( ) . e_machine ) ;
dbgprintf ( " entry: %x \n " , header ( ) . e_entry ) ;
dbgprintf ( " shoff: %u \n " , header ( ) . e_shoff ) ;
dbgprintf ( " shnum: %u \n " , header ( ) . e_shnum ) ;
2020-01-06 23:04:57 +03:00
dbgprintf ( " phoff: %u \n " , header ( ) . e_phoff ) ;
dbgprintf ( " phnum: %u \n " , header ( ) . e_phnum ) ;
2019-12-29 09:20:38 +03:00
dbgprintf ( " shstrndx: %u \n " , header ( ) . e_shstrndx ) ;
2018-10-10 12:53:07 +03:00
2020-01-11 04:25:12 +03:00
for_each_program_header ( [ & ] ( const ProgramHeader & program_header ) {
dbgprintf ( " Program Header %d: { \n " , program_header . index ( ) ) ;
dbgprintf ( " type: %x \n " , program_header . type ( ) ) ;
dbgprintf ( " offset: %x \n " , program_header . offset ( ) ) ;
dbgprintf ( " flags: %x \n " , program_header . flags ( ) ) ;
dbgprintf ( " \n " ) ;
dbgprintf ( " } \n " ) ;
} ) ;
2018-10-10 12:53:07 +03:00
for ( unsigned i = 0 ; i < header ( ) . e_shnum ; + + i ) {
auto & section = this - > section ( i ) ;
2019-12-29 09:20:38 +03:00
dbgprintf ( " Section %u: { \n " , i ) ;
dbgprintf ( " name: %s \n " , section . name ( ) ) ;
dbgprintf ( " type: %x \n " , section . type ( ) ) ;
dbgprintf ( " offset: %x \n " , section . offset ( ) ) ;
dbgprintf ( " size: %u \n " , section . size ( ) ) ;
dbgprintf ( " \n " ) ;
dbgprintf ( " } \n " ) ;
2018-10-10 12:53:07 +03:00
}
2019-12-29 09:20:38 +03:00
dbgprintf ( " Symbol count: %u (table is %u) \n " , symbol_count ( ) , m_symbol_table_section_index ) ;
2018-11-04 16:09:30 +03:00
for ( unsigned i = 1 ; i < symbol_count ( ) ; + + i ) {
2018-10-10 12:53:07 +03:00
auto & sym = symbol ( i ) ;
2019-12-29 09:20:38 +03:00
dbgprintf ( " Symbol @%u: \n " , i ) ;
dbgprintf ( " Name: %s \n " , sym . name ( ) ) ;
dbgprintf ( " In section: %s \n " , section_index_to_string ( sym . section_index ( ) ) ) ;
dbgprintf ( " Value: %x \n " , sym . value ( ) ) ;
dbgprintf ( " Size: %u \n " , sym . size ( ) ) ;
2018-10-10 12:53:07 +03:00
}
2019-12-29 09:20:38 +03:00
dbgprintf ( " } \n " ) ;
2018-10-10 12:53:07 +03:00
}
2018-11-04 16:09:30 +03:00
unsigned ELFImage : : section_count ( ) const
2018-10-10 12:53:07 +03:00
{
return header ( ) . e_shnum ;
}
2018-11-03 12:11:56 +03:00
unsigned ELFImage : : program_header_count ( ) const
{
return header ( ) . e_phnum ;
}
2018-10-10 12:53:07 +03:00
bool ELFImage : : parse ( )
{
2020-01-11 04:25:12 +03:00
if ( ! validate_elf_header ( header ( ) , m_size ) ) {
dbgputstr ( " ELFImage::parse(): ELF Header not valid \n " ) ;
2018-10-10 12:53:07 +03:00
return false ;
2019-03-27 03:29:49 +03:00
}
2018-10-10 12:53:07 +03:00
// First locate the string tables.
2018-11-04 16:09:30 +03:00
for ( unsigned i = 0 ; i < section_count ( ) ; + + i ) {
auto & sh = section_header ( i ) ;
2018-10-10 12:53:07 +03:00
if ( sh . sh_type = = SHT_SYMTAB ) {
2019-11-28 22:53:02 +03:00
ASSERT ( ! m_symbol_table_section_index | | m_symbol_table_section_index = = i ) ;
2018-11-04 16:09:30 +03:00
m_symbol_table_section_index = i ;
2018-10-10 12:53:07 +03:00
}
if ( sh . sh_type = = SHT_STRTAB & & i ! = header ( ) . e_shstrndx ) {
2020-02-21 18:16:23 +03:00
if ( section_header_table_string ( sh . sh_name ) = = ELF_STRTAB )
2020-01-01 00:45:50 +03:00
m_string_table_section_index = i ;
}
2018-10-10 12:53:07 +03:00
}
2019-11-28 22:53:02 +03:00
// Then create a name-to-index map.
for ( unsigned i = 0 ; i < section_count ( ) ; + + i ) {
auto & section = this - > section ( i ) ;
m_sections . set ( section . name ( ) , move ( i ) ) ;
}
2018-10-10 12:53:07 +03:00
return true ;
}
2020-01-17 00:04:44 +03:00
StringView ELFImage : : table_string ( unsigned table_index , unsigned offset ) const
2018-10-10 12:53:07 +03:00
{
2020-01-17 00:04:44 +03:00
auto & sh = section_header ( table_index ) ;
2018-10-10 12:53:07 +03:00
if ( sh . sh_type ! = SHT_STRTAB )
return nullptr ;
2020-01-17 00:04:44 +03:00
size_t computed_offset = sh . sh_offset + offset ;
if ( computed_offset > = m_size ) {
dbgprintf ( " SHENANIGANS! ELFImage::table_string() computed offset outside image. \n " ) ;
return { } ;
}
size_t max_length = m_size - computed_offset ;
size_t length = strnlen ( raw_data ( sh . sh_offset + offset ) , max_length ) ;
return { raw_data ( sh . sh_offset + offset ) , length } ;
2018-10-10 12:53:07 +03:00
}
2020-01-17 00:04:44 +03:00
StringView ELFImage : : section_header_table_string ( unsigned offset ) const
2018-10-10 12:53:07 +03:00
{
2020-01-17 00:04:44 +03:00
return table_string ( header ( ) . e_shstrndx , offset ) ;
}
StringView ELFImage : : table_string ( unsigned offset ) const
{
return table_string ( m_string_table_section_index , offset ) ;
2018-10-10 12:53:07 +03:00
}
2018-11-04 16:09:30 +03:00
const char * ELFImage : : raw_data ( unsigned offset ) const
2018-10-10 12:53:07 +03:00
{
2018-11-08 23:20:09 +03:00
return reinterpret_cast < const char * > ( m_buffer ) + offset ;
2018-10-10 12:53:07 +03:00
}
const Elf32_Ehdr & ELFImage : : header ( ) const
{
2018-11-04 16:09:30 +03:00
return * reinterpret_cast < const Elf32_Ehdr * > ( raw_data ( 0 ) ) ;
2018-10-10 12:53:07 +03:00
}
2018-11-03 12:11:56 +03:00
const Elf32_Phdr & ELFImage : : program_header_internal ( unsigned index ) const
{
ASSERT ( index < header ( ) . e_phnum ) ;
2018-11-04 16:09:30 +03:00
return * reinterpret_cast < const Elf32_Phdr * > ( raw_data ( header ( ) . e_phoff + ( index * sizeof ( Elf32_Phdr ) ) ) ) ;
2018-11-03 12:11:56 +03:00
}
2018-11-04 16:09:30 +03:00
const Elf32_Shdr & ELFImage : : section_header ( unsigned index ) const
2018-10-10 12:53:07 +03:00
{
ASSERT ( index < header ( ) . e_shnum ) ;
2020-01-01 00:45:50 +03:00
return * reinterpret_cast < const Elf32_Shdr * > ( raw_data ( header ( ) . e_shoff + ( index * header ( ) . e_shentsize ) ) ) ;
2018-10-10 12:53:07 +03:00
}
const ELFImage : : Symbol ELFImage : : symbol ( unsigned index ) const
{
2018-11-04 16:09:30 +03:00
ASSERT ( index < symbol_count ( ) ) ;
2019-01-31 19:31:23 +03:00
auto * raw_syms = reinterpret_cast < const Elf32_Sym * > ( raw_data ( section ( m_symbol_table_section_index ) . offset ( ) ) ) ;
return Symbol ( * this , index , raw_syms [ index ] ) ;
2018-10-10 12:53:07 +03:00
}
const ELFImage : : Section ELFImage : : section ( unsigned index ) const
{
2018-11-04 16:09:30 +03:00
ASSERT ( index < section_count ( ) ) ;
2018-10-10 12:53:07 +03:00
return Section ( * this , index ) ;
}
2018-11-03 12:11:56 +03:00
const ELFImage : : ProgramHeader ELFImage : : program_header ( unsigned index ) const
{
ASSERT ( index < program_header_count ( ) ) ;
return ProgramHeader ( * this , index ) ;
}
2019-11-28 22:53:02 +03:00
const ELFImage : : Relocation ELFImage : : RelocationSection : : relocation ( unsigned index ) const
{
ASSERT ( index < relocation_count ( ) ) ;
auto * rels = reinterpret_cast < const Elf32_Rel * > ( m_image . raw_data ( offset ( ) ) ) ;
return Relocation ( m_image , rels [ index ] ) ;
}
const ELFImage : : RelocationSection ELFImage : : Section : : relocations ( ) const
{
2020-01-05 10:37:05 +03:00
StringBuilder builder ;
builder . append ( " .rel " ) ;
builder . append ( name ( ) ) ;
2019-11-28 22:53:02 +03:00
2020-01-05 10:37:05 +03:00
auto relocation_section = m_image . lookup_section ( builder . to_string ( ) ) ;
2019-11-28 22:53:02 +03:00
if ( relocation_section . type ( ) ! = SHT_REL )
return static_cast < const RelocationSection > ( m_image . section ( 0 ) ) ;
# ifdef ELFIMAGE_DEBUG
2019-12-29 09:20:38 +03:00
dbgprintf ( " Found relocations for %s in %s \n " , name ( ) , relocation_section . name ( ) ) ;
2019-11-28 22:53:02 +03:00
# endif
return static_cast < const RelocationSection > ( relocation_section ) ;
}
2020-01-05 10:37:05 +03:00
const ELFImage : : Section ELFImage : : lookup_section ( const String & name ) const
2019-11-28 22:53:02 +03:00
{
if ( auto it = m_sections . find ( name ) ; it ! = m_sections . end ( ) )
return section ( ( * it ) . value ) ;
return section ( 0 ) ;
}
2020-01-11 04:25:12 +03:00
bool ELFImage : : validate_elf_header ( const Elf32_Ehdr & elf_header , size_t file_size )
{
if ( ! IS_ELF ( elf_header ) ) {
dbgputstr ( " File is not an ELF file. \n " ) ;
return false ;
}
if ( ELFCLASS32 ! = elf_header . e_ident [ EI_CLASS ] ) {
dbgputstr ( " File is not a 32 bit ELF file. \n " ) ;
return false ;
}
if ( ELFDATA2LSB ! = elf_header . e_ident [ EI_DATA ] ) {
dbgputstr ( " File is not a little endian ELF file. \n " ) ;
return false ;
}
if ( EV_CURRENT ! = elf_header . e_ident [ EI_VERSION ] ) {
dbgprintf ( " File has unrecognized ELF version (%d), expected (%d)! \n " , elf_header . e_ident [ EI_VERSION ] , EV_CURRENT ) ;
return false ;
}
if ( ELFOSABI_SYSV ! = elf_header . e_ident [ EI_OSABI ] ) {
dbgprintf ( " File has unknown OS ABI (%d), expected SYSV(0)! \n " , elf_header . e_ident [ EI_OSABI ] ) ;
return false ;
}
if ( 0 ! = elf_header . e_ident [ EI_ABIVERSION ] ) {
dbgprintf ( " File has unknown SYSV ABI version (%d)! \n " , elf_header . e_ident [ EI_ABIVERSION ] ) ;
return false ;
}
if ( EM_386 ! = elf_header . e_machine ) {
dbgprintf ( " File has unknown machine (%d), expected i386 (3)! \n " , elf_header . e_machine ) ;
return false ;
}
if ( ET_EXEC ! = elf_header . e_type & & ET_DYN ! = elf_header . e_type & & ET_REL ! = elf_header . e_type ) {
dbgprintf ( " File has unloadable ELF type (%d), expected REL (1), EXEC (2) or DYN (3)! \n " , elf_header . e_type ) ;
return false ;
}
if ( EV_CURRENT ! = elf_header . e_version ) {
dbgprintf ( " File has unrecognized ELF version (%d), expected (%d)! \n " , elf_header . e_version , EV_CURRENT ) ;
return false ;
}
if ( sizeof ( Elf32_Ehdr ) ! = elf_header . e_ehsize ) {
dbgprintf ( " File has incorrect ELF header size..? (%d), expected (%d)! \n " , elf_header . e_ehsize , sizeof ( Elf32_Ehdr ) ) ;
return false ;
}
if ( elf_header . e_phoff > file_size | | elf_header . e_shoff > file_size ) {
dbgprintf ( " SHENANIGANS! program header offset (%d) or section header offset (%d) are past the end of the file! \n " ,
elf_header . e_phoff , elf_header . e_shoff ) ;
return false ;
}
if ( elf_header . e_phnum ! = 0 & & elf_header . e_phoff ! = elf_header . e_ehsize ) {
dbgprintf ( " File does not have program headers directly after the ELF header? program header offset (%d), expected (%d). \n " ,
elf_header . e_phoff , elf_header . e_ehsize ) ;
return false ;
}
if ( 0 ! = elf_header . e_flags ) {
dbgprintf ( " File has incorrect ELF header flags...? (%d), expected (%d). \n " , elf_header . e_flags , 0 ) ;
return false ;
}
if ( 0 ! = elf_header . e_phnum & & sizeof ( Elf32_Phdr ) ! = elf_header . e_phentsize ) {
dbgprintf ( " File has incorrect program header size..? (%d), expected (%d). \n " , elf_header . e_phentsize , sizeof ( Elf32_Phdr ) ) ;
return false ;
}
if ( sizeof ( Elf32_Shdr ) ! = elf_header . e_shentsize ) {
dbgprintf ( " File has incorrect section header size..? (%d), expected (%d). \n " , elf_header . e_shentsize , sizeof ( Elf32_Shdr ) ) ;
return false ;
}
size_t end_of_last_program_header = elf_header . e_phoff + ( elf_header . e_phnum * elf_header . e_phentsize ) ;
if ( end_of_last_program_header > file_size ) {
dbgprintf ( " SHENANIGANS! End of last program header (%d) is past the end of the file! \n " , end_of_last_program_header ) ;
return false ;
}
size_t end_of_last_section_header = elf_header . e_shoff + ( elf_header . e_shnum * elf_header . e_shentsize ) ;
if ( end_of_last_section_header > file_size ) {
dbgprintf ( " SHENANIGANS! End of last section header (%d) is past the end of the file! \n " , end_of_last_section_header ) ;
return false ;
}
if ( elf_header . e_shstrndx > = elf_header . e_shnum ) {
dbgprintf ( " SHENANIGANS! Section header string table index (%d) is not a valid index given we have %d section headers! \n " , elf_header . e_shstrndx , elf_header . e_shnum ) ;
return false ;
}
return true ;
}
bool ELFImage : : validate_program_headers ( const Elf32_Ehdr & elf_header , size_t file_size , u8 * buffer , size_t buffer_size , String & interpreter_path )
{
// Can we actually parse all the program headers in the given buffer?
size_t end_of_last_program_header = elf_header . e_phoff + ( elf_header . e_phnum * elf_header . e_phentsize ) ;
if ( end_of_last_program_header > buffer_size ) {
dbgprintf ( " Unable to parse program headers from buffer, buffer too small! Buffer size: %zu, End of program headers %zu \n " ,
buffer_size , end_of_last_program_header ) ;
return false ;
}
if ( file_size < buffer_size ) {
dbgputstr ( " We somehow read more from a file than was in the file in the first place! \n " ) ;
ASSERT_NOT_REACHED ( ) ;
}
size_t num_program_headers = elf_header . e_phnum ;
auto program_header_begin = ( const Elf32_Phdr * ) & ( buffer [ elf_header . e_phoff ] ) ;
for ( size_t header_index = 0 ; header_index < num_program_headers ; + + header_index ) {
auto & program_header = program_header_begin [ header_index ] ;
switch ( program_header . p_type ) {
case PT_INTERP :
if ( ET_DYN ! = elf_header . e_type ) {
dbgprintf ( " Found PT_INTERP header (%d) in non-DYN ELF object! What? We can't handle this! \n " , header_index ) ;
return false ;
}
// We checked above that file_size was >= buffer size. We only care about buffer size anyway, we're trying to read this!
if ( program_header . p_offset + program_header . p_filesz > buffer_size ) {
dbgprintf ( " Found PT_INTERP header (%d), but the .interp section was not within our buffer :( Your program will not be loaded today. \n " , header_index ) ;
return false ;
}
interpreter_path = String ( ( const char * ) & buffer [ program_header . p_offset ] , program_header . p_filesz - 1 ) ;
break ;
case PT_LOAD :
case PT_DYNAMIC :
case PT_NOTE :
case PT_PHDR :
case PT_TLS :
if ( program_header . p_offset + program_header . p_filesz > file_size ) {
dbgprintf ( " SHENANIGANS! Program header %d segment leaks beyond end of file! \n " , header_index ) ;
return false ;
}
if ( ( program_header . p_flags & PF_X ) & & ( program_header . p_flags & PF_W ) ) {
dbgprintf ( " SHENANIGANS! Program header %d segment is marked write and execute \n " , header_index ) ;
return false ;
}
break ;
default :
// Not handling other program header types in other code so... let's not surprise them
2020-04-11 20:20:16 +03:00
dbgprintf ( " Found program header (%d) of unrecognized type %x! \n " , header_index , program_header . p_type ) ;
return false ;
2020-01-11 04:25:12 +03:00
}
}
return true ;
}
2020-04-11 19:45:17 +03:00
StringView ELFImage : : Symbol : : raw_data ( ) const
{
auto & section = this - > section ( ) ;
return { section . raw_data ( ) + ( value ( ) - section . address ( ) ) , size ( ) } ;
}