LibELF: Implement GNU Hashing algorithm for dyn relocations

This commit is contained in:
Sahan Fernando 2021-01-06 01:53:01 +11:00 committed by Andreas Kling
parent 2444bf6d3b
commit b370ee3423
Notes: sideshowbarker 2024-07-19 00:05:03 +09:00
2 changed files with 88 additions and 19 deletions

View File

@ -109,6 +109,13 @@ void DynamicObject::parse()
m_fini_array_size = entry.val();
break;
case DT_HASH:
// Use SYSV hash only if GNU hash is not available
if (m_hash_type == HashType::SYSV) {
m_hash_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
}
break;
case DT_GNU_HASH:
m_hash_type = HashType::GNU;
m_hash_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
break;
case DT_SYMTAB:
@ -235,7 +242,8 @@ const DynamicObject::Section DynamicObject::fini_array_section() const
const DynamicObject::HashSection DynamicObject::hash_section() const
{
return HashSection(Section(*this, m_hash_table_offset, 0, 0, "DT_HASH"), HashType::SYSV);
const char* section_name = m_hash_type == HashType::SYSV ? "DT_HASH" : "DT_GNU_HASH";
return HashSection(Section(*this, m_hash_table_offset, 0, 0, section_name), m_hash_type);
}
const DynamicObject::RelocationSection DynamicObject::relocation_section() const
@ -254,33 +262,40 @@ u32 DynamicObject::HashSection::calculate_elf_hash(const char* name) const
// Note that the GNU HASH algorithm has less collisions
uint32_t hash = 0;
uint32_t top_nibble_of_hash = 0;
while (*name != '\0') {
hash = hash << 4;
hash += *name;
name++;
top_nibble_of_hash = hash & 0xF0000000U;
if (top_nibble_of_hash != 0)
hash ^= top_nibble_of_hash >> 24;
const uint32_t top_nibble_of_hash = hash & 0xF0000000U;
hash ^= top_nibble_of_hash >> 24;
hash &= ~top_nibble_of_hash;
}
return hash;
}
u32 DynamicObject::HashSection::calculate_gnu_hash(const char*) const
u32 DynamicObject::HashSection::calculate_gnu_hash(const char* name) const
{
// FIXME: Implement the GNU hash algorithm
ASSERT_NOT_REACHED();
// GNU ELF hash algorithm
u32 hash = 5381;
for (; *name != '\0'; ++name) {
hash = hash * 33 + *name;
}
return hash;
}
const DynamicObject::Symbol DynamicObject::HashSection::lookup_symbol(const char* name) const
{
// FIXME: If we enable gnu hash in the compiler, we should use that here instead
// The algo is way better with less collisions
u32 hash_value = (this->*(m_hash_function))(name);
return (this->*(m_lookup_function))(name);
}
const DynamicObject::Symbol DynamicObject::HashSection::lookup_elf_symbol(const char* name) const
{
u32 hash_value = calculate_elf_hash(name);
u32* hash_table_begin = (u32*)address().as_ptr();
@ -289,7 +304,8 @@ const DynamicObject::Symbol DynamicObject::HashSection::lookup_symbol(const char
// This is here for completeness, but, since we're using the fact that every chain
// will end at chain 0 (which means 'not found'), we don't need to check num_chains.
// Interestingly, num_chains is required to be num_symbols
//size_t num_chains = hash_table_begin[1];
// size_t num_chains = hash_table_begin[1];
u32* buckets = &hash_table_begin[2];
u32* chains = &buckets[num_buckets];
@ -297,13 +313,62 @@ const DynamicObject::Symbol DynamicObject::HashSection::lookup_symbol(const char
for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) {
auto symbol = m_dynamic.symbol(i);
if (strcmp(name, symbol.name()) == 0) {
VERBOSE("Returning dynamic symbol with index %u for %s: %p\n", i, symbol.name(), symbol.address().as_ptr());
VERBOSE("Returning SYSV dynamic symbol with index %u for %s: %p\n", i, symbol.name(), symbol.address().as_ptr());
return symbol;
}
}
return Symbol::create_undefined(m_dynamic);
}
const DynamicObject::Symbol DynamicObject::HashSection::lookup_gnu_symbol(const char* name) const
{
// Algorithm reference: https://ent-voy.blogspot.com/2011/02/
// TODO: Handle 64bit bloomwords for ELF_CLASS64
using BloomWord = u32;
constexpr size_t bloom_word_size = sizeof(BloomWord) * 8;
const u32* hash_table_begin = (u32*)address().as_ptr();
const size_t num_buckets = hash_table_begin[0];
const size_t num_omitted_symbols = hash_table_begin[1];
const u32 num_maskwords = hash_table_begin[2];
// This works because num_maskwords is required to be a power of 2
const u32 num_maskwords_bitmask = num_maskwords - 1;
const u32 shift2 = hash_table_begin[3];
const BloomWord* bloom_words = &hash_table_begin[4];
const u32* const buckets = &bloom_words[num_maskwords];
const u32* const chains = &buckets[num_buckets];
BloomWord hash1 = calculate_gnu_hash(name);
BloomWord hash2 = hash1 >> shift2;
const BloomWord bitmask = (1 << (hash1 % bloom_word_size)) | (1 << (hash2 % bloom_word_size));
if ((bloom_words[(hash1 / bloom_word_size) & num_maskwords_bitmask] & bitmask) != bitmask) {
return Symbol::create_undefined(m_dynamic);
}
size_t current_sym = buckets[hash1 % num_buckets];
if (current_sym == 0) {
return Symbol::create_undefined(m_dynamic);
}
const u32* current_chain = &chains[current_sym - num_omitted_symbols];
for (hash1 &= ~1;; ++current_sym) {
hash2 = *(current_chain++);
const auto symbol = m_dynamic.symbol(current_sym);
if ((hash1 == (hash2 & ~1)) && strcmp(name, symbol.name()) == 0) {
VERBOSE("Returning GNU dynamic symbol with index %zu for %s: %p\n", current_sym, symbol.name(), symbol.address().as_ptr());
return symbol;
}
if (hash2 & 1) {
break;
}
}
return Symbol::create_undefined(m_dynamic);
}
const char* DynamicObject::symbol_string_table_string(Elf32_Word index) const
{
return (const char*)base_address().offset(m_string_table_offset + index).as_ptr();
@ -439,7 +504,7 @@ Elf32_Addr DynamicObject::patch_plt_entry(u32 relocation_offset)
u32 symbol_location = res.address;
VERBOSE("DynamicLoader: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), symbol_location, relocation_address);
VERBOSE("DynamicLoader: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), (void*)symbol_location, (void*)relocation_address);
*(u32*)relocation_address = symbol_location;

View File

@ -204,15 +204,15 @@ public:
class HashSection : public Section {
public:
HashSection(const Section& section, HashType hash_type = HashType::SYSV)
HashSection(const Section& section, HashType hash_type)
: Section(section.m_dynamic, section.m_section_offset, section.m_section_size_bytes, section.m_entry_size, section.m_name)
{
switch (hash_type) {
case HashType::SYSV:
m_hash_function = &HashSection::calculate_elf_hash;
m_lookup_function = &HashSection::lookup_elf_symbol;
break;
case HashType::GNU:
m_hash_function = &HashSection::calculate_gnu_hash;
m_lookup_function = &HashSection::lookup_gnu_symbol;
break;
default:
ASSERT_NOT_REACHED();
@ -226,8 +226,11 @@ public:
u32 calculate_elf_hash(const char* name) const;
u32 calculate_gnu_hash(const char* name) const;
typedef u32 (HashSection::*HashFunction)(const char*) const;
HashFunction m_hash_function;
const DynamicObject::Symbol lookup_elf_symbol(const char* name) const;
const DynamicObject::Symbol lookup_gnu_symbol(const char* name) const;
typedef const DynamicObject::Symbol (HashSection::*LookupFunction)(const char*) const;
LookupFunction m_lookup_function;
};
unsigned symbol_count() const { return m_symbol_count; }
@ -318,6 +321,7 @@ private:
size_t m_fini_array_size { 0 };
FlatPtr m_hash_table_offset { 0 };
HashType m_hash_type { HashType::SYSV };
FlatPtr m_string_table_offset { 0 };
size_t m_size_of_string_table { 0 };