From 9a026fc8d50bce544eb4f03c526c8b3479a231ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kleines=20Filmr=C3=B6llchen?= Date: Thu, 21 Sep 2023 00:14:35 +0200 Subject: [PATCH] AK: Implement SipHash as the default hash algorithm for most use cases SipHash is highly HashDoS-resistent, initialized with a random seed at startup (i.e. non-deterministic) and usable for security-critical use cases with large enough parameters. We just use it because it's reasonably secure with parameters 1-3 while having excellent properties and not being significantly slower than before. --- AK/CMakeLists.txt | 1 + AK/HashMap.h | 3 + AK/HashTable.h | 3 + AK/SipHash.cpp | 184 +++++++++++++++++++++++++++++++++ AK/SipHash.h | 29 ++++++ AK/StringHash.h | 4 + AK/Traits.h | 17 ++- Kernel/CMakeLists.txt | 1 + Tests/AK/TestHashFunctions.cpp | 51 +++++++++ 9 files changed, 282 insertions(+), 11 deletions(-) create mode 100644 AK/SipHash.cpp create mode 100644 AK/SipHash.h diff --git a/AK/CMakeLists.txt b/AK/CMakeLists.txt index b9eb1cb4874..c5eb4c37e51 100644 --- a/AK/CMakeLists.txt +++ b/AK/CMakeLists.txt @@ -23,6 +23,7 @@ set(AK_SOURCES NumberFormat.cpp OptionParser.cpp Random.cpp + SipHash.cpp StackInfo.cpp Stream.cpp String.cpp diff --git a/AK/HashMap.h b/AK/HashMap.h index b8c4f122eef..ca7f73c76ed 100644 --- a/AK/HashMap.h +++ b/AK/HashMap.h @@ -14,6 +14,9 @@ namespace AK { +// A map datastructure, mapping keys K to values V, based on a hash table with closed hashing. +// HashMap can optionally provide ordered iteration based on the order of keys when IsOrdered = true. +// HashMap is based on HashTable, which should be used instead if just a set datastructure is required. template class HashMap { private: diff --git a/AK/HashTable.h b/AK/HashTable.h index b56c0ce3bcc..45bd3701246 100644 --- a/AK/HashTable.h +++ b/AK/HashTable.h @@ -114,6 +114,9 @@ private: BucketType* m_bucket { nullptr }; }; +// A set datastructure based on a hash table with closed hashing. +// HashTable can optionally provide ordered iteration when IsOrdered = true. +// For a (more commonly required) map datastructure with key-value entries, see HashMap. template class HashTable { static constexpr size_t grow_capacity_at_least = 8; diff --git a/AK/SipHash.cpp b/AK/SipHash.cpp new file mode 100644 index 00000000000..d9009ce3f94 --- /dev/null +++ b/AK/SipHash.cpp @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include + +#ifdef KERNEL +# include +#else +# include +#endif + +namespace AK { + +ALWAYS_INLINE constexpr u64 rotate_left(u64 x, u64 bits) +{ + return static_cast(((x) << (bits)) | ((x) >> (64 - (bits)))); +} + +ALWAYS_INLINE constexpr void sipround(u64& v0, u64& v1, u64& v2, u64& v3) +{ + v0 += v1; + v1 = rotate_left(v1, 13); + v1 ^= v0; + v0 = rotate_left(v0, 32); + v2 += v3; + v3 = rotate_left(v3, 16); + v3 ^= v2; + v0 += v3; + v3 = rotate_left(v3, 21); + v3 ^= v0; + v2 += v1; + v1 = rotate_left(v1, 17); + v1 ^= v2; + v2 = rotate_left(v2, 32); +} + +// Can handle u64 or u128 output as per reference implementation. +// We currenly only use u64 and further fold it to u32 (unsigned) for use in Traits. +template +static void do_siphash(ReadonlyBytes input, u128 key, Bytes output) +{ + VERIFY((output.size() == 8) || (output.size() == 16)); + + u64 v0 = 0x736f6d6570736575ull; + u64 v1 = 0x646f72616e646f6dull; + u64 v2 = 0x6c7967656e657261ull; + u64 v3 = 0x7465646279746573ull; + auto const left = input.size() & 7; + // The end of 64-bit blocks. + auto const block_end = input.size() - (input.size() % sizeof(u64)); + u64 b = input.size() << 56; + v3 ^= key.high(); + v2 ^= key.low(); + v1 ^= key.high(); + v0 ^= key.low(); + + if (output.size() == 16) + v1 ^= 0xee; + + for (size_t input_index = 0; input_index < block_end; input_index += 8) { + u64 const m = bit_cast>(ByteReader::load64(input.slice(input_index, sizeof(u64)).data())); + v3 ^= m; + + for (size_t i = 0; i < message_block_rounds; ++i) + sipround(v0, v1, v2, v3); + + v0 ^= m; + } + + switch (left) { + case 7: + b |= (static_cast(input[block_end + 6])) << 48; + [[fallthrough]]; + case 6: + b |= (static_cast(input[block_end + 5])) << 40; + [[fallthrough]]; + case 5: + b |= (static_cast(input[block_end + 4])) << 32; + [[fallthrough]]; + case 4: + b |= (static_cast(input[block_end + 3])) << 24; + [[fallthrough]]; + case 3: + b |= (static_cast(input[block_end + 2])) << 16; + [[fallthrough]]; + case 2: + b |= (static_cast(input[block_end + 1])) << 8; + [[fallthrough]]; + case 1: + b |= (static_cast(input[block_end + 0])); + break; + case 0: + break; + } + + v3 ^= b; + + for (size_t i = 0; i < message_block_rounds; ++i) + sipround(v0, v1, v2, v3); + + v0 ^= b; + + if (output.size() == 16) + v2 ^= 0xee; + else + v2 ^= 0xff; + + for (size_t i = 0; i < finalization_rounds; ++i) + sipround(v0, v1, v2, v3); + + b = v0 ^ v1 ^ v2 ^ v3; + + LittleEndian b_le { b }; + output.overwrite(0, &b_le, sizeof(b_le)); + + if (output.size() == 8) + return; + + v1 ^= 0xdd; + + for (size_t i = 0; i < finalization_rounds; ++i) + sipround(v0, v1, v2, v3); + + b = v0 ^ v1 ^ v2 ^ v3; + b_le = b; + output.overwrite(sizeof(b_le), &b_le, sizeof(b_le)); +} + +struct SipHashKey { + SipHashKey() + { +#ifdef KERNEL + key = Kernel::get_good_random(); +#else + // get_random is assumed to be secure, otherwise SipHash doesn't deliver on its promises! + key = get_random(); +#endif + } + constexpr u128 operator*() const { return key; } + u128 key; +}; +// Using a singleton is a little heavier than a plain static, but avoids an initialization order fiasco. +static Singleton static_sip_hash_key; + +template +unsigned sip_hash_u64(u64 input) +{ + ReadonlyBytes input_bytes { &input, sizeof(input) }; + u64 const output_u64 = sip_hash_bytes(input_bytes); + return static_cast(output_u64 ^ (output_u64 >> 32)); +} + +unsigned standard_sip_hash(u64 input) +{ + return sip_hash_u64<1, 3>(input); +} + +unsigned secure_sip_hash(u64 input) +{ + return sip_hash_u64<4, 8>(input); +} + +template +u64 sip_hash_bytes(ReadonlyBytes input) +{ + auto sip_hash_key = **static_sip_hash_key; + u64 output = 0; + Bytes output_bytes { &output, sizeof(output) }; + do_siphash(input, sip_hash_key, output_bytes); + return output; +} + +// Instantiate all used SipHash variants here: +template u64 sip_hash_bytes<1, 3>(ReadonlyBytes); +template u64 sip_hash_bytes<4, 8>(ReadonlyBytes); + +} diff --git a/AK/SipHash.h b/AK/SipHash.h new file mode 100644 index 00000000000..3c159362aeb --- /dev/null +++ b/AK/SipHash.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace AK { + +// Ported from the SipHash reference implementation, released to the public domain: +// https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/siphash.c +// The standard is SipHash-2-4, but we use 1-3 for a little more speed. +// Cryptography should use 4-8 for (relative) conservative security, +// though SipHash itself is NOT a cryptographically secure hash algorithm. +template +u64 sip_hash_bytes(ReadonlyBytes input); +unsigned standard_sip_hash(u64 input); +unsigned secure_sip_hash(u64 input); + +} + +#ifdef USING_AK_GLOBALLY +using AK::secure_sip_hash; +using AK::sip_hash_bytes; +using AK::standard_sip_hash; +#endif diff --git a/AK/StringHash.h b/AK/StringHash.h index b5966adb3c2..dd45698252f 100644 --- a/AK/StringHash.h +++ b/AK/StringHash.h @@ -10,6 +10,10 @@ namespace AK { +// FIXME: This hashing algorithm isn't well-known and may not be good at all. +// We can't use SipHash since that depends on runtime parameters, +// but some string hashes like IPC endpoint magic numbers need to be deterministic. +// Maybe use a SipHash with a statically-known key? constexpr u32 string_hash(char const* characters, size_t length, u32 seed = 0) { u32 hash = seed; diff --git a/AK/Traits.h b/AK/Traits.h index d6e975f46a2..515d779adc9 100644 --- a/AK/Traits.h +++ b/AK/Traits.h @@ -10,6 +10,7 @@ #include #include #include +#include #include namespace AK { @@ -33,12 +34,9 @@ template struct Traits : public GenericTraits { static constexpr bool is_trivial() { return true; } static constexpr bool is_trivially_serializable() { return true; } - static constexpr unsigned hash(T value) + static unsigned hash(T value) { - if constexpr (sizeof(T) < 8) - return int_hash(value); - else - return u64_hash(value); + return standard_sip_hash(static_cast(value)); } }; @@ -47,19 +45,16 @@ template struct Traits : public GenericTraits { static constexpr bool is_trivial() { return true; } static constexpr bool is_trivially_serializable() { return true; } - static constexpr unsigned hash(T value) + static unsigned hash(T value) { - if constexpr (sizeof(T) < 8) - return int_hash(bit_cast(value)); - else - return u64_hash(bit_cast(value)); + return standard_sip_hash(bit_cast(static_cast(value))); } }; #endif template requires(IsPointer && !Detail::IsPointerOfType) struct Traits : public GenericTraits { - static unsigned hash(T p) { return ptr_hash(p); } + static unsigned hash(T p) { return standard_sip_hash(bit_cast(p)); } static constexpr bool is_trivial() { return true; } }; diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 74ffebc48dc..4fa82f6e51a 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -522,6 +522,7 @@ set(AK_SOURCES ../AK/GenericLexer.cpp ../AK/Hex.cpp ../AK/MemoryStream.cpp + ../AK/SipHash.cpp ../AK/Stream.cpp ../AK/StringBuilder.cpp ../AK/StringUtils.cpp diff --git a/Tests/AK/TestHashFunctions.cpp b/Tests/AK/TestHashFunctions.cpp index c29367b8b7b..1baf577fb23 100644 --- a/Tests/AK/TestHashFunctions.cpp +++ b/Tests/AK/TestHashFunctions.cpp @@ -7,6 +7,7 @@ #include #include +#include #include TEST_CASE(int_hash) @@ -53,3 +54,53 @@ TEST_CASE(constexpr_ptr_hash) // "ptr_hash" test binds the result. static_assert(ptr_hash(FlatPtr(42))); } + +// Testing concrete hash results is not possible due to SipHash's non-determinism. +// We instead perform some sanity checks and try to hit any asserts caused by programming errors. +TEST_CASE(sip_hash) +{ + EXPECT_EQ(standard_sip_hash(42), standard_sip_hash(42)); + EXPECT_EQ(secure_sip_hash(42), secure_sip_hash(42)); + EXPECT_NE(standard_sip_hash(42), secure_sip_hash(42)); +} + +TEST_CASE(sip_hash_bytes) +{ + constexpr Array short_test_array { 1, 2, 3, 4, 5, 6, 7, 8 }; + constexpr Array common_prefix_array { 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0 }; + EXPECT_EQ((sip_hash_bytes<1, 3>(short_test_array.span())), (sip_hash_bytes<1, 3>(short_test_array.span()))); + EXPECT_NE((sip_hash_bytes<1, 3>(short_test_array.span())), (sip_hash_bytes<1, 3>(common_prefix_array.span()))); + + for (size_t prefix_length = 1; prefix_length < 8; ++prefix_length) { + EXPECT_NE((sip_hash_bytes<1, 3>(short_test_array.span().trim(prefix_length))), (sip_hash_bytes<1, 3>(short_test_array.span()))); + EXPECT_EQ((sip_hash_bytes<1, 3>(short_test_array.span().trim(prefix_length))), (sip_hash_bytes<1, 3>(common_prefix_array.span().trim(prefix_length)))); + } +} + +template +requires(IsCallableWithArguments) +static void run_benchmark(HashFunction hash_function) +{ + for (size_t i = 0; i < 1'000'000; ++i) { + auto a = hash_function(i); + AK::taint_for_optimizer(a); + auto b = hash_function(i); + AK::taint_for_optimizer(b); + EXPECT_EQ(a, b); + } +} + +BENCHMARK_CASE(deterministic_hash) +{ + run_benchmark(u64_hash); +} + +BENCHMARK_CASE(fast_sip_hash) +{ + run_benchmark(standard_sip_hash); +} + +BENCHMARK_CASE(secure_sip_hash) +{ + run_benchmark(secure_sip_hash); +}