AK: Implement Knuth's algorithm D for dividing UFixedBigInt's

This commit is contained in:
Dan Klishch 2023-02-04 18:57:10 +03:00 committed by Andrew Kaster
parent 2470fab05e
commit 2d27c98659
Notes: sideshowbarker 2024-07-16 23:51:07 +09:00
4 changed files with 233 additions and 67 deletions

View File

@ -10,6 +10,7 @@
#include <AK/ScopeGuard.h>
#include <AK/StringView.h>
#include <AK/UFixedBigInt.h>
#include <AK/UFixedBigIntDivision.h>
namespace AK {

View File

@ -60,6 +60,13 @@ constexpr void mul_internal(Operand1 const& operand1, Operand2 const& operand2,
StorageOperations::baseline_mul(operand1, operand2, result, g_null_allocator);
}
template<size_t dividend_size, size_t divisor_size, bool restore_remainder>
constexpr void div_mod_internal( // Include AK/UFixedBigIntDivision.h to use UFixedBigInt division
StaticStorage<false, dividend_size> const& dividend,
StaticStorage<false, divisor_size> const& divisor,
StaticStorage<false, dividend_size>& quotient,
StaticStorage<false, divisor_size>& remainder);
template<size_t bit_size, typename Storage>
class UFixedBigInt {
constexpr static size_t static_size = Storage::static_size;
@ -395,84 +402,49 @@ public:
return result;
}
// FIXME: Refactor out this
using R = UFixedBigInt<bit_size>;
static constexpr size_t my_size()
template<NotBuiltInUFixedInt T>
constexpr UFixedBigInt<bit_size> div_mod(T const& divisor, T& remainder) const
{
return sizeof(Storage);
}
// FIXME: Do something smarter (process at least one word per iteration).
// FIXME: no restraints on this
template<Unsigned U>
requires(sizeof(Storage) >= sizeof(U)) constexpr R div_mod(U const& divisor, U& remainder) const
{
// FIXME: Is there a better way to raise a division by 0?
// Maybe as a compiletime warning?
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdiv-by-zero"
if (!divisor) {
int volatile x = 1;
int volatile y = 0;
[[maybe_unused]] int volatile z = x / y;
}
#pragma GCC diagnostic pop
// fastpaths
if (*this < divisor) {
remainder = static_cast<U>(*this);
return 0u;
}
if (*this == divisor) {
remainder = 0u;
return 1u;
}
if (divisor == 1u) {
remainder = 0u;
return *this;
}
remainder = 0u;
R quotient = 0u;
for (ssize_t i = sizeof(R) * 8 - clz() - 1; i >= 0; --i) {
remainder <<= 1u;
remainder |= static_cast<unsigned>(*this >> (size_t)i) & 1u;
if (remainder >= divisor) {
remainder -= divisor;
quotient |= R { 1u } << (size_t)i;
}
}
UFixedBigInt<bit_size> quotient;
UFixedBigInt<assumed_bit_size<T>> resulting_remainder;
div_mod_internal<bit_size, assumed_bit_size<T>, true>(m_data, get_storage_of(divisor), get_storage_of(quotient), get_storage_of(resulting_remainder));
remainder = resulting_remainder;
return quotient;
}
template<Unsigned U>
constexpr R operator/(U const& other) const
template<UFixedInt T>
constexpr auto operator/(T const& other) const
{
U mod { 0u }; // unused
return div_mod(other, mod);
}
template<Unsigned U>
constexpr U operator%(U const& other) const
{
R res { 0u };
div_mod(other, res);
return res;
UFixedBigInt<bit_size> quotient;
StaticStorage<false, assumed_bit_size<T>> remainder; // unused
div_mod_internal<bit_size, assumed_bit_size<T>, false>(m_data, get_storage_of(other), get_storage_of(quotient), remainder);
return quotient;
}
template<Unsigned U>
constexpr R& operator/=(U const& other)
template<UFixedInt T>
constexpr auto operator%(T const& other) const
{
*this = *this / other;
return *this;
StaticStorage<false, bit_size> quotient; // unused
UFixedBigInt<assumed_bit_size<T>> remainder;
div_mod_internal<bit_size, assumed_bit_size<T>, true>(m_data, get_storage_of(other), quotient, get_storage_of(remainder));
return remainder;
}
constexpr auto operator/(IntegerWrapper const& other) const { return *this / static_cast<UFixedBigInt<32>>(other); }
constexpr auto operator%(IntegerWrapper const& other) const { return *this % static_cast<UFixedBigInt<32>>(other); }
template<UFixedInt T>
constexpr auto& operator/=(T const& other) { return *this = *this / other; }
constexpr auto& operator/=(IntegerWrapper const& other) { return *this = *this / other; }
template<Unsigned U>
constexpr R& operator%=(U const& other)
constexpr auto& operator%=(U const& other) { return *this = *this % other; }
constexpr auto& operator%=(IntegerWrapper const& other) { return *this = *this % other; }
// FIXME: Replace uses with more general `assumed_bit_size<T>`.
static constexpr size_t my_size()
{
*this = *this % other;
return *this;
return sizeof(Storage);
}
// Note: If there ever be need for non side-channel proof sqrt/pow/pow_mod of UFixedBigInt, you

136
AK/UFixedBigIntDivision.h Normal file
View File

@ -0,0 +1,136 @@
/*
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Diagnostics.h>
#include <AK/UFixedBigInt.h>
namespace AK {
namespace Detail {
template<size_t dividend_bit_size, size_t divisor_bit_size, bool restore_remainder>
constexpr void div_mod_internal(
StaticStorage<false, dividend_bit_size> const& operand1,
StaticStorage<false, divisor_bit_size> const& operand2,
StaticStorage<false, dividend_bit_size>& quotient,
StaticStorage<false, divisor_bit_size>& remainder)
{
size_t dividend_len = operand1.size(), divisor_len = operand2.size();
while (divisor_len > 0 && !operand2[divisor_len - 1])
--divisor_len;
while (dividend_len > 0 && !operand1[dividend_len - 1])
--dividend_len;
// FIXME: Should raise SIGFPE instead
VERIFY(divisor_len); // VERIFY(divisor != 0)
// Fast paths
if (divisor_len == 1 && operand2[0] == 1) { // divisor == 1
quotient = operand1;
if constexpr (restore_remainder)
StorageOperations::set(0, remainder);
return;
}
if (dividend_len < divisor_len) { // dividend < divisor
StorageOperations::set(0, quotient);
if constexpr (restore_remainder)
remainder = operand1;
return;
}
if (divisor_len == 1 && dividend_len == 1) { // NativeWord / NativeWord
StorageOperations::set(operand1[0] / operand2[0], quotient);
if constexpr (restore_remainder)
StorageOperations::set(operand1[0] % operand2[0], remainder);
return;
}
if (divisor_len == 1) { // BigInt by NativeWord
auto u = (static_cast<DoubleWord>(operand1[dividend_len - 1]) << word_size) + operand1[dividend_len - 2];
auto divisor = operand2[0];
auto top = u / divisor;
quotient[dividend_len - 1] = static_cast<NativeWord>(top >> word_size);
quotient[dividend_len - 2] = static_cast<NativeWord>(top);
auto carry = static_cast<NativeWord>(u % divisor);
for (size_t i = dividend_len - 2; i--;)
quotient[i] = div_mod_words(operand1[i], carry, divisor, carry);
for (size_t i = dividend_len; i < quotient.size(); ++i)
quotient[i] = 0;
if constexpr (restore_remainder)
StorageOperations::set(carry, remainder);
return;
}
// Knuth's algorithm D
StaticStorage<false, dividend_bit_size + word_size> dividend;
StorageOperations::copy(operand1, dividend);
auto divisor = operand2;
// D1. Normalize
// FIXME: Investigate GCC producing bogus -Warray-bounds when dividing u128 by u32. This code
// should not be reachable at all in this case because fast paths above cover all cases
// when `operand2.size() == 1`.
AK_IGNORE_DIAGNOSTIC("-Warray-bounds", size_t shift = count_leading_zeroes(divisor[divisor_len - 1]);)
StorageOperations::shift_left(dividend, shift, dividend);
StorageOperations::shift_left(divisor, shift, divisor);
auto divisor_approx = divisor[divisor_len - 1];
for (size_t i = dividend_len + 1; i-- > divisor_len;) {
// D3. Calculate qhat
NativeWord qhat;
VERIFY(dividend[i] <= divisor_approx);
if (dividend[i] == divisor_approx) {
qhat = max_word;
} else {
NativeWord rhat;
qhat = div_mod_words(dividend[i - 1], dividend[i], divisor_approx, rhat);
auto is_qhat_too_large = [&] {
return UFixedBigInt<word_size> { qhat }.wide_multiply(divisor[divisor_len - 2]) > u128 { dividend[i - 2], rhat };
};
if (is_qhat_too_large()) {
--qhat;
bool carry = false;
rhat = add_words(rhat, divisor_approx, carry);
if (!carry && is_qhat_too_large())
--qhat;
}
}
// D4. Multiply & subtract
NativeWord mul_carry = 0;
bool sub_carry = false;
for (size_t j = 0; j < divisor_len; ++j) {
auto mul_result = UFixedBigInt<word_size> { qhat }.wide_multiply(divisor[j]) + mul_carry;
auto& output = dividend[i + j - divisor_len];
output = sub_words(output, mul_result.low(), sub_carry);
mul_carry = mul_result.high();
}
dividend[i] = sub_words(dividend[i], mul_carry, sub_carry);
if (sub_carry) {
// D6. Add back
auto dividend_part = UnsignedStorageSpan { dividend.data() + i - divisor_len, divisor_len + 1 };
VERIFY(StorageOperations::add<false>(dividend_part, divisor, dividend_part));
}
quotient[i - divisor_len] = qhat - sub_carry;
}
for (size_t i = dividend_len - divisor_len + 1; i < quotient.size(); ++i)
quotient[i] = 0;
// D8. Unnormalize
if constexpr (restore_remainder)
StorageOperations::shift_right(UnsignedStorageSpan { dividend.data(), remainder.size() }, shift, remainder);
}
}
}

View File

@ -9,6 +9,7 @@
#include <AK/NumericLimits.h>
#include <AK/Random.h>
#include <AK/UFixedBigInt.h>
#include <AK/UFixedBigIntDivision.h>
constexpr int test_iterations = 32;
@ -78,6 +79,62 @@ TEST_CASE(div_mod)
}
}
TEST_CASE(div_anti_knuth)
{
EXPECT_EQ((u256 { { 0ull, 0xffffffffffffffffull, 1ull, 0ull } } / u128 { 0x8000000000000001ull, 0xffffffffffffffffull }), 1u);
EXPECT_EQ((u128 { { 0xffffffff00000000ull, 1ull } } / u128 { 0xffffffff80000001ull }), 1u);
srand(0);
auto generate_u512 = [] {
using namespace AK::Detail;
u512 number;
auto& storage = get_storage_of(number);
static constexpr u32 interesting_words_count = 14;
static constexpr NativeWord interesting_words[interesting_words_count] = {
0,
0,
1,
2,
3,
max_word / 4 - 1,
max_word / 4,
max_word / 2 - 1,
max_word / 2,
max_word / 2 + 1,
max_word / 2 + 2,
max_word - 3,
max_word - 2,
max_word - 1,
};
for (size_t i = 0; i < storage.size(); ++i) {
u32 type = get_random_uniform(interesting_words_count + 1);
NativeWord& next_word = storage[i];
if (type == interesting_words_count)
next_word = get_random<NativeWord>();
else
next_word = interesting_words[type];
}
return number;
};
for (int i = 0; i < 16384; ++i) {
u512 a = generate_u512(), b = generate_u512();
if (b == 0)
continue;
u512 mod;
u512 div = a.div_mod(b, mod);
EXPECT_EQ(div * b + mod, a);
EXPECT_EQ(div.wide_multiply(b) + mod, a);
EXPECT(0 <= mod && mod < b);
}
}
TEST_CASE(shifts)
{
u128 val { 0x1234ULL };