mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-09-17 16:28:06 +03:00
AK: Add SIMDMath.h with vectorized version of math functions
This commit is contained in:
parent
178a57bbf7
commit
444a15bad3
Notes:
sideshowbarker
2024-07-18 02:47:59 +09:00
Author: https://github.com/sunverwerth Commit: https://github.com/SerenityOS/serenity/commit/444a15bad34 Pull-request: https://github.com/SerenityOS/serenity/pull/11568 Reviewed-by: https://github.com/Hendiadyoin1 ✅ Reviewed-by: https://github.com/Quaker762 ✅ Reviewed-by: https://github.com/gmta
62
AK/SIMDMath.h
Normal file
62
AK/SIMDMath.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/SIMD.h>
|
||||
#include <math.h>
|
||||
|
||||
// Returning a vector on i686 target generates warning "psabi".
|
||||
// This prevents the CI, treating this as an error, from running to completion.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic warning "-Wpsabi"
|
||||
|
||||
namespace AK::SIMD {
|
||||
|
||||
// Functions ending in "_int_range" only accept arguments within range [INT_MIN, INT_MAX].
|
||||
// Other inputs will generate unexpected results.
|
||||
|
||||
ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v)
|
||||
{
|
||||
return to_f32x4(to_i32x4(v));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v)
|
||||
{
|
||||
auto t = truncate_int_range(v);
|
||||
return t > v ? t - 1.0f : t;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static f32x4 ceil_int_range(f32x4 v)
|
||||
{
|
||||
auto t = truncate_int_range(v);
|
||||
return t < v ? t + 1.0f : t;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static f32x4 frac_int_range(f32x4 v)
|
||||
{
|
||||
return v - floor_int_range(v);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static f32x4 clamp(f32x4 v, f32x4 min, f32x4 max)
|
||||
{
|
||||
return v < min ? min : (v > max ? max : v);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static f32x4 exp(f32x4 v)
|
||||
{
|
||||
// FIXME: This should be replaced with a vectorized algorithm instead of calling the scalar expf 4 times
|
||||
return f32x4 {
|
||||
expf(v[0]),
|
||||
expf(v[1]),
|
||||
expf(v[2]),
|
||||
expf(v[3]),
|
||||
};
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user