2021-12-19 02:02:32 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
#include <AK/SIMDExtras.h>
|
|
|
|
#include <AK/SIMDMath.h>
|
2021-12-29 23:59:13 +03:00
|
|
|
#include <LibSoftGPU/Config.h>
|
2021-12-19 02:02:32 +03:00
|
|
|
#include <LibSoftGPU/Image.h>
|
2022-01-06 19:06:46 +03:00
|
|
|
#include <LibSoftGPU/SIMD.h>
|
2021-12-19 02:02:32 +03:00
|
|
|
#include <LibSoftGPU/Sampler.h>
|
|
|
|
#include <math.h>
|
|
|
|
|
|
|
|
namespace SoftGPU {
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
using AK::SIMD::f32x4;
|
|
|
|
using AK::SIMD::i32x4;
|
|
|
|
using AK::SIMD::u32x4;
|
|
|
|
|
|
|
|
using AK::SIMD::clamp;
|
|
|
|
using AK::SIMD::expand4;
|
|
|
|
using AK::SIMD::floor_int_range;
|
|
|
|
using AK::SIMD::frac_int_range;
|
|
|
|
using AK::SIMD::maskbits;
|
|
|
|
using AK::SIMD::to_f32x4;
|
|
|
|
using AK::SIMD::to_i32x4;
|
|
|
|
using AK::SIMD::to_u32x4;
|
|
|
|
using AK::SIMD::truncate_int_range;
|
|
|
|
|
|
|
|
static f32x4 wrap_repeat(f32x4 value)
|
2021-12-19 02:02:32 +03:00
|
|
|
{
|
2022-01-06 19:06:46 +03:00
|
|
|
return frac_int_range(value);
|
2021-12-19 02:02:32 +03:00
|
|
|
}
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
[[maybe_unused]] static f32x4 wrap_clamp(f32x4 value)
|
2021-12-19 02:02:32 +03:00
|
|
|
{
|
2022-01-06 19:06:46 +03:00
|
|
|
return clamp(value, expand4(0.0f), expand4(1.0f));
|
2021-12-19 02:02:32 +03:00
|
|
|
}
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
static f32x4 wrap_clamp_to_edge(f32x4 value, u32x4 num_texels)
|
2021-12-19 02:02:32 +03:00
|
|
|
{
|
2022-01-06 19:06:46 +03:00
|
|
|
f32x4 const clamp_limit = 1.f / to_f32x4(2 * num_texels);
|
2021-12-19 02:02:32 +03:00
|
|
|
return clamp(value, clamp_limit, 1.0f - clamp_limit);
|
|
|
|
}
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
static f32x4 wrap_mirrored_repeat(f32x4 value, u32x4 num_texels)
|
2021-12-19 02:02:32 +03:00
|
|
|
{
|
2022-01-06 19:06:46 +03:00
|
|
|
f32x4 integer = floor_int_range(value);
|
|
|
|
f32x4 frac = value - integer;
|
|
|
|
auto is_odd = to_i32x4(integer) & 1;
|
|
|
|
return wrap_clamp_to_edge(is_odd ? 1 - frac : frac, num_texels);
|
2021-12-19 02:02:32 +03:00
|
|
|
}
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
static f32x4 wrap(f32x4 value, TextureWrapMode mode, u32x4 num_texels)
|
2021-12-19 02:02:32 +03:00
|
|
|
{
|
|
|
|
switch (mode) {
|
|
|
|
case TextureWrapMode::Repeat:
|
|
|
|
return wrap_repeat(value);
|
|
|
|
case TextureWrapMode::MirroredRepeat:
|
|
|
|
return wrap_mirrored_repeat(value, num_texels);
|
|
|
|
case TextureWrapMode::Clamp:
|
2021-12-27 01:38:51 +03:00
|
|
|
if constexpr (CLAMP_DEPRECATED_BEHAVIOR) {
|
|
|
|
return wrap_clamp(value);
|
|
|
|
}
|
|
|
|
return wrap_clamp_to_edge(value, num_texels);
|
2021-12-19 02:02:32 +03:00
|
|
|
case TextureWrapMode::ClampToBorder:
|
|
|
|
case TextureWrapMode::ClampToEdge:
|
|
|
|
return wrap_clamp_to_edge(value, num_texels);
|
|
|
|
default:
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
ALWAYS_INLINE static Vector4<f32x4> texel4(Image const& image, u32x4 layer, u32x4 level, u32x4 x, u32x4 y, u32x4 z)
|
|
|
|
{
|
|
|
|
auto t0 = image.texel(layer[0], level[0], x[0], y[0], z[0]);
|
|
|
|
auto t1 = image.texel(layer[1], level[1], x[1], y[1], z[1]);
|
|
|
|
auto t2 = image.texel(layer[2], level[2], x[2], y[2], z[2]);
|
|
|
|
auto t3 = image.texel(layer[3], level[3], x[3], y[3], z[3]);
|
|
|
|
|
|
|
|
return Vector4<f32x4> {
|
|
|
|
f32x4 { t0.x(), t1.x(), t2.x(), t3.x() },
|
|
|
|
f32x4 { t0.y(), t1.y(), t2.y(), t3.y() },
|
|
|
|
f32x4 { t0.z(), t1.z(), t2.z(), t3.z() },
|
|
|
|
f32x4 { t0.w(), t1.w(), t2.w(), t3.w() },
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
ALWAYS_INLINE static Vector4<f32x4> texel4border(Image const& image, u32x4 layer, u32x4 level, u32x4 x, u32x4 y, u32x4 z, FloatVector4 const& border, u32x4 w, u32x4 h)
|
|
|
|
{
|
|
|
|
auto border_mask = maskbits(x < 0 || x >= w || y < 0 || y >= h);
|
|
|
|
|
|
|
|
auto t0 = border_mask & 1 ? border : image.texel(layer[0], level[0], x[0], y[0], z[0]);
|
|
|
|
auto t1 = border_mask & 2 ? border : image.texel(layer[1], level[1], x[1], y[1], z[1]);
|
|
|
|
auto t2 = border_mask & 4 ? border : image.texel(layer[2], level[2], x[2], y[2], z[2]);
|
|
|
|
auto t3 = border_mask & 8 ? border : image.texel(layer[3], level[3], x[3], y[3], z[3]);
|
|
|
|
|
|
|
|
return Vector4<f32x4> {
|
|
|
|
f32x4 { t0.x(), t1.x(), t2.x(), t3.x() },
|
|
|
|
f32x4 { t0.y(), t1.y(), t2.y(), t3.y() },
|
|
|
|
f32x4 { t0.z(), t1.z(), t2.z(), t3.z() },
|
|
|
|
f32x4 { t0.w(), t1.w(), t2.w(), t3.w() },
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
Vector4<AK::SIMD::f32x4> Sampler::sample_2d(Vector2<AK::SIMD::f32x4> const& uv) const
|
2021-12-19 02:02:32 +03:00
|
|
|
{
|
|
|
|
if (m_config.bound_image.is_null())
|
2022-01-06 19:06:46 +03:00
|
|
|
return expand4(FloatVector4 { 1, 0, 0, 1 });
|
2021-12-19 02:02:32 +03:00
|
|
|
|
|
|
|
auto const& image = *m_config.bound_image;
|
|
|
|
|
2022-02-19 04:16:44 +03:00
|
|
|
// FIXME: Make base level configurable with glTexParameteri(GL_TEXTURE_BASE_LEVEL, base_level)
|
|
|
|
constexpr unsigned base_level = 0;
|
|
|
|
|
|
|
|
// Determine the texture scale factor. See OpenGL 1.5 spec chapter 3.8.8.
|
|
|
|
// FIXME: Static casting from u32 to float could silently truncate here.
|
|
|
|
// u16 should be plenty enough for texture dimensions and would allow textures of up to 65536x65536x65536 pixels.
|
|
|
|
auto texel_coordinates = uv;
|
|
|
|
texel_coordinates.set_x(texel_coordinates.x() * static_cast<float>(image.level_width(base_level)));
|
|
|
|
texel_coordinates.set_y(texel_coordinates.y() * static_cast<float>(image.level_height(base_level)));
|
|
|
|
auto dtdx = ddx(texel_coordinates);
|
|
|
|
auto dtdy = ddy(texel_coordinates);
|
|
|
|
auto scale_factor = max(dtdx.dot(dtdx), dtdy.dot(dtdy));
|
|
|
|
|
|
|
|
// FIXME: Here we simply determine the filter based on the single scale factor of the upper left pixel.
|
|
|
|
// Actually, we could end up with different scale factors for each pixel. This however would break our
|
|
|
|
// parallelisation as we could also end up with different filter modes per pixel.
|
2022-03-08 00:36:43 +03:00
|
|
|
|
|
|
|
// Note: scale_factor approximates texels per pixel. This means a scale factor less than 1 indicates texture magnification.
|
|
|
|
if (scale_factor[0] < 1)
|
|
|
|
return sample_2d_lod(uv, expand4(base_level), m_config.texture_mag_filter);
|
2022-02-19 04:16:44 +03:00
|
|
|
|
|
|
|
if (m_config.mipmap_filter == MipMapFilter::None)
|
2022-03-08 00:36:43 +03:00
|
|
|
return sample_2d_lod(uv, expand4(base_level), m_config.texture_min_filter);
|
2022-02-19 04:16:44 +03:00
|
|
|
|
|
|
|
// FIXME: Instead of clamping to num_levels - 1, actually make the max mipmap level configurable with glTexParameteri(GL_TEXTURE_MAX_LEVEL, max_level)
|
|
|
|
auto min_level = expand4(static_cast<float>(base_level));
|
|
|
|
auto max_level = expand4(image.num_levels() - 1.0f);
|
|
|
|
auto level = min(max(log2_approximate(scale_factor) * 0.5f, min_level), max_level);
|
|
|
|
|
2022-03-08 00:36:43 +03:00
|
|
|
auto lower_level_texel = sample_2d_lod(uv, to_u32x4(level), m_config.texture_min_filter);
|
2022-02-19 04:16:44 +03:00
|
|
|
|
|
|
|
if (m_config.mipmap_filter == MipMapFilter::Nearest)
|
|
|
|
return lower_level_texel;
|
|
|
|
|
2022-03-08 00:36:43 +03:00
|
|
|
auto higher_level_texel = sample_2d_lod(uv, to_u32x4(min(level + 1.f, max_level)), m_config.texture_min_filter);
|
2022-02-19 04:16:44 +03:00
|
|
|
|
|
|
|
return mix(lower_level_texel, higher_level_texel, frac_int_range(level));
|
|
|
|
}
|
|
|
|
|
|
|
|
Vector4<AK::SIMD::f32x4> Sampler::sample_2d_lod(Vector2<AK::SIMD::f32x4> const& uv, AK::SIMD::u32x4 level, TextureFilter filter) const
|
|
|
|
{
|
|
|
|
auto const& image = *m_config.bound_image;
|
2022-01-06 19:06:46 +03:00
|
|
|
u32x4 const layer = expand4(0u);
|
|
|
|
|
|
|
|
u32x4 const width = {
|
|
|
|
image.level_width(level[0]),
|
|
|
|
image.level_width(level[1]),
|
|
|
|
image.level_width(level[2]),
|
|
|
|
image.level_width(level[3]),
|
|
|
|
};
|
|
|
|
u32x4 const height = {
|
|
|
|
image.level_height(level[0]),
|
|
|
|
image.level_height(level[1]),
|
|
|
|
image.level_height(level[2]),
|
|
|
|
image.level_height(level[3]),
|
|
|
|
};
|
|
|
|
|
2022-01-01 20:19:19 +03:00
|
|
|
u32x4 width_mask = width - 1;
|
|
|
|
u32x4 height_mask = height - 1;
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
f32x4 s = wrap(uv.x(), m_config.texture_wrap_u, width);
|
|
|
|
f32x4 t = wrap(uv.y(), m_config.texture_wrap_v, height);
|
|
|
|
|
|
|
|
f32x4 u = s * to_f32x4(width);
|
|
|
|
f32x4 v = t * to_f32x4(height);
|
2021-12-19 02:02:32 +03:00
|
|
|
|
2022-02-19 04:16:44 +03:00
|
|
|
if (filter == TextureFilter::Nearest) {
|
2022-01-01 20:19:19 +03:00
|
|
|
u32x4 i = to_u32x4(u);
|
|
|
|
u32x4 j = to_u32x4(v);
|
2022-01-06 19:06:46 +03:00
|
|
|
u32x4 k = expand4(0u);
|
|
|
|
|
2022-01-01 20:19:19 +03:00
|
|
|
i = image.width_is_power_of_two() ? i & width_mask : i % width;
|
|
|
|
j = image.height_is_power_of_two() ? j & height_mask : j % height;
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
return texel4(image, layer, level, i, j, k);
|
2021-12-19 02:02:32 +03:00
|
|
|
}
|
|
|
|
|
2022-01-04 17:03:13 +03:00
|
|
|
u -= 0.5f;
|
|
|
|
v -= 0.5f;
|
|
|
|
|
2022-03-08 14:21:46 +03:00
|
|
|
u32x4 i0 = to_u32x4(floor_int_range(u));
|
|
|
|
u32x4 i1 = i0 + 1;
|
|
|
|
u32x4 j0 = to_u32x4(floor_int_range(v));
|
|
|
|
u32x4 j1 = j0 + 1;
|
2022-01-01 20:19:19 +03:00
|
|
|
|
|
|
|
if (m_config.texture_wrap_u == TextureWrapMode::Repeat) {
|
|
|
|
if (image.width_is_power_of_two()) {
|
2022-03-08 14:21:46 +03:00
|
|
|
i0 = i0 & width_mask;
|
|
|
|
i1 = i1 & width_mask;
|
2022-01-01 20:19:19 +03:00
|
|
|
} else {
|
2022-03-08 14:21:46 +03:00
|
|
|
i0 = i0 % width;
|
|
|
|
i1 = i1 % width;
|
2022-01-01 20:19:19 +03:00
|
|
|
}
|
|
|
|
}
|
2022-01-06 19:06:46 +03:00
|
|
|
|
2022-01-01 20:19:19 +03:00
|
|
|
if (m_config.texture_wrap_v == TextureWrapMode::Repeat) {
|
|
|
|
if (image.height_is_power_of_two()) {
|
2022-03-08 14:21:46 +03:00
|
|
|
j0 = j0 & height_mask;
|
|
|
|
j1 = j1 & height_mask;
|
2022-01-01 20:19:19 +03:00
|
|
|
} else {
|
2022-03-08 14:21:46 +03:00
|
|
|
j0 = j0 % height;
|
|
|
|
j1 = j1 % height;
|
2022-01-01 20:19:19 +03:00
|
|
|
}
|
|
|
|
}
|
2021-12-19 02:02:32 +03:00
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
u32x4 k = expand4(0u);
|
2021-12-19 02:02:32 +03:00
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
Vector4<f32x4> t0, t1, t2, t3;
|
2021-12-19 02:02:32 +03:00
|
|
|
|
|
|
|
if (m_config.texture_wrap_u == TextureWrapMode::Repeat && m_config.texture_wrap_v == TextureWrapMode::Repeat) {
|
2022-03-08 14:21:46 +03:00
|
|
|
t0 = texel4(image, layer, level, i0, j0, k);
|
|
|
|
t1 = texel4(image, layer, level, i1, j0, k);
|
|
|
|
t2 = texel4(image, layer, level, i0, j1, k);
|
|
|
|
t3 = texel4(image, layer, level, i1, j1, k);
|
2021-12-19 02:02:32 +03:00
|
|
|
} else {
|
2022-03-08 14:21:46 +03:00
|
|
|
t1 = texel4border(image, layer, level, i1, j0, k, m_config.border_color, width, height);
|
|
|
|
t0 = texel4border(image, layer, level, i0, j0, k, m_config.border_color, width, height);
|
|
|
|
t2 = texel4border(image, layer, level, i0, j1, k, m_config.border_color, width, height);
|
|
|
|
t3 = texel4border(image, layer, level, i1, j1, k, m_config.border_color, width, height);
|
2021-12-19 02:02:32 +03:00
|
|
|
}
|
|
|
|
|
2022-01-06 19:06:46 +03:00
|
|
|
f32x4 const alpha = frac_int_range(u);
|
|
|
|
f32x4 const beta = frac_int_range(v);
|
2021-12-19 02:02:32 +03:00
|
|
|
|
2022-01-04 17:02:30 +03:00
|
|
|
auto const lerp_0 = mix(t0, t1, alpha);
|
|
|
|
auto const lerp_1 = mix(t2, t3, alpha);
|
|
|
|
return mix(lerp_0, lerp_1, beta);
|
2021-12-19 02:02:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|