ladybird/Userland/Libraries/LibSoftGPU/Device.cpp
Jelle Raaijmakers 526390ec06 LibSoftGPU: Move back to i32-based subpixels
Our move to floating point precision has eradicated the pixel artifacts
in Quake 1, but introduced new and not so subtle rendering glitches in
games like Tux Racer. This commit changes three things to get the best
of both worlds:

1. Subpixel logic based on `i32` types was reintroduced, the number of
   bits is set to 6. This reintroduces the artifacts in Quake 1 but
   fixes rendering of Tux Racer.

2. Before triangle culling, subpixel coordinates are calculated and
   stored in `Triangle`. These coordinates are rounded, which fixes the
   Quake 1 artifacts. Tux Racer is unaffected.

3. The triangle area (actually parallelogram area) is also stored in
   `Triangle` so we don't need to recalculate it later on. In our
   previous subpixel code, there was a subtle disconnect between the
   two calculations (one with and one without subpixel precision) which
   resulted in triangles incorrectly being culled. This fixes some
   remaining Quake 1 artifacts.
2022-05-05 20:50:46 +02:00

1324 lines
57 KiB
C++

/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
* Copyright (c) 2021, Jesse Buhagiar <jooster669@gmail.com>
* Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Error.h>
#include <AK/Math.h>
#include <AK/NumericLimits.h>
#include <AK/SIMDExtras.h>
#include <AK/SIMDMath.h>
#include <LibCore/ElapsedTimer.h>
#include <LibGfx/Painter.h>
#include <LibGfx/Vector2.h>
#include <LibGfx/Vector3.h>
#include <LibSoftGPU/Config.h>
#include <LibSoftGPU/Device.h>
#include <LibSoftGPU/PixelQuad.h>
#include <LibSoftGPU/SIMD.h>
#include <math.h>
namespace SoftGPU {
static u64 g_num_rasterized_triangles;
static u64 g_num_pixels;
static u64 g_num_pixels_shaded;
static u64 g_num_pixels_blended;
static u64 g_num_sampler_calls;
static u64 g_num_stencil_writes;
static u64 g_num_quads;
using AK::abs;
using AK::SIMD::any;
using AK::SIMD::exp;
using AK::SIMD::expand4;
using AK::SIMD::f32x4;
using AK::SIMD::i32x4;
using AK::SIMD::load4_masked;
using AK::SIMD::maskbits;
using AK::SIMD::maskcount;
using AK::SIMD::none;
using AK::SIMD::store4_masked;
using AK::SIMD::to_f32x4;
using AK::SIMD::to_u32x4;
using AK::SIMD::u32x4;
static constexpr int subpixel_factor = 1 << SUBPIXEL_BITS;
// Returns positive values for counter-clockwise rotation of vertices. Note that it returns the
// area of a parallelogram with sides {a, b} and {b, c}, so _double_ the area of the triangle {a, b, c}.
constexpr static i32 edge_function(IntVector2 const& a, IntVector2 const& b, IntVector2 const& c)
{
return (c.y() - a.y()) * (b.x() - a.x()) - (c.x() - a.x()) * (b.y() - a.y());
}
constexpr static i32x4 edge_function4(IntVector2 const& a, IntVector2 const& b, Vector2<i32x4> const& c)
{
return (c.y() - a.y()) * (b.x() - a.x()) - (c.x() - a.x()) * (b.y() - a.y());
}
template<typename T, typename U>
constexpr static auto interpolate(T const& v0, T const& v1, T const& v2, Vector3<U> const& barycentric_coords)
{
return v0 * barycentric_coords.x() + v1 * barycentric_coords.y() + v2 * barycentric_coords.z();
}
static GPU::ColorType to_bgra32(FloatVector4 const& color)
{
auto clamped = color.clamped(0.0f, 1.0f);
auto r = static_cast<u8>(clamped.x() * 255);
auto g = static_cast<u8>(clamped.y() * 255);
auto b = static_cast<u8>(clamped.z() * 255);
auto a = static_cast<u8>(clamped.w() * 255);
return a << 24 | r << 16 | g << 8 | b;
}
ALWAYS_INLINE static u32x4 to_bgra32(Vector4<f32x4> const& v)
{
auto clamped = v.clamped(expand4(0.0f), expand4(1.0f));
auto r = to_u32x4(clamped.x() * 255);
auto g = to_u32x4(clamped.y() * 255);
auto b = to_u32x4(clamped.z() * 255);
auto a = to_u32x4(clamped.w() * 255);
return a << 24 | r << 16 | g << 8 | b;
}
static Vector4<f32x4> to_vec4(u32x4 bgra)
{
auto constexpr one_over_255 = expand4(1.0f / 255);
return {
to_f32x4((bgra >> 16) & 0xff) * one_over_255,
to_f32x4((bgra >> 8) & 0xff) * one_over_255,
to_f32x4(bgra & 0xff) * one_over_255,
to_f32x4((bgra >> 24) & 0xff) * one_over_255,
};
}
void Device::setup_blend_factors()
{
m_alpha_blend_factors = {};
switch (m_options.blend_source_factor) {
case GPU::BlendFactor::Zero:
break;
case GPU::BlendFactor::One:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
break;
case GPU::BlendFactor::SrcColor:
m_alpha_blend_factors.src_factor_src_color = 1;
break;
case GPU::BlendFactor::OneMinusSrcColor:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_src_color = -1;
break;
case GPU::BlendFactor::SrcAlpha:
m_alpha_blend_factors.src_factor_src_alpha = 1;
break;
case GPU::BlendFactor::OneMinusSrcAlpha:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_src_alpha = -1;
break;
case GPU::BlendFactor::DstAlpha:
m_alpha_blend_factors.src_factor_dst_alpha = 1;
break;
case GPU::BlendFactor::OneMinusDstAlpha:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_dst_alpha = -1;
break;
case GPU::BlendFactor::DstColor:
m_alpha_blend_factors.src_factor_dst_color = 1;
break;
case GPU::BlendFactor::OneMinusDstColor:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_dst_color = -1;
break;
case GPU::BlendFactor::SrcAlphaSaturate:
default:
VERIFY_NOT_REACHED();
}
switch (m_options.blend_destination_factor) {
case GPU::BlendFactor::Zero:
break;
case GPU::BlendFactor::One:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
break;
case GPU::BlendFactor::SrcColor:
m_alpha_blend_factors.dst_factor_src_color = 1;
break;
case GPU::BlendFactor::OneMinusSrcColor:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_src_color = -1;
break;
case GPU::BlendFactor::SrcAlpha:
m_alpha_blend_factors.dst_factor_src_alpha = 1;
break;
case GPU::BlendFactor::OneMinusSrcAlpha:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_src_alpha = -1;
break;
case GPU::BlendFactor::DstAlpha:
m_alpha_blend_factors.dst_factor_dst_alpha = 1;
break;
case GPU::BlendFactor::OneMinusDstAlpha:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_dst_alpha = -1;
break;
case GPU::BlendFactor::DstColor:
m_alpha_blend_factors.dst_factor_dst_color = 1;
break;
case GPU::BlendFactor::OneMinusDstColor:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_dst_color = -1;
break;
case GPU::BlendFactor::SrcAlphaSaturate:
default:
VERIFY_NOT_REACHED();
}
}
void Device::rasterize_triangle(Triangle const& triangle)
{
INCREASE_STATISTICS_COUNTER(g_num_rasterized_triangles, 1);
// Return if alpha testing is a no-op
if (m_options.enable_alpha_test && m_options.alpha_test_func == GPU::AlphaTestFunction::Never)
return;
auto const& vertex0 = triangle.vertices[0];
auto const& vertex1 = triangle.vertices[1];
auto const& vertex2 = triangle.vertices[2];
auto const& v0 = triangle.subpixel_coordinates[0];
auto const& v1 = triangle.subpixel_coordinates[1];
auto const& v2 = triangle.subpixel_coordinates[2];
auto const one_over_area = 1.0f / triangle.area;
auto render_bounds = m_frame_buffer->rect();
if (m_options.scissor_enabled)
render_bounds.intersect(m_options.scissor_box);
// This function calculates the 3 edge values for the pixel relative to the triangle.
auto calculate_edge_values4 = [v0, v1, v2](Vector2<i32x4> const& p) -> Vector3<i32x4> {
return {
edge_function4(v1, v2, p),
edge_function4(v2, v0, p),
edge_function4(v0, v1, p),
};
};
// Zero is used in testing against edge values below, applying the "top-left rule". If a pixel
// lies exactly on an edge shared by two triangles, we only render that pixel if the edge in
// question is a "top" or "left" edge. By setting either a 1 or 0, we effectively change the
// comparisons against the edge values below from "> 0" into ">= 0".
IntVector3 const zero {
(v2.y() < v1.y() || (v2.y() == v1.y() && v2.x() < v1.x())) ? 0 : 1,
(v0.y() < v2.y() || (v0.y() == v2.y() && v0.x() < v2.x())) ? 0 : 1,
(v1.y() < v0.y() || (v1.y() == v0.y() && v1.x() < v0.x())) ? 0 : 1,
};
// This function tests whether a point as identified by its 3 edge values lies within the triangle
auto test_point4 = [zero](Vector3<i32x4> const& edges) -> i32x4 {
return edges.x() >= zero.x()
&& edges.y() >= zero.y()
&& edges.z() >= zero.z();
};
// Calculate block-based bounds
// clang-format off
int const bx0 = max(render_bounds.left(), min(min(v0.x(), v1.x()), v2.x()) / subpixel_factor) & ~1;
int const bx1 = (min(render_bounds.right(), max(max(v0.x(), v1.x()), v2.x()) / subpixel_factor) & ~1) + 2;
int const by0 = max(render_bounds.top(), min(min(v0.y(), v1.y()), v2.y()) / subpixel_factor) & ~1;
int const by1 = (min(render_bounds.bottom(), max(max(v0.y(), v1.y()), v2.y()) / subpixel_factor) & ~1) + 2;
// clang-format on
// Calculate depth of fragment for fog;
// OpenGL 1.5 spec chapter 3.10: "An implementation may choose to approximate the
// eye-coordinate distance from the eye to each fragment center by |Ze|."
Vector3<f32x4> fog_depth;
if (m_options.fog_enabled) {
fog_depth = {
expand4(abs(vertex0.eye_coordinates.z())),
expand4(abs(vertex1.eye_coordinates.z())),
expand4(abs(vertex2.eye_coordinates.z())),
};
}
auto const render_bounds_left = render_bounds.left();
auto const render_bounds_right = render_bounds.right();
auto const render_bounds_top = render_bounds.top();
auto const render_bounds_bottom = render_bounds.bottom();
auto const half_pixel_offset = Vector2<i32x4> { expand4(subpixel_factor / 2), expand4(subpixel_factor / 2) };
auto color_buffer = m_frame_buffer->color_buffer();
auto depth_buffer = m_frame_buffer->depth_buffer();
auto stencil_buffer = m_frame_buffer->stencil_buffer();
auto const window_z_coordinates = Vector3<f32x4> {
expand4(vertex0.window_coordinates.z()),
expand4(vertex1.window_coordinates.z()),
expand4(vertex2.window_coordinates.z()),
};
auto const window_w_coordinates = Vector3<f32x4> {
expand4(vertex0.window_coordinates.w()),
expand4(vertex1.window_coordinates.w()),
expand4(vertex2.window_coordinates.w()),
};
// Stencil configuration and writing
auto const& stencil_configuration = m_stencil_configuration[GPU::Face::Front];
auto const stencil_reference_value = stencil_configuration.reference_value & stencil_configuration.test_mask;
auto write_to_stencil = [](GPU::StencilType* stencil_ptrs[4], i32x4 stencil_value, GPU::StencilOperation op, GPU::StencilType reference_value, GPU::StencilType write_mask, i32x4 pixel_mask) {
if (write_mask == 0 || op == GPU::StencilOperation::Keep)
return;
switch (op) {
case GPU::StencilOperation::Decrement:
stencil_value = (stencil_value & ~write_mask) | (max(stencil_value - 1, expand4(0)) & write_mask);
break;
case GPU::StencilOperation::DecrementWrap:
stencil_value = (stencil_value & ~write_mask) | (((stencil_value - 1) & 0xFF) & write_mask);
break;
case GPU::StencilOperation::Increment:
stencil_value = (stencil_value & ~write_mask) | (min(stencil_value + 1, expand4(0xFF)) & write_mask);
break;
case GPU::StencilOperation::IncrementWrap:
stencil_value = (stencil_value & ~write_mask) | (((stencil_value + 1) & 0xFF) & write_mask);
break;
case GPU::StencilOperation::Invert:
stencil_value ^= write_mask;
break;
case GPU::StencilOperation::Replace:
stencil_value = (stencil_value & ~write_mask) | (reference_value & write_mask);
break;
case GPU::StencilOperation::Zero:
stencil_value &= ~write_mask;
break;
default:
VERIFY_NOT_REACHED();
}
INCREASE_STATISTICS_COUNTER(g_num_stencil_writes, maskcount(pixel_mask));
store4_masked(stencil_value, stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], pixel_mask);
};
// Iterate over all blocks within the bounds of the triangle
for (int by = by0; by < by1; by += 2) {
for (int bx = bx0; bx < bx1; bx += 2) {
PixelQuad quad;
quad.screen_coordinates = {
i32x4 { bx, bx + 1, bx, bx + 1 },
i32x4 { by, by, by + 1, by + 1 },
};
auto edge_values = calculate_edge_values4(quad.screen_coordinates * subpixel_factor + half_pixel_offset);
// Generate triangle coverage mask
quad.mask = test_point4(edge_values);
// Test quad against intersection of render target size and scissor rect
quad.mask &= quad.screen_coordinates.x() >= render_bounds_left
&& quad.screen_coordinates.x() <= render_bounds_right
&& quad.screen_coordinates.y() >= render_bounds_top
&& quad.screen_coordinates.y() <= render_bounds_bottom;
if (none(quad.mask))
continue;
INCREASE_STATISTICS_COUNTER(g_num_quads, 1);
INCREASE_STATISTICS_COUNTER(g_num_pixels, maskcount(quad.mask));
int coverage_bits = maskbits(quad.mask);
// Stencil testing
GPU::StencilType* stencil_ptrs[4];
i32x4 stencil_value;
if (m_options.enable_stencil_test) {
stencil_ptrs[0] = coverage_bits & 1 ? &stencil_buffer->scanline(by)[bx] : nullptr;
stencil_ptrs[1] = coverage_bits & 2 ? &stencil_buffer->scanline(by)[bx + 1] : nullptr;
stencil_ptrs[2] = coverage_bits & 4 ? &stencil_buffer->scanline(by + 1)[bx] : nullptr;
stencil_ptrs[3] = coverage_bits & 8 ? &stencil_buffer->scanline(by + 1)[bx + 1] : nullptr;
stencil_value = load4_masked(stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], quad.mask);
stencil_value &= stencil_configuration.test_mask;
i32x4 stencil_test_passed;
switch (stencil_configuration.test_function) {
case GPU::StencilTestFunction::Always:
stencil_test_passed = expand4(~0);
break;
case GPU::StencilTestFunction::Equal:
stencil_test_passed = stencil_value == stencil_reference_value;
break;
case GPU::StencilTestFunction::Greater:
stencil_test_passed = stencil_value > stencil_reference_value;
break;
case GPU::StencilTestFunction::GreaterOrEqual:
stencil_test_passed = stencil_value >= stencil_reference_value;
break;
case GPU::StencilTestFunction::Less:
stencil_test_passed = stencil_value < stencil_reference_value;
break;
case GPU::StencilTestFunction::LessOrEqual:
stencil_test_passed = stencil_value <= stencil_reference_value;
break;
case GPU::StencilTestFunction::Never:
stencil_test_passed = expand4(0);
break;
case GPU::StencilTestFunction::NotEqual:
stencil_test_passed = stencil_value != stencil_reference_value;
break;
default:
VERIFY_NOT_REACHED();
}
// Update stencil buffer for pixels that failed the stencil test
write_to_stencil(
stencil_ptrs,
stencil_value,
stencil_configuration.on_stencil_test_fail,
stencil_reference_value,
stencil_configuration.write_mask,
quad.mask & ~stencil_test_passed);
// Update coverage mask + early quad rejection
quad.mask &= stencil_test_passed;
if (none(quad.mask))
continue;
}
// Calculate barycentric coordinates from previously calculated edge values
quad.barycentrics = Vector3<f32x4> {
to_f32x4(edge_values.x()),
to_f32x4(edge_values.y()),
to_f32x4(edge_values.z()),
} * one_over_area;
// Depth testing
GPU::DepthType* depth_ptrs[4] = {
coverage_bits & 1 ? &depth_buffer->scanline(by)[bx] : nullptr,
coverage_bits & 2 ? &depth_buffer->scanline(by)[bx + 1] : nullptr,
coverage_bits & 4 ? &depth_buffer->scanline(by + 1)[bx] : nullptr,
coverage_bits & 8 ? &depth_buffer->scanline(by + 1)[bx + 1] : nullptr,
};
if (m_options.enable_depth_test) {
auto depth = load4_masked(depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask);
quad.depth = window_z_coordinates.dot(quad.barycentrics);
// FIXME: Also apply depth_offset_factor which depends on the depth gradient
if (m_options.depth_offset_enabled)
quad.depth += m_options.depth_offset_constant * NumericLimits<float>::epsilon();
i32x4 depth_test_passed;
switch (m_options.depth_func) {
case GPU::DepthTestFunction::Always:
depth_test_passed = expand4(~0);
break;
case GPU::DepthTestFunction::Never:
depth_test_passed = expand4(0);
break;
case GPU::DepthTestFunction::Greater:
depth_test_passed = quad.depth > depth;
break;
case GPU::DepthTestFunction::GreaterOrEqual:
depth_test_passed = quad.depth >= depth;
break;
case GPU::DepthTestFunction::NotEqual:
#ifdef __SSE__
depth_test_passed = quad.depth != depth;
#else
depth_test_passed = i32x4 {
bit_cast<u32>(quad.depth[0]) != bit_cast<u32>(depth[0]) ? -1 : 0,
bit_cast<u32>(quad.depth[1]) != bit_cast<u32>(depth[1]) ? -1 : 0,
bit_cast<u32>(quad.depth[2]) != bit_cast<u32>(depth[2]) ? -1 : 0,
bit_cast<u32>(quad.depth[3]) != bit_cast<u32>(depth[3]) ? -1 : 0,
};
#endif
break;
case GPU::DepthTestFunction::Equal:
#ifdef __SSE__
depth_test_passed = quad.depth == depth;
#else
//
// This is an interesting quirk that occurs due to us using the x87 FPU when Serenity is
// compiled for the i386 target. When we calculate our depth value to be stored in the buffer,
// it is an 80-bit x87 floating point number, however, when stored into the depth buffer, this is
// truncated to 32 bits. This 38 bit loss of precision means that when x87 `FCOMP` is eventually
// used here the comparison fails.
// This could be solved by using a `long double` for the depth buffer, however this would take
// up significantly more space and is completely overkill for a depth buffer. As such, comparing
// the first 32-bits of this depth value is "good enough" that if we get a hit on it being
// equal, we can pretty much guarantee that it's actually equal.
//
depth_test_passed = i32x4 {
bit_cast<u32>(quad.depth[0]) == bit_cast<u32>(depth[0]) ? -1 : 0,
bit_cast<u32>(quad.depth[1]) == bit_cast<u32>(depth[1]) ? -1 : 0,
bit_cast<u32>(quad.depth[2]) == bit_cast<u32>(depth[2]) ? -1 : 0,
bit_cast<u32>(quad.depth[3]) == bit_cast<u32>(depth[3]) ? -1 : 0,
};
#endif
break;
case GPU::DepthTestFunction::LessOrEqual:
depth_test_passed = quad.depth <= depth;
break;
case GPU::DepthTestFunction::Less:
depth_test_passed = quad.depth < depth;
break;
default:
VERIFY_NOT_REACHED();
}
// Update stencil buffer for pixels that failed the depth test
if (m_options.enable_stencil_test) {
write_to_stencil(
stencil_ptrs,
stencil_value,
stencil_configuration.on_depth_test_fail,
stencil_reference_value,
stencil_configuration.write_mask,
quad.mask & ~depth_test_passed);
}
// Update coverage mask + early quad rejection
quad.mask &= depth_test_passed;
if (none(quad.mask))
continue;
}
// Update stencil buffer for passed pixels
if (m_options.enable_stencil_test) {
write_to_stencil(
stencil_ptrs,
stencil_value,
stencil_configuration.on_pass,
stencil_reference_value,
stencil_configuration.write_mask,
quad.mask);
}
INCREASE_STATISTICS_COUNTER(g_num_pixels_shaded, maskcount(quad.mask));
// Draw the pixels according to the previously generated mask
auto const interpolated_reciprocal_w = window_w_coordinates.dot(quad.barycentrics);
quad.barycentrics = quad.barycentrics * window_w_coordinates / interpolated_reciprocal_w;
// FIXME: make this more generic. We want to interpolate more than just color and uv
if (m_options.shade_smooth)
quad.vertex_color = interpolate(expand4(vertex0.color), expand4(vertex1.color), expand4(vertex2.color), quad.barycentrics);
else
quad.vertex_color = expand4(vertex0.color);
for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i)
quad.texture_coordinates[i] = interpolate(expand4(vertex0.tex_coords[i]), expand4(vertex1.tex_coords[i]), expand4(vertex2.tex_coords[i]), quad.barycentrics);
if (m_options.fog_enabled)
quad.fog_depth = fog_depth.dot(quad.barycentrics);
shade_fragments(quad);
if (m_options.enable_alpha_test && m_options.alpha_test_func != GPU::AlphaTestFunction::Always && !test_alpha(quad))
continue;
// Write to depth buffer
if (m_options.enable_depth_test && m_options.enable_depth_write)
store4_masked(quad.depth, depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask);
// We will not update the color buffer at all
if ((m_options.color_mask == 0) || !m_options.enable_color_write)
continue;
GPU::ColorType* color_ptrs[4] = {
coverage_bits & 1 ? &color_buffer->scanline(by)[bx] : nullptr,
coverage_bits & 2 ? &color_buffer->scanline(by)[bx + 1] : nullptr,
coverage_bits & 4 ? &color_buffer->scanline(by + 1)[bx] : nullptr,
coverage_bits & 8 ? &color_buffer->scanline(by + 1)[bx + 1] : nullptr,
};
u32x4 dst_u32;
if (m_options.enable_blending || m_options.color_mask != 0xffffffff)
dst_u32 = load4_masked(color_ptrs[0], color_ptrs[1], color_ptrs[2], color_ptrs[3], quad.mask);
if (m_options.enable_blending) {
INCREASE_STATISTICS_COUNTER(g_num_pixels_blended, maskcount(quad.mask));
// Blend color values from pixel_staging into color_buffer
Vector4<f32x4> const& src = quad.out_color;
auto dst = to_vec4(dst_u32);
auto src_factor = expand4(m_alpha_blend_factors.src_constant)
+ src * m_alpha_blend_factors.src_factor_src_color
+ Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.src_factor_src_alpha
+ dst * m_alpha_blend_factors.src_factor_dst_color
+ Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.src_factor_dst_alpha;
auto dst_factor = expand4(m_alpha_blend_factors.dst_constant)
+ src * m_alpha_blend_factors.dst_factor_src_color
+ Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.dst_factor_src_alpha
+ dst * m_alpha_blend_factors.dst_factor_dst_color
+ Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.dst_factor_dst_alpha;
quad.out_color = src * src_factor + dst * dst_factor;
}
if (m_options.color_mask == 0xffffffff)
store4_masked(to_bgra32(quad.out_color), color_ptrs[0], color_ptrs[1], color_ptrs[2], color_ptrs[3], quad.mask);
else
store4_masked((to_bgra32(quad.out_color) & m_options.color_mask) | (dst_u32 & ~m_options.color_mask), color_ptrs[0], color_ptrs[1], color_ptrs[2], color_ptrs[3], quad.mask);
}
}
}
Device::Device(Gfx::IntSize const& size)
: m_frame_buffer(FrameBuffer<GPU::ColorType, GPU::DepthType, GPU::StencilType>::try_create(size).release_value_but_fixme_should_propagate_errors())
{
m_options.scissor_box = m_frame_buffer->rect();
m_options.viewport = m_frame_buffer->rect();
}
GPU::DeviceInfo Device::info() const
{
return {
.vendor_name = "SerenityOS",
.device_name = "SoftGPU",
.num_texture_units = GPU::NUM_SAMPLERS,
.num_lights = NUM_LIGHTS,
.stencil_bits = sizeof(GPU::StencilType) * 8,
.supports_npot_textures = true,
};
}
static void generate_texture_coordinates(GPU::Vertex& vertex, GPU::RasterizerOptions const& options)
{
auto generate_coordinate = [&](size_t texcoord_index, size_t config_index) -> float {
auto mode = options.texcoord_generation_config[texcoord_index][config_index].mode;
switch (mode) {
case GPU::TexCoordGenerationMode::ObjectLinear: {
auto coefficients = options.texcoord_generation_config[texcoord_index][config_index].coefficients;
return coefficients.dot(vertex.position);
}
case GPU::TexCoordGenerationMode::EyeLinear: {
auto coefficients = options.texcoord_generation_config[texcoord_index][config_index].coefficients;
return coefficients.dot(vertex.eye_coordinates);
}
case GPU::TexCoordGenerationMode::SphereMap: {
auto const eye_unit = vertex.eye_coordinates.normalized();
FloatVector3 const eye_unit_xyz = eye_unit.xyz();
auto const normal = vertex.normal;
auto reflection = eye_unit_xyz - normal * 2 * normal.dot(eye_unit_xyz);
reflection.set_z(reflection.z() + 1);
auto const reflection_value = reflection[config_index];
return reflection_value / (2 * reflection.length()) + 0.5f;
}
case GPU::TexCoordGenerationMode::ReflectionMap: {
auto const eye_unit = vertex.eye_coordinates.normalized();
FloatVector3 const eye_unit_xyz = eye_unit.xyz();
auto const normal = vertex.normal;
auto reflection = eye_unit_xyz - normal * 2 * normal.dot(eye_unit_xyz);
return reflection[config_index];
}
case GPU::TexCoordGenerationMode::NormalMap: {
return vertex.normal[config_index];
}
default:
VERIFY_NOT_REACHED();
}
};
for (size_t i = 0; i < vertex.tex_coords.size(); ++i) {
auto& tex_coord = vertex.tex_coords[i];
auto const enabled_coords = options.texcoord_generation_enabled_coordinates[i];
tex_coord = {
((enabled_coords & GPU::TexCoordGenerationCoordinate::S) > 0) ? generate_coordinate(i, 0) : tex_coord.x(),
((enabled_coords & GPU::TexCoordGenerationCoordinate::T) > 0) ? generate_coordinate(i, 1) : tex_coord.y(),
((enabled_coords & GPU::TexCoordGenerationCoordinate::R) > 0) ? generate_coordinate(i, 2) : tex_coord.z(),
((enabled_coords & GPU::TexCoordGenerationCoordinate::Q) > 0) ? generate_coordinate(i, 3) : tex_coord.w(),
};
}
}
void Device::draw_primitives(GPU::PrimitiveType primitive_type, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform,
FloatMatrix4x4 const& texture_transform, Vector<GPU::Vertex> const& vertices, Vector<size_t> const& enabled_texture_units)
{
// At this point, the user has effectively specified that they are done with defining the geometry
// of what they want to draw. We now need to do a few things (https://www.khronos.org/opengl/wiki/Rendering_Pipeline_Overview):
//
// 1. Transform all of the vertices in the current vertex list into eye space by multiplying the model-view matrix
// 2. Transform all of the vertices from eye space into clip space by multiplying by the projection matrix
// 3. If culling is enabled, we cull the desired faces (https://learnopengl.com/Advanced-OpenGL/Face-culling)
// 4. Each element of the vertex is then divided by w to bring the positions into NDC (Normalized Device Coordinates)
// 5. The vertices are sorted (for the rasterizer, how are we doing this? 3Dfx did this top to bottom in terms of vertex y coordinates)
// 6. The vertices are then sent off to the rasterizer and drawn to the screen
m_enabled_texture_units = enabled_texture_units;
m_triangle_list.clear_with_capacity();
m_processed_triangles.clear_with_capacity();
// Let's construct some triangles
if (primitive_type == GPU::PrimitiveType::Triangles) {
Triangle triangle;
if (vertices.size() < 3)
return;
for (size_t i = 0; i < vertices.size() - 2; i += 3) {
triangle.vertices[0] = vertices.at(i);
triangle.vertices[1] = vertices.at(i + 1);
triangle.vertices[2] = vertices.at(i + 2);
m_triangle_list.append(triangle);
}
} else if (primitive_type == GPU::PrimitiveType::Quads) {
// We need to construct two triangles to form the quad
Triangle triangle;
if (vertices.size() < 4)
return;
for (size_t i = 0; i < vertices.size() - 3; i += 4) {
// Triangle 1
triangle.vertices[0] = vertices.at(i);
triangle.vertices[1] = vertices.at(i + 1);
triangle.vertices[2] = vertices.at(i + 2);
m_triangle_list.append(triangle);
// Triangle 2
triangle.vertices[0] = vertices.at(i + 2);
triangle.vertices[1] = vertices.at(i + 3);
triangle.vertices[2] = vertices.at(i);
m_triangle_list.append(triangle);
}
} else if (primitive_type == GPU::PrimitiveType::TriangleFan) {
Triangle triangle;
triangle.vertices[0] = vertices.at(0); // Root vertex is always the vertex defined first
// This is technically `n-2` triangles. We start at index 1
for (size_t i = 1; i < vertices.size() - 1; i++) {
triangle.vertices[1] = vertices.at(i);
triangle.vertices[2] = vertices.at(i + 1);
m_triangle_list.append(triangle);
}
} else if (primitive_type == GPU::PrimitiveType::TriangleStrip) {
Triangle triangle;
if (vertices.size() < 3)
return;
for (size_t i = 0; i < vertices.size() - 2; i++) {
if (i % 2 == 0) {
triangle.vertices[0] = vertices.at(i);
triangle.vertices[1] = vertices.at(i + 1);
triangle.vertices[2] = vertices.at(i + 2);
} else {
triangle.vertices[0] = vertices.at(i + 1);
triangle.vertices[1] = vertices.at(i);
triangle.vertices[2] = vertices.at(i + 2);
}
m_triangle_list.append(triangle);
}
}
// Set up normals transform by taking the upper left 3x3 elements from the model view matrix
// See section 2.11.3 of the OpenGL 1.5 spec
auto normal_transform = model_view_transform.submatrix_from_topleft<3>().transpose().inverse();
// Now let's transform each triangle and send that to the GPU
auto const viewport = m_options.viewport;
auto const viewport_half_width = viewport.width() / 2.0f;
auto const viewport_half_height = viewport.height() / 2.0f;
auto const viewport_center_x = viewport.x() + viewport_half_width;
auto const viewport_center_y = viewport.y() + viewport_half_height;
auto const depth_half_range = (m_options.depth_max - m_options.depth_min) / 2;
auto const depth_halfway = (m_options.depth_min + m_options.depth_max) / 2;
for (auto& triangle : m_triangle_list) {
// Transform vertices into eye coordinates using the model-view transform
triangle.vertices[0].eye_coordinates = model_view_transform * triangle.vertices[0].position;
triangle.vertices[1].eye_coordinates = model_view_transform * triangle.vertices[1].position;
triangle.vertices[2].eye_coordinates = model_view_transform * triangle.vertices[2].position;
// Transform normals before use in lighting
triangle.vertices[0].normal = normal_transform * triangle.vertices[0].normal;
triangle.vertices[1].normal = normal_transform * triangle.vertices[1].normal;
triangle.vertices[2].normal = normal_transform * triangle.vertices[2].normal;
if (m_options.normalization_enabled) {
triangle.vertices[0].normal.normalize();
triangle.vertices[1].normal.normalize();
triangle.vertices[2].normal.normalize();
}
// Calculate per-vertex lighting
if (m_options.lighting_enabled) {
auto const& material = m_materials.at(0);
for (auto& vertex : triangle.vertices) {
auto ambient = material.ambient;
auto diffuse = material.diffuse;
auto emissive = material.emissive;
auto specular = material.specular;
if (m_options.color_material_enabled
&& (m_options.color_material_face == GPU::ColorMaterialFace::Front || m_options.color_material_face == GPU::ColorMaterialFace::FrontAndBack)) {
switch (m_options.color_material_mode) {
case GPU::ColorMaterialMode::Ambient:
ambient = vertex.color;
break;
case GPU::ColorMaterialMode::AmbientAndDiffuse:
ambient = vertex.color;
diffuse = vertex.color;
break;
case GPU::ColorMaterialMode::Diffuse:
diffuse = vertex.color;
break;
case GPU::ColorMaterialMode::Emissive:
emissive = vertex.color;
break;
case GPU::ColorMaterialMode::Specular:
specular = vertex.color;
break;
}
}
FloatVector4 result_color = emissive + (ambient * m_lighting_model.scene_ambient_color);
for (auto const& light : m_lights) {
if (!light.is_enabled)
continue;
// We need to save the length here because the attenuation factor requires a non-normalized vector!
auto sgi_arrow_operator = [](FloatVector4 const& p1, FloatVector4 const& p2, float& output_length) {
FloatVector3 light_vector;
if ((p1.w() != 0.f) && (p2.w() == 0.f))
light_vector = p2.xyz();
else if ((p1.w() == 0.f) && (p2.w() != 0.f))
light_vector = -p1.xyz();
else
light_vector = p2.xyz() - p1.xyz();
output_length = light_vector.length();
if (output_length == 0.f)
return light_vector;
return light_vector / output_length;
};
auto sgi_dot_operator = [](FloatVector3 const& d1, FloatVector3 const& d2) {
return AK::max(d1.dot(d2), 0.0f);
};
float vertex_to_light_length = 0.f;
FloatVector3 vertex_to_light = sgi_arrow_operator(vertex.eye_coordinates, light.position, vertex_to_light_length);
// Light attenuation value.
float light_attenuation_factor = 1.0f;
if (light.position.w() != 0.0f)
light_attenuation_factor = 1.0f / (light.constant_attenuation + (light.linear_attenuation * vertex_to_light_length) + (light.quadratic_attenuation * vertex_to_light_length * vertex_to_light_length));
// Spotlight factor
float spotlight_factor = 1.0f;
if (light.spotlight_cutoff_angle != 180.0f) {
auto const vertex_to_light_dot_spotlight_direction = sgi_dot_operator(vertex_to_light, light.spotlight_direction.normalized());
auto const cos_spotlight_cutoff = AK::cos<float>(light.spotlight_cutoff_angle * AK::Pi<float> / 180.f);
if (vertex_to_light_dot_spotlight_direction >= cos_spotlight_cutoff)
spotlight_factor = AK::pow<float>(vertex_to_light_dot_spotlight_direction, light.spotlight_exponent);
else
spotlight_factor = 0.0f;
}
// FIXME: The spec allows for splitting the colors calculated here into multiple different colors (primary/secondary color). Investigate what this means.
(void)m_lighting_model.color_control;
// FIXME: Two sided lighting should be implemented eventually (I believe this is where the normals are -ve and then lighting is calculated with the BACK material)
(void)m_lighting_model.two_sided_lighting;
// Ambient
auto const ambient_component = ambient * light.ambient_intensity;
// Diffuse
auto const normal_dot_vertex_to_light = sgi_dot_operator(vertex.normal, vertex_to_light);
auto const diffuse_component = diffuse * light.diffuse_intensity * normal_dot_vertex_to_light;
// Specular
FloatVector4 specular_component = { 0.0f, 0.0f, 0.0f, 0.0f };
if (normal_dot_vertex_to_light > 0.0f) {
FloatVector3 half_vector_normalized;
if (!m_lighting_model.viewer_at_infinity) {
half_vector_normalized = vertex_to_light + FloatVector3(0.0f, 0.0f, 1.0f);
} else {
auto const vertex_to_eye_point = sgi_arrow_operator(vertex.eye_coordinates, { 0.f, 0.f, 0.f, 1.f }, vertex_to_light_length);
half_vector_normalized = vertex_to_light + vertex_to_eye_point;
}
half_vector_normalized.normalize();
auto const normal_dot_half_vector = sgi_dot_operator(vertex.normal, half_vector_normalized);
auto const specular_coefficient = AK::pow(normal_dot_half_vector, material.shininess);
specular_component = specular * light.specular_intensity * specular_coefficient;
}
auto color = ambient_component + diffuse_component + specular_component;
color = color * light_attenuation_factor * spotlight_factor;
result_color += color;
}
vertex.color = result_color;
vertex.color.set_w(diffuse.w()); // OpenGL 1.5 spec, page 59: "The A produced by lighting is the alpha value associated with diffuse color material"
vertex.color.clamp(0.0f, 1.0f);
}
}
// Transform eye coordinates into clip coordinates using the projection transform
triangle.vertices[0].clip_coordinates = projection_transform * triangle.vertices[0].eye_coordinates;
triangle.vertices[1].clip_coordinates = projection_transform * triangle.vertices[1].eye_coordinates;
triangle.vertices[2].clip_coordinates = projection_transform * triangle.vertices[2].eye_coordinates;
// At this point, we're in clip space
// Here's where we do the clipping. This is a really crude implementation of the
// https://learnopengl.com/Getting-started/Coordinate-Systems
// "Note that if only a part of a primitive e.g. a triangle is outside the clipping volume OpenGL
// will reconstruct the triangle as one or more triangles to fit inside the clipping range. "
m_clipped_vertices.clear_with_capacity();
m_clipped_vertices.append(triangle.vertices[0]);
m_clipped_vertices.append(triangle.vertices[1]);
m_clipped_vertices.append(triangle.vertices[2]);
m_clipper.clip_triangle_against_frustum(m_clipped_vertices);
if (m_clipped_vertices.size() < 3)
continue;
for (auto& vec : m_clipped_vertices) {
// To normalized device coordinates (NDC)
auto const one_over_w = 1 / vec.clip_coordinates.w();
auto const ndc_coordinates = FloatVector4 {
vec.clip_coordinates.x() * one_over_w,
vec.clip_coordinates.y() * one_over_w,
vec.clip_coordinates.z() * one_over_w,
one_over_w,
};
// To window coordinates
vec.window_coordinates = {
viewport_center_x + ndc_coordinates.x() * viewport_half_width,
viewport_center_y + ndc_coordinates.y() * viewport_half_height,
depth_halfway + ndc_coordinates.z() * depth_half_range,
ndc_coordinates.w(),
};
}
Triangle tri;
tri.vertices[0] = m_clipped_vertices[0];
for (size_t i = 1; i < m_clipped_vertices.size() - 1; i++) {
tri.vertices[1] = m_clipped_vertices[i];
tri.vertices[2] = m_clipped_vertices[i + 1];
m_processed_triangles.append(tri);
}
}
// Generate texture coordinates if at least one coordinate is enabled
bool texture_coordinate_generation_enabled = false;
for (auto const coordinates_enabled : m_options.texcoord_generation_enabled_coordinates) {
if (coordinates_enabled != GPU::TexCoordGenerationCoordinate::None) {
texture_coordinate_generation_enabled = true;
break;
}
}
for (auto& triangle : m_processed_triangles) {
triangle.subpixel_coordinates[0] = (triangle.vertices[0].window_coordinates.xy() * subpixel_factor).to_rounded<int>();
triangle.subpixel_coordinates[1] = (triangle.vertices[1].window_coordinates.xy() * subpixel_factor).to_rounded<int>();
triangle.subpixel_coordinates[2] = (triangle.vertices[2].window_coordinates.xy() * subpixel_factor).to_rounded<int>();
auto triangle_area = edge_function(triangle.subpixel_coordinates[0], triangle.subpixel_coordinates[1], triangle.subpixel_coordinates[2]);
if (triangle_area == 0)
continue;
if (m_options.enable_culling) {
bool is_front = (m_options.front_face == GPU::WindingOrder::CounterClockwise ? triangle_area > 0 : triangle_area < 0);
if (!is_front && m_options.cull_back)
continue;
if (is_front && m_options.cull_front)
continue;
}
// Force counter-clockwise ordering of vertices
if (triangle_area < 0) {
swap(triangle.vertices[0], triangle.vertices[1]);
swap(triangle.subpixel_coordinates[0], triangle.subpixel_coordinates[1]);
triangle_area *= -1;
}
triangle.area = triangle_area;
if (texture_coordinate_generation_enabled) {
generate_texture_coordinates(triangle.vertices[0], m_options);
generate_texture_coordinates(triangle.vertices[1], m_options);
generate_texture_coordinates(triangle.vertices[2], m_options);
}
// Apply texture transformation
for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) {
triangle.vertices[0].tex_coords[i] = texture_transform * triangle.vertices[0].tex_coords[i];
triangle.vertices[1].tex_coords[i] = texture_transform * triangle.vertices[1].tex_coords[i];
triangle.vertices[2].tex_coords[i] = texture_transform * triangle.vertices[2].tex_coords[i];
}
rasterize_triangle(triangle);
}
}
ALWAYS_INLINE void Device::shade_fragments(PixelQuad& quad)
{
quad.out_color = quad.vertex_color;
for (size_t i : m_enabled_texture_units) {
// FIXME: implement GL_TEXTURE_1D, GL_TEXTURE_3D and GL_TEXTURE_CUBE_MAP
auto const& sampler = m_samplers[i];
auto texel = sampler.sample_2d(quad.texture_coordinates[i].xy());
INCREASE_STATISTICS_COUNTER(g_num_sampler_calls, 1);
// FIXME: Implement more blend modes
switch (sampler.config().fixed_function_texture_env_mode) {
case GPU::TextureEnvMode::Modulate:
quad.out_color = quad.out_color * texel;
break;
case GPU::TextureEnvMode::Replace:
quad.out_color = texel;
break;
case GPU::TextureEnvMode::Decal: {
auto dst_alpha = texel.w();
quad.out_color.set_x(mix(quad.out_color.x(), texel.x(), dst_alpha));
quad.out_color.set_y(mix(quad.out_color.y(), texel.y(), dst_alpha));
quad.out_color.set_z(mix(quad.out_color.z(), texel.z(), dst_alpha));
break;
}
case GPU::TextureEnvMode::Add:
quad.out_color.set_x(quad.out_color.x() + texel.x());
quad.out_color.set_y(quad.out_color.y() + texel.y());
quad.out_color.set_z(quad.out_color.z() + texel.z());
quad.out_color.set_w(quad.out_color.w() * texel.w()); // FIXME: If texture format is `GL_INTENSITY` alpha components must be added (https://www.khronos.org/registry/OpenGL-Refpages/gl2.1/xhtml/glTexEnv.xml)
break;
default:
VERIFY_NOT_REACHED();
}
}
// Calculate fog
// Math from here: https://opengl-notes.readthedocs.io/en/latest/topics/texturing/aliasing.html
// FIXME: exponential fog is not vectorized, we should add a SIMD exp function that calculates an approximation.
if (m_options.fog_enabled) {
auto factor = expand4(0.0f);
switch (m_options.fog_mode) {
case GPU::FogMode::Linear:
factor = (m_options.fog_end - quad.fog_depth) / (m_options.fog_end - m_options.fog_start);
break;
case GPU::FogMode::Exp: {
auto argument = -m_options.fog_density * quad.fog_depth;
factor = exp(argument);
} break;
case GPU::FogMode::Exp2: {
auto argument = m_options.fog_density * quad.fog_depth;
argument *= -argument;
factor = exp(argument);
} break;
default:
VERIFY_NOT_REACHED();
}
// Mix texel's RGB with fog's RBG - leave alpha alone
auto fog_color = expand4(m_options.fog_color);
quad.out_color.set_x(mix(fog_color.x(), quad.out_color.x(), factor));
quad.out_color.set_y(mix(fog_color.y(), quad.out_color.y(), factor));
quad.out_color.set_z(mix(fog_color.z(), quad.out_color.z(), factor));
}
}
ALWAYS_INLINE bool Device::test_alpha(PixelQuad& quad)
{
auto const alpha = quad.out_color.w();
auto const ref_value = expand4(m_options.alpha_test_ref_value);
switch (m_options.alpha_test_func) {
case GPU::AlphaTestFunction::Less:
quad.mask &= alpha < ref_value;
break;
case GPU::AlphaTestFunction::Equal:
quad.mask &= alpha == ref_value;
break;
case GPU::AlphaTestFunction::LessOrEqual:
quad.mask &= alpha <= ref_value;
break;
case GPU::AlphaTestFunction::Greater:
quad.mask &= alpha > ref_value;
break;
case GPU::AlphaTestFunction::NotEqual:
quad.mask &= alpha != ref_value;
break;
case GPU::AlphaTestFunction::GreaterOrEqual:
quad.mask &= alpha >= ref_value;
break;
case GPU::AlphaTestFunction::Never:
case GPU::AlphaTestFunction::Always:
default:
VERIFY_NOT_REACHED();
}
return any(quad.mask);
}
void Device::resize(Gfx::IntSize const& size)
{
auto frame_buffer_or_error = FrameBuffer<GPU::ColorType, GPU::DepthType, GPU::StencilType>::try_create(size);
m_frame_buffer = MUST(frame_buffer_or_error);
}
void Device::clear_color(FloatVector4 const& color)
{
auto const fill_color = to_bgra32(color);
auto clear_rect = m_frame_buffer->rect();
if (m_options.scissor_enabled)
clear_rect.intersect(m_options.scissor_box);
m_frame_buffer->color_buffer()->fill(fill_color, clear_rect);
}
void Device::clear_depth(GPU::DepthType depth)
{
auto clear_rect = m_frame_buffer->rect();
if (m_options.scissor_enabled)
clear_rect.intersect(m_options.scissor_box);
m_frame_buffer->depth_buffer()->fill(depth, clear_rect);
}
void Device::clear_stencil(GPU::StencilType value)
{
auto clear_rect = m_frame_buffer->rect();
if (m_options.scissor_enabled)
clear_rect.intersect(m_options.scissor_box);
m_frame_buffer->stencil_buffer()->fill(value, clear_rect);
}
void Device::blit_to_color_buffer_at_raster_position(Gfx::Bitmap const& source)
{
if (!m_raster_position.valid)
return;
INCREASE_STATISTICS_COUNTER(g_num_pixels, source.width() * source.height());
INCREASE_STATISTICS_COUNTER(g_num_pixels_shaded, source.width() * source.height());
auto const blit_rect = get_rasterization_rect_of_size({ source.width(), source.height() });
m_frame_buffer->color_buffer()->blit_from_bitmap(source, blit_rect);
}
void Device::blit_to_depth_buffer_at_raster_position(Vector<GPU::DepthType> const& depth_values, int width, int height)
{
if (!m_raster_position.valid)
return;
auto const raster_rect = get_rasterization_rect_of_size({ width, height });
auto const y1 = raster_rect.y();
auto const y2 = y1 + height;
auto const x1 = raster_rect.x();
auto const x2 = x1 + width;
auto index = 0;
for (auto y = y1; y < y2; ++y) {
auto depth_line = m_frame_buffer->depth_buffer()->scanline(y);
for (auto x = x1; x < x2; ++x)
depth_line[x] = depth_values[index++];
}
}
void Device::blit_color_buffer_to(Gfx::Bitmap& target)
{
m_frame_buffer->color_buffer()->blit_flipped_to_bitmap(target, m_frame_buffer->rect());
if constexpr (ENABLE_STATISTICS_OVERLAY)
draw_statistics_overlay(target);
}
void Device::draw_statistics_overlay(Gfx::Bitmap& target)
{
static Core::ElapsedTimer timer;
static String debug_string;
static int frame_counter;
frame_counter++;
int milliseconds = 0;
if (timer.is_valid())
milliseconds = timer.elapsed();
else
timer.start();
Gfx::Painter painter { target };
if (milliseconds > MILLISECONDS_PER_STATISTICS_PERIOD) {
int num_rendertarget_pixels = m_frame_buffer->rect().size().area();
StringBuilder builder;
builder.append(String::formatted("Timings : {:.1}ms {:.1}FPS\n",
static_cast<double>(milliseconds) / frame_counter,
(milliseconds > 0) ? 1000.0 * frame_counter / milliseconds : 9999.0));
builder.append(String::formatted("Triangles : {}\n", g_num_rasterized_triangles));
builder.append(String::formatted("SIMD usage : {}%\n", g_num_quads > 0 ? g_num_pixels_shaded * 25 / g_num_quads : 0));
builder.append(String::formatted("Pixels : {}, Stencil: {}%, Shaded: {}%, Blended: {}%, Overdraw: {}%\n",
g_num_pixels,
g_num_pixels > 0 ? g_num_stencil_writes * 100 / g_num_pixels : 0,
g_num_pixels > 0 ? g_num_pixels_shaded * 100 / g_num_pixels : 0,
g_num_pixels_shaded > 0 ? g_num_pixels_blended * 100 / g_num_pixels_shaded : 0,
num_rendertarget_pixels > 0 ? g_num_pixels_shaded * 100 / num_rendertarget_pixels - 100 : 0));
builder.append(String::formatted("Sampler calls: {}\n", g_num_sampler_calls));
debug_string = builder.to_string();
frame_counter = 0;
timer.start();
}
g_num_rasterized_triangles = 0;
g_num_pixels = 0;
g_num_pixels_shaded = 0;
g_num_pixels_blended = 0;
g_num_sampler_calls = 0;
g_num_stencil_writes = 0;
g_num_quads = 0;
auto& font = Gfx::FontDatabase::default_fixed_width_font();
for (int y = -1; y < 2; y++)
for (int x = -1; x < 2; x++)
if (x != 0 && y != 0)
painter.draw_text(target.rect().translated(x + 2, y + 2), debug_string, font, Gfx::TextAlignment::TopLeft, Gfx::Color::Black);
painter.draw_text(target.rect().translated(2, 2), debug_string, font, Gfx::TextAlignment::TopLeft, Gfx::Color::White);
}
void Device::set_options(GPU::RasterizerOptions const& options)
{
m_options = options;
if (m_options.enable_blending)
setup_blend_factors();
}
void Device::set_light_model_params(GPU::LightModelParameters const& lighting_model)
{
m_lighting_model = lighting_model;
}
GPU::ColorType Device::get_color_buffer_pixel(int x, int y)
{
// FIXME: Reading individual pixels is very slow, rewrite this to transfer whole blocks
if (!m_frame_buffer->rect().contains(x, y))
return 0;
return m_frame_buffer->color_buffer()->scanline(y)[x];
}
GPU::DepthType Device::get_depthbuffer_value(int x, int y)
{
// FIXME: Reading individual pixels is very slow, rewrite this to transfer whole blocks
if (!m_frame_buffer->rect().contains(x, y))
return 1.0f;
return m_frame_buffer->depth_buffer()->scanline(y)[x];
}
NonnullRefPtr<GPU::Image> Device::create_image(GPU::ImageFormat format, unsigned width, unsigned height, unsigned depth, unsigned levels, unsigned layers)
{
VERIFY(format == GPU::ImageFormat::BGRA8888);
VERIFY(width > 0);
VERIFY(height > 0);
VERIFY(depth > 0);
VERIFY(levels > 0);
VERIFY(layers > 0);
return adopt_ref(*new Image(this, width, height, depth, levels, layers));
}
void Device::set_sampler_config(unsigned sampler, GPU::SamplerConfig const& config)
{
VERIFY(config.bound_image.is_null() || config.bound_image->ownership_token() == this);
m_samplers[sampler].set_config(config);
}
void Device::set_light_state(unsigned int light_id, GPU::Light const& light)
{
m_lights.at(light_id) = light;
}
void Device::set_material_state(GPU::Face face, GPU::Material const& material)
{
m_materials[face] = material;
}
void Device::set_stencil_configuration(GPU::Face face, GPU::StencilConfiguration const& stencil_configuration)
{
m_stencil_configuration[face] = stencil_configuration;
}
void Device::set_raster_position(GPU::RasterPosition const& raster_position)
{
m_raster_position = raster_position;
}
void Device::set_raster_position(FloatVector4 const& position, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform)
{
auto const eye_coordinates = model_view_transform * position;
auto const clip_coordinates = projection_transform * eye_coordinates;
// FIXME: implement clipping
m_raster_position.valid = true;
auto ndc_coordinates = clip_coordinates / clip_coordinates.w();
ndc_coordinates.set_w(clip_coordinates.w());
auto const viewport = m_options.viewport;
auto const viewport_half_width = viewport.width() / 2.0f;
auto const viewport_half_height = viewport.height() / 2.0f;
auto const viewport_center_x = viewport.x() + viewport_half_width;
auto const viewport_center_y = viewport.y() + viewport_half_height;
auto const depth_half_range = (m_options.depth_max - m_options.depth_min) / 2;
auto const depth_halfway = (m_options.depth_min + m_options.depth_max) / 2;
// FIXME: implement other raster position properties such as color and texcoords
m_raster_position.window_coordinates = {
viewport_center_x + ndc_coordinates.x() * viewport_half_width,
viewport_center_y + ndc_coordinates.y() * viewport_half_height,
depth_halfway + ndc_coordinates.z() * depth_half_range,
ndc_coordinates.w(),
};
m_raster_position.eye_coordinate_distance = eye_coordinates.length();
}
Gfx::IntRect Device::get_rasterization_rect_of_size(Gfx::IntSize size)
{
// Round the X and Y floating point coordinates to the nearest integer; OpenGL 1.5 spec:
// "Any fragments whose centers lie inside of this rectangle (or on its bottom or left
// boundaries) are produced in correspondence with this particular group of elements."
return {
static_cast<int>(lroundf(m_raster_position.window_coordinates.x())),
static_cast<int>(lroundf(m_raster_position.window_coordinates.y())),
size.width(),
size.height(),
};
}
}
extern "C" {
GPU::Device* serenity_gpu_create_device(Gfx::IntSize const& size)
{
return make<SoftGPU::Device>(size).leak_ptr();
}
}