Jelle Raaijmakers 84c4b66721 LibGL+LibGPU+LibSoftGPU: Implement texture pixel format support
In OpenGL this is called the (base) internal format which is an
expectation expressed by the client for the minimum supported texel
storage format in the GPU for textures.

Since we store everything as RGBA in a `FloatVector4`, the only thing
we do in this patch is remember the expected internal format, and when
we write new texels we fixate the value for the alpha channel to 1 for
two formats that require it.

`PixelConverter` has learned how to transform pixels during transfer to
support this.
2022-08-27 12:28:05 +02:00

443 lines
19 KiB

* Copyright (c) 2022, Jelle Raaijmakers <>
* SPDX-License-Identifier: BSD-2-Clause
#include <AK/Array.h>
#include <AK/Error.h>
#include <AK/FloatingPoint.h>
#include <LibSoftGPU/PixelConverter.h>
namespace SoftGPU {
template<typename T>
static constexpr T reverse_component_bytes_if_needed(T value, GPU::ImageDataLayout const& image_data_layout) requires(sizeof(T) == 2 || sizeof(T) == 4)
if (image_data_layout.packing.component_bytes_order == GPU::ComponentBytesOrder::Normal)
return value;
VERIFY(image_data_layout.pixel_type.bits == GPU::PixelComponentBits::AllBits);
auto* u8_ptr = reinterpret_cast<u8*>(&value);
if constexpr (sizeof(T) == 2) {
swap(u8_ptr[0], u8_ptr[1]);
} else if constexpr (sizeof(T) == 4) {
swap(u8_ptr[0], u8_ptr[3]);
swap(u8_ptr[1], u8_ptr[2]);
return value;
static constexpr FloatVector4 decode_component_order_for_format(FloatVector4 const& components, GPU::PixelFormat format)
switch (format) {
case GPU::PixelFormat::Alpha:
return { 0.f, 0.f, 0.f, components[0] };
case GPU::PixelFormat::BGR:
return { components[2], components[1], components[0], 1.f };
case GPU::PixelFormat::BGRA:
return { components[2], components[1], components[0], components[3] };
case GPU::PixelFormat::Blue:
return { 0.f, 0.f, components[0], 1.f };
case GPU::PixelFormat::ColorIndex:
case GPU::PixelFormat::DepthComponent:
case GPU::PixelFormat::StencilIndex:
return { components[0], 0.f, 0.f, 0.f };
case GPU::PixelFormat::Green:
return { 0.f, components[0], 0.f, 1.f };
case GPU::PixelFormat::Intensity:
return { components[0], components[0], components[0], components[0] };
case GPU::PixelFormat::Luminance:
return { components[0], components[0], components[0], 1.f };
case GPU::PixelFormat::LuminanceAlpha:
return { components[0], components[0], components[0], components[1] };
case GPU::PixelFormat::Red:
return { components[0], 0.f, 0.f, 1.f };
case GPU::PixelFormat::RGB:
return { components[0], components[1], components[2], 1.f };
case GPU::PixelFormat::RGBA:
return components;
static constexpr FloatVector4 encode_component_order_for_format(FloatVector4 const& components, GPU::PixelFormat format)
switch (format) {
case GPU::PixelFormat::Alpha:
return { components[3], 0.f, 0.f, 0.f };
case GPU::PixelFormat::BGR:
return { components[2], components[1], components[0], 0.f };
case GPU::PixelFormat::BGRA:
return { components[2], components[1], components[0], components[3] };
case GPU::PixelFormat::Blue:
return { components[2], 0.f, 0.f, 0.f };
case GPU::PixelFormat::ColorIndex:
case GPU::PixelFormat::DepthComponent:
case GPU::PixelFormat::Intensity:
case GPU::PixelFormat::Luminance:
case GPU::PixelFormat::Red:
case GPU::PixelFormat::RGB:
case GPU::PixelFormat::RGBA:
case GPU::PixelFormat::StencilIndex:
return components;
case GPU::PixelFormat::Green:
return { components[1], 0.f, 0.f, 0.f };
case GPU::PixelFormat::LuminanceAlpha:
return { components[0], components[3], 0.f, 0.f };
template<typename S, typename O>
static int read_pixel_values(u8 const* input_data, Array<O, 4>& output_values, GPU::ImageDataLayout const& layout)
auto const& pixel_type = layout.pixel_type;
auto const number_of_data_reads = GPU::number_of_components(pixel_type.format) / GPU::number_of_components(pixel_type.bits);
for (int i = 0; i < number_of_data_reads; ++i) {
auto storage_value = reinterpret_cast<S const*>(input_data)[i];
if (layout.pixel_type.bits == GPU::PixelComponentBits::AllBits) {
if constexpr (sizeof(S) == 2 || sizeof(S) == 4)
storage_value = reverse_component_bytes_if_needed(storage_value, layout);
O value = storage_value;
// Special case: convert HalfFloat to regular float
if constexpr (IsSame<O, float>) {
if (pixel_type.data_type == GPU::PixelDataType::HalfFloat)
value = convert_to_native_float(FloatingPointBits<1, 5, 10>(storage_value));
output_values[i] = value;
return number_of_data_reads;
template<typename T>
constexpr FloatVector4 extract_component_values(Span<T> data_values, GPU::PixelType const& pixel_type)
// FIXME: implement fixed point conversion for ::StencilIndex
// FIXME: stencil components should account for GL_MAP_STENCIL
// FIXME: stencil components should get GL_INDEX_SHIFT and GL_INDEX_OFFSET applied
// FIXME: depth components should get GL_DEPTH_SCALE and GL_DEPTH_BIAS applied
// FIXME: color components should get GL_C_SCALE and GL_C_BIAS applied
auto const number_of_values = data_values.size();
auto const bits_number_of_components = number_of_components(pixel_type.bits);
VERIFY(bits_number_of_components == 1 || bits_number_of_components == number_of_components(pixel_type.format));
// Maps a signed value to -1.0f..1.0f
auto signed_to_float = [](T value) -> float {
auto constexpr number_of_bits = sizeof(T) * 8 - 1;
return max(static_cast<float>(value / static_cast<float>(1 << number_of_bits)), -1.f);
// Maps an unsigned value to 0.0f..1.0f
auto unsigned_to_float = [](T value, u8 const number_of_bits) -> float {
return static_cast<float>(value / static_cast<double>((1ull << number_of_bits) - 1));
// Handle full data values (1 or more)
if (pixel_type.bits == GPU::PixelComponentBits::AllBits) {
FloatVector4 components;
for (size_t i = 0; i < number_of_values; ++i) {
if constexpr (IsSigned<T>)
components[i] = signed_to_float(data_values[i]);
components[i] = unsigned_to_float(data_values[i], sizeof(T) * 8);
return components;
VERIFY(number_of_values == 1);
T const value = data_values[0];
auto bitfields = pixel_component_bitfield_lengths(pixel_type.bits);
// Map arbitrary bitfields to floats
u8 remaining_width = 0;
for (auto bitwidth : bitfields)
remaining_width += bitwidth;
// "By default the components are laid out from msb (most-significant bit) to lsb (least-significant bit)"
FloatVector4 components;
for (auto i = 0; i < 4; ++i) {
auto bitwidth = bitfields[i];
if (bitwidth == 0)
remaining_width -= bitwidth;
components[i] = unsigned_to_float((value >> remaining_width) & ((1 << bitwidth) - 1), bitwidth);
return components;
constexpr FloatVector4 extract_component_values(Span<float> data_values, GPU::PixelType const&)
FloatVector4 components;
for (size_t i = 0; i < data_values.size(); ++i)
components[i] = data_values[i];
return components;
template<typename T>
static FloatVector4 pixel_values_to_components(Span<T> values, GPU::PixelType const& pixel_type)
// Deconstruct read value(s) into separate components
auto components = extract_component_values(values, pixel_type);
if (pixel_type.components_order == GPU::ComponentsOrder::Reversed)
components = { components[3], components[2], components[1], components[0] };
// Reconstruct component values in order
auto component_values = decode_component_order_for_format(components, pixel_type.format);
component_values.clamp(0.f, 1.f);
return component_values;
FloatVector4 PixelConverter::read_pixel(u8 const** input_data)
auto read_components = [&]<typename S, typename O>() {
Array<O, 4> values;
auto number_of_values = read_pixel_values<S, O>(*input_data, values, m_input_specification);
*input_data += number_of_values * sizeof(O);
return pixel_values_to_components(values.span().trim(number_of_values), m_input_specification.pixel_type);
switch (m_input_specification.pixel_type.data_type) {
case GPU::PixelDataType::Bitmap:
case GPU::PixelDataType::Byte:
return read_components.template operator()<i8, i8>();
case GPU::PixelDataType::Float:
return read_components.template operator()<float, float>();
case GPU::PixelDataType::HalfFloat:
return read_components.template operator()<u16, float>();
case GPU::PixelDataType::Int:
return read_components.template operator()<i32, i32>();
case GPU::PixelDataType::Short:
return read_components.template operator()<i16, i16>();
case GPU::PixelDataType::UnsignedByte:
return read_components.template operator()<u8, u8>();
case GPU::PixelDataType::UnsignedInt:
return read_components.template operator()<u32, u32>();
case GPU::PixelDataType::UnsignedShort:
return read_components.template operator()<u16, u16>();
static constexpr void write_pixel_as_type(u8** output_data, float value, GPU::ImageDataLayout layout)
auto write_value = [&output_data, &layout]<typename T>(T value) -> void {
if constexpr (sizeof(T) == 2 || sizeof(T) == 4)
value = reverse_component_bytes_if_needed(value, layout);
**reinterpret_cast<T**>(output_data) = value;
(*output_data) += sizeof(T);
auto constexpr float_to_signed = []<typename T>(float value) -> T {
auto const signed_max = 1ull << (sizeof(T) * 8 - 1);
auto const unsigned_max = 2 * signed_max - 1;
return round_to<T>((static_cast<double>(value) + 1.) / 2. * unsigned_max - signed_max);
auto constexpr float_to_unsigned = []<typename T>(float value) -> T {
auto const unsigned_max = (1ull << (sizeof(T) * 8)) - 1;
return round_to<T>(static_cast<double>(value) * unsigned_max);
switch (layout.pixel_type.data_type) {
case GPU::PixelDataType::Bitmap:
case GPU::PixelDataType::Byte:
case GPU::PixelDataType::Float:
case GPU::PixelDataType::HalfFloat:
write_value(static_cast<u16>(convert_from_native_float<FloatingPointBits<1, 5, 10>>(value).bits()));
case GPU::PixelDataType::Int:
case GPU::PixelDataType::Short:
case GPU::PixelDataType::UnsignedByte:
case GPU::PixelDataType::UnsignedInt:
case GPU::PixelDataType::UnsignedShort:
void constexpr write_pixel_as_bitfield(u8** output_data, FloatVector4 const& components, GPU::PixelType const& pixel_type)
auto constexpr float_to_unsigned = [](float value, u8 bits) {
auto unsigned_max = (1ull << bits) - 1;
return round_to<u64>(value * unsigned_max);
// Construct value with concatenated bitfields - first component has most significant bits
auto bitfields = pixel_component_bitfield_lengths(pixel_type.bits);
u64 value = 0;
u8 bitsize = 0;
for (auto i = 0; i < 4; ++i) {
value <<= bitsize;
bitsize = bitfields[i];
if (bitsize == 0)
value |= float_to_unsigned(components[i], bitsize);
// Write out the value in the requested data type
auto write_value = [&output_data]<typename T>(T value) -> void {
**reinterpret_cast<T**>(output_data) = value;
(*output_data) += sizeof(T);
switch (pixel_type.data_type) {
case GPU::PixelDataType::UnsignedByte:
case GPU::PixelDataType::UnsignedInt:
case GPU::PixelDataType::UnsignedShort:
void PixelConverter::write_pixel(u8** output_data, FloatVector4 const& components)
// NOTE: `components` is already clamped to 0.f..1.f
// Reorder float components to data order
auto const& pixel_type = m_output_specification.pixel_type;
auto output_components = encode_component_order_for_format(components, pixel_type.format);
if (pixel_type.components_order == GPU::ComponentsOrder::Reversed)
output_components = { output_components[3], output_components[2], output_components[1], output_components[0] };
// Write components as full data types
auto const number_of_components_in_pixel = number_of_components(pixel_type.format);
if (pixel_type.bits == GPU::PixelComponentBits::AllBits) {
for (u8 i = 0; i < number_of_components_in_pixel; ++i)
write_pixel_as_type(output_data, output_components[i], m_output_specification);
// Write components as a concatenated bitfield value
VERIFY(number_of_components_in_pixel == number_of_components(pixel_type.bits));
write_pixel_as_bitfield(output_data, output_components, pixel_type);
static constexpr GPU::ImageSelection restrain_selection_within_dimensions(GPU::ImageSelection selection, GPU::DimensionSpecification const& dimensions)
if (selection.offset_x < 0) {
selection.width += selection.offset_x;
selection.offset_x = 0;
if (selection.offset_y < 0) {
selection.height += selection.offset_y;
selection.offset_y = 0;
if (selection.offset_z < 0) {
selection.depth += selection.offset_z;
selection.offset_z = 0;
if (selection.offset_x + selection.width > dimensions.width)
selection.width = dimensions.width - selection.offset_x;
if (selection.offset_y + selection.height > dimensions.height)
selection.height = dimensions.height - selection.offset_y;
if (selection.offset_z + selection.depth > dimensions.depth)
selection.depth = dimensions.depth - selection.offset_z;
return selection;
ErrorOr<void> PixelConverter::convert(void const* input_data, void* output_data, Function<void(FloatVector4&)> transform)
// Verify pixel data specifications
auto validate_image_data_layout = [](GPU::ImageDataLayout const& specification) -> ErrorOr<void> {
if (specification.packing.row_stride > 0
&& specification.dimensions.width > specification.packing.row_stride)
return Error::from_string_view("Width exceeds the row stride"sv);
if (specification.packing.depth_stride > 0
&& specification.dimensions.height > specification.packing.depth_stride)
return Error::from_string_view("Height exceeds the depth stride"sv);
// NOTE: GL_BITMAP is removed from current OpenGL specs. Since it is largely unsupported and it
// requires extra logic (i.e. 8 vs. 1 pixel packing/unpacking), we also do not support it.
if (specification.pixel_type.data_type == GPU::PixelDataType::Bitmap)
return Error::from_string_view("Bitmap is unsupported"sv);
return {};
// Restrain input and output selection:
// - selection dimensions should be equal
// - selection offsets cannot be negative
// - selection bounds cannot exceed the image dimensions
auto const& input_dimensions = m_input_specification.dimensions;
auto const& output_dimensions = m_output_specification.dimensions;
auto input_selection = restrain_selection_within_dimensions(m_input_specification.selection, input_dimensions);
auto const& output_selection = restrain_selection_within_dimensions(m_output_specification.selection, output_dimensions);
input_selection.width = min(input_selection.width, output_selection.width);
input_selection.height = min(input_selection.height, output_selection.height);
input_selection.depth = min(input_selection.depth, output_selection.depth);
// Set up copy parameters
auto const& input_packing = m_input_specification.packing;
auto const input_pixels_per_row = input_packing.row_stride > 0 ? input_packing.row_stride : input_dimensions.width;
auto const input_pixel_size_in_bytes = pixel_size_in_bytes(m_input_specification.pixel_type);
auto const input_row_width_bytes = input_pixels_per_row * input_pixel_size_in_bytes;
auto const input_byte_alignment = input_packing.byte_alignment;
auto const input_row_stride = input_row_width_bytes + (input_byte_alignment - input_row_width_bytes % input_byte_alignment) % input_byte_alignment;
auto const input_rows_per_image = input_packing.depth_stride > 0 ? input_packing.depth_stride : input_dimensions.height;
auto const input_depth_stride = input_rows_per_image * input_row_stride;
auto const& output_packing = m_output_specification.packing;
auto const output_pixels_per_row = output_packing.row_stride > 0 ? output_packing.row_stride : output_dimensions.width;
auto const output_pixel_size_in_bytes = pixel_size_in_bytes(m_output_specification.pixel_type);
auto const output_row_width_bytes = output_pixels_per_row * output_pixel_size_in_bytes;
auto const output_byte_alignment = output_packing.byte_alignment;
auto const output_row_stride = output_row_width_bytes + (output_byte_alignment - output_row_width_bytes % output_byte_alignment) % output_byte_alignment;
auto const output_rows_per_image = output_packing.depth_stride > 0 ? output_packing.depth_stride : output_dimensions.height;
auto const output_depth_stride = output_rows_per_image * output_row_stride;
// Copy all pixels from input to output
auto input_bytes = reinterpret_cast<u8 const*>(input_data);
auto output_bytes = reinterpret_cast<u8*>(output_data);
auto output_z = output_selection.offset_z;
for (u32 input_z = input_selection.offset_z; input_z < input_selection.offset_z + input_selection.depth; ++input_z) {
auto output_y = output_selection.offset_y;
for (u32 input_y = input_selection.offset_y; input_y < input_selection.offset_y + input_selection.height; ++input_y) {
auto const* input_scanline = &input_bytes[input_z * input_depth_stride
+ input_y * input_row_stride
+ input_selection.offset_x * input_pixel_size_in_bytes];
auto* output_scanline = &output_bytes[output_z * output_depth_stride
+ output_y * output_row_stride
+ output_selection.offset_x * output_pixel_size_in_bytes];
for (u32 input_x = input_selection.offset_x; input_x < input_selection.offset_x + input_selection.width; ++input_x) {
auto pixel_components = read_pixel(&input_scanline);
if (transform)
write_pixel(&output_scanline, pixel_components);
return {};