From 64598473cc6e46556c4822ac60be6aaf29eda3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kleines=20Filmr=C3=B6llchen?= Date: Thu, 12 Oct 2023 22:36:23 +0200 Subject: [PATCH] LibRIFF: Rework to match LibGfx needs There's now two namespaces, RIFF (little-endian) and IFF (big-endian) which (for the most part) contain the same kinds of structures for handling similar data in both formats. (They also share almost all of their implementation) The main types are ChunkHeader and (Owned)Chunk. While Chunk has no ownership over the data it accesses (and can only be constructed from a byte view), OwnedChunk has ownership over this data and is aimed at reading from streams. OwnedList, implementing the standard RIFF LIST type, is currently only implemented for RIFF due to its only user being WAV, but it may be generalized in the future for use by IFF. Co-authored-by: Timothy Flynn --- Userland/Libraries/LibAudio/WavLoader.cpp | 30 +++--- Userland/Libraries/LibAudio/WavLoader.h | 4 +- Userland/Libraries/LibRIFF/CMakeLists.txt | 3 +- Userland/Libraries/LibRIFF/ChunkID.h | 63 +++++++++++ Userland/Libraries/LibRIFF/Decoding.cpp | 28 +++++ Userland/Libraries/LibRIFF/Details.cpp | 125 ++++++++++++++++++++++ Userland/Libraries/LibRIFF/Details.h | 74 +++++++++++++ Userland/Libraries/LibRIFF/IFF.h | 24 +++++ Userland/Libraries/LibRIFF/RIFF.h | 35 ++++++ Userland/Libraries/LibRIFF/Types.cpp | 76 ------------- Userland/Libraries/LibRIFF/Types.h | 49 --------- 11 files changed, 366 insertions(+), 145 deletions(-) create mode 100644 Userland/Libraries/LibRIFF/ChunkID.h create mode 100644 Userland/Libraries/LibRIFF/Decoding.cpp create mode 100644 Userland/Libraries/LibRIFF/Details.cpp create mode 100644 Userland/Libraries/LibRIFF/Details.h create mode 100644 Userland/Libraries/LibRIFF/IFF.h create mode 100644 Userland/Libraries/LibRIFF/RIFF.h delete mode 100644 Userland/Libraries/LibRIFF/Types.cpp delete mode 100644 Userland/Libraries/LibRIFF/Types.h diff --git a/Userland/Libraries/LibAudio/WavLoader.cpp b/Userland/Libraries/LibAudio/WavLoader.cpp index b4edfa795b6..3f25ab2ed19 100644 --- a/Userland/Libraries/LibAudio/WavLoader.cpp +++ b/Userland/Libraries/LibAudio/WavLoader.cpp @@ -185,16 +185,12 @@ MaybeLoaderError WavLoaderPlugin::parse_header() } \ } while (0) - auto riff = TRY(m_stream->read_value()); - CHECK(riff == RIFF::riff_magic, LoaderError::Category::Format, "RIFF header magic invalid"); + auto file_header = TRY(m_stream->read_value()); + CHECK(file_header.magic() == RIFF::riff_magic, LoaderError::Category::Format, "RIFF header magic invalid"); + CHECK(file_header.subformat == Wav::wave_subformat_id, LoaderError::Category::Format, "WAVE subformat id invalid"); - TRY(m_stream->read_value>()); // File size header - - auto wave = TRY(m_stream->read_value()); - CHECK(wave == Wav::wave_subformat_id, LoaderError::Category::Format, "WAVE subformat id invalid"); - - auto format_chunk = TRY(m_stream->read_value()); - CHECK(format_chunk.id.as_ascii_string() == Wav::format_chunk_id, LoaderError::Category::Format, "FMT chunk id invalid"); + auto format_chunk = TRY(m_stream->read_value()); + CHECK(format_chunk.id().as_ascii_string() == Wav::format_chunk_id, LoaderError::Category::Format, "FMT chunk id invalid"); auto format_stream = format_chunk.data_stream(); u16 audio_format = TRY(format_stream.read_value>()); @@ -212,7 +208,7 @@ MaybeLoaderError WavLoaderPlugin::parse_header() u16 bits_per_sample = TRY(format_stream.read_value>()); if (audio_format == to_underlying(Wav::WaveFormat::Extensible)) { - CHECK(format_chunk.size == 40, LoaderError::Category::Format, "Extensible fmt size is not 40 bytes"); + CHECK(format_chunk.size() == 40, LoaderError::Category::Format, "Extensible fmt size is not 40 bytes"); // Discard everything until the GUID. // We've already read 16 bytes from the stream. The GUID starts in another 8 bytes. @@ -260,9 +256,9 @@ MaybeLoaderError WavLoaderPlugin::parse_header() found_data = true; } else { TRY(m_stream->seek(-RIFF::chunk_id_size, SeekMode::FromCurrentPosition)); - auto chunk = TRY(m_stream->read_value()); - if (chunk.id == RIFF::list_chunk_id) { - auto maybe_list = chunk.data_stream().read_value(); + auto chunk = TRY(m_stream->read_value()); + if (chunk.id() == RIFF::list_chunk_id) { + auto maybe_list = chunk.data_stream().read_value(); if (maybe_list.is_error()) { dbgln("WAV Warning: LIST chunk invalid, error: {}", maybe_list.release_error()); continue; @@ -278,7 +274,7 @@ MaybeLoaderError WavLoaderPlugin::parse_header() dbgln("Unhandled WAV list of type {} with {} subchunks", list.type.as_ascii_string(), list.chunks.size()); } } else { - dbgln_if(AWAVLOADER_DEBUG, "Unhandled WAV chunk of type {}, size {} bytes", chunk.id.as_ascii_string(), chunk.size); + dbgln_if(AWAVLOADER_DEBUG, "Unhandled WAV chunk of type {}, size {} bytes", chunk.id().as_ascii_string(), chunk.size()); } } } @@ -299,12 +295,12 @@ MaybeLoaderError WavLoaderPlugin::parse_header() // http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/riffmci.pdf page 23 (LIST type) // We only recognize the relevant official metadata types; types added in later errata of RIFF are not relevant for audio. -MaybeLoaderError WavLoaderPlugin::load_wav_info_block(Vector info_chunks) +MaybeLoaderError WavLoaderPlugin::load_wav_info_block(Vector info_chunks) { for (auto const& chunk : info_chunks) { - auto metadata_name = chunk.id.as_ascii_string(); + auto metadata_name = chunk.id().as_ascii_string(); // Chunk contents are zero-terminated strings "ZSTR", so we just drop the null terminator. - StringView metadata_text { chunk.data.span().trim(chunk.data.size() - 1) }; + StringView metadata_text { chunk.data().trim(chunk.size() - 1) }; // Note that we assume chunks to be unique, since that seems to almost always be the case. // Worst case we just drop some metadata. if (metadata_name == "IART"sv) { diff --git a/Userland/Libraries/LibAudio/WavLoader.h b/Userland/Libraries/LibAudio/WavLoader.h index d6dfa1a41c8..9e6f6919de8 100644 --- a/Userland/Libraries/LibAudio/WavLoader.h +++ b/Userland/Libraries/LibAudio/WavLoader.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace Audio { @@ -46,7 +46,7 @@ public: private: MaybeLoaderError parse_header(); - MaybeLoaderError load_wav_info_block(Vector info_chunks); + MaybeLoaderError load_wav_info_block(Vector info_chunks); LoaderSamples samples_from_pcm_data(ReadonlyBytes data, size_t samples_to_read) const; template diff --git a/Userland/Libraries/LibRIFF/CMakeLists.txt b/Userland/Libraries/LibRIFF/CMakeLists.txt index 4a13da1b6c0..e0c91f5c288 100644 --- a/Userland/Libraries/LibRIFF/CMakeLists.txt +++ b/Userland/Libraries/LibRIFF/CMakeLists.txt @@ -1,5 +1,6 @@ set(SOURCES - Types.cpp + Decoding.cpp + Details.cpp ) serenity_lib(LibRIFF riff) diff --git a/Userland/Libraries/LibRIFF/ChunkID.h b/Userland/Libraries/LibRIFF/ChunkID.h new file mode 100644 index 00000000000..a47e2860bf7 --- /dev/null +++ b/Userland/Libraries/LibRIFF/ChunkID.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace RIFF { + +static constexpr size_t const chunk_id_size = 4; + +// Also referred to as "FourCC" (four character code) in the context of some formats. +struct ChunkID { + constexpr ChunkID(char const name[4]) + { + id_data[0] = static_cast(name[0]); + id_data[1] = static_cast(name[1]); + id_data[2] = static_cast(name[2]); + id_data[3] = static_cast(name[3]); + } + constexpr ChunkID(Array data) + : id_data(data) + { + } + constexpr ChunkID(ChunkID const&) = default; + constexpr ChunkID(ChunkID&&) = default; + constexpr ChunkID& operator=(ChunkID const&) = default; + static constexpr ChunkID from_big_endian_number(u32 number) { return bit_cast>(AK::convert_between_host_and_big_endian(number)); } + + static ErrorOr read_from_stream(Stream& stream); + + StringView as_ascii_string() const; + constexpr u32 as_big_endian_number() const + { + return AK::convert_between_host_and_big_endian((id_data[0] << 24) | (id_data[1] << 16) | (id_data[2] << 8) | id_data[3]); + } + + bool operator==(ChunkID const&) const = default; + bool operator==(StringView) const; + + Array id_data; +}; +static_assert(AssertSize()); + +} + +template<> +struct AK::Formatter : StandardFormatter { + ErrorOr format(FormatBuilder& builder, RIFF::ChunkID const& chunk_id) + { + TRY(builder.put_padding('\'', 1)); + TRY(builder.put_literal(chunk_id.as_ascii_string())); + TRY(builder.put_padding('\'', 1)); + return {}; + } +}; diff --git a/Userland/Libraries/LibRIFF/Decoding.cpp b/Userland/Libraries/LibRIFF/Decoding.cpp new file mode 100644 index 00000000000..e02dc064723 --- /dev/null +++ b/Userland/Libraries/LibRIFF/Decoding.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * Copyright (c) 2023, Nicolas Ramz + * Copyright (c) 2023, Nico Weber + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +ErrorOr RIFF::ChunkID::read_from_stream(Stream& stream) +{ + Array id; + TRY(stream.read_until_filled(id.span())); + return ChunkID { id }; +} + +ErrorOr RIFF::OwnedList::read_from_stream(Stream& stream) +{ + auto type = TRY(stream.read_value()); + Vector chunks; + while (!stream.is_eof()) + TRY(chunks.try_append(TRY(stream.read_value()))); + + return RIFF::OwnedList { .type = type, .chunks = move(chunks) }; +} diff --git a/Userland/Libraries/LibRIFF/Details.cpp b/Userland/Libraries/LibRIFF/Details.cpp new file mode 100644 index 00000000000..78487d76c37 --- /dev/null +++ b/Userland/Libraries/LibRIFF/Details.cpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "Details.h" +#include +#include +#include + +namespace RIFF { + +StringView ChunkID::as_ascii_string() const +{ + return StringView { id_data.span() }; +} + +bool ChunkID::operator==(StringView other_string) const +{ + return as_ascii_string() == other_string; +} + +namespace Detail { + +template +auto ChunkHeader::read_from_stream(Stream& stream) -> ErrorOr +{ + auto id = TRY(stream.read_value()); + u32 size = TRY(stream.read_value()); + return ChunkHeader { id, size }; +} + +template +auto FileHeader::read_from_stream(Stream& stream) -> ErrorOr +{ + auto header = TRY(stream.read_value()); + auto subformat = TRY(stream.read_value()); + return FileHeader { header, subformat }; +} + +template +Chunk::Chunk(HeaderType header, ReadonlyBytes data) + : m_header(header) + , m_data(data) +{ + VERIFY(data.size() == header.size); +} + +template +FixedMemoryStream Chunk::data_stream() const +{ + return FixedMemoryStream { m_data }; +} + +template +auto Chunk::decode(ReadonlyBytes data) -> ErrorOr +{ + auto data_stream = FixedMemoryStream { data }; + auto header = TRY(HeaderType::read_from_stream(data_stream)); + + if (data.size() < sizeof(HeaderType) + header.size) + return Error::from_string_literal("Not enough data for IFF/RIFF chunk"); + + return Chunk { header, data.slice(sizeof(HeaderType), header.size) }; +} + +template +auto Chunk::decode_and_advance(ReadonlyBytes& data) -> ErrorOr +{ + auto chunk = TRY(decode(data)); + data = data.slice(sizeof(HeaderType) + chunk.size()); + // add padding if needed + if (chunk.size() % 2 != 0) { + if (data.is_empty()) + return Error::from_string_literal("Missing data for padding byte"); + if (*data.data() != 0) + return Error::from_string_literal("Padding byte is not 0"); + data = data.slice(1); + } + + return chunk; +} + +template +OwnedChunk::OwnedChunk(HeaderType header, Buffer backing_data) + : Chunk(header, backing_data.span()) + , m_backing_data(move(backing_data)) +{ +} + +template +auto OwnedChunk::read_from_stream(Stream& stream) -> ErrorOr +{ + auto header = TRY(stream.read_value()); + + auto data = TRY(Buffer::create_uninitialized(header.size)); + TRY(stream.read_until_filled(data.span())); + + // RIFF chunks may have trailing padding to align to x86 "words" (i.e. 2 bytes). + if (is(stream)) { + if (!stream.is_eof()) { + auto stream_position = TRY(static_cast(stream).tell()); + if (stream_position % 2 != 0) + TRY(static_cast(stream).seek(1, SeekMode::FromCurrentPosition)); + } + } else { + dbgln("RIFF Warning: Cannot align stream to 2-byte boundary, next chunk may be bogus!"); + } + + return OwnedChunk { header, data }; +} + +template class Chunk; +template class Chunk; +template class OwnedChunk; +template class OwnedChunk; +template struct ChunkHeader; +template struct ChunkHeader; +template struct FileHeader; +template struct FileHeader; + +} + +} diff --git a/Userland/Libraries/LibRIFF/Details.h b/Userland/Libraries/LibRIFF/Details.h new file mode 100644 index 00000000000..276eac08c25 --- /dev/null +++ b/Userland/Libraries/LibRIFF/Details.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +// Despite the name, this header contains details for both RIFF and IFF +namespace RIFF::Detail { + +// http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/riffmci.pdf page 11 (Chunks) +template +struct ChunkHeader { + static ErrorOr read_from_stream(Stream& stream); + + RIFF::ChunkID id; + u32 size; +}; + +// Standard RIFF/IFF file formats use a global chunk with a chunk ID (magic bytes) such as "RIFF" or "FORM". +// A chunk ID right at the start of the global chunk specifies the subformat specific to the file type. +// Example for RIFF from WebP: https://developers.google.com/speed/webp/docs/riff_container#webp_file_header +template +struct FileHeader { + HeaderType global_header; + RIFF::ChunkID subformat; + + static ErrorOr read_from_stream(Stream& stream); + + constexpr ChunkID magic() const { return global_header.id; } + constexpr u32 file_size() const { return global_header.size; } +}; + +// An RIFF or IFF chunk. +template +class Chunk { +public: + Chunk(HeaderType header, ReadonlyBytes data); + + // Note that the resulting chunk will refer to the provided data. + static ErrorOr decode(ReadonlyBytes data); + static ErrorOr decode_and_advance(ReadonlyBytes& data); + + RIFF::ChunkID id() const { return m_header.id; } + u32 size() const { return m_header.size; } + ReadonlyBytes data() const { return m_data; } + FixedMemoryStream data_stream() const; + + u8 operator[](size_t index) const { return data()[index]; } + +private: + HeaderType m_header; + ReadonlyBytes m_data; +}; + +// Owns the chunk data and can therefore be parsed from a stream. +template +class OwnedChunk : public Chunk { +public: + using Buffer = AK::Detail::ByteBuffer<0>; + OwnedChunk(HeaderType, Buffer); + + static ErrorOr read_from_stream(Stream& stream); + +private: + Buffer m_backing_data; +}; + +} diff --git a/Userland/Libraries/LibRIFF/IFF.h b/Userland/Libraries/LibRIFF/IFF.h new file mode 100644 index 00000000000..d70013efbd4 --- /dev/null +++ b/Userland/Libraries/LibRIFF/IFF.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2018-2023, the SerenityOS developers. + * Copyright (c) 2023, Nico Weber + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +// IFF chunks (as often used by Amiga, EA and more modern formats) use big-endian fields. +namespace IFF { + +using WordType = BigEndian; +using ChunkHeader = RIFF::Detail::ChunkHeader; +using FileHeader = RIFF::Detail::FileHeader; +using Chunk = RIFF::Detail::Chunk; +using OwnedChunk = RIFF::Detail::OwnedChunk; + +} diff --git a/Userland/Libraries/LibRIFF/RIFF.h b/Userland/Libraries/LibRIFF/RIFF.h new file mode 100644 index 00000000000..df7ca0f4e3a --- /dev/null +++ b/Userland/Libraries/LibRIFF/RIFF.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2018-2023, the SerenityOS developers. + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +// RIFF chunks (as often used by Microsoft's older formats) use little-endian fields. +namespace RIFF { + +static constexpr StringView const riff_magic = "RIFF"sv; +static constexpr StringView const list_chunk_id = "LIST"sv; + +using WordType = LittleEndian; +using ChunkHeader = RIFF::Detail::ChunkHeader; +using FileHeader = RIFF::Detail::FileHeader; +using Chunk = RIFF::Detail::Chunk; +using OwnedChunk = RIFF::Detail::OwnedChunk; + +// http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/riffmci.pdf page 23 (LIST type) +struct OwnedList { + static ErrorOr read_from_stream(Stream& stream); + + ChunkID type; + Vector chunks; +}; + +} diff --git a/Userland/Libraries/LibRIFF/Types.cpp b/Userland/Libraries/LibRIFF/Types.cpp deleted file mode 100644 index 03002e82259..00000000000 --- a/Userland/Libraries/LibRIFF/Types.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2023, kleines Filmröllchen - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include "Types.h" -#include -#include -#include -#include - -namespace RIFF { - -ErrorOr ChunkID::read_from_stream(Stream& stream) -{ - Array id; - TRY(stream.read_until_filled(id.span())); - return ChunkID { id }; -} - -ErrorOr Chunk::read_from_stream(Stream& stream) -{ - auto id = TRY(stream.read_value()); - - u32 size = TRY(stream.read_value>()); - auto data = TRY(FixedArray::create(size)); - TRY(stream.read_until_filled(data.span())); - - // RIFF chunks may have trailing padding to align to x86 "words" (i.e. 2 bytes). - if (is(stream)) { - if (!stream.is_eof()) { - auto stream_position = TRY(static_cast(stream).tell()); - if (stream_position % 2 != 0) - TRY(static_cast(stream).seek(1, SeekMode::FromCurrentPosition)); - } - } else { - dbgln("RIFF Warning: Cannot align stream to 2-byte boundary, next chunk may be bogus!"); - } - - return Chunk { - id, - size, - move(data), - }; -} - -ErrorOr List::read_from_stream(Stream& stream) -{ - auto type = TRY(stream.read_value()); - Vector chunks; - while (!stream.is_eof()) - TRY(chunks.try_append(TRY(stream.read_value()))); - - return List { - .type = type, - .chunks = move(chunks), - }; -} - -StringView ChunkID::as_ascii_string() const -{ - return StringView { id_data.span() }; -} - -bool ChunkID::operator==(StringView const& other_string) const -{ - return as_ascii_string() == other_string; -} - -FixedMemoryStream Chunk::data_stream() -{ - return FixedMemoryStream { data.span() }; -} - -} diff --git a/Userland/Libraries/LibRIFF/Types.h b/Userland/Libraries/LibRIFF/Types.h deleted file mode 100644 index b17829dd891..00000000000 --- a/Userland/Libraries/LibRIFF/Types.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018-2023, the SerenityOS developers. - * Copyright (c) 2023, kleines Filmröllchen - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#pragma once - -#include -#include -#include -#include - -namespace RIFF { - -static constexpr StringView const riff_magic = "RIFF"sv; -static constexpr StringView const list_chunk_id = "LIST"sv; - -static constexpr size_t const chunk_id_size = 4; - -struct ChunkID { - static ErrorOr read_from_stream(Stream& stream); - StringView as_ascii_string() const; - bool operator==(ChunkID const&) const = default; - bool operator==(StringView const&) const; - - Array id_data; -}; - -// http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/riffmci.pdf page 11 (Chunks) -struct Chunk { - static ErrorOr read_from_stream(Stream& stream); - FixedMemoryStream data_stream(); - - ChunkID id; - u32 size; - FixedArray data; -}; - -// http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/riffmci.pdf page 23 (LIST type) -struct List { - static ErrorOr read_from_stream(Stream& stream); - - ChunkID type; - Vector chunks; -}; - -}