diff --git a/AK/Debug.h.in b/AK/Debug.h.in index 76cac4735fe..278c1fd5752 100644 --- a/AK/Debug.h.in +++ b/AK/Debug.h.in @@ -258,6 +258,14 @@ #cmakedefine01 MARKDOWN_DEBUG #endif +#ifndef MATROSKA_DEBUG +#cmakedefine01 MATROSKA_DEBUG +#endif + +#ifndef MATROSKA_TRACE_DEBUG +#cmakedefine01 MATROSKA_TRACE_DEBUG +#endif + #ifndef MEMORY_DEBUG #cmakedefine01 MEMORY_DEBUG #endif diff --git a/Meta/CMake/all_the_debug_macros.cmake b/Meta/CMake/all_the_debug_macros.cmake index c8646d35419..8919fc2377b 100644 --- a/Meta/CMake/all_the_debug_macros.cmake +++ b/Meta/CMake/all_the_debug_macros.cmake @@ -101,6 +101,8 @@ set(LOCK_TRACE_DEBUG ON) set(LOOKUPSERVER_DEBUG ON) set(MALLOC_DEBUG ON) set(MARKDOWN_DEBUG ON) +set(MATROSKA_DEBUG ON) +set(MATROSKA_TRACE_DEBUG ON) set(MASTERPTY_DEBUG ON) set(MBR_DEBUG ON) set(MEMORY_DEBUG ON) diff --git a/Userland/Libraries/CMakeLists.txt b/Userland/Libraries/CMakeLists.txt index d4c55908b73..5a7543656a2 100644 --- a/Userland/Libraries/CMakeLists.txt +++ b/Userland/Libraries/CMakeLists.txt @@ -41,6 +41,7 @@ add_subdirectory(LibTextCodec) add_subdirectory(LibThreading) add_subdirectory(LibTLS) add_subdirectory(LibTTF) +add_subdirectory(LibVideo) add_subdirectory(LibVT) add_subdirectory(LibWasm) add_subdirectory(LibWeb) diff --git a/Userland/Libraries/LibVideo/CMakeLists.txt b/Userland/Libraries/LibVideo/CMakeLists.txt new file mode 100644 index 00000000000..b0ec1f6753e --- /dev/null +++ b/Userland/Libraries/LibVideo/CMakeLists.txt @@ -0,0 +1,7 @@ +set(SOURCES + MatroskaDocument.h + MatroskaReader.cpp +) + +serenity_lib(LibVideo video) +target_link_libraries(LibVideo LibAudio LibCore LibIPC) diff --git a/Userland/Libraries/LibVideo/MatroskaDocument.h b/Userland/Libraries/LibVideo/MatroskaDocument.h new file mode 100644 index 00000000000..8da2ca71b3c --- /dev/null +++ b/Userland/Libraries/LibVideo/MatroskaDocument.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2021, Hunter Salyer + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace Video { + +struct EBMLHeader { + String doc_type; + u32 doc_type_version; +}; + +class SegmentInformation { +public: + u64 timestamp_scale() const { return m_timestamp_scale; } + void set_timestamp_scale(u64 timestamp_scale) { m_timestamp_scale = timestamp_scale; } + Utf8View muxing_app() const { return Utf8View(m_muxing_app); } + void set_muxing_app(String muxing_app) { m_muxing_app = move(muxing_app); } + Utf8View writing_app() const { return Utf8View(m_writing_app); } + void set_writing_app(String writing_app) { m_writing_app = move(writing_app); } + +private: + u64 m_timestamp_scale { 1'000'000 }; + String m_muxing_app; + String m_writing_app; +}; + +class TrackEntry { +public: + enum TrackType : u8 { + Invalid = 0, + Video = 1, + Audio = 2, + Complex = 3, + Logo = 16, + Subtitle = 17, + Buttons = 18, + Control = 32, + Metadata = 33, + }; + + struct VideoTrack { + u64 pixel_width; + u64 pixel_height; + }; + + struct AudioTrack { + u64 channels; + u64 bit_depth; + }; + + u64 track_number() const { return m_track_number; } + void set_track_number(u64 track_number) { m_track_number = track_number; } + u64 track_uid() const { return m_track_uid; } + void set_track_uid(u64 track_uid) { m_track_uid = track_uid; } + TrackType track_type() const { return m_track_type; } + void set_track_type(TrackType track_type) { m_track_type = track_type; } + FlyString language() const { return m_language; } + void set_language(const FlyString& language) { m_language = language; } + FlyString codec_id() const { return m_codec_id; } + void set_codec_id(const FlyString& codec_id) { m_codec_id = codec_id; } + Optional video_track() const + { + if (track_type() != Video) + return {}; + return m_video_track; + } + void set_video_track(VideoTrack video_track) { m_video_track = video_track; } + Optional audio_track() const + { + if (track_type() != Audio) + return {}; + return m_audio_track; + } + void set_audio_track(AudioTrack audio_track) { m_audio_track = audio_track; } + +private: + u64 m_track_number { 0 }; + u64 m_track_uid { 0 }; + TrackType m_track_type { Invalid }; + FlyString m_language = "eng"; + FlyString m_codec_id; + + union { + VideoTrack m_video_track; + AudioTrack m_audio_track; + }; +}; + +class Block { +public: + enum Lacing : u8 { + None = 0b00, + XIPH = 0b01, + FixedSize = 0b10, + EBML = 0b11, + }; + + Block() = default; + + u64 track_number() const { return m_track_number; } + void set_track_number(u64 track_number) { m_track_number = track_number; } + i16 timestamp() const { return m_timestamp; } + void set_timestamp(i16 timestamp) { m_timestamp = timestamp; } + bool only_keyframes() const { return m_only_keyframes; } + void set_only_keyframes(bool only_keyframes) { m_only_keyframes = only_keyframes; } + bool invisible() const { return m_invisible; } + void set_invisible(bool invisible) { m_invisible = invisible; } + Lacing lacing() const { return m_lacing; } + void set_lacing(Lacing lacing) { m_lacing = lacing; } + bool discardable() const { return m_discardable; } + void set_discardable(bool discardable) { m_discardable = discardable; } + u64 frame_count() const { return m_frames.size(); } + const ByteBuffer& frame(size_t index) const { return m_frames.at(index); } + void add_frame(const ByteBuffer& frame) { m_frames.append(move(frame)); } + +private: + u64 m_track_number { 0 }; + i16 m_timestamp { 0 }; + bool m_only_keyframes { false }; + bool m_invisible { false }; + Lacing m_lacing { None }; + bool m_discardable { true }; + Vector m_frames; +}; + +class Cluster { +public: + u64 timestamp() const { return m_timestamp; } + void set_timestamp(u64 timestamp) { m_timestamp = timestamp; } + NonnullOwnPtrVector& blocks() { return m_blocks; } + const NonnullOwnPtrVector& blocks() const { return m_blocks; } + +private: + u64 m_timestamp { 0 }; + NonnullOwnPtrVector m_blocks; +}; + +class MatroskaDocument { +public: + explicit MatroskaDocument(EBMLHeader m_header) + : m_header(move(m_header)) + { + } + + const EBMLHeader& header() const { return m_header; } + + Optional segment_information() const + { + if (!m_segment_information) + return {}; + return *m_segment_information; + } + void set_segment_information(OwnPtr segment_information) { m_segment_information = move(segment_information); } + const HashMap>& tracks() const { return m_tracks; } + Optional track_for_track_number(u64 track_number) const + { + auto track = m_tracks.get(track_number); + if (!track.has_value()) + return {}; + return *track.value(); + } + Optional track_for_track_type(TrackEntry::TrackType type) const + { + for (auto& track_entry : m_tracks) { + if (track_entry.value->track_type() == type) + return *track_entry.value; + } + return {}; + } + void add_track(u64 track_number, NonnullOwnPtr track) + { + m_tracks.set(track_number, move(track)); + } + NonnullOwnPtrVector& clusters() { return m_clusters; } + +private: + EBMLHeader m_header; + OwnPtr m_segment_information; + HashMap> m_tracks; + NonnullOwnPtrVector m_clusters; +}; + +} diff --git a/Userland/Libraries/LibVideo/MatroskaReader.cpp b/Userland/Libraries/LibVideo/MatroskaReader.cpp new file mode 100644 index 00000000000..723401f2b24 --- /dev/null +++ b/Userland/Libraries/LibVideo/MatroskaReader.cpp @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2021, Hunter Salyer + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "MatroskaReader.h" +#include +#include +#include +#include + +namespace Video { + +#define CHECK_HAS_VALUE(x) \ + if (!(x).has_value()) \ + return false + +constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3; +constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067; +constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282; +constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287; +constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966; +constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B; +constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675; +constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1; +constexpr u32 MUXING_APP_ID = 0x4D80; +constexpr u32 WRITING_APP_ID = 0x5741; +constexpr u32 TRACK_ENTRY_ID = 0xAE; +constexpr u32 TRACK_NUMBER_ID = 0xD7; +constexpr u32 TRACK_UID_ID = 0x73C5; +constexpr u32 TRACK_TYPE_ID = 0x83; +constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C; +constexpr u32 TRACK_CODEC_ID = 0x86; +constexpr u32 TRACK_VIDEO_ID = 0xE0; +constexpr u32 TRACK_AUDIO_ID = 0xE1; +constexpr u32 PIXEL_WIDTH_ID = 0xB0; +constexpr u32 PIXEL_HEIGHT_ID = 0xBA; +constexpr u32 CHANNELS_ID = 0x9F; +constexpr u32 BIT_DEPTH_ID = 0x6264; +constexpr u32 SIMPLE_BLOCK_ID = 0xA3; +constexpr u32 TIMESTAMP_ID = 0xE7; + +OwnPtr MatroskaReader::parse_matroska_from_file(const StringView& path) +{ + auto mapped_file_result = MappedFile::map(path); + if (mapped_file_result.is_error()) + return {}; + + auto mapped_file = mapped_file_result.release_value(); + return parse_matroska_from_data((u8*)mapped_file->data(), mapped_file->size()); +} + +OwnPtr MatroskaReader::parse_matroska_from_data(const u8* data, size_t size) +{ + MatroskaReader reader(data, size); + return reader.parse(); +} + +OwnPtr MatroskaReader::parse() +{ + auto first_element_id = m_streamer.read_variable_size_integer(false); + dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id.value()); + if (!first_element_id.has_value() || first_element_id.value() != EBML_MASTER_ELEMENT_ID) + return {}; + + auto header = parse_ebml_header(); + if (!header.has_value()) + return {}; + + dbgln_if(MATROSKA_DEBUG, "Parsed EBML header"); + + auto root_element_id = m_streamer.read_variable_size_integer(false); + if (!root_element_id.has_value() || root_element_id.value() != SEGMENT_ELEMENT_ID) + return {}; + + auto matroska_document = make(header.value()); + + auto segment_parse_success = parse_segment_elements(*matroska_document); + if (!segment_parse_success) + return {}; + + return matroska_document; +} + +bool MatroskaReader::parse_master_element([[maybe_unused]] const StringView& element_name, Function element_consumer) +{ + auto element_data_size = m_streamer.read_variable_size_integer(); + CHECK_HAS_VALUE(element_data_size); + dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size.value()); + + m_streamer.push_octets_read(); + while (m_streamer.octets_read() < element_data_size.value()) { + dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======"); + auto optional_element_id = m_streamer.read_variable_size_integer(false); + CHECK_HAS_VALUE(optional_element_id); + + auto element_id = optional_element_id.value(); + dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}\n", element_name, element_id); + + if (!element_consumer(element_id)) { + dbgln_if(MATROSKA_DEBUG, "{:s} consumer failed on ID {:#010x}\n", element_name.to_string().characters(), element_id); + return false; + } + + dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", m_streamer.octets_read(), element_name); + } + m_streamer.pop_octets_read(); + + return true; +} + +Optional MatroskaReader::parse_ebml_header() +{ + EBMLHeader header; + auto success = parse_master_element("Header", [&](u64 element_id) { + if (element_id == DOCTYPE_ELEMENT_ID) { + auto doc_type = read_string_element(); + CHECK_HAS_VALUE(doc_type); + header.doc_type = doc_type.value(); + dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", doc_type.value()); + } else if (element_id == DOCTYPE_VERSION_ELEMENT_ID) { + auto doc_type_version = read_u64_element(); + CHECK_HAS_VALUE(doc_type_version); + header.doc_type_version = doc_type_version.value(); + dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", doc_type_version.value()); + } else { + return read_unknown_element(); + } + + return true; + }); + + if (!success) + return {}; + return header; +} + +bool MatroskaReader::parse_segment_elements(MatroskaDocument& matroska_document) +{ + dbgln_if(MATROSKA_DEBUG, "Parsing segment elements"); + auto success = parse_master_element("Segment", [&](u64 element_id) { + if (element_id == SEGMENT_INFORMATION_ELEMENT_ID) { + auto segment_information = parse_information(); + if (!segment_information) + return false; + matroska_document.set_segment_information(move(segment_information)); + } else if (element_id == TRACK_ELEMENT_ID) { + return parse_tracks(matroska_document); + } else if (element_id == CLUSTER_ELEMENT_ID) { + auto cluster = parse_cluster(); + if (!cluster) + return false; + matroska_document.clusters().append(cluster.release_nonnull()); + } else { + return read_unknown_element(); + } + + return true; + }); + + dbgln("Success {}", success); + return success; +} + +OwnPtr MatroskaReader::parse_information() +{ + auto segment_information = make(); + auto success = parse_master_element("Segment Information", [&](u64 element_id) { + if (element_id == TIMESTAMP_SCALE_ID) { + auto timestamp_scale = read_u64_element(); + CHECK_HAS_VALUE(timestamp_scale); + segment_information->set_timestamp_scale(timestamp_scale.value()); + dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", timestamp_scale.value()); + } else if (element_id == MUXING_APP_ID) { + auto muxing_app = read_string_element(); + CHECK_HAS_VALUE(muxing_app); + segment_information->set_muxing_app(muxing_app.value()); + dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", muxing_app.value()); + } else if (element_id == WRITING_APP_ID) { + auto writing_app = read_string_element(); + CHECK_HAS_VALUE(writing_app); + segment_information->set_writing_app(writing_app.value()); + dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", writing_app.value()); + } else { + return read_unknown_element(); + } + + return true; + }); + + if (!success) + return {}; + return segment_information; +} + +bool MatroskaReader::parse_tracks(MatroskaDocument& matroska_document) +{ + auto success = parse_master_element("Tracks", [&](u64 element_id) { + if (element_id == TRACK_ENTRY_ID) { + dbgln_if(MATROSKA_DEBUG, "Parsing track"); + auto track_entry = parse_track_entry(); + if (!track_entry) + return false; + auto track_number = track_entry->track_number(); + matroska_document.add_track(track_number, track_entry.release_nonnull()); + dbgln_if(MATROSKA_DEBUG, "Track {} added to document", track_number); + } else { + return read_unknown_element(); + } + + return true; + }); + + return success; +} + +OwnPtr MatroskaReader::parse_track_entry() +{ + auto track_entry = make(); + auto success = parse_master_element("Track", [&](u64 element_id) { + if (element_id == TRACK_NUMBER_ID) { + auto track_number = read_u64_element(); + CHECK_HAS_VALUE(track_number); + track_entry->set_track_number(track_number.value()); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_number.value()); + } else if (element_id == TRACK_UID_ID) { + auto track_uid = read_u64_element(); + CHECK_HAS_VALUE(track_uid); + track_entry->set_track_uid(track_uid.value()); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_uid.value()); + } else if (element_id == TRACK_TYPE_ID) { + auto track_type = read_u64_element(); + CHECK_HAS_VALUE(track_type); + track_entry->set_track_type(static_cast(track_type.value())); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", track_type.value()); + } else if (element_id == TRACK_LANGUAGE_ID) { + auto language = read_string_element(); + CHECK_HAS_VALUE(language); + track_entry->set_language(language.value()); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", language.value()); + } else if (element_id == TRACK_CODEC_ID) { + auto codec_id = read_string_element(); + CHECK_HAS_VALUE(codec_id); + track_entry->set_codec_id(codec_id.value()); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", codec_id.value()); + } else if (element_id == TRACK_VIDEO_ID) { + auto video_track = parse_video_track_information(); + CHECK_HAS_VALUE(video_track); + track_entry->set_video_track(video_track.value()); + } else if (element_id == TRACK_AUDIO_ID) { + auto audio_track = parse_audio_track_information(); + CHECK_HAS_VALUE(audio_track); + track_entry->set_audio_track(audio_track.value()); + } else { + return read_unknown_element(); + } + + return true; + }); + + if (!success) + return {}; + return track_entry; +} + +Optional MatroskaReader::parse_video_track_information() +{ + TrackEntry::VideoTrack video_track {}; + + auto success = parse_master_element("VideoTrack", [&](u64 element_id) { + if (element_id == PIXEL_WIDTH_ID) { + auto pixel_width = read_u64_element(); + CHECK_HAS_VALUE(pixel_width); + video_track.pixel_width = pixel_width.value(); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", pixel_width.value()); + } else if (element_id == PIXEL_HEIGHT_ID) { + auto pixel_height = read_u64_element(); + CHECK_HAS_VALUE(pixel_height); + video_track.pixel_height = pixel_height.value(); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", pixel_height.value()); + } else { + return read_unknown_element(); + } + + return true; + }); + + if (!success) + return {}; + return video_track; +} + +Optional MatroskaReader::parse_audio_track_information() +{ + TrackEntry::AudioTrack audio_track {}; + + auto success = parse_master_element("AudioTrack", [&](u64 element_id) { + if (element_id == CHANNELS_ID) { + auto channels = read_u64_element(); + CHECK_HAS_VALUE(channels); + audio_track.channels = channels.value(); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", channels.value()); + } else if (element_id == BIT_DEPTH_ID) { + auto bit_depth = read_u64_element(); + CHECK_HAS_VALUE(bit_depth); + audio_track.bit_depth = bit_depth.value(); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", bit_depth.value()); + } else { + return read_unknown_element(); + } + + return true; + }); + + if (!success) + return {}; + return audio_track; +} + +OwnPtr MatroskaReader::parse_cluster() +{ + auto cluster = make(); + + auto success = parse_master_element("Cluster", [&](u64 element_id) { + if (element_id == SIMPLE_BLOCK_ID) { + auto simple_block = parse_simple_block(); + if (!simple_block) + return false; + cluster->blocks().append(simple_block.release_nonnull()); + } else if (element_id == TIMESTAMP_ID) { + auto timestamp = read_u64_element(); + if (!timestamp.has_value()) + return false; + cluster->set_timestamp(timestamp.value()); + } else { + auto success = read_unknown_element(); + if (!success) + return false; + } + + return true; + }); + + if (!success) + return {}; + return cluster; +} + +OwnPtr MatroskaReader::parse_simple_block() +{ + auto block = make(); + + auto content_size = m_streamer.read_variable_size_integer(); + if (!content_size.has_value()) + return {}; + + auto octets_read_before_track_number = m_streamer.octets_read(); + auto track_number = m_streamer.read_variable_size_integer(); + if (!track_number.has_value()) + return {}; + block->set_track_number(track_number.value()); + + if (m_streamer.remaining() < 3) + return {}; + block->set_timestamp(m_streamer.read_i16()); + + auto flags = m_streamer.read_octet(); + block->set_only_keyframes(flags & (1u << 7u)); + block->set_invisible(flags & (1u << 3u)); + block->set_lacing(static_cast((flags & 0b110u) >> 1u)); + block->set_discardable(flags & 1u); + + auto total_frame_content_size = content_size.value() - (m_streamer.octets_read() - octets_read_before_track_number); + if (block->lacing() == Block::Lacing::EBML) { + auto octets_read_before_frame_sizes = m_streamer.octets_read(); + auto frame_count = m_streamer.read_octet() + 1; + Vector frame_sizes; + frame_sizes.ensure_capacity(frame_count); + + u64 frame_size_sum = 0; + u64 previous_frame_size; + auto first_frame_size = m_streamer.read_variable_size_integer(); + if (!first_frame_size.has_value()) + return {}; + frame_sizes.append(first_frame_size.value()); + frame_size_sum += first_frame_size.value(); + previous_frame_size = first_frame_size.value(); + + for (int i = 0; i < frame_count - 2; i++) { + auto frame_size_difference = m_streamer.read_variable_sized_signed_integer(); + if (!frame_size_difference.has_value()) + return {}; + u64 frame_size; + if (frame_size_difference.value() < 0) + frame_size = previous_frame_size - (-frame_size_difference.value()); + else + frame_size = previous_frame_size + frame_size_difference.value(); + frame_sizes.append(frame_size); + frame_size_sum += frame_size; + previous_frame_size = frame_size; + } + frame_sizes.append(total_frame_content_size - frame_size_sum - (m_streamer.octets_read() - octets_read_before_frame_sizes)); + + for (int i = 0; i < frame_count; i++) { + auto current_frame_size = frame_sizes.at(i); + block->add_frame(ByteBuffer::copy(m_streamer.data(), current_frame_size)); + m_streamer.drop_octets(current_frame_size); + } + } else if (block->lacing() == Block::Lacing::FixedSize) { + auto frame_count = m_streamer.read_octet() + 1; + auto individual_frame_size = total_frame_content_size / frame_count; + for (int i = 0; i < frame_count; i++) { + block->add_frame(ByteBuffer::copy(m_streamer.data(), individual_frame_size)); + m_streamer.drop_octets(individual_frame_size); + } + } else { + block->add_frame(ByteBuffer::copy(m_streamer.data(), total_frame_content_size)); + m_streamer.drop_octets(total_frame_content_size); + } + return block; +} + +Optional MatroskaReader::read_string_element() +{ + auto string_length = m_streamer.read_variable_size_integer(); + if (!string_length.has_value() || m_streamer.remaining() < string_length.value()) + return {}; + auto string_value = String(m_streamer.data_as_chars(), string_length.value()); + m_streamer.drop_octets(string_length.value()); + return string_value; +} + +Optional MatroskaReader::read_u64_element() +{ + auto integer_length = m_streamer.read_variable_size_integer(); + if (!integer_length.has_value() || m_streamer.remaining() < integer_length.value()) + return {}; + u64 result = 0; + for (size_t i = 0; i < integer_length.value(); i++) { + if (!m_streamer.has_octet()) + return {}; + result = (result << 8u) + m_streamer.read_octet(); + } + return result; +} + +bool MatroskaReader::read_unknown_element() +{ + auto element_length = m_streamer.read_variable_size_integer(); + if (!element_length.has_value() || m_streamer.remaining() < element_length.value()) + return false; + + m_streamer.drop_octets(element_length.value()); + return true; +} + +} diff --git a/Userland/Libraries/LibVideo/MatroskaReader.h b/Userland/Libraries/LibVideo/MatroskaReader.h new file mode 100644 index 00000000000..61aae73582d --- /dev/null +++ b/Userland/Libraries/LibVideo/MatroskaReader.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2021, Hunter Salyer + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include "MatroskaDocument.h" +#include +#include +#include +#include +#include + +namespace Video { + +class MatroskaReader { +public: + MatroskaReader(const u8* data, size_t size) + : m_streamer(data, size) + { + } + + static OwnPtr parse_matroska_from_file(const StringView& path); + static OwnPtr parse_matroska_from_data(const u8*, size_t); + + OwnPtr parse(); + +private: + class Streamer { + public: + Streamer(const u8* data, size_t size) + : m_data_ptr(data) + , m_size_remaining(size) + { + } + + const u8* data() { return m_data_ptr; } + + const char* data_as_chars() { return reinterpret_cast(m_data_ptr); } + + u8 read_octet() + { + VERIFY(m_size_remaining >= 1); + m_size_remaining--; + m_octets_read.last()++; + return *(m_data_ptr++); + } + + i16 read_i16() + { + return (read_octet() << 8) | read_octet(); + } + + size_t octets_read() { return m_octets_read.last(); } + + void push_octets_read() { m_octets_read.append(0); } + + void pop_octets_read() + { + auto popped = m_octets_read.take_last(); + if (!m_octets_read.is_empty()) + m_octets_read.last() += popped; + } + + Optional read_variable_size_integer(bool mask_length = true) + { + dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", m_data_ptr); + auto length_descriptor = read_octet(); + dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor); + if (length_descriptor == 0) + return {}; + size_t length = 0; + while (length < 8) { + if (length_descriptor & (1u << (8 - length))) + break; + length++; + } + dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length); + if (length > 8) + return {}; + + u64 result; + if (mask_length) + result = length_descriptor & ~(1u << (8 - length)); + else + result = length_descriptor; + dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result); + for (size_t i = 1; i < length; i++) { + if (!has_octet()) { + dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data"); + return {}; + } + u8 next_octet = read_octet(); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet); + result = (result << 8u) | next_octet; + dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result); + } + return result; + } + + Optional read_variable_sized_signed_integer() + { + auto length_descriptor = read_octet(); + if (length_descriptor == 0) + return {}; + size_t length = 0; + while (length < 8) { + if (length_descriptor & (1u << (8 - length))) + break; + length++; + } + if (length > 8) + return {}; + + i64 result = length_descriptor & ~(1u << (8 - length)); + for (size_t i = 1; i < length; i++) { + if (!has_octet()) { + return {}; + } + u8 next_octet = read_octet(); + result = (result << 8u) | next_octet; + } + result -= pow(2, length * 7 - 1) - 1; + return result; + } + + void drop_octets(size_t num_octets) + { + VERIFY(m_size_remaining >= num_octets); + m_size_remaining -= num_octets; + m_octets_read.last() += num_octets; + m_data_ptr += num_octets; + } + + bool at_end() const { return !m_size_remaining; } + bool has_octet() const { return m_size_remaining >= 1; } + + size_t remaining() const { return m_size_remaining; } + void set_remaining(size_t remaining) { m_size_remaining = remaining; } + + private: + const u8* m_data_ptr { nullptr }; + size_t m_size_remaining { 0 }; + Vector m_octets_read { 0 }; + }; + + bool parse_master_element(const StringView& element_name, Function element_consumer); + Optional parse_ebml_header(); + + bool parse_segment_elements(MatroskaDocument&); + OwnPtr parse_information(); + + bool parse_tracks(MatroskaDocument&); + OwnPtr parse_track_entry(); + Optional parse_video_track_information(); + Optional parse_audio_track_information(); + OwnPtr parse_cluster(); + OwnPtr parse_simple_block(); + + Optional read_string_element(); + Optional read_u64_element(); + bool read_unknown_element(); + + Streamer m_streamer; +}; + +}