From f6a4973578c4692f33283aee1c089afd0cdae508 Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Wed, 19 Jun 2024 18:29:12 -0500 Subject: [PATCH] LibMedia: Give frame timestamps to FFmpeg decoders H.264 in Matroska can have blocks with unordered timestamps. Without passing these as the presentation timestamp into the FFmpeg decoder, the frames will not be returned in chronological order. VideoFrame will now include a timestamp that is used by the PlaybackManager, rather than assuming that it is the same timestamp returned by the demuxer. --- Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp | 2 +- Tests/LibMedia/TestMediaCommon.h | 2 +- Tests/LibMedia/TestVP9Decode.cpp | 2 +- .../LibMedia/FFmpeg/FFmpegVideoDecoder.cpp | 7 +++++-- .../LibMedia/FFmpeg/FFmpegVideoDecoder.h | 2 +- Userland/Libraries/LibMedia/PlaybackManager.cpp | 6 +++--- Userland/Libraries/LibMedia/Video/VP9/Decoder.cpp | 15 ++++++++------- Userland/Libraries/LibMedia/Video/VP9/Decoder.h | 6 +++--- Userland/Libraries/LibMedia/VideoDecoder.h | 5 +++-- Userland/Libraries/LibMedia/VideoFrame.cpp | 6 ++++-- Userland/Libraries/LibMedia/VideoFrame.h | 15 ++++++++++++--- 11 files changed, 42 insertions(+), 26 deletions(-) diff --git a/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp index 69fe35acdb1..b2e0512e900 100644 --- a/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp @@ -11,6 +11,6 @@ extern "C" int LLVMFuzzerTestOneInput(u8 const* data, size_t size) { AK::set_debug_enabled(false); Media::Video::VP9::Decoder vp9_decoder; - (void)vp9_decoder.receive_sample({ data, size }); + (void)vp9_decoder.receive_sample(Duration::zero(), { data, size }); return 0; } diff --git a/Tests/LibMedia/TestMediaCommon.h b/Tests/LibMedia/TestMediaCommon.h index 0b57e7388fd..92be4649fcb 100644 --- a/Tests/LibMedia/TestMediaCommon.h +++ b/Tests/LibMedia/TestMediaCommon.h @@ -37,7 +37,7 @@ static inline void decode_video(StringView path, size_t expected_frame_count, T auto block = block_result.release_value(); for (auto const& frame : block.frames()) { - MUST(decoder->receive_sample(frame)); + MUST(decoder->receive_sample(block.timestamp(), frame)); while (true) { auto frame_result = decoder->get_decoded_frame(); if (frame_result.is_error()) { diff --git a/Tests/LibMedia/TestVP9Decode.cpp b/Tests/LibMedia/TestVP9Decode.cpp index aaeafa8693c..c0bbd316ce4 100644 --- a/Tests/LibMedia/TestVP9Decode.cpp +++ b/Tests/LibMedia/TestVP9Decode.cpp @@ -35,7 +35,7 @@ TEST_CASE(vp9_malformed_frame) for (auto test_input : test_inputs) { auto file = MUST(Core::MappedFile::map(test_input)); Media::Video::VP9::Decoder vp9_decoder; - auto maybe_decoder_error = vp9_decoder.receive_sample(file->bytes()); + auto maybe_decoder_error = vp9_decoder.receive_sample(Duration::zero(), file->bytes()); EXPECT(maybe_decoder_error.is_error()); } } diff --git a/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.cpp b/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.cpp index f3f5af64c5a..cc9020f74d1 100644 --- a/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.cpp +++ b/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.cpp @@ -101,12 +101,14 @@ FFmpegVideoDecoder::~FFmpegVideoDecoder() avcodec_free_context(&m_codec_context); } -DecoderErrorOr FFmpegVideoDecoder::receive_sample(ReadonlyBytes sample) +DecoderErrorOr FFmpegVideoDecoder::receive_sample(Duration timestamp, ReadonlyBytes sample) { VERIFY(sample.size() < NumericLimits::max()); m_packet->data = const_cast(sample.data()); m_packet->size = static_cast(sample.size()); + m_packet->pts = timestamp.to_microseconds(); + m_packet->dts = m_packet->pts; auto result = avcodec_send_packet(m_codec_context, m_packet); switch (result) { @@ -187,7 +189,8 @@ DecoderErrorOr> FFmpegVideoDecoder::get_decoded_frame( auto size = Gfx::Size { m_frame->width, m_frame->height }; - auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(size, bit_depth, cicp, subsampling)); + auto timestamp = Duration::from_microseconds(m_frame->pts); + auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(timestamp, size, bit_depth, cicp, subsampling)); for (u32 plane = 0; plane < 3; plane++) { VERIFY(m_frame->linesize[plane] != 0); diff --git a/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.h b/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.h index 3ec165ba36a..7146fbcc683 100644 --- a/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.h +++ b/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.h @@ -19,7 +19,7 @@ public: FFmpegVideoDecoder(AVCodecContext* codec_context, AVPacket* packet, AVFrame* frame); ~FFmpegVideoDecoder(); - DecoderErrorOr receive_sample(ReadonlyBytes sample) override; + DecoderErrorOr receive_sample(Duration timestamp, ReadonlyBytes sample) override; DecoderErrorOr> get_decoded_frame() override; private: diff --git a/Userland/Libraries/LibMedia/PlaybackManager.cpp b/Userland/Libraries/LibMedia/PlaybackManager.cpp index 579ddb416e8..cfb2996cdb8 100644 --- a/Userland/Libraries/LibMedia/PlaybackManager.cpp +++ b/Userland/Libraries/LibMedia/PlaybackManager.cpp @@ -215,7 +215,7 @@ void PlaybackManager::decode_and_queue_one_sample() auto sample = sample_result.release_value(); // Submit the sample to the decoder. - auto decode_result = m_decoder->receive_sample(sample.data()); + auto decode_result = m_decoder->receive_sample(sample.timestamp(), sample.data()); if (decode_result.is_error()) { item_to_enqueue = FrameQueueItem::error_marker(decode_result.release_error(), sample.timestamp()); break; @@ -262,9 +262,9 @@ void PlaybackManager::decode_and_queue_one_sample() auto bitmap_result = decoded_frame->to_bitmap(); if (bitmap_result.is_error()) - item_to_enqueue = FrameQueueItem::error_marker(bitmap_result.release_error(), sample.timestamp()); + item_to_enqueue = FrameQueueItem::error_marker(bitmap_result.release_error(), decoded_frame->timestamp()); else - item_to_enqueue = FrameQueueItem::frame(bitmap_result.release_value(), sample.timestamp()); + item_to_enqueue = FrameQueueItem::frame(bitmap_result.release_value(), decoded_frame->timestamp()); break; } } diff --git a/Userland/Libraries/LibMedia/Video/VP9/Decoder.cpp b/Userland/Libraries/LibMedia/Video/VP9/Decoder.cpp index 5ecb233058a..a631d033aef 100644 --- a/Userland/Libraries/LibMedia/Video/VP9/Decoder.cpp +++ b/Userland/Libraries/LibMedia/Video/VP9/Decoder.cpp @@ -25,12 +25,12 @@ Decoder::Decoder() { } -DecoderErrorOr Decoder::receive_sample(ReadonlyBytes chunk_data) +DecoderErrorOr Decoder::receive_sample(Duration timestamp, ReadonlyBytes chunk_data) { auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data); if (superframe_sizes.is_empty()) { - return decode_frame(chunk_data); + return decode_frame(timestamp, chunk_data); } size_t offset = 0; @@ -41,14 +41,14 @@ DecoderErrorOr Decoder::receive_sample(ReadonlyBytes chunk_data) if (checked_size.has_overflow() || checked_size.value() > chunk_data.size()) return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Superframe size invalid"sv); auto frame_data = chunk_data.slice(offset, superframe_size); - TRY(decode_frame(frame_data)); + TRY(decode_frame(timestamp, frame_data)); offset = checked_size.value(); } return {}; } -DecoderErrorOr Decoder::decode_frame(ReadonlyBytes frame_data) +DecoderErrorOr Decoder::decode_frame(Duration timestamp, ReadonlyBytes frame_data) { // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax // tables include function calls indicating when the block decode processes should be triggered. @@ -69,11 +69,11 @@ DecoderErrorOr Decoder::decode_frame(ReadonlyBytes frame_data) if (frame_context.shows_a_frame()) { switch (frame_context.color_config.bit_depth) { case 8: - TRY(create_video_frame(frame_context)); + TRY(create_video_frame(timestamp, frame_context)); break; case 10: case 12: - TRY(create_video_frame(frame_context)); + TRY(create_video_frame(timestamp, frame_context)); break; } } @@ -143,7 +143,7 @@ inline CodingIndependentCodePoints get_cicp_color_space(FrameContext const& fram } template -DecoderErrorOr Decoder::create_video_frame(FrameContext const& frame_context) +DecoderErrorOr Decoder::create_video_frame(Duration timestamp, FrameContext const& frame_context) { // (8.9) Output process @@ -162,6 +162,7 @@ DecoderErrorOr Decoder::create_video_frame(FrameContext const& frame_conte auto output_uv_size = subsampling.subsampled_size(output_y_size); auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create( + timestamp, { output_y_size.width(), output_y_size.height() }, frame_context.color_config.bit_depth, get_cicp_color_space(frame_context), subsampling)); diff --git a/Userland/Libraries/LibMedia/Video/VP9/Decoder.h b/Userland/Libraries/LibMedia/Video/VP9/Decoder.h index 40927affb84..33dea7a131f 100644 --- a/Userland/Libraries/LibMedia/Video/VP9/Decoder.h +++ b/Userland/Libraries/LibMedia/Video/VP9/Decoder.h @@ -28,7 +28,7 @@ public: Decoder(); ~Decoder() override { } /* (8.1) General */ - DecoderErrorOr receive_sample(ReadonlyBytes) override; + DecoderErrorOr receive_sample(Duration timestamp, ReadonlyBytes) override; DecoderErrorOr> get_decoded_frame() override; @@ -41,9 +41,9 @@ private: // Based on the maximum for TXSize. static constexpr size_t maximum_transform_size = 32ULL * 32ULL; - DecoderErrorOr decode_frame(ReadonlyBytes); + DecoderErrorOr decode_frame(Duration timestamp, ReadonlyBytes); template - DecoderErrorOr create_video_frame(FrameContext const&); + DecoderErrorOr create_video_frame(Duration timestamp, FrameContext const&); DecoderErrorOr allocate_buffers(FrameContext const&); Vector& get_output_buffer(u8 plane); diff --git a/Userland/Libraries/LibMedia/VideoDecoder.h b/Userland/Libraries/LibMedia/VideoDecoder.h index 76659d7c400..0a219ecba19 100644 --- a/Userland/Libraries/LibMedia/VideoDecoder.h +++ b/Userland/Libraries/LibMedia/VideoDecoder.h @@ -8,6 +8,7 @@ #include #include +#include #include "DecoderError.h" @@ -17,8 +18,8 @@ class VideoDecoder { public: virtual ~VideoDecoder() {}; - virtual DecoderErrorOr receive_sample(ReadonlyBytes sample) = 0; - DecoderErrorOr receive_sample(ByteBuffer const& sample) { return receive_sample(sample.span()); } + virtual DecoderErrorOr receive_sample(Duration timestamp, ReadonlyBytes sample) = 0; + DecoderErrorOr receive_sample(Duration timestamp, ByteBuffer const& sample) { return receive_sample(timestamp, sample.span()); } virtual DecoderErrorOr> get_decoded_frame() = 0; }; diff --git a/Userland/Libraries/LibMedia/VideoFrame.cpp b/Userland/Libraries/LibMedia/VideoFrame.cpp index fd101d70005..a7d39b80c42 100644 --- a/Userland/Libraries/LibMedia/VideoFrame.cpp +++ b/Userland/Libraries/LibMedia/VideoFrame.cpp @@ -13,6 +13,7 @@ namespace Media { ErrorOr> SubsampledYUVFrame::try_create( + Duration timestamp, Gfx::Size size, u8 bit_depth, CodingIndependentCodePoints cicp, Subsampling subsampling) @@ -35,16 +36,17 @@ ErrorOr> SubsampledYUVFrame::try_create( auto* u_buffer = TRY(alloc_buffer(uv_data_size)); auto* v_buffer = TRY(alloc_buffer(uv_data_size)); - return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling, y_buffer, u_buffer, v_buffer)); + return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(timestamp, size, bit_depth, cicp, subsampling, y_buffer, u_buffer, v_buffer)); } ErrorOr> SubsampledYUVFrame::try_create_from_data( + Duration timestamp, Gfx::Size size, u8 bit_depth, CodingIndependentCodePoints cicp, Subsampling subsampling, ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data) { - auto frame = TRY(try_create(size, bit_depth, cicp, subsampling)); + auto frame = TRY(try_create(timestamp, size, bit_depth, cicp, subsampling)); size_t component_size = bit_depth > 8 ? sizeof(u16) : sizeof(u8); auto y_data_size = size.to_type().area() * component_size; diff --git a/Userland/Libraries/LibMedia/VideoFrame.h b/Userland/Libraries/LibMedia/VideoFrame.h index 4719b420792..0dd015b4431 100644 --- a/Userland/Libraries/LibMedia/VideoFrame.h +++ b/Userland/Libraries/LibMedia/VideoFrame.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,8 @@ public: return bitmap; } + inline Duration timestamp() const { return m_timestamp; } + inline Gfx::Size size() const { return m_size; } inline u32 width() const { return size().width(); } inline u32 height() const { return size().height(); } @@ -38,14 +41,17 @@ public: inline CodingIndependentCodePoints& cicp() { return m_cicp; } protected: - VideoFrame(Gfx::Size size, + VideoFrame(Duration timestamp, + Gfx::Size size, u8 bit_depth, CodingIndependentCodePoints cicp) - : m_size(size) + : m_timestamp(timestamp) + , m_size(size) , m_bit_depth(bit_depth) , m_cicp(cicp) { } + Duration m_timestamp; Gfx::Size m_size; u8 m_bit_depth; CodingIndependentCodePoints m_cicp; @@ -55,22 +61,25 @@ class SubsampledYUVFrame : public VideoFrame { public: static ErrorOr> try_create( + Duration timestamp, Gfx::Size size, u8 bit_depth, CodingIndependentCodePoints cicp, Subsampling subsampling); static ErrorOr> try_create_from_data( + Duration timestamp, Gfx::Size size, u8 bit_depth, CodingIndependentCodePoints cicp, Subsampling subsampling, ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data); SubsampledYUVFrame( + Duration timestamp, Gfx::Size size, u8 bit_depth, CodingIndependentCodePoints cicp, Subsampling subsampling, u8* plane_y_data, u8* plane_u_data, u8* plane_v_data) - : VideoFrame(size, bit_depth, cicp) + : VideoFrame(timestamp, size, bit_depth, cicp) , m_subsampling(subsampling) , m_y_buffer(plane_y_data) , m_u_buffer(plane_u_data)