LibMedia: Give frame timestamps to FFmpeg decoders

H.264 in Matroska can have blocks with unordered timestamps. Without passing these as the presentation timestamp into the FFmpeg decoder, the frames will not be returned in chronological order. VideoFrame will now include a timestamp that is used by the PlaybackManager, rather than assuming that it is the same timestamp returned by the demuxer.
2024-07-15 01:00:33 +03:00 · 2024-06-19 18:29:12 -05:00 · 2024-06-19 18:29:12 -05:00 · f6a4973578
commit f6a4973578
parent 084cf68dd5
11 changed files with 42 additions and 26 deletions
--- a/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp
+++ b/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp
@ -11,6 +11,6 @@ extern "C" int LLVMFuzzerTestOneInput(u8 const* data, size_t size)
 {
    AK::set_debug_enabled(false);
    Media::Video::VP9::Decoder vp9_decoder;
-    (void)vp9_decoder.receive_sample({ data, size });
+    (void)vp9_decoder.receive_sample(Duration::zero(), { data, size });
    return 0;
 }
--- a/Tests/LibMedia/TestMediaCommon.h
+++ b/Tests/LibMedia/TestMediaCommon.h
@ -37,7 +37,7 @@ static inline void decode_video(StringView path, size_t expected_frame_count, T

        auto block = block_result.release_value();
        for (auto const& frame : block.frames()) {
-            MUST(decoder->receive_sample(frame));
+            MUST(decoder->receive_sample(block.timestamp(), frame));
            while (true) {
                auto frame_result = decoder->get_decoded_frame();
                if (frame_result.is_error()) {
--- a/Tests/LibMedia/TestVP9Decode.cpp
+++ b/Tests/LibMedia/TestVP9Decode.cpp
@ -35,7 +35,7 @@ TEST_CASE(vp9_malformed_frame)
    for (auto test_input : test_inputs) {
        auto file = MUST(Core::MappedFile::map(test_input));
        Media::Video::VP9::Decoder vp9_decoder;
-        auto maybe_decoder_error = vp9_decoder.receive_sample(file->bytes());
+        auto maybe_decoder_error = vp9_decoder.receive_sample(Duration::zero(), file->bytes());
        EXPECT(maybe_decoder_error.is_error());
    }
 }
--- a/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.cpp
+++ b/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.cpp
@ -101,12 +101,14 @@ FFmpegVideoDecoder::~FFmpegVideoDecoder()
    avcodec_free_context(&m_codec_context);
 }

-DecoderErrorOr<void> FFmpegVideoDecoder::receive_sample(ReadonlyBytes sample)
+DecoderErrorOr<void> FFmpegVideoDecoder::receive_sample(Duration timestamp, ReadonlyBytes sample)
 {
    VERIFY(sample.size() < NumericLimits<int>::max());

    m_packet->data = const_cast<u8*>(sample.data());
    m_packet->size = static_cast<int>(sample.size());
+    m_packet->pts = timestamp.to_microseconds();
+    m_packet->dts = m_packet->pts;

    auto result = avcodec_send_packet(m_codec_context, m_packet);
    switch (result) {
@ -187,7 +189,8 @@ DecoderErrorOr<NonnullOwnPtr<VideoFrame>> FFmpegVideoDecoder::get_decoded_frame(

        auto size = Gfx::Size<u32> { m_frame->width, m_frame->height };

-        auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(size, bit_depth, cicp, subsampling));
+        auto timestamp = Duration::from_microseconds(m_frame->pts);
+        auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(timestamp, size, bit_depth, cicp, subsampling));

        for (u32 plane = 0; plane < 3; plane++) {
            VERIFY(m_frame->linesize[plane] != 0);
--- a/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.h
+++ b/Userland/Libraries/LibMedia/FFmpeg/FFmpegVideoDecoder.h
@ -19,7 +19,7 @@ public:
    FFmpegVideoDecoder(AVCodecContext* codec_context, AVPacket* packet, AVFrame* frame);
    ~FFmpegVideoDecoder();

-    DecoderErrorOr<void> receive_sample(ReadonlyBytes sample) override;
+    DecoderErrorOr<void> receive_sample(Duration timestamp, ReadonlyBytes sample) override;
    DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame() override;

 private:
--- a/Userland/Libraries/LibMedia/PlaybackManager.cpp
+++ b/Userland/Libraries/LibMedia/PlaybackManager.cpp
@ -215,7 +215,7 @@ void PlaybackManager::decode_and_queue_one_sample()
        auto sample = sample_result.release_value();

        // Submit the sample to the decoder.
-        auto decode_result = m_decoder->receive_sample(sample.data());
+        auto decode_result = m_decoder->receive_sample(sample.timestamp(), sample.data());
        if (decode_result.is_error()) {
            item_to_enqueue = FrameQueueItem::error_marker(decode_result.release_error(), sample.timestamp());
            break;
@ -262,9 +262,9 @@ void PlaybackManager::decode_and_queue_one_sample()
            auto bitmap_result = decoded_frame->to_bitmap();

            if (bitmap_result.is_error())
-                item_to_enqueue = FrameQueueItem::error_marker(bitmap_result.release_error(), sample.timestamp());
+                item_to_enqueue = FrameQueueItem::error_marker(bitmap_result.release_error(), decoded_frame->timestamp());
            else
-                item_to_enqueue = FrameQueueItem::frame(bitmap_result.release_value(), sample.timestamp());
+                item_to_enqueue = FrameQueueItem::frame(bitmap_result.release_value(), decoded_frame->timestamp());
            break;
        }
    }
--- a/Userland/Libraries/LibMedia/Video/VP9/Decoder.cpp
+++ b/Userland/Libraries/LibMedia/Video/VP9/Decoder.cpp
@ -25,12 +25,12 @@ Decoder::Decoder()
 {
 }

-DecoderErrorOr<void> Decoder::receive_sample(ReadonlyBytes chunk_data)
+DecoderErrorOr<void> Decoder::receive_sample(Duration timestamp, ReadonlyBytes chunk_data)
 {
    auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);

    if (superframe_sizes.is_empty()) {
-        return decode_frame(chunk_data);
+        return decode_frame(timestamp, chunk_data);
    }

    size_t offset = 0;
@ -41,14 +41,14 @@ DecoderErrorOr<void> Decoder::receive_sample(ReadonlyBytes chunk_data)
        if (checked_size.has_overflow() || checked_size.value() > chunk_data.size())
            return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Superframe size invalid"sv);
        auto frame_data = chunk_data.slice(offset, superframe_size);
-        TRY(decode_frame(frame_data));
+        TRY(decode_frame(timestamp, frame_data));
        offset = checked_size.value();
    }

    return {};
 }

-DecoderErrorOr<void> Decoder::decode_frame(ReadonlyBytes frame_data)
+DecoderErrorOr<void> Decoder::decode_frame(Duration timestamp, ReadonlyBytes frame_data)
 {
    // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
    // tables include function calls indicating when the block decode processes should be triggered.
@ -69,11 +69,11 @@ DecoderErrorOr<void> Decoder::decode_frame(ReadonlyBytes frame_data)
    if (frame_context.shows_a_frame()) {
        switch (frame_context.color_config.bit_depth) {
        case 8:
-            TRY(create_video_frame<u8>(frame_context));
+            TRY(create_video_frame<u8>(timestamp, frame_context));
            break;
        case 10:
        case 12:
-            TRY(create_video_frame<u16>(frame_context));
+            TRY(create_video_frame<u16>(timestamp, frame_context));
            break;
        }
    }
@ -143,7 +143,7 @@ inline CodingIndependentCodePoints get_cicp_color_space(FrameContext const& fram
 }

 template<typename T>
-DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_context)
+DecoderErrorOr<void> Decoder::create_video_frame(Duration timestamp, FrameContext const& frame_context)
 {
    // (8.9) Output process

@ -162,6 +162,7 @@ DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_conte
    auto output_uv_size = subsampling.subsampled_size(output_y_size);

    auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(
+        timestamp,
        { output_y_size.width(), output_y_size.height() },
        frame_context.color_config.bit_depth, get_cicp_color_space(frame_context),
        subsampling));
--- a/Userland/Libraries/LibMedia/Video/VP9/Decoder.h
+++ b/Userland/Libraries/LibMedia/Video/VP9/Decoder.h
@ -28,7 +28,7 @@ public:
    Decoder();
    ~Decoder() override { }
    /* (8.1) General */
-    DecoderErrorOr<void> receive_sample(ReadonlyBytes) override;
+    DecoderErrorOr<void> receive_sample(Duration timestamp, ReadonlyBytes) override;

    DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame() override;

@ -41,9 +41,9 @@ private:
    // Based on the maximum for TXSize.
    static constexpr size_t maximum_transform_size = 32ULL * 32ULL;

-    DecoderErrorOr<void> decode_frame(ReadonlyBytes);
+    DecoderErrorOr<void> decode_frame(Duration timestamp, ReadonlyBytes);
    template<typename T>
-    DecoderErrorOr<void> create_video_frame(FrameContext const&);
+    DecoderErrorOr<void> create_video_frame(Duration timestamp, FrameContext const&);

    DecoderErrorOr<void> allocate_buffers(FrameContext const&);
    Vector<u16>& get_output_buffer(u8 plane);
--- a/Userland/Libraries/LibMedia/VideoDecoder.h
+++ b/Userland/Libraries/LibMedia/VideoDecoder.h
@ -8,6 +8,7 @@

 #include <AK/ByteBuffer.h>
 #include <AK/NonnullOwnPtr.h>
+#include <AK/Time.h>

 #include "DecoderError.h"

@ -17,8 +18,8 @@ class VideoDecoder {
 public:
    virtual ~VideoDecoder() {};

-    virtual DecoderErrorOr<void> receive_sample(ReadonlyBytes sample) = 0;
-    DecoderErrorOr<void> receive_sample(ByteBuffer const& sample) { return receive_sample(sample.span()); }
+    virtual DecoderErrorOr<void> receive_sample(Duration timestamp, ReadonlyBytes sample) = 0;
+    DecoderErrorOr<void> receive_sample(Duration timestamp, ByteBuffer const& sample) { return receive_sample(timestamp, sample.span()); }
    virtual DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame() = 0;
 };

--- a/Userland/Libraries/LibMedia/VideoFrame.cpp
+++ b/Userland/Libraries/LibMedia/VideoFrame.cpp
@ -13,6 +13,7 @@
 namespace Media {

 ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create(
+    Duration timestamp,
    Gfx::Size<u32> size,
    u8 bit_depth, CodingIndependentCodePoints cicp,
    Subsampling subsampling)
@ -35,16 +36,17 @@ ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create(
    auto* u_buffer = TRY(alloc_buffer(uv_data_size));
    auto* v_buffer = TRY(alloc_buffer(uv_data_size));

-    return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling, y_buffer, u_buffer, v_buffer));
+    return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(timestamp, size, bit_depth, cicp, subsampling, y_buffer, u_buffer, v_buffer));
 }

 ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create_from_data(
+    Duration timestamp,
    Gfx::Size<u32> size,
    u8 bit_depth, CodingIndependentCodePoints cicp,
    Subsampling subsampling,
    ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data)
 {
-    auto frame = TRY(try_create(size, bit_depth, cicp, subsampling));
+    auto frame = TRY(try_create(timestamp, size, bit_depth, cicp, subsampling));

    size_t component_size = bit_depth > 8 ? sizeof(u16) : sizeof(u8);
    auto y_data_size = size.to_type<size_t>().area() * component_size;
--- a/Userland/Libraries/LibMedia/VideoFrame.h
+++ b/Userland/Libraries/LibMedia/VideoFrame.h
@ -8,6 +8,7 @@

 #include <AK/ByteBuffer.h>
 #include <AK/FixedArray.h>
+#include <AK/Time.h>
 #include <LibGfx/Bitmap.h>
 #include <LibGfx/Size.h>
 #include <LibMedia/Color/CodingIndependentCodePoints.h>
@ -30,6 +31,8 @@ public:
        return bitmap;
    }

+    inline Duration timestamp() const { return m_timestamp; }
+
    inline Gfx::Size<u32> size() const { return m_size; }
    inline u32 width() const { return size().width(); }
    inline u32 height() const { return size().height(); }
@ -38,14 +41,17 @@ public:
    inline CodingIndependentCodePoints& cicp() { return m_cicp; }

 protected:
-    VideoFrame(Gfx::Size<u32> size,
+    VideoFrame(Duration timestamp,
+        Gfx::Size<u32> size,
        u8 bit_depth, CodingIndependentCodePoints cicp)
-        : m_size(size)
+        : m_timestamp(timestamp)
+        , m_size(size)
        , m_bit_depth(bit_depth)
        , m_cicp(cicp)
    {
    }

+    Duration m_timestamp;
    Gfx::Size<u32> m_size;
    u8 m_bit_depth;
    CodingIndependentCodePoints m_cicp;
@ -55,22 +61,25 @@ class SubsampledYUVFrame : public VideoFrame {

 public:
    static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create(
+        Duration timestamp,
        Gfx::Size<u32> size,
        u8 bit_depth, CodingIndependentCodePoints cicp,
        Subsampling subsampling);

    static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create_from_data(
+        Duration timestamp,
        Gfx::Size<u32> size,
        u8 bit_depth, CodingIndependentCodePoints cicp,
        Subsampling subsampling,
        ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data);

    SubsampledYUVFrame(
+        Duration timestamp,
        Gfx::Size<u32> size,
        u8 bit_depth, CodingIndependentCodePoints cicp,
        Subsampling subsampling,
        u8* plane_y_data, u8* plane_u_data, u8* plane_v_data)
-        : VideoFrame(size, bit_depth, cicp)
+        : VideoFrame(timestamp, size, bit_depth, cicp)
        , m_subsampling(subsampling)
        , m_y_buffer(plane_y_data)
        , m_u_buffer(plane_u_data)