diff --git a/Userland/Libraries/LibVideo/CMakeLists.txt b/Userland/Libraries/LibVideo/CMakeLists.txt index 4c324ecc95e..56992223711 100644 --- a/Userland/Libraries/LibVideo/CMakeLists.txt +++ b/Userland/Libraries/LibVideo/CMakeLists.txt @@ -8,6 +8,7 @@ set(SOURCES VP9/ProbabilityTables.cpp VP9/Symbols.h VP9/SyntaxElementCounter.cpp + VP9/TreeParser.cpp ) serenity_lib(LibVideo video) diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.cpp b/Userland/Libraries/LibVideo/VP9/Decoder.cpp index b102c895fb3..89f6195cfd9 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp +++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp @@ -13,14 +13,16 @@ namespace Video::VP9 { return false Decoder::Decoder() + : m_probability_tables(make()) + , m_tree_parser(make(*m_probability_tables)) { - m_probability_tables = make(); } bool Decoder::parse_frame(const ByteBuffer& frame_data) { m_bit_stream = make(frame_data.data(), frame_data.size()); m_syntax_element_counter = make(); + m_tree_parser->set_bit_stream(m_bit_stream); if (!uncompressed_header()) return false; @@ -126,6 +128,8 @@ bool Decoder::uncompressed_header() } } + m_tree_parser->set_frame_is_intra(m_frame_is_intra); + if (!m_error_resilient_mode) { m_refresh_frame_context = m_bit_stream->read_bit(); m_frame_parallel_decoding_mode = m_bit_stream->read_bit(); @@ -440,10 +444,10 @@ bool Decoder::compressed_header() bool Decoder::read_tx_mode() { if (m_lossless) { - m_tx_mode = Only4x4; + m_tx_mode = Only_4x4; } else { auto tx_mode = m_bit_stream->read_literal(2); - if (tx_mode == Allow32x32) { + if (tx_mode == Allow_32x32) { tx_mode += m_bit_stream->read_literal(1); } m_tx_mode = static_cast(tx_mode); @@ -456,17 +460,17 @@ bool Decoder::tx_mode_probs() auto& tx_probs = m_probability_tables->tx_probs(); for (auto i = 0; i < TX_SIZE_CONTEXTS; i++) { for (auto j = 0; j < TX_SIZES - 3; j++) { - tx_probs[TX8x8][i][j] = diff_update_prob(tx_probs[TX8x8][i][j]); + tx_probs[TX_8x8][i][j] = diff_update_prob(tx_probs[TX_8x8][i][j]); } } for (auto i = 0; i < TX_SIZE_CONTEXTS; i++) { for (auto j = 0; j < TX_SIZES - 2; j++) { - tx_probs[TX16x16][i][j] = diff_update_prob(tx_probs[TX16x16][i][j]); + tx_probs[TX_16x16][i][j] = diff_update_prob(tx_probs[TX_16x16][i][j]); } } for (auto i = 0; i < TX_SIZE_CONTEXTS; i++) { for (auto j = 0; j < TX_SIZES - 1; j++) { - tx_probs[TX32x32][i][j] = diff_update_prob(tx_probs[TX32x32][i][j]); + tx_probs[TX_32x32][i][j] = diff_update_prob(tx_probs[TX_32x32][i][j]); } } return true; @@ -517,7 +521,8 @@ u8 Decoder::inv_recenter_nonneg(u8 v, u8 m) bool Decoder::read_coef_probs() { auto max_tx_size = tx_mode_to_biggest_tx_size[m_tx_mode]; - for (auto tx_size = TX4x4; tx_size <= max_tx_size; tx_size = static_cast(static_cast(tx_size) + 1)) { + m_tree_parser->set_max_tx_size(max_tx_size); + for (auto tx_size = TX_4x4; tx_size <= max_tx_size; tx_size = static_cast(static_cast(tx_size) + 1)) { auto update_probs = m_bit_stream->read_literal(1); if (update_probs == 1) { for (auto i = 0; i < 2; i++) { @@ -760,7 +765,9 @@ bool Decoder::decode_tile() for (auto row = m_mi_row_start; row < m_mi_row_end; row += 8) { if (!clear_left_context()) return false; + m_tree_parser->set_row(row); for (auto col = m_mi_col_start; col < m_mi_col_end; col += 8) { + m_tree_parser->set_col(col); if (!decode_partition(row, col, Block_64x64)) return false; } @@ -787,9 +794,15 @@ bool Decoder::decode_partition(u32 row, u32 col, u8 block_subsize) auto has_rows = (row + half_block_8x8) < m_mi_rows; auto has_cols = (col + half_block_8x8) < m_mi_cols; - // FIXME: Parse partition (type: T) as specified by spec in section 9.3 - (void)has_rows; - (void)has_cols; + m_tree_parser->set_has_rows(has_rows); + m_tree_parser->set_has_cols(has_cols); + m_tree_parser->set_block_subsize(block_subsize); + m_tree_parser->set_num_8x8(num_8x8); + + auto partition = m_tree_parser->parse_tree(SyntaxElementType::Partition); + dbgln("Parsed partition value {}", partition); + + // FIXME: Finish implementing partition decoding return true; } diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.h b/Userland/Libraries/LibVideo/VP9/Decoder.h index 3ccc62888fa..6eaea166f5a 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.h +++ b/Userland/Libraries/LibVideo/VP9/Decoder.h @@ -10,6 +10,7 @@ #include "LookupTables.h" #include "ProbabilityTables.h" #include "SyntaxElementCounter.h" +#include "TreeParser.h" #include #include @@ -132,6 +133,8 @@ private: u32 m_mi_col_start { 0 }; u32 m_mi_col_end { 0 }; + bool m_use_hp { false }; + TXMode m_tx_mode; ReferenceMode m_reference_mode; ReferenceFrame m_comp_fixed_ref; @@ -140,6 +143,7 @@ private: OwnPtr m_bit_stream; OwnPtr m_probability_tables; OwnPtr m_syntax_element_counter; + NonnullOwnPtr m_tree_parser; }; } diff --git a/Userland/Libraries/LibVideo/VP9/Enums.h b/Userland/Libraries/LibVideo/VP9/Enums.h index 577c2f0524b..d3af2eb8612 100644 --- a/Userland/Libraries/LibVideo/VP9/Enums.h +++ b/Userland/Libraries/LibVideo/VP9/Enums.h @@ -7,6 +7,7 @@ #pragma once #include "Symbols.h" +#include namespace Video::VP9 { @@ -47,18 +48,18 @@ enum ReferenceFrame { }; enum TXMode { - Only4x4 = 0, - Allow8x8 = 1, - Allow16x16 = 2, - Allow32x32 = 3, + Only_4x4 = 0, + Allow_8x8 = 1, + Allow_16x16 = 2, + Allow_32x32 = 3, TXModeSelect = 4, }; enum TXSize { - TX4x4 = 0, - TX8x8 = 1, - TX16x16 = 2, - TX32x32 = 3, + TX_4x4 = 0, + TX_8x8 = 1, + TX_16x16 = 2, + TX_32x32 = 3, }; enum ReferenceMode { @@ -84,4 +85,66 @@ enum BlockSubsize : u8 { Block_Invalid = BLOCK_INVALID }; +enum Partition : u8 { + PartitionNone = 0, + PartitionHorizontal = 1, + PartitionVertical = 2, + PartitionSplit = 3, +}; + +enum IntraMode : u8 { + DcPred = 0, + VPred = 1, + HPred = 2, + D45Pred = 3, + D135Pred = 4, + D117Pred = 5, + D153Pred = 6, + D207Pred = 7, + D63Pred = 8, + TmPred = 9, +}; + +enum InterMode : u8 { + NearestMv = 0, + NearMv = 1, + ZeroMv = 2, + NewMv = 3, +}; + +enum MvJoint : u8 { + MvJointZero = 0, + MvJointHnzvz = 1, + MvJointHzvnz = 2, + MvJointHnzvnz = 3, +}; + +enum MvClass : u8 { + MvClass0 = 0, + MvClass1 = 1, + MvClass2 = 2, + MvClass3 = 3, + MvClass4 = 4, + MvClass5 = 5, + MvClass6 = 6, + MvClass7 = 7, + MvClass8 = 8, + MvClass9 = 9, + MvClass10 = 10, +}; + +enum Token : u8 { + ZeroToken = 0, + OneToken = 1, + TwoToken = 2, + ThreeToken = 3, + FourToken = 4, + DctValCat1 = 5, + DctValCat2 = 6, + DctValCat3 = 7, + DctValCat4 = 8, + DctValCat5 = 9, + DctValCat6 = 10, +}; + } diff --git a/Userland/Libraries/LibVideo/VP9/LookupTables.h b/Userland/Libraries/LibVideo/VP9/LookupTables.h index 66ea5473ff7..ec7266e3886 100644 --- a/Userland/Libraries/LibVideo/VP9/LookupTables.h +++ b/Userland/Libraries/LibVideo/VP9/LookupTables.h @@ -12,7 +12,7 @@ namespace Video::VP9 { static constexpr InterpolationFilter literal_to_type[4] = { EightTapSmooth, EightTap, EightTapSharp, Bilinear }; -static constexpr TXSize tx_mode_to_biggest_tx_size[TX_MODES] = { TX4x4, TX8x8, TX16x16, TX32x32, TX32x32 }; +static constexpr TXSize tx_mode_to_biggest_tx_size[TX_MODES] = { TX_4x4, TX_8x8, TX_16x16, TX_32x32, TX_32x32 }; static constexpr u8 segmentation_feature_bits[SEG_LVL_MAX] = { 8, 6, 2, 0 }; static constexpr bool segmentation_feature_signed[SEG_LVL_MAX] = { true, true, false, false }; static constexpr u8 inv_map_table[MAX_PROB] = { @@ -30,7 +30,6 @@ static constexpr u8 inv_map_table[MAX_PROB] = { 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 253 }; -static constexpr u8 num_8x8_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8 }; static constexpr BlockSubsize subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { { // PARTITION_NONE @@ -98,4 +97,91 @@ static constexpr BlockSubsize subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { } }; +static constexpr int partition_tree[6] = { + -PartitionNone, 2, + -PartitionHorizontal, 4, + -PartitionVertical, -PartitionSplit +}; +static constexpr int cols_partition_tree[2] = { -PartitionHorizontal, -PartitionSplit }; +static constexpr int rows_partition_tree[2] = { -PartitionVertical, -PartitionSplit }; +static constexpr int intra_mode_tree[18] = { + -DcPred, 2, + -TmPred, 4, + -VPred, 6, + 8, 12, + -HPred, 10, + -D135Pred, -D117Pred, + -D45Pred, 14, + -D63Pred, 16, + -D153Pred, -D207Pred +}; +static constexpr int segment_tree[14] = { + 2, 4, 6, 8, 10, 12, + 0, -1, -2, -3, -4, -5, -6, -7 +}; +static constexpr int binary_tree[2] = { 0, -1 }; +static constexpr int tx_size_32_tree[6] = { + -TX_4x4, 2, + -TX_8x8, 4, + -TX_16x16, -TX_32x32 +}; +static constexpr int tx_size_16_tree[4] = { + -TX_4x4, 2, + -TX_8x8, -TX_16x16 +}; +static constexpr int tx_size_8_tree[2] = { -TX_4x4, -TX_8x8 }; +static constexpr int inter_mode_tree[6] = { + -(ZeroMv - NearestMv), 2, + -(NearestMv - NearestMv), 4, + -(NearMv - NearestMv), -(NewMv - NearestMv) +}; +static constexpr int interp_filter_tree[4] = { + -EightTap, 2, + -EightTapSmooth, -EightTapSharp +}; +static constexpr int mv_joint_tree[6] = { + -MvJointZero, 2, + -MvJointHnzvz, 4, + -MvJointHzvnz, -MvJointHnzvnz +}; +static constexpr int mv_class_tree[20] = { + -MvClass0, 2, + -MvClass1, 4, + 6, 8, + -MvClass2, -MvClass3, + 10, 12, + -MvClass4, -MvClass5, + -MvClass6, 14, + 16, 18, + -MvClass7, -MvClass8, + -MvClass9, -MvClass10 +}; +static constexpr int mv_fr_tree[6] = { + -0, 2, + -1, 4, + -2, -3 +}; +static constexpr int token_tree[20] = { + -ZeroToken, 2, + -OneToken, 4, + 6, 10, + -TwoToken, 8, + -ThreeToken, -FourToken, + 12, 14, + -DctValCat1, -DctValCat2, + 16, 18, + -DctValCat3, -DctValCat4, + -DctValCat5, -DctValCat6 +}; + +static constexpr u8 b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 }; +static constexpr u8 b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4 }; +static constexpr u8 num_4x4_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16 }; +static constexpr u8 num_4x4_blocks_high_lookup[BLOCK_SIZES] = { 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16 }; +static constexpr u8 mi_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3 }; +static constexpr u8 num_8x8_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8 }; +static constexpr u8 mi_height_log2_lookup[BLOCK_SIZES] = { 0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3 }; +static constexpr u8 num_8x8_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8 }; +static constexpr u8 size_group_lookup[BLOCK_SIZES] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + } diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp new file mode 100644 index 00000000000..ac06e66b2f7 --- /dev/null +++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021, Hunter Salyer + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "TreeParser.h" +#include "LookupTables.h" + +namespace Video::VP9 { + +int TreeParser::parse_tree(SyntaxElementType type) +{ + auto tree_selection = select_tree(type); + if (tree_selection.is_single_value()) + return tree_selection.get_single_value(); + auto tree = tree_selection.get_tree_value(); + int n = 0; + do { + n = tree[n + m_bit_stream->read_bool(select_tree_probability(type, n >> 1))]; + } while (n > 0); + return -n; +} + +/* + * Select a tree value based on the type of syntax element being parsed, as well as some parser state, as specified in section 9.3.1 + */ +TreeParser::TreeSelection TreeParser::select_tree(SyntaxElementType type) +{ + switch (type) { + case SyntaxElementType::Partition: + if (m_has_rows && m_has_cols) + return { partition_tree }; + if (m_has_cols) + return { cols_partition_tree }; + if (m_has_rows) + return { rows_partition_tree }; + return { PartitionSplit }; + case SyntaxElementType::DefaultIntraMode: + case SyntaxElementType::DefaultUVMode: + case SyntaxElementType::IntraMode: + case SyntaxElementType::SubIntraMode: + case SyntaxElementType::UVMode: + return { intra_mode_tree }; + case SyntaxElementType::SegmentID: + return { segment_tree }; + case SyntaxElementType::Skip: + case SyntaxElementType::SegIDPredicted: + case SyntaxElementType::IsInter: + case SyntaxElementType::CompMode: + case SyntaxElementType::CompRef: + case SyntaxElementType::SingleRefP1: + case SyntaxElementType::SingleRefP2: + case SyntaxElementType::MVSign: + case SyntaxElementType::MVClass0Bit: + case SyntaxElementType::MVBit: + case SyntaxElementType::MoreCoefs: + return { binary_tree }; + case SyntaxElementType::TXSize: + if (m_max_tx_size == TX_32x32) + return { tx_size_32_tree }; + if (m_max_tx_size == TX_16x16) + return { tx_size_16_tree }; + return { tx_size_8_tree }; + case SyntaxElementType::InterMode: + return { inter_mode_tree }; + case SyntaxElementType::InterpFilter: + return { interp_filter_tree }; + case SyntaxElementType::MVJoint: + return { mv_joint_tree }; + case SyntaxElementType::MVClass: + return { mv_class_tree }; + case SyntaxElementType::MVClass0FR: + case SyntaxElementType::MVFR: + return { mv_fr_tree }; + case SyntaxElementType::MVClass0HP: + case SyntaxElementType::MVHP: + if (m_use_hp) + return { binary_tree }; + return { 1 }; + case SyntaxElementType::Token: + return { token_tree }; + } + VERIFY_NOT_REACHED(); +} + +/* + * Select a probability with which to read a boolean when decoding a tree, as specified in section 9.3.2 + */ +u8 TreeParser::select_tree_probability(SyntaxElementType type, u8 node) +{ + switch (type) { + case SyntaxElementType::Partition: + return calculate_partition_probability(node); + case SyntaxElementType::DefaultIntraMode: + break; + case SyntaxElementType::DefaultUVMode: + break; + case SyntaxElementType::IntraMode: + break; + case SyntaxElementType::SubIntraMode: + break; + case SyntaxElementType::UVMode: + break; + case SyntaxElementType::SegmentID: + break; + case SyntaxElementType::Skip: + break; + case SyntaxElementType::SegIDPredicted: + break; + case SyntaxElementType::IsInter: + break; + case SyntaxElementType::CompMode: + break; + case SyntaxElementType::CompRef: + break; + case SyntaxElementType::SingleRefP1: + break; + case SyntaxElementType::SingleRefP2: + break; + case SyntaxElementType::MVSign: + break; + case SyntaxElementType::MVClass0Bit: + break; + case SyntaxElementType::MVBit: + break; + case SyntaxElementType::TXSize: + break; + case SyntaxElementType::InterMode: + break; + case SyntaxElementType::InterpFilter: + break; + case SyntaxElementType::MVJoint: + break; + case SyntaxElementType::MVClass: + break; + case SyntaxElementType::MVClass0FR: + break; + case SyntaxElementType::MVClass0HP: + break; + case SyntaxElementType::MVFR: + break; + case SyntaxElementType::MVHP: + break; + case SyntaxElementType::Token: + break; + case SyntaxElementType::MoreCoefs: + break; + } + TODO(); +} + +u8 TreeParser::calculate_partition_probability(u8 node) +{ + int node2; + if (m_has_rows && m_has_cols) { + node2 = node; + } else if (m_has_cols) { + node2 = 1; + } else { + node2 = 2; + } + + u32 above = 0; + u32 left = 0; + auto bsl = mi_width_log2_lookup[m_block_subsize]; + auto block_offset = mi_width_log2_lookup[Block_64x64] - bsl; + for (auto i = 0; i < m_num_8x8; i++) { + above |= m_above_partition_context[m_col + i]; + left |= m_left_partition_context[m_row + i]; + } + above = (above & (1 << block_offset)) > 0; + left = (left & (1 << block_offset)) > 0; + auto ctx = bsl * 4 + left * 2 + above; + if (m_frame_is_intra) + return m_probability_tables.kf_partition_probs()[ctx][node2]; + return m_probability_tables.partition_probs()[ctx][node2]; +} + +TreeParser::TreeSelection::TreeSelection(const int* values) + : m_is_single_value(false) + , m_value { .m_tree = values } +{ +} + +TreeParser::TreeSelection::TreeSelection(int value) + : m_is_single_value(true) + , m_value { .m_value = value } +{ +} + +} diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.h b/Userland/Libraries/LibVideo/VP9/TreeParser.h new file mode 100644 index 00000000000..2fb09841eef --- /dev/null +++ b/Userland/Libraries/LibVideo/VP9/TreeParser.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021, Hunter Salyer + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include "BitStream.h" +#include "Enums.h" +#include "ProbabilityTables.h" +#include "SyntaxElementCounter.h" + +namespace Video::VP9 { + +class TreeParser { +public: + explicit TreeParser(ProbabilityTables& probability_tables) + : m_probability_tables(probability_tables) + { + } + + class TreeSelection { + public: + union TreeSelectionValue { + const int* m_tree; + int m_value; + }; + + TreeSelection(const int* values); + TreeSelection(int value); + + bool is_single_value() const { return m_is_single_value; } + int get_single_value() const { return m_value.m_value; } + const int* get_tree_value() const { return m_value.m_tree; } + + private: + bool m_is_single_value; + TreeSelectionValue m_value; + }; + + int parse_tree(SyntaxElementType type); + TreeSelection select_tree(SyntaxElementType type); + u8 select_tree_probability(SyntaxElementType type, u8 node); + + void set_bit_stream(BitStream* bit_stream) { m_bit_stream = bit_stream; } + void set_has_rows(bool has_rows) { m_has_rows = has_rows; } + void set_has_cols(bool has_cols) { m_has_cols = has_cols; } + void set_max_tx_size(TXSize max_tx_size) { m_max_tx_size = max_tx_size; } + void set_use_hp(bool use_hp) { m_use_hp = use_hp; } + void set_block_subsize(u8 block_subsize) { m_block_subsize = block_subsize; } + void set_num_8x8(u8 num_8x8) { m_num_8x8 = num_8x8; } + void set_above_partition_context(u8* above_partition_context) { m_above_partition_context = above_partition_context; } + void set_left_partition_context(u8* left_partition_context) { m_left_partition_context = left_partition_context; } + void set_col(u32 col) { m_col = col; } + void set_row(u32 row) { m_row = row; } + void set_frame_is_intra(bool frame_is_intra) { m_frame_is_intra = frame_is_intra; } + +private: + u8 calculate_partition_probability(u8 node); + + ProbabilityTables& m_probability_tables; + BitStream* m_bit_stream { nullptr }; + + bool m_has_rows { false }; + bool m_has_cols { false }; + TXSize m_max_tx_size { TX_4x4 }; + bool m_use_hp { false }; + u8 m_block_subsize { 0 }; + u8 m_num_8x8 { 0 }; + u8* m_above_partition_context { nullptr }; + u8* m_left_partition_context { nullptr }; + u32 m_col { 0 }; + u32 m_row { 0 }; + bool m_frame_is_intra { false }; +}; + +}