diff options
| author | Kelebek1 <eeeedddccc@hotmail.co.uk> | 2021-06-29 05:54:54 +0100 | 
|---|---|---|
| committer | Kelebek1 <eeeedddccc@hotmail.co.uk> | 2021-07-01 06:22:05 +0100 | 
| commit | 208a04dcffe8142070bd8136b42def6a3233bb0f (patch) | |
| tree | 60814ffb191978100a259ee230989a6f255a4460 | |
| parent | 4df04ad48a2b9f04712ad6627e9712f3625253a9 (diff) | |
Slightly refactor NVDEC and codecs for readability and safety
| -rw-r--r-- | src/video_core/cdma_pusher.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.cpp | 85 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.h | 12 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/h264.cpp | 207 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/h264.h | 132 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9_types.h | 307 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec.h | 8 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec_common.h | 103 | 
10 files changed, 522 insertions, 356 deletions
| diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094..8b86ad050 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {          case ThiMethod::SetMethod1:              LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",                        static_cast<u32>(nvdec_thi_state.method_0)); -            nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), -                                           data); +            nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);              break;          default:              break; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260..1b4bbc8ac 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {      av_free(ptr);  } -Codec::Codec(GPU& gpu_) -    : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) +    : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),        vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}  Codec::~Codec() { @@ -43,46 +43,48 @@ Codec::~Codec() {      avcodec_close(av_codec_ctx);  } +void Codec::Initialize() { +    AVCodecID codec{AV_CODEC_ID_NONE}; +    switch (current_codec) { +    case NvdecCommon::VideoCodec::H264: +        codec = AV_CODEC_ID_H264; +        break; +    case NvdecCommon::VideoCodec::Vp9: +        codec = AV_CODEC_ID_VP9; +        break; +    default: +        return; +    } +    av_codec = avcodec_find_decoder(codec); +    av_codec_ctx = avcodec_alloc_context3(av_codec); +    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + +    // TODO(ameerj): libavcodec gpu hw acceleration + +    const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); +    if (av_error < 0) { +        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); +        avcodec_close(av_codec_ctx); +        return; +    } +    initialized = true; +    return; +} +  void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {      if (current_codec != codec) { -        LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));          current_codec = codec; +        LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());      }  } -void Codec::StateWrite(u32 offset, u64 arguments) { -    u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); -    std::memcpy(state_offset, &arguments, sizeof(u64)); -} -  void Codec::Decode() { -    bool is_first_frame = false; +    const bool is_first_frame = !initialized;      if (!initialized) { -        if (current_codec == NvdecCommon::VideoCodec::H264) { -            av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); -        } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { -            av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); -        } else { -            LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); -            return; -        } - -        av_codec_ctx = avcodec_alloc_context3(av_codec); -        av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - -        // TODO(ameerj): libavcodec gpu hw acceleration - -        const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); -        if (av_error < 0) { -            LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); -            avcodec_close(av_codec_ctx); -            return; -        } -        initialized = true; -        is_first_frame = true; +        Initialize();      } -    bool vp9_hidden_frame = false; +    bool vp9_hidden_frame = false;      AVPacket packet{};      av_init_packet(&packet);      std::vector<u8> frame_data; @@ -95,7 +97,7 @@ void Codec::Decode() {      }      packet.data = frame_data.data(); -    packet.size = static_cast<int>(frame_data.size()); +    packet.size = static_cast<s32>(frame_data.size());      avcodec_send_packet(av_codec_ctx, &packet); @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {      return current_codec;  } +std::string_view Codec::GetCurrentCodecName() const { +    switch (current_codec) { +    case NvdecCommon::VideoCodec::None: +        return "None"; +    case NvdecCommon::VideoCodec::H264: +        return "H264"; +    case NvdecCommon::VideoCodec::Vp8: +        return "VP8"; +    case NvdecCommon::VideoCodec::H265: +        return "H265"; +    case NvdecCommon::VideoCodec::Vp9: +        return "VP9"; +    default: +        return "Unknown"; +    } +}; +  } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 3e135a2a6..f2aef1699 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -42,15 +42,15 @@ class VP9;  class Codec {  public: -    explicit Codec(GPU& gpu); +    explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);      ~Codec(); +    /// Initialize the codec, returning success or failure +    void Initialize(); +      /// Sets NVDEC video stream codec      void SetTargetCodec(NvdecCommon::VideoCodec codec); -    /// Populate NvdecRegisters state with argument value at the provided offset -    void StateWrite(u32 offset, u64 arguments); -      /// Call decoders to construct headers, decode AVFrame with ffmpeg      void Decode(); @@ -59,6 +59,8 @@ public:      /// Returns the value of current_codec      [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; +    /// Return name of the current codec +    [[nodiscard]] std::string_view GetCurrentCodecName() const;  private:      bool initialized{}; @@ -68,10 +70,10 @@ private:      AVCodecContext* av_codec_ctx{nullptr};      GPU& gpu; +    const NvdecCommon::NvdecRegisters& state;      std::unique_ptr<Decoder::H264> h264_decoder;      std::unique_ptr<Decoder::VP9> vp9_decoder; -    NvdecCommon::NvdecRegisters state{};      std::queue<AVFramePtr> av_frames{};  }; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed98..5fb6d45ee 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,134 +45,129 @@ H264::~H264() = default;  const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,                                                  bool is_first_frame) { -    H264DecoderContext context{}; +    H264DecoderContext context;      gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); -    const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); +    const s64 frame_number = context.h264_parameter_set.frame_number.Value();      if (!is_first_frame && frame_number != 0) { -        frame.resize(context.frame_data_size); - +        frame.resize(context.stream_len);          gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); -    } else { -        /// Encode header -        H264BitWriter writer{}; -        writer.WriteU(1, 24); -        writer.WriteU(0, 1); -        writer.WriteU(3, 2); -        writer.WriteU(7, 5); -        writer.WriteU(100, 8); -        writer.WriteU(0, 8); -        writer.WriteU(31, 8); -        writer.WriteUe(0); -        const auto chroma_format_idc = -            static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); -        writer.WriteUe(chroma_format_idc); -        if (chroma_format_idc == 3) { -            writer.WriteBit(false); -        } - -        writer.WriteUe(0); -        writer.WriteUe(0); -        writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag -        writer.WriteBit(false); // Scaling matrix present flag - -        const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); -        writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); -        writer.WriteUe(order_cnt_type); -        if (order_cnt_type == 0) { -            writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); -        } else if (order_cnt_type == 1) { -            writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); - -            writer.WriteSe(0); -            writer.WriteSe(0); -            writer.WriteUe(0); -        } - -        const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / -                               (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); +        return frame; +    } -        writer.WriteUe(16); +    // Encode header +    H264BitWriter writer{}; +    writer.WriteU(1, 24); +    writer.WriteU(0, 1); +    writer.WriteU(3, 2); +    writer.WriteU(7, 5); +    writer.WriteU(100, 8); +    writer.WriteU(0, 8); +    writer.WriteU(31, 8); +    writer.WriteUe(0); +    const u32 chroma_format_idc = +        static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); +    writer.WriteUe(chroma_format_idc); +    if (chroma_format_idc == 3) {          writer.WriteBit(false); -        writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); -        writer.WriteUe(pic_height - 1); -        writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - -        if (!context.h264_parameter_set.frame_mbs_only_flag) { -            writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); -        } +    } -        writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); -        writer.WriteBit(false); // Frame cropping flag -        writer.WriteBit(false); // VUI parameter present flag +    writer.WriteUe(0); +    writer.WriteUe(0); +    writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag +    writer.WriteBit(false); // Scaling matrix present flag -        writer.End(); +    writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); -        // H264 PPS -        writer.WriteU(1, 24); -        writer.WriteU(0, 1); -        writer.WriteU(3, 2); -        writer.WriteU(8, 5); +    const auto order_cnt_type = +        static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); +    writer.WriteUe(order_cnt_type); +    if (order_cnt_type == 0) { +        writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); +    } else if (order_cnt_type == 1) { +        writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); +        writer.WriteSe(0); +        writer.WriteSe(0);          writer.WriteUe(0); -        writer.WriteUe(0); +    } -        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); -        writer.WriteBit(false); -        writer.WriteUe(0); -        writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); -        writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); -        writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); -        writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); -        s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); -        pic_init_qp = (pic_init_qp << 26) >> 26; -        writer.WriteSe(pic_init_qp); -        writer.WriteSe(0); -        s32 chroma_qp_index_offset = -            static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); -        chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; +    const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / +                           (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + +    writer.WriteUe(16); +    writer.WriteBit(false); +    writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); +    writer.WriteUe(pic_height - 1); +    writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); -        writer.WriteSe(chroma_qp_index_offset); -        writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); -        writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); -        writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); -        writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); +    if (!context.h264_parameter_set.frame_mbs_only_flag) { +        writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); +    } +    writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); +    writer.WriteBit(false); // Frame cropping flag +    writer.WriteBit(false); // VUI parameter present flag + +    writer.End(); + +    // H264 PPS +    writer.WriteU(1, 24); +    writer.WriteU(0, 1); +    writer.WriteU(3, 2); +    writer.WriteU(8, 5); + +    writer.WriteUe(0); +    writer.WriteUe(0); + +    writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); +    writer.WriteBit(false); +    writer.WriteUe(0); +    writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); +    writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); +    writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); +    writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); +    s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); +    writer.WriteSe(pic_init_qp); +    writer.WriteSe(0); +    s32 chroma_qp_index_offset = +        static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); + +    writer.WriteSe(chroma_qp_index_offset); +    writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); +    writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); +    writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); +    writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + +    writer.WriteBit(true); + +    for (s32 index = 0; index < 6; index++) {          writer.WriteBit(true); +        std::span<const u8> matrix{context.weight_scale}; +        writer.WriteScalingList(matrix, index * 16, 16); +    } -        for (s32 index = 0; index < 6; index++) { +    if (context.h264_parameter_set.transform_8x8_mode_flag) { +        for (s32 index = 0; index < 2; index++) {              writer.WriteBit(true); -            const auto matrix_x4 = -                std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); -            writer.WriteScalingList(matrix_x4, index * 16, 16); -        } - -        if (context.h264_parameter_set.transform_8x8_mode_flag) { -            for (s32 index = 0; index < 2; index++) { -                writer.WriteBit(true); -                const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), -                                                       context.scaling_matrix_8.end()); - -                writer.WriteScalingList(matrix_x8, index * 64, 64); -            } +            std::span<const u8> matrix{context.weight_scale_8x8}; +            writer.WriteScalingList(matrix, index * 64, 64);          } +    } -        s32 chroma_qp_index_offset2 = -            static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); -        chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; +    s32 chroma_qp_index_offset2 = +        static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); -        writer.WriteSe(chroma_qp_index_offset2); +    writer.WriteSe(chroma_qp_index_offset2); -        writer.End(); +    writer.End(); -        const auto& encoded_header = writer.GetByteArray(); -        frame.resize(encoded_header.size() + context.frame_data_size); -        std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); +    const auto& encoded_header = writer.GetByteArray(); +    frame.resize(encoded_header.size() + context.stream_len); +    std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); -        gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, -                                      frame.data() + encoded_header.size(), -                                      context.frame_data_size); -    } +    gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, +                                  frame.data() + encoded_header.size(), context.stream_len);      return frame;  } @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {      WriteBits(state ? 1 : 0, 1);  } -void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { +void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {      std::vector<u8> scan(count);      if (count == 16) {          std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3..bfe84a472 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -20,7 +20,9 @@  #pragma once +#include <span>  #include <vector> +#include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h"  #include "video_core/command_classes/nvdec_common.h" @@ -48,7 +50,7 @@ public:      /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification      /// Writes the scaling matrices of the sream -    void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); +    void WriteScalingList(std::span<const u8> list, s32 start, s32 count);      /// Return the bitstream as a vector.      [[nodiscard]] std::vector<u8>& GetByteArray(); @@ -78,40 +80,110 @@ public:          const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);  private: +    std::vector<u8> frame; +    GPU& gpu; +      struct H264ParameterSet { -        u32 log2_max_pic_order_cnt{}; -        u32 delta_pic_order_always_zero_flag{}; -        u32 frame_mbs_only_flag{}; -        u32 pic_width_in_mbs{}; -        u32 pic_height_in_map_units{}; -        INSERT_PADDING_WORDS(1); -        u32 entropy_coding_mode_flag{}; -        u32 bottom_field_pic_order_flag{}; -        u32 num_refidx_l0_default_active{}; -        u32 num_refidx_l1_default_active{}; -        u32 deblocking_filter_control_flag{}; -        u32 redundant_pic_count_flag{}; -        u32 transform_8x8_mode_flag{}; -        INSERT_PADDING_WORDS(9); -        u64 flags{}; -        u32 frame_number{}; -        u32 frame_number2{}; +        s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 +        s32 delta_pic_order_always_zero_flag;  ///< 0x04 +        s32 frame_mbs_only_flag;               ///< 0x08 +        u32 pic_width_in_mbs;                  ///< 0x0C +        u32 frame_height_in_map_units;         ///< 0x10 +        union {                                ///< 0x14 +            BitField<0, 2, u32> tile_format; +            BitField<2, 3, u32> gob_height; +        }; +        u32 entropy_coding_mode_flag;               ///< 0x18 +        s32 pic_order_present_flag;                 ///< 0x1C +        s32 num_refidx_l0_default_active;           ///< 0x20 +        s32 num_refidx_l1_default_active;           ///< 0x24 +        s32 deblocking_filter_control_present_flag; ///< 0x28 +        s32 redundant_pic_cnt_present_flag;         ///< 0x2C +        u32 transform_8x8_mode_flag;                ///< 0x30 +        u32 pitch_luma;                             ///< 0x34 +        u32 pitch_chroma;                           ///< 0x38 +        u32 luma_top_offset;                        ///< 0x3C +        u32 luma_bot_offset;                        ///< 0x40 +        u32 luma_frame_offset;                      ///< 0x44 +        u32 chroma_top_offset;                      ///< 0x48 +        u32 chroma_bot_offset;                      ///< 0x4C +        u32 chroma_frame_offset;                    ///< 0x50 +        u32 hist_buffer_size;                       ///< 0x54 +        union {                                     ///< 0x58 +            union { +                BitField<0, 1, u64> mbaff_frame; +                BitField<1, 1, u64> direct_8x8_inference; +                BitField<2, 1, u64> weighted_pred; +                BitField<3, 1, u64> constrained_intra_pred; +                BitField<4, 1, u64> ref_pic; +                BitField<5, 1, u64> field_pic; +                BitField<6, 1, u64> bottom_field; +                BitField<7, 1, u64> second_field; +            } flags; +            BitField<8, 4, u64> log2_max_frame_num_minus4; +            BitField<12, 2, u64> chroma_format_idc; +            BitField<14, 2, u64> pic_order_cnt_type; +            BitField<16, 6, s64> pic_init_qp_minus26; +            BitField<22, 5, s64> chroma_qp_index_offset; +            BitField<27, 5, s64> second_chroma_qp_index_offset; +            BitField<32, 2, u64> weighted_bipred_idc; +            BitField<34, 7, u64> curr_pic_idx; +            BitField<41, 5, u64> curr_col_idx; +            BitField<46, 16, u64> frame_number; +            BitField<62, 1, u64> frame_surfaces; +            BitField<63, 1, u64> output_memory_layout; +        };      }; -    static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); +    static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");      struct H264DecoderContext { -        INSERT_PADDING_BYTES(0x48); -        u32 frame_data_size{}; -        INSERT_PADDING_BYTES(0xc); -        H264ParameterSet h264_parameter_set{}; -        INSERT_PADDING_BYTES(0x100); -        std::array<u8, 0x60> scaling_matrix_4; -        std::array<u8, 0x80> scaling_matrix_8; +        INSERT_PADDING_WORDS_NOINIT(18);       ///< 0x0000 +        u32 stream_len;                        ///< 0x0048 +        INSERT_PADDING_WORDS_NOINIT(3);        ///< 0x004C +        H264ParameterSet h264_parameter_set;   ///< 0x0058 +        INSERT_PADDING_WORDS_NOINIT(66);       ///< 0x00B8 +        std::array<u8, 0x60> weight_scale;     ///< 0x01C0 +        std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220      }; -    static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); - -    std::vector<u8> frame; -    GPU& gpu; +    static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position)                                                      \ +    static_assert(offsetof(H264ParameterSet, field_name) == position,                              \ +                  "Field " #field_name " has invalid position") + +    ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); +    ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); +    ASSERT_POSITION(frame_mbs_only_flag, 0x08); +    ASSERT_POSITION(pic_width_in_mbs, 0x0C); +    ASSERT_POSITION(frame_height_in_map_units, 0x10); +    ASSERT_POSITION(tile_format, 0x14); +    ASSERT_POSITION(entropy_coding_mode_flag, 0x18); +    ASSERT_POSITION(pic_order_present_flag, 0x1C); +    ASSERT_POSITION(num_refidx_l0_default_active, 0x20); +    ASSERT_POSITION(num_refidx_l1_default_active, 0x24); +    ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); +    ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); +    ASSERT_POSITION(transform_8x8_mode_flag, 0x30); +    ASSERT_POSITION(pitch_luma, 0x34); +    ASSERT_POSITION(pitch_chroma, 0x38); +    ASSERT_POSITION(luma_top_offset, 0x3C); +    ASSERT_POSITION(luma_bot_offset, 0x40); +    ASSERT_POSITION(luma_frame_offset, 0x44); +    ASSERT_POSITION(chroma_top_offset, 0x48); +    ASSERT_POSITION(chroma_bot_offset, 0x4C); +    ASSERT_POSITION(chroma_frame_offset, 0x50); +    ASSERT_POSITION(hist_buffer_size, 0x54); +    ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position)                                                      \ +    static_assert(offsetof(H264DecoderContext, field_name) == position,                            \ +                  "Field " #field_name " has invalid position") + +    ASSERT_POSITION(stream_len, 0x48); +    ASSERT_POSITION(h264_parameter_set, 0x58); +    ASSERT_POSITION(weight_scale, 0x1C0); +#undef ASSERT_POSITION  };  } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb31418..902bc2a98 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_  }  Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { -    PictureInfo picture_info{}; +    PictureInfo picture_info;      gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));      Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)  }  void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { -    EntropyProbs entropy{}; +    EntropyProbs entropy;      gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));      entropy.Convert(dst);  } diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c..2da14f3ca 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -15,10 +15,10 @@ class GPU;  namespace Decoder {  struct Vp9FrameDimensions { -    s16 width{}; -    s16 height{}; -    s16 luma_pitch{}; -    s16 chroma_pitch{}; +    s16 width; +    s16 height; +    s16 luma_pitch; +    s16 chroma_pitch;  };  static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); @@ -49,87 +49,87 @@ enum class TxMode {  };  struct Segmentation { -    u8 enabled{}; -    u8 update_map{}; -    u8 temporal_update{}; -    u8 abs_delta{}; -    std::array<u32, 8> feature_mask{}; -    std::array<std::array<s16, 4>, 8> feature_data{}; +    u8 enabled; +    u8 update_map; +    u8 temporal_update; +    u8 abs_delta; +    std::array<u32, 8> feature_mask; +    std::array<std::array<s16, 4>, 8> feature_data;  };  static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");  struct LoopFilter { -    u8 mode_ref_delta_enabled{}; -    std::array<s8, 4> ref_deltas{}; -    std::array<s8, 2> mode_deltas{}; +    u8 mode_ref_delta_enabled; +    std::array<s8, 4> ref_deltas; +    std::array<s8, 2> mode_deltas;  };  static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");  struct Vp9EntropyProbs { -    std::array<u8, 36> y_mode_prob{}; -    std::array<u8, 64> partition_prob{}; -    std::array<u8, 1728> coef_probs{}; -    std::array<u8, 8> switchable_interp_prob{}; -    std::array<u8, 28> inter_mode_prob{}; -    std::array<u8, 4> intra_inter_prob{}; -    std::array<u8, 5> comp_inter_prob{}; -    std::array<u8, 10> single_ref_prob{}; -    std::array<u8, 5> comp_ref_prob{}; -    std::array<u8, 6> tx_32x32_prob{}; -    std::array<u8, 4> tx_16x16_prob{}; -    std::array<u8, 2> tx_8x8_prob{}; -    std::array<u8, 3> skip_probs{}; -    std::array<u8, 3> joints{}; -    std::array<u8, 2> sign{}; -    std::array<u8, 20> classes{}; -    std::array<u8, 2> class_0{}; -    std::array<u8, 20> prob_bits{}; -    std::array<u8, 12> class_0_fr{}; -    std::array<u8, 6> fr{}; -    std::array<u8, 2> class_0_hp{}; -    std::array<u8, 2> high_precision{}; +    std::array<u8, 36> y_mode_prob;           ///< 0x0000 +    std::array<u8, 64> partition_prob;        ///< 0x0024 +    std::array<u8, 1728> coef_probs;          ///< 0x0064 +    std::array<u8, 8> switchable_interp_prob; ///< 0x0724 +    std::array<u8, 28> inter_mode_prob;       ///< 0x072C +    std::array<u8, 4> intra_inter_prob;       ///< 0x0748 +    std::array<u8, 5> comp_inter_prob;        ///< 0x074C +    std::array<u8, 10> single_ref_prob;       ///< 0x0751 +    std::array<u8, 5> comp_ref_prob;          ///< 0x075B +    std::array<u8, 6> tx_32x32_prob;          ///< 0x0760 +    std::array<u8, 4> tx_16x16_prob;          ///< 0x0766 +    std::array<u8, 2> tx_8x8_prob;            ///< 0x076A +    std::array<u8, 3> skip_probs;             ///< 0x076C +    std::array<u8, 3> joints;                 ///< 0x076F +    std::array<u8, 2> sign;                   ///< 0x0772 +    std::array<u8, 20> classes;               ///< 0x0774 +    std::array<u8, 2> class_0;                ///< 0x0788 +    std::array<u8, 20> prob_bits;             ///< 0x078A +    std::array<u8, 12> class_0_fr;            ///< 0x079E +    std::array<u8, 6> fr;                     ///< 0x07AA +    std::array<u8, 2> class_0_hp;             ///< 0x07B0 +    std::array<u8, 2> high_precision;         ///< 0x07B2  };  static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");  struct Vp9PictureInfo { -    bool is_key_frame{}; -    bool intra_only{}; -    bool last_frame_was_key{}; -    bool frame_size_changed{}; -    bool error_resilient_mode{}; -    bool last_frame_shown{}; -    bool show_frame{}; -    std::array<s8, 4> ref_frame_sign_bias{}; -    s32 base_q_index{}; -    s32 y_dc_delta_q{}; -    s32 uv_dc_delta_q{}; -    s32 uv_ac_delta_q{}; -    bool lossless{}; -    s32 transform_mode{}; -    bool allow_high_precision_mv{}; -    s32 interp_filter{}; -    s32 reference_mode{}; -    s8 comp_fixed_ref{}; -    std::array<s8, 2> comp_var_ref{}; -    s32 log2_tile_cols{}; -    s32 log2_tile_rows{}; -    bool segment_enabled{}; -    bool segment_map_update{}; -    bool segment_map_temporal_update{}; -    s32 segment_abs_delta{}; -    std::array<u32, 8> segment_feature_enable{}; -    std::array<std::array<s16, 4>, 8> segment_feature_data{}; -    bool mode_ref_delta_enabled{}; -    bool use_prev_in_find_mv_refs{}; -    std::array<s8, 4> ref_deltas{}; -    std::array<s8, 2> mode_deltas{}; -    Vp9EntropyProbs entropy{}; -    Vp9FrameDimensions frame_size{}; -    u8 first_level{}; -    u8 sharpness_level{}; -    u32 bitstream_size{}; -    std::array<u64, 4> frame_offsets{}; -    std::array<bool, 4> refresh_frame{}; +    bool is_key_frame; +    bool intra_only; +    bool last_frame_was_key; +    bool frame_size_changed; +    bool error_resilient_mode; +    bool last_frame_shown; +    bool show_frame; +    std::array<s8, 4> ref_frame_sign_bias; +    s32 base_q_index; +    s32 y_dc_delta_q; +    s32 uv_dc_delta_q; +    s32 uv_ac_delta_q; +    bool lossless; +    s32 transform_mode; +    bool allow_high_precision_mv; +    s32 interp_filter; +    s32 reference_mode; +    s8 comp_fixed_ref; +    std::array<s8, 2> comp_var_ref; +    s32 log2_tile_cols; +    s32 log2_tile_rows; +    bool segment_enabled; +    bool segment_map_update; +    bool segment_map_temporal_update; +    s32 segment_abs_delta; +    std::array<u32, 8> segment_feature_enable; +    std::array<std::array<s16, 4>, 8> segment_feature_data; +    bool mode_ref_delta_enabled; +    bool use_prev_in_find_mv_refs; +    std::array<s8, 4> ref_deltas; +    std::array<s8, 2> mode_deltas; +    Vp9EntropyProbs entropy; +    Vp9FrameDimensions frame_size; +    u8 first_level; +    u8 sharpness_level; +    u32 bitstream_size; +    std::array<u64, 4> frame_offsets; +    std::array<bool, 4> refresh_frame;  };  struct Vp9FrameContainer { @@ -138,35 +138,35 @@ struct Vp9FrameContainer {  };  struct PictureInfo { -    INSERT_PADDING_WORDS(12); -    u32 bitstream_size{}; -    INSERT_PADDING_WORDS(5); -    Vp9FrameDimensions last_frame_size{}; -    Vp9FrameDimensions golden_frame_size{}; -    Vp9FrameDimensions alt_frame_size{}; -    Vp9FrameDimensions current_frame_size{}; -    u32 vp9_flags{}; -    std::array<s8, 4> ref_frame_sign_bias{}; -    u8 first_level{}; -    u8 sharpness_level{}; -    u8 base_q_index{}; -    u8 y_dc_delta_q{}; -    u8 uv_ac_delta_q{}; -    u8 uv_dc_delta_q{}; -    u8 lossless{}; -    u8 tx_mode{}; -    u8 allow_high_precision_mv{}; -    u8 interp_filter{}; -    u8 reference_mode{}; -    s8 comp_fixed_ref{}; -    std::array<s8, 2> comp_var_ref{}; -    u8 log2_tile_cols{}; -    u8 log2_tile_rows{}; -    Segmentation segmentation{}; -    LoopFilter loop_filter{}; -    INSERT_PADDING_BYTES(5); -    u32 surface_params{}; -    INSERT_PADDING_WORDS(3); +    INSERT_PADDING_WORDS_NOINIT(12);       ///< 0x00 +    u32 bitstream_size;                    ///< 0x30 +    INSERT_PADDING_WORDS_NOINIT(5);        ///< 0x34 +    Vp9FrameDimensions last_frame_size;    ///< 0x48 +    Vp9FrameDimensions golden_frame_size;  ///< 0x50 +    Vp9FrameDimensions alt_frame_size;     ///< 0x58 +    Vp9FrameDimensions current_frame_size; ///< 0x60 +    u32 vp9_flags;                         ///< 0x68 +    std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C +    u8 first_level;                        ///< 0x70 +    u8 sharpness_level;                    ///< 0x71 +    u8 base_q_index;                       ///< 0x72 +    u8 y_dc_delta_q;                       ///< 0x73 +    u8 uv_ac_delta_q;                      ///< 0x74 +    u8 uv_dc_delta_q;                      ///< 0x75 +    u8 lossless;                           ///< 0x76 +    u8 tx_mode;                            ///< 0x77 +    u8 allow_high_precision_mv;            ///< 0x78 +    u8 interp_filter;                      ///< 0x79 +    u8 reference_mode;                     ///< 0x7A +    s8 comp_fixed_ref;                     ///< 0x7B +    std::array<s8, 2> comp_var_ref;        ///< 0x7C +    u8 log2_tile_cols;                     ///< 0x7E +    u8 log2_tile_rows;                     ///< 0x7F +    Segmentation segmentation;             ///< 0x80 +    LoopFilter loop_filter;                ///< 0xE4 +    INSERT_PADDING_BYTES_NOINIT(5);        ///< 0xEB +    u32 surface_params;                    ///< 0xF0 +    INSERT_PADDING_WORDS_NOINIT(3);        ///< 0xF4      [[nodiscard]] Vp9PictureInfo Convert() const {          return { @@ -176,6 +176,7 @@ struct PictureInfo {              .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,              .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,              .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, +            .show_frame = false,              .ref_frame_sign_bias = ref_frame_sign_bias,              .base_q_index = base_q_index,              .y_dc_delta_q = y_dc_delta_q, @@ -204,45 +205,48 @@ struct PictureInfo {                                          !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),              .ref_deltas = loop_filter.ref_deltas,              .mode_deltas = loop_filter.mode_deltas, +            .entropy{},              .frame_size = current_frame_size,              .first_level = first_level,              .sharpness_level = sharpness_level,              .bitstream_size = bitstream_size, +            .frame_offsets{}, +            .refresh_frame{},          };      }  };  static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");  struct EntropyProbs { -    INSERT_PADDING_BYTES(1024); -    std::array<u8, 28> inter_mode_prob{}; -    std::array<u8, 4> intra_inter_prob{}; -    INSERT_PADDING_BYTES(80); -    std::array<u8, 2> tx_8x8_prob{}; -    std::array<u8, 4> tx_16x16_prob{}; -    std::array<u8, 6> tx_32x32_prob{}; -    std::array<u8, 4> y_mode_prob_e8{}; -    std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; -    INSERT_PADDING_BYTES(64); -    std::array<u8, 64> partition_prob{}; -    INSERT_PADDING_BYTES(10); -    std::array<u8, 8> switchable_interp_prob{}; -    std::array<u8, 5> comp_inter_prob{}; -    std::array<u8, 3> skip_probs{}; -    INSERT_PADDING_BYTES(1); -    std::array<u8, 3> joints{}; -    std::array<u8, 2> sign{}; -    std::array<u8, 2> class_0{}; -    std::array<u8, 6> fr{}; -    std::array<u8, 2> class_0_hp{}; -    std::array<u8, 2> high_precision{}; -    std::array<u8, 20> classes{}; -    std::array<u8, 12> class_0_fr{}; -    std::array<u8, 20> pred_bits{}; -    std::array<u8, 10> single_ref_prob{}; -    std::array<u8, 5> comp_ref_prob{}; -    INSERT_PADDING_BYTES(17); -    std::array<u8, 2304> coef_probs{}; +    INSERT_PADDING_BYTES_NOINIT(1024);                 ///< 0x0000 +    std::array<u8, 28> inter_mode_prob;                ///< 0x0400 +    std::array<u8, 4> intra_inter_prob;                ///< 0x041C +    INSERT_PADDING_BYTES_NOINIT(80);                   ///< 0x0420 +    std::array<u8, 2> tx_8x8_prob;                     ///< 0x0470 +    std::array<u8, 4> tx_16x16_prob;                   ///< 0x0472 +    std::array<u8, 6> tx_32x32_prob;                   ///< 0x0476 +    std::array<u8, 4> y_mode_prob_e8;                  ///< 0x047C +    std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 +    INSERT_PADDING_BYTES_NOINIT(64);                   ///< 0x04A0 +    std::array<u8, 64> partition_prob;                 ///< 0x04E0 +    INSERT_PADDING_BYTES_NOINIT(10);                   ///< 0x0520 +    std::array<u8, 8> switchable_interp_prob;          ///< 0x052A +    std::array<u8, 5> comp_inter_prob;                 ///< 0x0532 +    std::array<u8, 3> skip_probs;                      ///< 0x0537 +    INSERT_PADDING_BYTES_NOINIT(1);                    ///< 0x053A +    std::array<u8, 3> joints;                          ///< 0x053B +    std::array<u8, 2> sign;                            ///< 0x053E +    std::array<u8, 2> class_0;                         ///< 0x0540 +    std::array<u8, 6> fr;                              ///< 0x0542 +    std::array<u8, 2> class_0_hp;                      ///< 0x0548 +    std::array<u8, 2> high_precision;                  ///< 0x054A +    std::array<u8, 20> classes;                        ///< 0x054C +    std::array<u8, 12> class_0_fr;                     ///< 0x0560 +    std::array<u8, 20> pred_bits;                      ///< 0x056C +    std::array<u8, 10> single_ref_prob;                ///< 0x0580 +    std::array<u8, 5> comp_ref_prob;                   ///< 0x058A +    INSERT_PADDING_BYTES_NOINIT(17);                   ///< 0x058F +    std::array<u8, 2304> coef_probs;                   ///< 0x05A0      void Convert(Vp9EntropyProbs& fc) {          fc.inter_mode_prob = inter_mode_prob; @@ -293,10 +297,45 @@ struct RefPoolElement {  };  struct FrameContexts { -    s64 from{}; -    bool adapted{}; -    Vp9EntropyProbs probs{}; +    s64 from; +    bool adapted; +    Vp9EntropyProbs probs;  }; +#define ASSERT_POSITION(field_name, position)                                                      \ +    static_assert(offsetof(Vp9EntropyProbs, field_name) == position,                               \ +                  "Field " #field_name " has invalid position") + +ASSERT_POSITION(partition_prob, 0x0024); +ASSERT_POSITION(switchable_interp_prob, 0x0724); +ASSERT_POSITION(sign, 0x0772); +ASSERT_POSITION(class_0_fr, 0x079E); +ASSERT_POSITION(high_precision, 0x07B2); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position)                                                      \ +    static_assert(offsetof(PictureInfo, field_name) == position,                                   \ +                  "Field " #field_name " has invalid position") + +ASSERT_POSITION(bitstream_size, 0x30); +ASSERT_POSITION(last_frame_size, 0x48); +ASSERT_POSITION(first_level, 0x70); +ASSERT_POSITION(segmentation, 0x80); +ASSERT_POSITION(loop_filter, 0xE4); +ASSERT_POSITION(surface_params, 0xF0); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position)                                                      \ +    static_assert(offsetof(EntropyProbs, field_name) == position,                                  \ +                  "Field " #field_name " has invalid position") + +ASSERT_POSITION(inter_mode_prob, 0x400); +ASSERT_POSITION(tx_8x8_prob, 0x470); +ASSERT_POSITION(partition_prob, 0x4E0); +ASSERT_POSITION(class_0, 0x540); +ASSERT_POSITION(class_0_fr, 0x560); +ASSERT_POSITION(coef_probs, 0x5A0); +#undef ASSERT_POSITION +  }; // namespace Decoder  }; // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd..b5e3b70fc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -8,22 +8,21 @@  namespace Tegra { -Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} +#define NVDEC_REG_INDEX(field_name)                                                                \ +    (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}  Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, u32 argument) { -    if (method == Method::SetVideoCodec) { -        codec->StateWrite(static_cast<u32>(method), argument); -    } else { -        codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); -    } +void Nvdec::ProcessMethod(u32 method, u32 argument) { +    state.reg_array[method] = static_cast<u64>(argument) << 8;      switch (method) { -    case Method::SetVideoCodec: +    case NVDEC_REG_INDEX(set_codec_id):          codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));          break; -    case Method::Execute: +    case NVDEC_REG_INDEX(execute):          Execute();          break;      } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b8..6e1da0b04 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -14,16 +14,11 @@ class GPU;  class Nvdec {  public: -    enum class Method : u32 { -        SetVideoCodec = 0x80, -        Execute = 0xc0, -    }; -      explicit Nvdec(GPU& gpu);      ~Nvdec();      /// Writes the method into the state, Invoke Execute() if encountered -    void ProcessMethod(Method method, u32 argument); +    void ProcessMethod(u32 method, u32 argument);      /// Return most recently decoded frame      [[nodiscard]] AVFramePtr GetFrame(); @@ -33,6 +28,7 @@ private:      void Execute();      GPU& gpu; +    NvdecCommon::NvdecRegisters state;      std::unique_ptr<Codec> codec;  };  } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d..6a24e00a0 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -4,40 +4,13 @@  #pragma once +#include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h"  namespace Tegra::NvdecCommon { -struct NvdecRegisters { -    INSERT_PADDING_WORDS(256); -    u64 set_codec_id{}; -    INSERT_PADDING_WORDS(254); -    u64 set_platform_id{}; -    u64 picture_info_offset{}; -    u64 frame_bitstream_offset{}; -    u64 frame_number{}; -    u64 h264_slice_data_offsets{}; -    u64 h264_mv_dump_offset{}; -    INSERT_PADDING_WORDS(6); -    u64 frame_stats_offset{}; -    u64 h264_last_surface_luma_offset{}; -    u64 h264_last_surface_chroma_offset{}; -    std::array<u64, 17> surface_luma_offset{}; -    std::array<u64, 17> surface_chroma_offset{}; -    INSERT_PADDING_WORDS(132); -    u64 vp9_entropy_probs_offset{}; -    u64 vp9_backward_updates_offset{}; -    u64 vp9_last_frame_segmap_offset{}; -    u64 vp9_curr_frame_segmap_offset{}; -    INSERT_PADDING_WORDS(2); -    u64 vp9_last_frame_mvs_offset{}; -    u64 vp9_curr_frame_mvs_offset{}; -    INSERT_PADDING_WORDS(2); -}; -static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); - -enum class VideoCodec : u32 { +enum class VideoCodec : u64 {      None = 0x0,      H264 = 0x3,      Vp8 = 0x5, @@ -45,4 +18,76 @@ enum class VideoCodec : u32 {      Vp9 = 0x9,  }; +// NVDEC should use a 32-bit address space, but is mapped to 64-bit, +// doubling the sizes here is compensating for that. +struct NvdecRegisters { +    static constexpr std::size_t NUM_REGS = 0x178; + +    union { +        struct { +            INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 +            VideoCodec set_codec_id;          ///< 0x0400 +            INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 +            u64 execute;                      ///< 0x0600 +            INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 +            struct {                          ///< 0x0800 +                union { +                    BitField<0, 3, VideoCodec> codec; +                    BitField<4, 1, u64> gp_timer_on; +                    BitField<13, 1, u64> mb_timer_on; +                    BitField<14, 1, u64> intra_frame_pslc; +                    BitField<17, 1, u64> all_intra_frame; +                }; +            } control_params; +            u64 picture_info_offset;                   ///< 0x0808 +            u64 frame_bitstream_offset;                ///< 0x0810 +            u64 frame_number;                          ///< 0x0818 +            u64 h264_slice_data_offsets;               ///< 0x0820 +            u64 h264_mv_dump_offset;                   ///< 0x0828 +            INSERT_PADDING_WORDS_NOINIT(6);            ///< 0x0830 +            u64 frame_stats_offset;                    ///< 0x0848 +            u64 h264_last_surface_luma_offset;         ///< 0x0850 +            u64 h264_last_surface_chroma_offset;       ///< 0x0858 +            std::array<u64, 17> surface_luma_offset;   ///< 0x0860 +            std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 +            INSERT_PADDING_WORDS_NOINIT(132);          ///< 0x0970 +            u64 vp9_entropy_probs_offset;              ///< 0x0B80 +            u64 vp9_backward_updates_offset;           ///< 0x0B88 +            u64 vp9_last_frame_segmap_offset;          ///< 0x0B90 +            u64 vp9_curr_frame_segmap_offset;          ///< 0x0B98 +            INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BA0 +            u64 vp9_last_frame_mvs_offset;             ///< 0x0BA8 +            u64 vp9_curr_frame_mvs_offset;             ///< 0x0BB0 +            INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BB8 +        }; +        std::array<u64, NUM_REGS> reg_array; +    }; +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +#define ASSERT_REG_POSITION(field_name, position)                                                  \ +    static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64),                  \ +                  "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(set_codec_id, 0x80); +ASSERT_REG_POSITION(execute, 0xC0); +ASSERT_REG_POSITION(control_params, 0x100); +ASSERT_REG_POSITION(picture_info_offset, 0x101); +ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); +ASSERT_REG_POSITION(frame_number, 0x103); +ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); +ASSERT_REG_POSITION(frame_stats_offset, 0x109); +ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); +ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); +ASSERT_REG_POSITION(surface_luma_offset, 0x10C); +ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); +ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); +ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); +ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); +ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); +ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); + +#undef ASSERT_REG_POSITION +  } // namespace Tegra::NvdecCommon | 
