diff options
Diffstat (limited to 'src')
25 files changed, 560 insertions, 428 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f30dd49a3..f8ec8fea8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,13 +45,23 @@ if (MSVC) /Zc:inline /Zc:throwingNew + # External headers diagnostics + /experimental:external # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later + /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers + /external:W0 # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers + # Warnings /W3 - /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled + /we4018 # 'expression': signed/unsigned mismatch + /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled /we4101 # 'identifier': unreferenced local variable + /we4189 # 'identifier': local variable is initialized but not referenced /we4265 # 'class': class has virtual functions, but destructor is not virtual - /we4388 # signed/unsigned mismatch - /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect + /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data + /we4305 # 'context': truncation from 'type1' to 'type2' + /we4388 # 'expression': signed/unsigned mismatch + /we4389 # 'operator': signed/unsigned mismatch + /we4547 # 'operator': operator before comma has no effect; expected operator with side-effect /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? /we4555 # Expression has no effect; expected expression with side-effect /we4715 # 'function': not all control paths return a value @@ -72,6 +82,7 @@ else() -Werror=missing-declarations -Werror=missing-field-initializers -Werror=reorder + -Werror=sign-compare -Werror=switch -Werror=uninitialized -Werror=unused-function diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index d25a1a645..090dd19b1 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -51,9 +51,6 @@ if (NOT MSVC) target_compile_options(audio_core PRIVATE -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=reorder - -Werror=sign-compare -Werror=shadow -Werror=unused-parameter -Werror=unused-variable diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 83b5b7676..b2b0dbe05 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -654,24 +654,19 @@ endif() if (MSVC) target_compile_options(core PRIVATE - /we4018 # 'expression' : signed/unsigned mismatch - /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) - /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data + /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data - /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data - /we4305 # 'context' : truncation from 'type1' to 'type2' /we4456 # Declaration of 'identifier' hides previous local declaration /we4457 # Declaration of 'identifier' hides function parameter /we4458 # Declaration of 'identifier' hides class member /we4459 # Declaration of 'identifier' hides global declaration - /we4715 # 'function' : not all control paths return a value ) else() target_compile_options(core PRIVATE -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=sign-compare -Werror=shadow $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp index 114aff31c..869d2763f 100644 --- a/src/core/hle/service/mii/manager.cpp +++ b/src/core/hle/service/mii/manager.cpp @@ -20,6 +20,7 @@ namespace { constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4}; +constexpr std::size_t BaseMiiCount{2}; constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()}; constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'}; @@ -415,7 +416,7 @@ u32 MiiManager::GetCount(SourceFlag source_flag) const { count += 0; } if ((source_flag & SourceFlag::Default) != SourceFlag::None) { - count += DefaultMiiCount; + count += (DefaultMiiCount - BaseMiiCount); } return static_cast<u32>(count); } @@ -445,7 +446,7 @@ ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_ return MakeResult(std::move(result)); } - for (std::size_t index = 0; index < DefaultMiiCount; index++) { + for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) { result.emplace_back(BuildDefault(index), Source::Default); } diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index 7c5763f9c..c4283a952 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -34,28 +34,17 @@ if (MSVC) /W4 /WX - # 'expression' : signed/unsigned mismatch - /we4018 - # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) - /we4244 - # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch - /we4245 - # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data - /we4254 - # 'var' : conversion from 'size_t' to 'type', possible loss of data - /we4267 - # 'context' : truncation from 'type1' to 'type2' - /we4305 + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data + /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch + /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data ) else() target_compile_options(input_common PRIVATE -Werror -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=reorder -Werror=shadow - -Werror=sign-compare $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> -Werror=unused-variable diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp index 100138d11..2fafd077f 100755 --- a/src/input_common/analog_from_button.cpp +++ b/src/input_common/analog_from_button.cpp @@ -27,6 +27,7 @@ public: down->SetCallback(callbacks); left->SetCallback(callbacks); right->SetCallback(callbacks); + modifier->SetCallback(callbacks); } bool IsAngleGreater(float old_angle, float new_angle) const { diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp index 320f51ee6..a2f1bb67c 100644 --- a/src/input_common/gcadapter/gc_adapter.cpp +++ b/src/input_common/gcadapter/gc_adapter.cpp @@ -5,14 +5,7 @@ #include <chrono> #include <thread> -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4200) // nonstandard extension used : zero-sized array in struct/union -#endif #include <libusb.h> -#ifdef _MSC_VER -#pragma warning(pop) -#endif #include "common/logging/log.h" #include "common/param_package.h" diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h index a3d276697..1bdc9209e 100644 --- a/src/input_common/udp/protocol.h +++ b/src/input_common/udp/protocol.h @@ -8,14 +8,7 @@ #include <optional> #include <type_traits> -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4701) -#endif #include <boost/crc.hpp> -#ifdef _MSC_VER -#pragma warning(pop) -#endif #include "common/bit_field.h" #include "common/swap.h" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e31eb30c0..e4de55f4d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -292,13 +292,12 @@ endif() if (MSVC) target_compile_options(video_core PRIVATE - /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data - /we4244 # 'var' : conversion from integer to 'type', possible loss of data + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data /we4456 # Declaration of 'identifier' hides previous local declaration /we4457 # Declaration of 'identifier' hides function parameter /we4458 # Declaration of 'identifier' hides class member /we4459 # Declaration of 'identifier' hides global declaration - /we4715 # 'function' : not all control paths return a value ) else() target_compile_options(video_core PRIVATE diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094..8b86ad050 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast<u32>(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), - data); + nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); break; default: break; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260..1b4bbc8ac 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { av_free(ptr); } -Codec::Codec(GPU& gpu_) - : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} Codec::~Codec() { @@ -43,46 +43,48 @@ Codec::~Codec() { avcodec_close(av_codec_ctx); } +void Codec::Initialize() { + AVCodecID codec{AV_CODEC_ID_NONE}; + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + codec = AV_CODEC_ID_H264; + break; + case NvdecCommon::VideoCodec::Vp9: + codec = AV_CODEC_ID_VP9; + break; + default: + return; + } + av_codec = avcodec_find_decoder(codec); + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + return; +} + void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { if (current_codec != codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); current_codec = codec; + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); } } -void Codec::StateWrite(u32 offset, u64 arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); - std::memcpy(state_offset, &arguments, sizeof(u64)); -} - void Codec::Decode() { - bool is_first_frame = false; + const bool is_first_frame = !initialized; if (!initialized) { - if (current_codec == NvdecCommon::VideoCodec::H264) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); - } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); - } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); - return; - } - - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - - // TODO(ameerj): libavcodec gpu hw acceleration - - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - return; - } - initialized = true; - is_first_frame = true; + Initialize(); } - bool vp9_hidden_frame = false; + bool vp9_hidden_frame = false; AVPacket packet{}; av_init_packet(&packet); std::vector<u8> frame_data; @@ -95,7 +97,7 @@ void Codec::Decode() { } packet.data = frame_data.data(); - packet.size = static_cast<int>(frame_data.size()); + packet.size = static_cast<s32>(frame_data.size()); avcodec_send_packet(av_codec_ctx, &packet); @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { return current_codec; } +std::string_view Codec::GetCurrentCodecName() const { + switch (current_codec) { + case NvdecCommon::VideoCodec::None: + return "None"; + case NvdecCommon::VideoCodec::H264: + return "H264"; + case NvdecCommon::VideoCodec::Vp8: + return "VP8"; + case NvdecCommon::VideoCodec::H265: + return "H265"; + case NvdecCommon::VideoCodec::Vp9: + return "VP9"; + default: + return "Unknown"; + } +}; + } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 3e135a2a6..96c823c76 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -14,18 +14,10 @@ extern "C" { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #endif -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4242) // conversion from 'type' to 'type', possible loss of data -#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data -#endif #include <libavcodec/avcodec.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif } namespace Tegra { @@ -42,15 +34,15 @@ class VP9; class Codec { public: - explicit Codec(GPU& gpu); + explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); ~Codec(); + /// Initialize the codec, returning success or failure + void Initialize(); + /// Sets NVDEC video stream codec void SetTargetCodec(NvdecCommon::VideoCodec codec); - /// Populate NvdecRegisters state with argument value at the provided offset - void StateWrite(u32 offset, u64 arguments); - /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); @@ -59,6 +51,8 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec + [[nodiscard]] std::string_view GetCurrentCodecName() const; private: bool initialized{}; @@ -68,10 +62,10 @@ private: AVCodecContext* av_codec_ctx{nullptr}; GPU& gpu; + const NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder; - NvdecCommon::NvdecRegisters state{}; std::queue<AVFramePtr> av_frames{}; }; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed98..5fb6d45ee 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,134 +45,129 @@ H264::~H264() = default; const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, bool is_first_frame) { - H264DecoderContext context{}; + H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); - const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); + const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.frame_data_size); - + frame.resize(context.stream_len); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); - } else { - /// Encode header - H264BitWriter writer{}; - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(7, 5); - writer.WriteU(100, 8); - writer.WriteU(0, 8); - writer.WriteU(31, 8); - writer.WriteUe(0); - const auto chroma_format_idc = - static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); - writer.WriteUe(chroma_format_idc); - if (chroma_format_idc == 3) { - writer.WriteBit(false); - } - - writer.WriteUe(0); - writer.WriteUe(0); - writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag - writer.WriteBit(false); // Scaling matrix present flag - - const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); - writer.WriteUe(order_cnt_type); - if (order_cnt_type == 0) { - writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); - } else if (order_cnt_type == 1) { - writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); - - writer.WriteSe(0); - writer.WriteSe(0); - writer.WriteUe(0); - } - - const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / - (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + return frame; + } - writer.WriteUe(16); + // Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const u32 chroma_format_idc = + static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { writer.WriteBit(false); - writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); - writer.WriteUe(pic_height - 1); - writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - - if (!context.h264_parameter_set.frame_mbs_only_flag) { - writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); - } + } - writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); - writer.WriteBit(false); // Frame cropping flag - writer.WriteBit(false); // VUI parameter present flag + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag - writer.End(); + writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); - // H264 PPS - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(8, 5); + const auto order_cnt_type = + static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + writer.WriteSe(0); + writer.WriteSe(0); writer.WriteUe(0); - writer.WriteUe(0); + } - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); - writer.WriteBit(false); - writer.WriteUe(0); - writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); - writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); - writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); - writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); - s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); - pic_init_qp = (pic_init_qp << 26) >> 26; - writer.WriteSe(pic_init_qp); - writer.WriteSe(0); - s32 chroma_qp_index_offset = - static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); - chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; + const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - writer.WriteSe(chroma_qp_index_offset); - writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); - writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); - writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); - writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); + } + writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); + writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); + s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); + std::span<const u8> matrix{context.weight_scale}; + writer.WriteScalingList(matrix, index * 16, 16); + } - for (s32 index = 0; index < 6; index++) { + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); - const auto matrix_x4 = - std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); - writer.WriteScalingList(matrix_x4, index * 16, 16); - } - - if (context.h264_parameter_set.transform_8x8_mode_flag) { - for (s32 index = 0; index < 2; index++) { - writer.WriteBit(true); - const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), - context.scaling_matrix_8.end()); - - writer.WriteScalingList(matrix_x8, index * 64, 64); - } + std::span<const u8> matrix{context.weight_scale_8x8}; + writer.WriteScalingList(matrix, index * 64, 64); } + } - s32 chroma_qp_index_offset2 = - static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); - chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; + s32 chroma_qp_index_offset2 = + static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); - writer.WriteSe(chroma_qp_index_offset2); + writer.WriteSe(chroma_qp_index_offset2); - writer.End(); + writer.End(); - const auto& encoded_header = writer.GetByteArray(); - frame.resize(encoded_header.size() + context.frame_data_size); - std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.stream_len); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, - frame.data() + encoded_header.size(), - context.frame_data_size); - } + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), context.stream_len); return frame; } @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { +void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { std::vector<u8> scan(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3..bfe84a472 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -20,7 +20,9 @@ #pragma once +#include <span> #include <vector> +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -48,7 +50,7 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); + void WriteScalingList(std::span<const u8> list, s32 start, s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector<u8>& GetByteArray(); @@ -78,40 +80,110 @@ public: const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: + std::vector<u8> frame; + GPU& gpu; + struct H264ParameterSet { - u32 log2_max_pic_order_cnt{}; - u32 delta_pic_order_always_zero_flag{}; - u32 frame_mbs_only_flag{}; - u32 pic_width_in_mbs{}; - u32 pic_height_in_map_units{}; - INSERT_PADDING_WORDS(1); - u32 entropy_coding_mode_flag{}; - u32 bottom_field_pic_order_flag{}; - u32 num_refidx_l0_default_active{}; - u32 num_refidx_l1_default_active{}; - u32 deblocking_filter_control_flag{}; - u32 redundant_pic_count_flag{}; - u32 transform_8x8_mode_flag{}; - INSERT_PADDING_WORDS(9); - u64 flags{}; - u32 frame_number{}; - u32 frame_number2{}; + s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 + s32 delta_pic_order_always_zero_flag; ///< 0x04 + s32 frame_mbs_only_flag; ///< 0x08 + u32 pic_width_in_mbs; ///< 0x0C + u32 frame_height_in_map_units; ///< 0x10 + union { ///< 0x14 + BitField<0, 2, u32> tile_format; + BitField<2, 3, u32> gob_height; + }; + u32 entropy_coding_mode_flag; ///< 0x18 + s32 pic_order_present_flag; ///< 0x1C + s32 num_refidx_l0_default_active; ///< 0x20 + s32 num_refidx_l1_default_active; ///< 0x24 + s32 deblocking_filter_control_present_flag; ///< 0x28 + s32 redundant_pic_cnt_present_flag; ///< 0x2C + u32 transform_8x8_mode_flag; ///< 0x30 + u32 pitch_luma; ///< 0x34 + u32 pitch_chroma; ///< 0x38 + u32 luma_top_offset; ///< 0x3C + u32 luma_bot_offset; ///< 0x40 + u32 luma_frame_offset; ///< 0x44 + u32 chroma_top_offset; ///< 0x48 + u32 chroma_bot_offset; ///< 0x4C + u32 chroma_frame_offset; ///< 0x50 + u32 hist_buffer_size; ///< 0x54 + union { ///< 0x58 + union { + BitField<0, 1, u64> mbaff_frame; + BitField<1, 1, u64> direct_8x8_inference; + BitField<2, 1, u64> weighted_pred; + BitField<3, 1, u64> constrained_intra_pred; + BitField<4, 1, u64> ref_pic; + BitField<5, 1, u64> field_pic; + BitField<6, 1, u64> bottom_field; + BitField<7, 1, u64> second_field; + } flags; + BitField<8, 4, u64> log2_max_frame_num_minus4; + BitField<12, 2, u64> chroma_format_idc; + BitField<14, 2, u64> pic_order_cnt_type; + BitField<16, 6, s64> pic_init_qp_minus26; + BitField<22, 5, s64> chroma_qp_index_offset; + BitField<27, 5, s64> second_chroma_qp_index_offset; + BitField<32, 2, u64> weighted_bipred_idc; + BitField<34, 7, u64> curr_pic_idx; + BitField<41, 5, u64> curr_col_idx; + BitField<46, 16, u64> frame_number; + BitField<62, 1, u64> frame_surfaces; + BitField<63, 1, u64> output_memory_layout; + }; }; - static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); + static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); struct H264DecoderContext { - INSERT_PADDING_BYTES(0x48); - u32 frame_data_size{}; - INSERT_PADDING_BYTES(0xc); - H264ParameterSet h264_parameter_set{}; - INSERT_PADDING_BYTES(0x100); - std::array<u8, 0x60> scaling_matrix_4; - std::array<u8, 0x80> scaling_matrix_8; + INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 + u32 stream_len; ///< 0x0048 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C + H264ParameterSet h264_parameter_set; ///< 0x0058 + INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 + std::array<u8, 0x60> weight_scale; ///< 0x01C0 + std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 }; - static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); - - std::vector<u8> frame; - GPU& gpu; + static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264ParameterSet, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); + ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); + ASSERT_POSITION(frame_mbs_only_flag, 0x08); + ASSERT_POSITION(pic_width_in_mbs, 0x0C); + ASSERT_POSITION(frame_height_in_map_units, 0x10); + ASSERT_POSITION(tile_format, 0x14); + ASSERT_POSITION(entropy_coding_mode_flag, 0x18); + ASSERT_POSITION(pic_order_present_flag, 0x1C); + ASSERT_POSITION(num_refidx_l0_default_active, 0x20); + ASSERT_POSITION(num_refidx_l1_default_active, 0x24); + ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); + ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); + ASSERT_POSITION(transform_8x8_mode_flag, 0x30); + ASSERT_POSITION(pitch_luma, 0x34); + ASSERT_POSITION(pitch_chroma, 0x38); + ASSERT_POSITION(luma_top_offset, 0x3C); + ASSERT_POSITION(luma_bot_offset, 0x40); + ASSERT_POSITION(luma_frame_offset, 0x44); + ASSERT_POSITION(chroma_top_offset, 0x48); + ASSERT_POSITION(chroma_bot_offset, 0x4C); + ASSERT_POSITION(chroma_frame_offset, 0x50); + ASSERT_POSITION(hist_buffer_size, 0x54); + ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264DecoderContext, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(stream_len, 0x48); + ASSERT_POSITION(h264_parameter_set, 0x58); + ASSERT_POSITION(weight_scale, 0x1C0); +#undef ASSERT_POSITION }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb31418..902bc2a98 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { - PictureInfo picture_info{}; + PictureInfo picture_info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { - EntropyProbs entropy{}; + EntropyProbs entropy; gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c..2da14f3ca 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -15,10 +15,10 @@ class GPU; namespace Decoder { struct Vp9FrameDimensions { - s16 width{}; - s16 height{}; - s16 luma_pitch{}; - s16 chroma_pitch{}; + s16 width; + s16 height; + s16 luma_pitch; + s16 chroma_pitch; }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); @@ -49,87 +49,87 @@ enum class TxMode { }; struct Segmentation { - u8 enabled{}; - u8 update_map{}; - u8 temporal_update{}; - u8 abs_delta{}; - std::array<u32, 8> feature_mask{}; - std::array<std::array<s16, 4>, 8> feature_data{}; + u8 enabled; + u8 update_map; + u8 temporal_update; + u8 abs_delta; + std::array<u32, 8> feature_mask; + std::array<std::array<s16, 4>, 8> feature_data; }; static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); struct LoopFilter { - u8 mode_ref_delta_enabled{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; + u8 mode_ref_delta_enabled; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; }; static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); struct Vp9EntropyProbs { - std::array<u8, 36> y_mode_prob{}; - std::array<u8, 64> partition_prob{}; - std::array<u8, 1728> coef_probs{}; - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 3> skip_probs{}; - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 20> classes{}; - std::array<u8, 2> class_0{}; - std::array<u8, 20> prob_bits{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; + std::array<u8, 36> y_mode_prob; ///< 0x0000 + std::array<u8, 64> partition_prob; ///< 0x0024 + std::array<u8, 1728> coef_probs; ///< 0x0064 + std::array<u8, 8> switchable_interp_prob; ///< 0x0724 + std::array<u8, 28> inter_mode_prob; ///< 0x072C + std::array<u8, 4> intra_inter_prob; ///< 0x0748 + std::array<u8, 5> comp_inter_prob; ///< 0x074C + std::array<u8, 10> single_ref_prob; ///< 0x0751 + std::array<u8, 5> comp_ref_prob; ///< 0x075B + std::array<u8, 6> tx_32x32_prob; ///< 0x0760 + std::array<u8, 4> tx_16x16_prob; ///< 0x0766 + std::array<u8, 2> tx_8x8_prob; ///< 0x076A + std::array<u8, 3> skip_probs; ///< 0x076C + std::array<u8, 3> joints; ///< 0x076F + std::array<u8, 2> sign; ///< 0x0772 + std::array<u8, 20> classes; ///< 0x0774 + std::array<u8, 2> class_0; ///< 0x0788 + std::array<u8, 20> prob_bits; ///< 0x078A + std::array<u8, 12> class_0_fr; ///< 0x079E + std::array<u8, 6> fr; ///< 0x07AA + std::array<u8, 2> class_0_hp; ///< 0x07B0 + std::array<u8, 2> high_precision; ///< 0x07B2 }; static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame{}; - bool intra_only{}; - bool last_frame_was_key{}; - bool frame_size_changed{}; - bool error_resilient_mode{}; - bool last_frame_shown{}; - bool show_frame{}; - std::array<s8, 4> ref_frame_sign_bias{}; - s32 base_q_index{}; - s32 y_dc_delta_q{}; - s32 uv_dc_delta_q{}; - s32 uv_ac_delta_q{}; - bool lossless{}; - s32 transform_mode{}; - bool allow_high_precision_mv{}; - s32 interp_filter{}; - s32 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - s32 log2_tile_cols{}; - s32 log2_tile_rows{}; - bool segment_enabled{}; - bool segment_map_update{}; - bool segment_map_temporal_update{}; - s32 segment_abs_delta{}; - std::array<u32, 8> segment_feature_enable{}; - std::array<std::array<s16, 4>, 8> segment_feature_data{}; - bool mode_ref_delta_enabled{}; - bool use_prev_in_find_mv_refs{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; - Vp9EntropyProbs entropy{}; - Vp9FrameDimensions frame_size{}; - u8 first_level{}; - u8 sharpness_level{}; - u32 bitstream_size{}; - std::array<u64, 4> frame_offsets{}; - std::array<bool, 4> refresh_frame{}; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool frame_size_changed; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + std::array<s8, 4> ref_frame_sign_bias; + s32 base_q_index; + s32 y_dc_delta_q; + s32 uv_dc_delta_q; + s32 uv_ac_delta_q; + bool lossless; + s32 transform_mode; + bool allow_high_precision_mv; + s32 interp_filter; + s32 reference_mode; + s8 comp_fixed_ref; + std::array<s8, 2> comp_var_ref; + s32 log2_tile_cols; + s32 log2_tile_rows; + bool segment_enabled; + bool segment_map_update; + bool segment_map_temporal_update; + s32 segment_abs_delta; + std::array<u32, 8> segment_feature_enable; + std::array<std::array<s16, 4>, 8> segment_feature_data; + bool mode_ref_delta_enabled; + bool use_prev_in_find_mv_refs; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; + Vp9EntropyProbs entropy; + Vp9FrameDimensions frame_size; + u8 first_level; + u8 sharpness_level; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; + std::array<bool, 4> refresh_frame; }; struct Vp9FrameContainer { @@ -138,35 +138,35 @@ struct Vp9FrameContainer { }; struct PictureInfo { - INSERT_PADDING_WORDS(12); - u32 bitstream_size{}; - INSERT_PADDING_WORDS(5); - Vp9FrameDimensions last_frame_size{}; - Vp9FrameDimensions golden_frame_size{}; - Vp9FrameDimensions alt_frame_size{}; - Vp9FrameDimensions current_frame_size{}; - u32 vp9_flags{}; - std::array<s8, 4> ref_frame_sign_bias{}; - u8 first_level{}; - u8 sharpness_level{}; - u8 base_q_index{}; - u8 y_dc_delta_q{}; - u8 uv_ac_delta_q{}; - u8 uv_dc_delta_q{}; - u8 lossless{}; - u8 tx_mode{}; - u8 allow_high_precision_mv{}; - u8 interp_filter{}; - u8 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - u8 log2_tile_cols{}; - u8 log2_tile_rows{}; - Segmentation segmentation{}; - LoopFilter loop_filter{}; - INSERT_PADDING_BYTES(5); - u32 surface_params{}; - INSERT_PADDING_WORDS(3); + INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 + u32 bitstream_size; ///< 0x30 + INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 + Vp9FrameDimensions last_frame_size; ///< 0x48 + Vp9FrameDimensions golden_frame_size; ///< 0x50 + Vp9FrameDimensions alt_frame_size; ///< 0x58 + Vp9FrameDimensions current_frame_size; ///< 0x60 + u32 vp9_flags; ///< 0x68 + std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C + u8 first_level; ///< 0x70 + u8 sharpness_level; ///< 0x71 + u8 base_q_index; ///< 0x72 + u8 y_dc_delta_q; ///< 0x73 + u8 uv_ac_delta_q; ///< 0x74 + u8 uv_dc_delta_q; ///< 0x75 + u8 lossless; ///< 0x76 + u8 tx_mode; ///< 0x77 + u8 allow_high_precision_mv; ///< 0x78 + u8 interp_filter; ///< 0x79 + u8 reference_mode; ///< 0x7A + s8 comp_fixed_ref; ///< 0x7B + std::array<s8, 2> comp_var_ref; ///< 0x7C + u8 log2_tile_cols; ///< 0x7E + u8 log2_tile_rows; ///< 0x7F + Segmentation segmentation; ///< 0x80 + LoopFilter loop_filter; ///< 0xE4 + INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB + u32 surface_params; ///< 0xF0 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 [[nodiscard]] Vp9PictureInfo Convert() const { return { @@ -176,6 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .show_frame = false, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, @@ -204,45 +205,48 @@ struct PictureInfo { !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, + .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, .bitstream_size = bitstream_size, + .frame_offsets{}, + .refresh_frame{}, }; } }; static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); struct EntropyProbs { - INSERT_PADDING_BYTES(1024); - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - INSERT_PADDING_BYTES(80); - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> y_mode_prob_e8{}; - std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; - INSERT_PADDING_BYTES(64); - std::array<u8, 64> partition_prob{}; - INSERT_PADDING_BYTES(10); - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 3> skip_probs{}; - INSERT_PADDING_BYTES(1); - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 2> class_0{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; - std::array<u8, 20> classes{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 20> pred_bits{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - INSERT_PADDING_BYTES(17); - std::array<u8, 2304> coef_probs{}; + INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 + std::array<u8, 28> inter_mode_prob; ///< 0x0400 + std::array<u8, 4> intra_inter_prob; ///< 0x041C + INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 + std::array<u8, 2> tx_8x8_prob; ///< 0x0470 + std::array<u8, 4> tx_16x16_prob; ///< 0x0472 + std::array<u8, 6> tx_32x32_prob; ///< 0x0476 + std::array<u8, 4> y_mode_prob_e8; ///< 0x047C + std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 + INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 + std::array<u8, 64> partition_prob; ///< 0x04E0 + INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 + std::array<u8, 8> switchable_interp_prob; ///< 0x052A + std::array<u8, 5> comp_inter_prob; ///< 0x0532 + std::array<u8, 3> skip_probs; ///< 0x0537 + INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A + std::array<u8, 3> joints; ///< 0x053B + std::array<u8, 2> sign; ///< 0x053E + std::array<u8, 2> class_0; ///< 0x0540 + std::array<u8, 6> fr; ///< 0x0542 + std::array<u8, 2> class_0_hp; ///< 0x0548 + std::array<u8, 2> high_precision; ///< 0x054A + std::array<u8, 20> classes; ///< 0x054C + std::array<u8, 12> class_0_fr; ///< 0x0560 + std::array<u8, 20> pred_bits; ///< 0x056C + std::array<u8, 10> single_ref_prob; ///< 0x0580 + std::array<u8, 5> comp_ref_prob; ///< 0x058A + INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F + std::array<u8, 2304> coef_probs; ///< 0x05A0 void Convert(Vp9EntropyProbs& fc) { fc.inter_mode_prob = inter_mode_prob; @@ -293,10 +297,45 @@ struct RefPoolElement { }; struct FrameContexts { - s64 from{}; - bool adapted{}; - Vp9EntropyProbs probs{}; + s64 from; + bool adapted; + Vp9EntropyProbs probs; }; +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(partition_prob, 0x0024); +ASSERT_POSITION(switchable_interp_prob, 0x0724); +ASSERT_POSITION(sign, 0x0772); +ASSERT_POSITION(class_0_fr, 0x079E); +ASSERT_POSITION(high_precision, 0x07B2); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(PictureInfo, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(bitstream_size, 0x30); +ASSERT_POSITION(last_frame_size, 0x48); +ASSERT_POSITION(first_level, 0x70); +ASSERT_POSITION(segmentation, 0x80); +ASSERT_POSITION(loop_filter, 0xE4); +ASSERT_POSITION(surface_params, 0xF0); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(inter_mode_prob, 0x400); +ASSERT_POSITION(tx_8x8_prob, 0x470); +ASSERT_POSITION(partition_prob, 0x4E0); +ASSERT_POSITION(class_0, 0x540); +ASSERT_POSITION(class_0_fr, 0x560); +ASSERT_POSITION(coef_probs, 0x5A0); +#undef ASSERT_POSITION + }; // namespace Decoder }; // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd..b5e3b70fc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -8,22 +8,21 @@ namespace Tegra { -Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} +#define NVDEC_REG_INDEX(field_name) \ + (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, u32 argument) { - if (method == Method::SetVideoCodec) { - codec->StateWrite(static_cast<u32>(method), argument); - } else { - codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); - } +void Nvdec::ProcessMethod(u32 method, u32 argument) { + state.reg_array[method] = static_cast<u64>(argument) << 8; switch (method) { - case Method::SetVideoCodec: + case NVDEC_REG_INDEX(set_codec_id): codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); break; - case Method::Execute: + case NVDEC_REG_INDEX(execute): Execute(); break; } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b8..6e1da0b04 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -14,16 +14,11 @@ class GPU; class Nvdec { public: - enum class Method : u32 { - SetVideoCodec = 0x80, - Execute = 0xc0, - }; - explicit Nvdec(GPU& gpu); ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, u32 argument); + void ProcessMethod(u32 method, u32 argument); /// Return most recently decoded frame [[nodiscard]] AVFramePtr GetFrame(); @@ -33,6 +28,7 @@ private: void Execute(); GPU& gpu; + NvdecCommon::NvdecRegisters state; std::unique_ptr<Codec> codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d..6a24e00a0 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -4,40 +4,13 @@ #pragma once +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra::NvdecCommon { -struct NvdecRegisters { - INSERT_PADDING_WORDS(256); - u64 set_codec_id{}; - INSERT_PADDING_WORDS(254); - u64 set_platform_id{}; - u64 picture_info_offset{}; - u64 frame_bitstream_offset{}; - u64 frame_number{}; - u64 h264_slice_data_offsets{}; - u64 h264_mv_dump_offset{}; - INSERT_PADDING_WORDS(6); - u64 frame_stats_offset{}; - u64 h264_last_surface_luma_offset{}; - u64 h264_last_surface_chroma_offset{}; - std::array<u64, 17> surface_luma_offset{}; - std::array<u64, 17> surface_chroma_offset{}; - INSERT_PADDING_WORDS(132); - u64 vp9_entropy_probs_offset{}; - u64 vp9_backward_updates_offset{}; - u64 vp9_last_frame_segmap_offset{}; - u64 vp9_curr_frame_segmap_offset{}; - INSERT_PADDING_WORDS(2); - u64 vp9_last_frame_mvs_offset{}; - u64 vp9_curr_frame_mvs_offset{}; - INSERT_PADDING_WORDS(2); -}; -static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); - -enum class VideoCodec : u32 { +enum class VideoCodec : u64 { None = 0x0, H264 = 0x3, Vp8 = 0x5, @@ -45,4 +18,76 @@ enum class VideoCodec : u32 { Vp9 = 0x9, }; +// NVDEC should use a 32-bit address space, but is mapped to 64-bit, +// doubling the sizes here is compensating for that. +struct NvdecRegisters { + static constexpr std::size_t NUM_REGS = 0x178; + + union { + struct { + INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 + VideoCodec set_codec_id; ///< 0x0400 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 + u64 execute; ///< 0x0600 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 + struct { ///< 0x0800 + union { + BitField<0, 3, VideoCodec> codec; + BitField<4, 1, u64> gp_timer_on; + BitField<13, 1, u64> mb_timer_on; + BitField<14, 1, u64> intra_frame_pslc; + BitField<17, 1, u64> all_intra_frame; + }; + } control_params; + u64 picture_info_offset; ///< 0x0808 + u64 frame_bitstream_offset; ///< 0x0810 + u64 frame_number; ///< 0x0818 + u64 h264_slice_data_offsets; ///< 0x0820 + u64 h264_mv_dump_offset; ///< 0x0828 + INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 + u64 frame_stats_offset; ///< 0x0848 + u64 h264_last_surface_luma_offset; ///< 0x0850 + u64 h264_last_surface_chroma_offset; ///< 0x0858 + std::array<u64, 17> surface_luma_offset; ///< 0x0860 + std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 + INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 + u64 vp9_entropy_probs_offset; ///< 0x0B80 + u64 vp9_backward_updates_offset; ///< 0x0B88 + u64 vp9_last_frame_segmap_offset; ///< 0x0B90 + u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 + u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 + u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 + }; + std::array<u64, NUM_REGS> reg_array; + }; +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(set_codec_id, 0x80); +ASSERT_REG_POSITION(execute, 0xC0); +ASSERT_REG_POSITION(control_params, 0x100); +ASSERT_REG_POSITION(picture_info_offset, 0x101); +ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); +ASSERT_REG_POSITION(frame_number, 0x103); +ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); +ASSERT_REG_POSITION(frame_stats_offset, 0x109); +ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); +ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); +ASSERT_REG_POSITION(surface_luma_offset, 0x10C); +ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); +ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); +ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); +ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); +ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); +ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); + +#undef ASSERT_REG_POSITION + } // namespace Tegra::NvdecCommon diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 5faf8c0f1..ff3db0aee 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -9,17 +9,10 @@ extern "C" { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #endif -#ifdef _MSC_VER -#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data -#pragma warning(push) -#endif #include <libswscale/swscale.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif } #include "common/assert.h" diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a2c1599f7..ff0f03e99 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -327,7 +327,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 if (format_info.is_compressed) { return false; } - if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { + if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) == + ACCELERATED_FORMATS.end()) { return false; } if (format_info.compatibility_by_size) { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 2aae338b6..6052d148a 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -85,7 +85,7 @@ std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const if (info.type != ImageType::e3D) { const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); const auto end = mip_level_offsets.begin() + info.resources.levels; - const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); + const auto it = std::find(mip_level_offsets.begin(), end, static_cast<u32>(mip_offset)); if (layer > info.resources.layers || it == end) { return std::nullopt; } diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 10093a11d..c872517b8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -394,7 +394,7 @@ template <u32 GOB_EXTENT> const s32 mip_offset = diff % layer_stride; const std::array offsets = CalculateMipLevelOffsets(new_info); const auto end = offsets.begin() + new_info.resources.levels; - const auto it = std::find(offsets.begin(), end, mip_offset); + const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset)); if (it == end) { // Mipmap is not aligned to any valid size return std::nullopt; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 7b756ba41..3ab500760 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -1365,8 +1365,8 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, // each partition. // Determine partitions, partition index, and color endpoint modes - s32 planeIdx = -1; - u32 partitionIndex; + u32 planeIdx{UINT32_MAX}; + u32 partitionIndex{}; u32 colorEndpointMode[4] = {0, 0, 0, 0}; // Define color data. diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index efdc6aa50..7a6f84d96 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) { } void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); ev->accept(); } void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); ev->accept(); } void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); ev->accept(); } void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, + ev->angleDelta().y() / 120); ev->accept(); } |