From a5a94f52ffcbf3119d272a9369021a213ea6dad2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 9 Feb 2022 15:00:05 +0100 Subject: MacroHLE: Add MultidrawIndirect HLE Macro. --- src/video_core/engines/draw_manager.cpp | 21 +++++++++++++++++++++ src/video_core/engines/draw_manager.h | 20 ++++++++++++++++++++ src/video_core/engines/engine_interface.h | 2 ++ 3 files changed, 43 insertions(+) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 3a78421f6..4fa77b684 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -91,6 +91,16 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind ProcessDraw(true, num_instances); } +void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { + const auto& regs{maxwell3d->regs}; + draw_state.topology = topology; + draw_state.index_buffer = regs.index_buffer; + draw_state.index_buffer.first = index_first; + draw_state.index_buffer.count = index_count; + + ProcessDrawIndirect(true); +} + void DrawManager::SetInlineIndexBuffer(u32 index) { draw_state.inline_index_draw_indexes.push_back(static_cast(index & 0x000000ff)); draw_state.inline_index_draw_indexes.push_back(static_cast((index & 0x0000ff00) >> 8)); @@ -198,4 +208,15 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { maxwell3d->rasterizer->Draw(draw_indexed, instance_count); } } + +void DrawManager::ProcessDrawIndirect(bool draw_indexed) { + LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, + draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); + + UpdateTopology(); + + if (maxwell3d->ShouldExecute()) { + maxwell3d->rasterizer->DrawIndirect(draw_indexed); + } +} } // namespace Tegra::Engines diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 0e6930a9c..0cdb37f83 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -32,6 +32,13 @@ public: std::vector inline_index_draw_indexes; }; + struct IndirectParams { + GPUVAddr start_address; + size_t buffer_size; + size_t max_draw_counts; + size_t stride; + }; + explicit DrawManager(Maxwell3D* maxwell_3d); void ProcessMethodCall(u32 method, u32 argument); @@ -46,10 +53,20 @@ public: void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances); + void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count); + const State& GetDrawState() const { return draw_state; } + IndirectParams& GetIndirectParams() { + return indirect_state; + } + + const IndirectParams& GetIndirectParams() const { + return indirect_state; + } + private: void SetInlineIndexBuffer(u32 index); @@ -63,7 +80,10 @@ private: void ProcessDraw(bool draw_indexed, u32 instance_count); + void ProcessDrawIndirect(bool draw_indexed); + Maxwell3D* maxwell3d{}; State draw_state{}; + IndirectParams indirect_state{}; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 26cde8584..76630272d 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -17,6 +17,8 @@ public: /// Write multiple values to the register identified by method. virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) = 0; + + GPUVAddr current_dma_segment; }; } // namespace Tegra::Engines -- cgit v1.2.3 From 0f89828073a541eaa2cfd985483f839bd2f97b74 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 9 Feb 2022 15:39:40 +0100 Subject: MacroHLE: Implement DrawIndexedIndirect & DrawArraysIndirect. --- src/video_core/engines/draw_manager.cpp | 2 +- src/video_core/engines/draw_manager.h | 5 ++++- src/video_core/engines/maxwell_3d.cpp | 4 ++++ src/video_core/engines/maxwell_3d.h | 12 +++++++++--- 4 files changed, 18 insertions(+), 5 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 4fa77b684..c60f32aad 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -216,7 +216,7 @@ void DrawManager::ProcessDrawIndirect(bool draw_indexed) { UpdateTopology(); if (maxwell3d->ShouldExecute()) { - maxwell3d->rasterizer->DrawIndirect(draw_indexed); + maxwell3d->rasterizer->DrawIndirect(); } } } // namespace Tegra::Engines diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 0cdb37f83..437990162 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -33,7 +33,10 @@ public: }; struct IndirectParams { - GPUVAddr start_address; + bool is_indexed; + bool include_count; + GPUVAddr count_start_address; + GPUVAddr indirect_start_address; size_t buffer_size; size_t max_draw_counts; size_t stride; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9b182b653..cd6274a9b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -130,11 +130,15 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool } macro_params.insert(macro_params.end(), base_start, base_start + amount); + for (size_t i = 0; i < amount; i++) { + macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); + } // Call the macro when there are no more parameters in the command buffer if (is_last_call) { CallMacroMethod(executing_macro, macro_params); macro_params.clear(); + macro_addresses.clear(); } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 22b904319..ac5e87563 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3066,6 +3066,15 @@ public: std::unique_ptr draw_manager; friend class DrawManager; + + std::vector inline_index_draw_indexes; + std::vector macro_addresses; + + Core::System& system; + MemoryManager& memory_manager; + + /// Handles a write to the CLEAR_BUFFERS register. + void ProcessClearBuffers(u32 layer_count); private: void InitializeRegisterDefaults(); @@ -3126,9 +3135,6 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); - Core::System& system; - MemoryManager& memory_manager; - VideoCore::RasterizerInterface* rasterizer = nullptr; /// Start offsets of each macro in macro_memory -- cgit v1.2.3 From c541559767c3912940ee3d73a122530b3edde9f1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 5 Mar 2022 08:01:13 +0100 Subject: MacroHLE: Refactor MacroHLE system. --- src/video_core/engines/draw_manager.cpp | 6 ++++++ src/video_core/engines/draw_manager.h | 2 ++ src/video_core/engines/maxwell_3d.cpp | 37 +++++++++++++++++++++++++++++++++ src/video_core/engines/maxwell_3d.h | 21 ++++++++++++++++--- 4 files changed, 63 insertions(+), 3 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index c60f32aad..183d5403c 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -91,6 +91,12 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind ProcessDraw(true, num_instances); } +void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { + draw_state.topology = topology; + + ProcessDrawIndirect(true); +} + void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { const auto& regs{maxwell3d->regs}; draw_state.topology = topology; diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 437990162..49a4fca48 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -56,6 +56,8 @@ public: void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances); + void DrawArrayIndirect(PrimitiveTopology topology); + void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count); const State& GetDrawState() const { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index cd6274a9b..b998a8e69 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -133,15 +133,52 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool for (size_t i = 0; i < amount; i++) { macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); } + macro_segments.emplace_back(current_dma_segment, amount); // Call the macro when there are no more parameters in the command buffer if (is_last_call) { CallMacroMethod(executing_macro, macro_params); macro_params.clear(); macro_addresses.clear(); + macro_segments.clear(); } } +void Maxwell3D::RefreshParameters() { + size_t current_index = 0; + for (auto& segment : macro_segments) { + if (segment.first == 0) { + current_index += segment.second; + continue; + } + memory_manager.ReadBlock(segment.first, ¯o_params[current_index], + sizeof(u32) * segment.second); + current_index += segment.second; + } +} + +u32 Maxwell3D::GetMaxCurrentVertices() { + u32 num_vertices = 0; + for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { + const auto& array = regs.vertex_streams[index]; + if (array.enable == 0) { + continue; + } + const auto& attribute = regs.vertex_attrib_format[index]; + if (attribute.constant) { + num_vertices = std::max(num_vertices, 1U); + continue; + } + const auto& limit = regs.vertex_stream_limits[index]; + const GPUVAddr gpu_addr_begin = array.Address(); + const GPUVAddr gpu_addr_end = limit.Address() + 1; + const u32 address_size = static_cast(gpu_addr_end - gpu_addr_begin); + num_vertices = std::max( + num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); + } + return num_vertices; +} + u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { // Keep track of the register value in shadow_state when requested. const auto control = shadow_state.shadow_ram_control; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ac5e87563..e2256594d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3068,10 +3068,14 @@ public: friend class DrawManager; std::vector inline_index_draw_indexes; - std::vector macro_addresses; - Core::System& system; - MemoryManager& memory_manager; + GPUVAddr getMacroAddress(size_t index) const { + return macro_addresses[index]; + } + + void RefreshParameters(); + + u32 GetMaxCurrentVertices(); /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(u32 layer_count); @@ -3135,6 +3139,9 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); + Core::System& system; + MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; /// Start offsets of each macro in macro_memory @@ -3151,6 +3158,14 @@ private: Upload::State upload_state; bool execute_on{true}; + + std::array draw_command{}; + std::vector deferred_draw_method; + enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; + DrawMode draw_mode{DrawMode::General}; + bool draw_indexed{}; + std::vector> macro_segments; + std::vector macro_addresses; }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3 From 93ac5a6a6d316966c1d288f8b83610bb48143a04 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 21 Oct 2022 01:46:51 +0200 Subject: MacroHLE: Add Index Buffer size estimation. --- src/video_core/engines/maxwell_3d.cpp | 7 +++++++ src/video_core/engines/maxwell_3d.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b998a8e69..a0dd7400d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -179,6 +179,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() { return num_vertices; } +size_t Maxwell3D::EstimateIndexBufferSize() { + GPUVAddr start_address = regs.index_buffer.StartAddress(); + GPUVAddr end_address = regs.index_buffer.EndAddress(); + return std::min(memory_manager.GetMemoryLayoutSize(start_address), + static_cast(end_address - start_address)); +} + u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { // Keep track of the register value in shadow_state when requested. const auto control = shadow_state.shadow_ram_control; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e2256594d..cfe1e4883 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3077,6 +3077,8 @@ public: u32 GetMaxCurrentVertices(); + size_t EstimateIndexBufferSize(); + /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(u32 layer_count); -- cgit v1.2.3 From aad0cbf024fb8077a9b375a093c60a7e2ab1db3d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 9 Nov 2022 17:58:10 +0100 Subject: MacroHLE: Add HLE replacement for base vertex and base instance. --- src/video_core/engines/maxwell_3d.cpp | 15 +++++++++++++-- src/video_core/engines/maxwell_3d.h | 17 +++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a0dd7400d..50d8a94b1 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -182,8 +182,14 @@ u32 Maxwell3D::GetMaxCurrentVertices() { size_t Maxwell3D::EstimateIndexBufferSize() { GPUVAddr start_address = regs.index_buffer.StartAddress(); GPUVAddr end_address = regs.index_buffer.EndAddress(); - return std::min(memory_manager.GetMemoryLayoutSize(start_address), - static_cast(end_address - start_address)); + constexpr std::array max_sizes = { + std::numeric_limits::max(), std::numeric_limits::max(), + std::numeric_limits::max(), std::numeric_limits::max()}; + const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); + return std::min( + memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / + byte_size, + static_cast(end_address - start_address)); } u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { @@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } +void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { + const u64 key = (static_cast(bank) << 32) | offset; + replace_table.emplace(key, name); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cfe1e4883..397e88f67 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3020,6 +3020,23 @@ public: /// Store temporary hw register values, used by some calls to restore state after a operation Regs shadow_state; + // None Engine + enum class EngineHint : u32 { + None = 0x0, + OnHLEMacro = 0x1, + }; + + EngineHint engine_state{EngineHint::None}; + + enum class HLEReplaceName : u32 { + BaseVertex = 0x0, + BaseInstance = 0x1, + }; + + void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); + + std::unordered_map replace_table; + static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(std::is_trivially_copyable_v, "Maxwell3D Regs must be trivially copyable"); -- cgit v1.2.3 From 18637766efd1ff9a0c22967553983cfda69c96ca Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Nov 2022 16:36:53 +0100 Subject: MacroHLE: Reduce massive calculations on sizing estimation. --- src/video_core/engines/maxwell_3d.cpp | 15 +++++++++++++++ src/video_core/engines/maxwell_3d.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 50d8a94b1..a9fd6d960 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -157,6 +157,21 @@ void Maxwell3D::RefreshParameters() { } } +bool Maxwell3D::AnyParametersDirty() { + size_t current_index = 0; + for (auto& segment : macro_segments) { + if (segment.first == 0) { + current_index += segment.second; + continue; + } + if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) { + return true; + } + current_index += segment.second; + } + return false; +} + u32 Maxwell3D::GetMaxCurrentVertices() { u32 num_vertices = 0; for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 397e88f67..cd996413c 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3092,6 +3092,8 @@ public: void RefreshParameters(); + bool AnyParametersDirty(); + u32 GetMaxCurrentVertices(); size_t EstimateIndexBufferSize(); -- cgit v1.2.3 From ce448ce770b6c329caec7ad1ae00e01dddb67b03 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 18 Nov 2022 00:21:13 +0100 Subject: Revert Buffer cache changes and setup additional macros. --- src/video_core/engines/engine_interface.h | 1 + src/video_core/engines/maxwell_3d.cpp | 26 +++++++++----------------- src/video_core/engines/maxwell_3d.h | 23 ++++++++++++++++++----- 3 files changed, 28 insertions(+), 22 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 76630272d..38f1abdc4 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -18,6 +18,7 @@ public: virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) = 0; + bool current_dirty{}; GPUVAddr current_dma_segment; }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a9fd6d960..bbe3202fe 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@ #include #include #include "common/assert.h" +#include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" #include "video_core/dirty_flags.h" @@ -14,6 +15,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" + namespace Tegra::Engines { using VideoCore::QueryType; @@ -134,6 +136,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); } macro_segments.emplace_back(current_dma_segment, amount); + current_macro_dirty |= current_dirty; + current_dirty = false; // Call the macro when there are no more parameters in the command buffer if (is_last_call) { @@ -141,10 +145,14 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool macro_params.clear(); macro_addresses.clear(); macro_segments.clear(); + current_macro_dirty = false; } } -void Maxwell3D::RefreshParameters() { +void Maxwell3D::RefreshParametersImpl() { + if (!Settings::IsGPULevelHigh()) { + return; + } size_t current_index = 0; for (auto& segment : macro_segments) { if (segment.first == 0) { @@ -157,21 +165,6 @@ void Maxwell3D::RefreshParameters() { } } -bool Maxwell3D::AnyParametersDirty() { - size_t current_index = 0; - for (auto& segment : macro_segments) { - if (segment.first == 0) { - current_index += segment.second; - continue; - } - if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) { - return true; - } - current_index += segment.second; - } - return false; -} - u32 Maxwell3D::GetMaxCurrentVertices() { u32 num_vertices = 0; for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { @@ -332,7 +325,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { const u32 argument = ProcessShadowRam(method, method_argument); ProcessDirtyRegisters(method, argument); - ProcessMethodCall(method, argument, method_argument, is_last_call); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cd996413c..f0a379801 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -272,6 +272,7 @@ public: }; union { + u32 raw; BitField<0, 1, Mode> mode; BitField<4, 8, u32> pad; }; @@ -1217,10 +1218,12 @@ public: struct Window { union { + u32 raw_1; BitField<0, 16, u32> x_min; BitField<16, 16, u32> x_max; }; union { + u32 raw_2; BitField<0, 16, u32> y_min; BitField<16, 16, u32> y_max; }; @@ -3090,9 +3093,16 @@ public: return macro_addresses[index]; } - void RefreshParameters(); + void RefreshParameters() { + if (!current_macro_dirty) { + return; + } + RefreshParametersImpl(); + } - bool AnyParametersDirty(); + bool AnyParametersDirty() { + return current_macro_dirty; + } u32 GetMaxCurrentVertices(); @@ -3101,6 +3111,9 @@ public: /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(u32 layer_count); + /// Handles a write to the CB_BIND register. + void ProcessCBBind(size_t stage_index); + private: void InitializeRegisterDefaults(); @@ -3154,12 +3167,11 @@ private: void ProcessCBData(u32 value); void ProcessCBMultiData(const u32* start_base, u32 amount); - /// Handles a write to the CB_BIND register. - void ProcessCBBind(size_t stage_index); - /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); + void RefreshParametersImpl(); + Core::System& system; MemoryManager& memory_manager; @@ -3187,6 +3199,7 @@ private: bool draw_indexed{}; std::vector> macro_segments; std::vector macro_addresses; + bool current_macro_dirty{}; }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3 From cb1497d0d7711a1c0e527aaa3e1dc3f95e5a6644 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 27 Nov 2022 00:58:06 +0100 Subject: DMAPusher: Improve collection of non executing methods --- src/video_core/engines/engine_interface.h | 21 +++++++ src/video_core/engines/fermi_2d.cpp | 10 ++++ src/video_core/engines/fermi_2d.h | 2 + src/video_core/engines/kepler_compute.cpp | 14 ++++- src/video_core/engines/kepler_compute.h | 2 + src/video_core/engines/kepler_memory.cpp | 11 ++++ src/video_core/engines/kepler_memory.h | 2 + src/video_core/engines/maxwell_3d.cpp | 94 +++++++++++++++++++++++++++++++ src/video_core/engines/maxwell_3d.h | 4 ++ src/video_core/engines/maxwell_dma.cpp | 12 +++- src/video_core/engines/maxwell_dma.h | 2 + 11 files changed, 172 insertions(+), 2 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 38f1abdc4..392322358 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -3,6 +3,10 @@ #pragma once +#include +#include +#include + #include "common/common_types.h" namespace Tegra::Engines { @@ -18,8 +22,25 @@ public: virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) = 0; + void ConsumeSink() { + if (method_sink.empty()) { + return; + } + ConsumeSinkImpl(); + } + + std::bitset::max()> execution_mask{}; + std::vector> method_sink{}; bool current_dirty{}; GPUVAddr current_dma_segment; + +protected: + virtual void ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + CallMethod(method, value, true); + } + method_sink.clear(); + } }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index c6478ae85..e655e7254 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -25,6 +25,9 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) { // Nvidia's OpenGL driver seems to assume these values regs.src.depth = 1; regs.dst.depth = 1; + + execution_mask.reset(); + execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true; } Fermi2D::~Fermi2D() = default; @@ -49,6 +52,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 } } +void Fermi2D::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); +} + void Fermi2D::Blit() { MICROPROFILE_SCOPE(GPU_BlitEngine); LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 100b21bac..523fbdec2 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -309,6 +309,8 @@ private: /// Performs the copy from the source surface to the destination surface as configured in the /// registers. void Blit(); + + void ConsumeSinkImpl() override; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index e5c622155..601095f03 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -14,7 +14,12 @@ namespace Tegra::Engines { KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) - : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {} + : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} { + execution_mask.reset(); + execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true; + execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true; + execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true; +} KeplerCompute::~KeplerCompute() = default; @@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) upload_state.BindRasterizer(rasterizer); } +void KeplerCompute::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); +} + void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid KeplerCompute register, increase the size of the Regs structure"); diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index e154e3f06..2092e685f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -204,6 +204,8 @@ public: private: void ProcessLaunch(); + void ConsumeSinkImpl() override; + /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 08045d1cf..c026801a3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default; void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { upload_state.BindRasterizer(rasterizer_); + + execution_mask.reset(); + execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true; + execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true; +} + +void KeplerMemory::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); } void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5fe7489f0..fb1eecbba 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -73,6 +73,8 @@ public: } regs{}; private: + void ConsumeSinkImpl() override; + Core::System& system; Upload::State upload_state; }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index bbe3202fe..d44a5cabf 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@ #include #include #include "common/assert.h" +#include "common/scope_exit.h" #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" @@ -30,6 +31,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) regs.upload} { dirty.flags.flip(); InitializeRegisterDefaults(); + execution_mask.reset(); + for (size_t i = 0; i < execution_mask.size(); i++) { + execution_mask[i] = IsMethodExecutable(static_cast(i)); + } } Maxwell3D::~Maxwell3D() = default; @@ -123,6 +128,71 @@ void Maxwell3D::InitializeRegisterDefaults() { shadow_state = regs; } +bool Maxwell3D::IsMethodExecutable(u32 method) { + if (method >= MacroRegistersStart) { + return true; + } + switch (method) { + case MAXWELL3D_REG_INDEX(draw.end): + case MAXWELL3D_REG_INDEX(draw.begin): + case MAXWELL3D_REG_INDEX(vertex_buffer.first): + case MAXWELL3D_REG_INDEX(vertex_buffer.count): + case MAXWELL3D_REG_INDEX(index_buffer.first): + case MAXWELL3D_REG_INDEX(index_buffer.count): + case MAXWELL3D_REG_INDEX(draw_inline_index): + case MAXWELL3D_REG_INDEX(index_buffer32_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer16_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer8_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer32_first): + case MAXWELL3D_REG_INDEX(index_buffer16_first): + case MAXWELL3D_REG_INDEX(index_buffer8_first): + case MAXWELL3D_REG_INDEX(inline_index_2x16.even): + case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): + case MAXWELL3D_REG_INDEX(vertex_array_instance_first): + case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): + case MAXWELL3D_REG_INDEX(wait_for_idle): + case MAXWELL3D_REG_INDEX(shadow_ram_control): + case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): + case MAXWELL3D_REG_INDEX(load_mme.instruction): + case MAXWELL3D_REG_INDEX(load_mme.start_address): + case MAXWELL3D_REG_INDEX(falcon[4]): + case MAXWELL3D_REG_INDEX(const_buffer.buffer): + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15: + case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): + case MAXWELL3D_REG_INDEX(topology_override): + case MAXWELL3D_REG_INDEX(clear_surface): + case MAXWELL3D_REG_INDEX(report_semaphore.query): + case MAXWELL3D_REG_INDEX(render_enable.mode): + case MAXWELL3D_REG_INDEX(clear_report_value): + case MAXWELL3D_REG_INDEX(sync_info): + case MAXWELL3D_REG_INDEX(launch_dma): + case MAXWELL3D_REG_INDEX(inline_data): + case MAXWELL3D_REG_INDEX(fragment_barrier): + case MAXWELL3D_REG_INDEX(tiled_cache_barrier): + return true; + default: + return false; + } +} + void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { if (executing_macro == 0) { // A macro call must begin by writing the macro method's register, not its argument. @@ -141,6 +211,7 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool // Call the macro when there are no more parameters in the command buffer if (is_last_call) { + ConsumeSink(); CallMacroMethod(executing_macro, macro_params); macro_params.clear(); macro_addresses.clear(); @@ -214,6 +285,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { return argument; } +void Maxwell3D::ConsumeSinkImpl() { + SCOPE_EXIT({ method_sink.clear(); }); + const auto control = shadow_state.shadow_ram_control; + if (control == Regs::ShadowRamControl::Track || + control == Regs::ShadowRamControl::TrackWithFilter) { + + for (auto [method, value] : method_sink) { + shadow_state.reg_array[method] = value; + ProcessDirtyRegisters(method, value); + } + return; + } + if (control == Regs::ShadowRamControl::Replay) { + for (auto [method, value] : method_sink) { + ProcessDirtyRegisters(method, shadow_state.reg_array[method]); + } + return; + } + for (auto [method, value] : method_sink) { + ProcessDirtyRegisters(method, value); + } +} + void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { if (regs.reg_array[method] == argument) { return; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f0a379801..478ba4dc7 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3123,6 +3123,8 @@ private: void ProcessDirtyRegisters(u32 method, u32 argument); + void ConsumeSinkImpl() override; + void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); /// Retrieves information about a specific TIC entry from the TIC buffer. @@ -3172,6 +3174,8 @@ private: void RefreshParametersImpl(); + bool IsMethodExecutable(u32 method); + Core::System& system; MemoryManager& memory_manager; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index f73d7bf0f..01f70ea9e 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -21,7 +21,10 @@ namespace Tegra::Engines { using namespace Texture; MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) - : system{system_}, memory_manager{memory_manager_} {} + : system{system_}, memory_manager{memory_manager_} { + execution_mask.reset(); + execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true; +} MaxwellDMA::~MaxwellDMA() = default; @@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { rasterizer = rasterizer_; } +void MaxwellDMA::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); +} + void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c88191a61..0e594fa74 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -231,6 +231,8 @@ private: void ReleaseSemaphore(); + void ConsumeSinkImpl() override; + Core::System& system; MemoryManager& memory_manager; -- cgit v1.2.3 From f800e485c9bcd98e08128db974540e7ba0324128 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 5 Dec 2022 17:14:34 +0100 Subject: Vulkan Implement Dynamic State 2 LogicOp and PatchVertices --- src/video_core/engines/maxwell_3d.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index d44a5cabf..7f406e171 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -124,6 +124,7 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.gl_front_face = Maxwell3D::Regs::FrontFace::ClockWise; regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill; regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill; + regs.logic_op.op = Maxwell3D::Regs::LogicOp::Op::Clear; shadow_state = regs; } -- cgit v1.2.3 From d33251db9300e29ae830ec74e0b39ec0aa202b30 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 6 Dec 2022 00:40:01 +0100 Subject: Vulkan: Implement Dynamic State 3 --- src/video_core/engines/maxwell_3d.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7f406e171..d44a5cabf 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -124,7 +124,6 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.gl_front_face = Maxwell3D::Regs::FrontFace::ClockWise; regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill; regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill; - regs.logic_op.op = Maxwell3D::Regs::LogicOp::Op::Clear; shadow_state = regs; } -- cgit v1.2.3 From 581a7d785bb4936c92d320f17d3d824e244eee5a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 7 Dec 2022 00:28:35 +0100 Subject: Rasterizer: Setup skeleton for Host Conditional rendering --- src/video_core/engines/maxwell_3d.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index d44a5cabf..943a69935 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -16,7 +16,6 @@ #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" - namespace Tegra::Engines { using VideoCore::QueryType; @@ -538,13 +537,17 @@ void Maxwell3D::ProcessQueryGet() { void Maxwell3D::ProcessQueryCondition() { const GPUVAddr condition_address{regs.render_enable.Address()}; switch (regs.render_enable_override) { - case Regs::RenderEnable::Override::AlwaysRender: + case Regs::RenderEnable::Override::AlwaysRender: { execute_on = true; break; case Regs::RenderEnable::Override::NeverRender: execute_on = false; break; - case Regs::RenderEnable::Override::UseRenderEnable: + case Regs::RenderEnable::Override::UseRenderEnable: { + if (rasterizer->AccelerateConditionalRendering()) { + execute_on = true; + return; + } switch (regs.render_enable.mode) { case Regs::RenderEnable::Mode::True: { execute_on = true; @@ -582,6 +585,8 @@ void Maxwell3D::ProcessQueryCondition() { } break; } + } + } } void Maxwell3D::ProcessCounterReset() { @@ -618,7 +623,8 @@ std::optional Maxwell3D::GetQueryResult() { } void Maxwell3D::ProcessCBBind(size_t stage_index) { - // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. + // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader + // stage. const auto& bind_data = regs.bind_groups[stage_index]; auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot]; buffer.enabled = bind_data.valid.Value() != 0; -- cgit v1.2.3 From d09aa0182f18d1ac338ab47009b42fdeb67497a8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 24 Dec 2022 19:19:41 -0500 Subject: MacroHLE: Final cleanup and fixes. --- src/video_core/engines/draw_manager.cpp | 3 ++- src/video_core/engines/maxwell_3d.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 183d5403c..feea89c0e 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -97,7 +97,8 @@ void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { ProcessDrawIndirect(true); } -void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { +void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, + u32 index_count) { const auto& regs{maxwell3d->regs}; draw_state.topology = topology; draw_state.index_buffer = regs.index_buffer; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 478ba4dc7..dbefcd715 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3086,7 +3086,7 @@ public: std::unique_ptr draw_manager; friend class DrawManager; - + std::vector inline_index_draw_indexes; GPUVAddr getMacroAddress(size_t index) const { -- cgit v1.2.3 From b62ffb612dbd672371d163e3b511e81f0c2282e6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 27 Dec 2022 16:40:28 -0500 Subject: Vulkan: rework stencil tracking. --- src/video_core/engines/maxwell_3d.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index dbefcd715..a2dff0350 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -2711,7 +2711,7 @@ public: u32 post_z_pixel_imask; ///< 0x0F1C INSERT_PADDING_BYTES_NOINIT(0x20); ConstantColorRendering const_color_rendering; ///< 0x0F40 - s32 stencil_back_ref; ///< 0x0F54 + u32 stencil_back_ref; ///< 0x0F54 u32 stencil_back_mask; ///< 0x0F58 u32 stencil_back_func_mask; ///< 0x0F5C INSERT_PADDING_BYTES_NOINIT(0x14); @@ -2835,9 +2835,9 @@ public: Blend blend; ///< 0x133C u32 stencil_enable; ///< 0x1380 StencilOp stencil_front_op; ///< 0x1384 - s32 stencil_front_ref; ///< 0x1394 - s32 stencil_front_func_mask; ///< 0x1398 - s32 stencil_front_mask; ///< 0x139C + u32 stencil_front_ref; ///< 0x1394 + u32 stencil_front_func_mask; ///< 0x1398 + u32 stencil_front_mask; ///< 0x139C INSERT_PADDING_BYTES_NOINIT(0x4); u32 draw_auto_start_byte_count; ///< 0x13A4 PsSaturate frag_color_clamp; ///< 0x13A8 -- cgit v1.2.3 From a0c697124ced080f58866825e2e323e8682bbd7f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 3 Jan 2023 10:01:25 -0500 Subject: Video_core: Address feedback --- src/video_core/engines/draw_manager.cpp | 13 ++++++++----- src/video_core/engines/draw_manager.h | 2 +- src/video_core/engines/maxwell_3d.cpp | 13 ++++++------- src/video_core/engines/maxwell_3d.h | 30 ++++++++++++------------------ 4 files changed, 27 insertions(+), 31 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index feea89c0e..2437121ce 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -94,7 +94,7 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { draw_state.topology = topology; - ProcessDrawIndirect(true); + ProcessDrawIndirect(); } void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, @@ -105,7 +105,7 @@ void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_firs draw_state.index_buffer.first = index_first; draw_state.index_buffer.count = index_count; - ProcessDrawIndirect(true); + ProcessDrawIndirect(); } void DrawManager::SetInlineIndexBuffer(u32 index) { @@ -216,9 +216,12 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { } } -void DrawManager::ProcessDrawIndirect(bool draw_indexed) { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, - draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); +void DrawManager::ProcessDrawIndirect() { + LOG_TRACE( + HW_GPU, + "called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}", + draw_state.topology, indirect_state.is_indexed, indirect_state.include_count, + indirect_state.buffer_size, indirect_state.max_draw_counts); UpdateTopology(); diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 49a4fca48..58d1b2d59 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -85,7 +85,7 @@ private: void ProcessDraw(bool draw_indexed, u32 instance_count); - void ProcessDrawIndirect(bool draw_indexed); + void ProcessDrawIndirect(); Maxwell3D* maxwell3d{}; State draw_state{}; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 943a69935..fbfd1ddd2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -220,9 +220,6 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool } void Maxwell3D::RefreshParametersImpl() { - if (!Settings::IsGPULevelHigh()) { - return; - } size_t current_index = 0; for (auto& segment : macro_segments) { if (segment.first == 0) { @@ -448,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15: ProcessCBMultiData(base_start, amount); break; - case MAXWELL3D_REG_INDEX(inline_data): + case MAXWELL3D_REG_INDEX(inline_data): { + ASSERT(methods_pending == amount); upload_state.ProcessData(base_start, amount); return; + } default: for (u32 i = 0; i < amount; i++) { CallMethod(method, base_start[i], methods_pending - i <= 1); @@ -537,7 +536,7 @@ void Maxwell3D::ProcessQueryGet() { void Maxwell3D::ProcessQueryCondition() { const GPUVAddr condition_address{regs.render_enable.Address()}; switch (regs.render_enable_override) { - case Regs::RenderEnable::Override::AlwaysRender: { + case Regs::RenderEnable::Override::AlwaysRender: execute_on = true; break; case Regs::RenderEnable::Override::NeverRender: @@ -586,7 +585,6 @@ void Maxwell3D::ProcessQueryCondition() { break; } } - } } void Maxwell3D::ProcessCounterReset() { @@ -685,7 +683,8 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } -void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { +void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset, + HLEReplacementAttributeType name) { const u64 key = (static_cast(bank) << 32) | offset; replace_table.emplace(key, name); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a2dff0350..0b2fd2928 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1218,12 +1218,12 @@ public: struct Window { union { - u32 raw_1; + u32 raw_x; BitField<0, 16, u32> x_min; BitField<16, 16, u32> x_max; }; union { - u32 raw_2; + u32 raw_y; BitField<0, 16, u32> y_min; BitField<16, 16, u32> y_max; }; @@ -3031,14 +3031,15 @@ public: EngineHint engine_state{EngineHint::None}; - enum class HLEReplaceName : u32 { + enum class HLEReplacementAttributeType : u32 { BaseVertex = 0x0, BaseInstance = 0x1, + DrawID = 0x2, }; - void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); + void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name); - std::unordered_map replace_table; + std::unordered_map replace_table; static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(std::is_trivially_copyable_v, "Maxwell3D Regs must be trivially copyable"); @@ -3087,9 +3088,7 @@ public: std::unique_ptr draw_manager; friend class DrawManager; - std::vector inline_index_draw_indexes; - - GPUVAddr getMacroAddress(size_t index) const { + GPUVAddr GetMacroAddress(size_t index) const { return macro_addresses[index]; } @@ -3100,7 +3099,7 @@ public: RefreshParametersImpl(); } - bool AnyParametersDirty() { + bool AnyParametersDirty() const { return current_macro_dirty; } @@ -3114,6 +3113,10 @@ public: /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); + /// Handles a write to the CB_DATA[i] register. + void ProcessCBData(u32 value); + void ProcessCBMultiData(const u32* start_base, u32 amount); + private: void InitializeRegisterDefaults(); @@ -3165,10 +3168,6 @@ private: /// Handles writes to syncing register. void ProcessSyncPoint(); - /// Handles a write to the CB_DATA[i] register. - void ProcessCBData(u32 value); - void ProcessCBMultiData(const u32* start_base, u32 amount); - /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); @@ -3196,11 +3195,6 @@ private: bool execute_on{true}; - std::array draw_command{}; - std::vector deferred_draw_method; - enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; - DrawMode draw_mode{DrawMode::General}; - bool draw_indexed{}; std::vector> macro_segments; std::vector macro_addresses; bool current_macro_dirty{}; -- cgit v1.2.3