diff options
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/engines/engine_interface.h | 21 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 94 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 2 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.cpp | 3 | 
13 files changed, 181 insertions, 2 deletions
| diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index b3e9cb82e..551929824 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -178,6 +178,11 @@ void DmaPusher::CallMethod(u32 argument) const {          });      } else {          auto subchannel = subchannels[dma_state.subchannel]; +        if (!subchannel->execution_mask[dma_state.method]) [[likely]] { +            subchannel->method_sink.emplace_back(dma_state.method, argument); +            return; +        } +        subchannel->ConsumeSink();          subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;          subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);      } @@ -189,6 +194,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {                                 dma_state.method_count);      } else {          auto subchannel = subchannels[dma_state.subchannel]; +        subchannel->ConsumeSink();          subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;          subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,                                      dma_state.method_count); diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 38f1abdc4..392322358 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -3,6 +3,10 @@  #pragma once +#include <bitset> +#include <limits> +#include <vector> +  #include "common/common_types.h"  namespace Tegra::Engines { @@ -18,8 +22,25 @@ public:      virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,                                   u32 methods_pending) = 0; +    void ConsumeSink() { +        if (method_sink.empty()) { +            return; +        } +        ConsumeSinkImpl(); +    } + +    std::bitset<std::numeric_limits<u16>::max()> execution_mask{}; +    std::vector<std::pair<u32, u32>> method_sink{};      bool current_dirty{};      GPUVAddr current_dma_segment; + +protected: +    virtual void ConsumeSinkImpl() { +        for (auto [method, value] : method_sink) { +            CallMethod(method, value, true); +        } +        method_sink.clear(); +    }  };  } // namespace Tegra::Engines diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index c6478ae85..e655e7254 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -25,6 +25,9 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) {      // Nvidia's OpenGL driver seems to assume these values      regs.src.depth = 1;      regs.dst.depth = 1; + +    execution_mask.reset(); +    execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;  }  Fermi2D::~Fermi2D() = default; @@ -49,6 +52,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32      }  } +void Fermi2D::ConsumeSinkImpl() { +    for (auto [method, value] : method_sink) { +        regs.reg_array[method] = value; +    } +    method_sink.clear(); +} +  void Fermi2D::Blit() {      MICROPROFILE_SCOPE(GPU_BlitEngine);      LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 100b21bac..523fbdec2 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -309,6 +309,8 @@ private:      /// Performs the copy from the source surface to the destination surface as configured in the      /// registers.      void Blit(); + +    void ConsumeSinkImpl() override;  };  #define ASSERT_REG_POSITION(field_name, position)                                                  \ diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index e5c622155..601095f03 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -14,7 +14,12 @@  namespace Tegra::Engines {  KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) -    : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {} +    : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} { +    execution_mask.reset(); +    execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true; +    execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true; +    execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true; +}  KeplerCompute::~KeplerCompute() = default; @@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)      upload_state.BindRasterizer(rasterizer);  } +void KeplerCompute::ConsumeSinkImpl() { +    for (auto [method, value] : method_sink) { +        regs.reg_array[method] = value; +    } +    method_sink.clear(); +} +  void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {      ASSERT_MSG(method < Regs::NUM_REGS,                 "Invalid KeplerCompute register, increase the size of the Regs structure"); diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index e154e3f06..2092e685f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -204,6 +204,8 @@ public:  private:      void ProcessLaunch(); +    void ConsumeSinkImpl() override; +      /// Retrieves information about a specific TIC entry from the TIC buffer.      Texture::TICEntry GetTICEntry(u32 tic_index) const; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 08045d1cf..c026801a3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;  void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {      upload_state.BindRasterizer(rasterizer_); + +    execution_mask.reset(); +    execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true; +    execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true; +} + +void KeplerMemory::ConsumeSinkImpl() { +    for (auto [method, value] : method_sink) { +        regs.reg_array[method] = value; +    } +    method_sink.clear();  }  void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5fe7489f0..fb1eecbba 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -73,6 +73,8 @@ public:      } regs{};  private: +    void ConsumeSinkImpl() override; +      Core::System& system;      Upload::State upload_state;  }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index bbe3202fe..d44a5cabf 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@  #include <cstring>  #include <optional>  #include "common/assert.h" +#include "common/scope_exit.h"  #include "common/settings.h"  #include "core/core.h"  #include "core/core_timing.h" @@ -30,6 +31,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)                                                                                  regs.upload} {      dirty.flags.flip();      InitializeRegisterDefaults(); +    execution_mask.reset(); +    for (size_t i = 0; i < execution_mask.size(); i++) { +        execution_mask[i] = IsMethodExecutable(static_cast<u32>(i)); +    }  }  Maxwell3D::~Maxwell3D() = default; @@ -123,6 +128,71 @@ void Maxwell3D::InitializeRegisterDefaults() {      shadow_state = regs;  } +bool Maxwell3D::IsMethodExecutable(u32 method) { +    if (method >= MacroRegistersStart) { +        return true; +    } +    switch (method) { +    case MAXWELL3D_REG_INDEX(draw.end): +    case MAXWELL3D_REG_INDEX(draw.begin): +    case MAXWELL3D_REG_INDEX(vertex_buffer.first): +    case MAXWELL3D_REG_INDEX(vertex_buffer.count): +    case MAXWELL3D_REG_INDEX(index_buffer.first): +    case MAXWELL3D_REG_INDEX(index_buffer.count): +    case MAXWELL3D_REG_INDEX(draw_inline_index): +    case MAXWELL3D_REG_INDEX(index_buffer32_subsequent): +    case MAXWELL3D_REG_INDEX(index_buffer16_subsequent): +    case MAXWELL3D_REG_INDEX(index_buffer8_subsequent): +    case MAXWELL3D_REG_INDEX(index_buffer32_first): +    case MAXWELL3D_REG_INDEX(index_buffer16_first): +    case MAXWELL3D_REG_INDEX(index_buffer8_first): +    case MAXWELL3D_REG_INDEX(inline_index_2x16.even): +    case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): +    case MAXWELL3D_REG_INDEX(vertex_array_instance_first): +    case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): +    case MAXWELL3D_REG_INDEX(wait_for_idle): +    case MAXWELL3D_REG_INDEX(shadow_ram_control): +    case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): +    case MAXWELL3D_REG_INDEX(load_mme.instruction): +    case MAXWELL3D_REG_INDEX(load_mme.start_address): +    case MAXWELL3D_REG_INDEX(falcon[4]): +    case MAXWELL3D_REG_INDEX(const_buffer.buffer): +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14: +    case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15: +    case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config): +    case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config): +    case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config): +    case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config): +    case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): +    case MAXWELL3D_REG_INDEX(topology_override): +    case MAXWELL3D_REG_INDEX(clear_surface): +    case MAXWELL3D_REG_INDEX(report_semaphore.query): +    case MAXWELL3D_REG_INDEX(render_enable.mode): +    case MAXWELL3D_REG_INDEX(clear_report_value): +    case MAXWELL3D_REG_INDEX(sync_info): +    case MAXWELL3D_REG_INDEX(launch_dma): +    case MAXWELL3D_REG_INDEX(inline_data): +    case MAXWELL3D_REG_INDEX(fragment_barrier): +    case MAXWELL3D_REG_INDEX(tiled_cache_barrier): +        return true; +    default: +        return false; +    } +} +  void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {      if (executing_macro == 0) {          // A macro call must begin by writing the macro method's register, not its argument. @@ -141,6 +211,7 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool      // Call the macro when there are no more parameters in the command buffer      if (is_last_call) { +        ConsumeSink();          CallMacroMethod(executing_macro, macro_params);          macro_params.clear();          macro_addresses.clear(); @@ -214,6 +285,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {      return argument;  } +void Maxwell3D::ConsumeSinkImpl() { +    SCOPE_EXIT({ method_sink.clear(); }); +    const auto control = shadow_state.shadow_ram_control; +    if (control == Regs::ShadowRamControl::Track || +        control == Regs::ShadowRamControl::TrackWithFilter) { + +        for (auto [method, value] : method_sink) { +            shadow_state.reg_array[method] = value; +            ProcessDirtyRegisters(method, value); +        } +        return; +    } +    if (control == Regs::ShadowRamControl::Replay) { +        for (auto [method, value] : method_sink) { +            ProcessDirtyRegisters(method, shadow_state.reg_array[method]); +        } +        return; +    } +    for (auto [method, value] : method_sink) { +        ProcessDirtyRegisters(method, value); +    } +} +  void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {      if (regs.reg_array[method] == argument) {          return; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f0a379801..478ba4dc7 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3123,6 +3123,8 @@ private:      void ProcessDirtyRegisters(u32 method, u32 argument); +    void ConsumeSinkImpl() override; +      void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);      /// Retrieves information about a specific TIC entry from the TIC buffer. @@ -3172,6 +3174,8 @@ private:      void RefreshParametersImpl(); +    bool IsMethodExecutable(u32 method); +      Core::System& system;      MemoryManager& memory_manager; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index f73d7bf0f..01f70ea9e 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -21,7 +21,10 @@ namespace Tegra::Engines {  using namespace Texture;  MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) -    : system{system_}, memory_manager{memory_manager_} {} +    : system{system_}, memory_manager{memory_manager_} { +    execution_mask.reset(); +    execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true; +}  MaxwellDMA::~MaxwellDMA() = default; @@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {      rasterizer = rasterizer_;  } +void MaxwellDMA::ConsumeSinkImpl() { +    for (auto [method, value] : method_sink) { +        regs.reg_array[method] = value; +    } +    method_sink.clear(); +} +  void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {      ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c88191a61..0e594fa74 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -231,6 +231,8 @@ private:      void ReleaseSemaphore(); +    void ConsumeSinkImpl() override; +      Core::System& system;      MemoryManager& memory_manager; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 3eac50975..294a338d2 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -126,6 +126,7 @@ private:          const u32 vertex_first = parameters[3];          const u32 vertex_count = parameters[1]; +                  if (maxwell3d.AnyParametersDirty() &&              maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { @@ -135,6 +136,7 @@ private:          const u32 base_instance = parameters[4];          if (extended) { +            maxwell3d.regs.global_base_instance_index = base_instance;              maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;              maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);          } @@ -144,6 +146,7 @@ private:              vertex_first, vertex_count, base_instance, instance_count);          if (extended) { +            maxwell3d.regs.global_base_instance_index = 0;              maxwell3d.engine_state = Maxwell::EngineHint::None;              maxwell3d.replace_table.clear();          } | 
