diff options
| author | bunnei <bunneidev@gmail.com> | 2020-04-27 00:18:46 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-27 00:18:46 -0400 | 
| commit | 6c7d8073be9ab0ce92d346742989800895beeffe (patch) | |
| tree | 298da9383d7f883102643f0ab146dda72d9f5358 /src/video_core | |
| parent | 378aed07e988f5a3c0f66b38edc22732d8b91eb6 (diff) | |
| parent | 5c9feaebb6bfa34bb275ffa59ca823003de20422 (diff) | |
Merge pull request #3742 from FernandoS27/command-list
Optimize GPU Command Lists and Introduce Fast GPU Time Option
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 74 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 3 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 55 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 10 | 
14 files changed, 198 insertions, 15 deletions
| diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 324dafdcd..16311f05e 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -71,16 +71,22 @@ bool DmaPusher::Step() {      gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),                                          command_list_header.size * sizeof(u32)); -    for (const CommandHeader& command_header : command_headers) { - -        // now, see if we're in the middle of a command -        if (dma_state.length_pending) { -            // Second word of long non-inc methods command - method count -            dma_state.length_pending = 0; -            dma_state.method_count = command_header.method_count_; -        } else if (dma_state.method_count) { +    for (std::size_t index = 0; index < command_headers.size();) { +        const CommandHeader& command_header = command_headers[index]; + +        if (dma_state.method_count) {              // Data word of methods command -            CallMethod(command_header.argument); +            if (dma_state.non_incrementing) { +                const u32 max_write = static_cast<u32>( +                    std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) - +                    index); +                CallMultiMethod(&command_header.argument, max_write); +                dma_state.method_count -= max_write; +                index += max_write; +                continue; +            } else { +                CallMethod(command_header.argument); +            }              if (!dma_state.non_incrementing) {                  dma_state.method++; @@ -120,6 +126,7 @@ bool DmaPusher::Step() {                  break;              }          } +        index++;      }      if (!non_main) { @@ -140,4 +147,9 @@ void DmaPusher::CallMethod(u32 argument) const {      gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});  } +void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { +    gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, +                        dma_state.method_count); +} +  } // namespace Tegra diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index d6188614a..6cef71306 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -75,6 +75,7 @@ private:      void SetState(const CommandHeader& command_header);      void CallMethod(u32 argument) const; +    void CallMultiMethod(const u32* base_start, u32 num_methods) const;      std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index bace6affb..8a47614d2 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -28,6 +28,12 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {      }  } +void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { +    for (std::size_t i = 0; i < amount; i++) { +        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); +    } +} +  static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {      const u32 line_a = src_2 - src_1;      const u32 line_b = dst_2 - dst_1; diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index dba342c70..939a5966d 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -39,6 +39,9 @@ public:      /// Write the value to the register identified by method.      void CallMethod(const GPU::MethodCall& method_call); +    /// Write multiple values to the register identified by method. +    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); +      enum class Origin : u32 {          Center = 0,          Corner = 1, diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 368c75a66..00a12175f 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -51,6 +51,13 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {      }  } +void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, +                                    u32 methods_pending) { +    for (std::size_t i = 0; i < amount; i++) { +        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); +    } +} +  Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {      const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();      ASSERT(cbuf_mask[regs.tex_cb_index]); diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index eeb79c56f..fe55fdfd0 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -202,6 +202,9 @@ public:      /// Write the value to the register identified by method.      void CallMethod(const GPU::MethodCall& method_call); +    /// Write multiple values to the register identified by method. +    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); +      Texture::FullTextureInfo GetTexture(std::size_t offset) const;      /// Given a texture handle, returns the TSC and TIC entries. diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 597872e43..586ff15dc 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -41,4 +41,11 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {      }  } +void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, +                                   u32 methods_pending) { +    for (std::size_t i = 0; i < amount; i++) { +        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); +    } +} +  } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 396fb6e86..bb26fb030 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -40,6 +40,9 @@ public:      /// Write the value to the register identified by method.      void CallMethod(const GPU::MethodCall& method_call); +    /// Write multiple values to the register identified by method. +    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); +      struct Regs {          static constexpr size_t NUM_REGS = 0x7F; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2824ed707..39e3b66a2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -280,6 +280,58 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {      }  } +void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, +                                u32 methods_pending) { +    // Methods after 0xE00 are special, they're actually triggers for some microcode that was +    // uploaded to the GPU during initialization. +    if (method >= MacroRegistersStart) { +        // We're trying to execute a macro +        if (executing_macro == 0) { +            // A macro call must begin by writing the macro method's register, not its argument. +            ASSERT_MSG((method % 2) == 0, +                       "Can't start macro execution by writing to the ARGS register"); +            executing_macro = method; +        } + +        for (std::size_t i = 0; i < amount; i++) { +            macro_params.push_back(base_start[i]); +        } + +        // Call the macro when there are no more parameters in the command buffer +        if (amount == methods_pending) { +            CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); +            macro_params.clear(); +        } +        return; +    } +    switch (method) { +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): +    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { +        ProcessCBMultiData(method, base_start, amount); +        break; +    } +    default: { +        for (std::size_t i = 0; i < amount; i++) { +            CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); +        } +    } +    } +} +  void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {      if (mme_draw.current_mode == MMEDrawMode::Undefined) {          if (mme_draw.gl_begin_consume) { @@ -570,6 +622,28 @@ void Maxwell3D::StartCBData(u32 method) {      ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);  } +void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) { +    if (cb_data_state.current != method) { +        if (cb_data_state.current != null_cb_data) { +            FinishCBData(); +        } +        constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); +        cb_data_state.start_pos = regs.const_buffer.cb_pos; +        cb_data_state.id = method - first_cb_data; +        cb_data_state.current = method; +        cb_data_state.counter = 0; +    } +    const std::size_t id = cb_data_state.id; +    const std::size_t size = amount; +    std::size_t i = 0; +    for (; i < size; i++) { +        cb_data_state.buffer[id][cb_data_state.counter] = start_base[i]; +        cb_data_state.counter++; +    } +    // Increment the current buffer position. +    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount; +} +  void Maxwell3D::FinishCBData() {      // Write the input value to the current const buffer at the current position.      const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7bbc6600b..3dfba8197 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1359,6 +1359,9 @@ public:      /// Write the value to the register identified by method.      void CallMethod(const GPU::MethodCall& method_call); +    /// Write multiple values to the register identified by method. +    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); +      /// Write the value to the register identified by method.      void CallMethodFromMME(const GPU::MethodCall& method_call); @@ -1512,6 +1515,7 @@ private:      /// Handles a write to the CB_DATA[i] register.      void StartCBData(u32 method);      void ProcessCBData(u32 value); +    void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);      void FinishCBData();      /// Handles a write to the CB_BIND register. diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3bfed6ab8..6630005b0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -36,6 +36,13 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {  #undef MAXWELLDMA_REG_INDEX  } +void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, +                                 u32 methods_pending) { +    for (std::size_t i = 0; i < amount; i++) { +        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); +    } +} +  void MaxwellDMA::HandleCopy() {      LOG_TRACE(HW_GPU, "Requested a DMA copy"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 4f40d1d1f..c43ed8194 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -35,6 +35,9 @@ public:      /// Write the value to the register identified by method.      void CallMethod(const GPU::MethodCall& method_call); +    /// Write multiple values to the register identified by method. +    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); +      struct Regs {          static constexpr std::size_t NUM_REGS = 0x1D6; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 3b7572d61..b87fd873d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -9,6 +9,7 @@  #include "core/core_timing_util.h"  #include "core/frontend/emu_window.h"  #include "core/memory.h" +#include "core/settings.h"  #include "video_core/engines/fermi_2d.h"  #include "video_core/engines/kepler_compute.h"  #include "video_core/engines/kepler_memory.h" @@ -154,7 +155,10 @@ u64 GPU::GetTicks() const {      constexpr u64 gpu_ticks_den = 625;      const u64 cpu_ticks = system.CoreTiming().GetTicks(); -    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); +    u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); +    if (Settings::values.use_fast_gpu_time) { +        nanoseconds /= 256; +    }      const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;      const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;      return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; @@ -209,16 +213,32 @@ void GPU::CallMethod(const MethodCall& method_call) {      ASSERT(method_call.subchannel < bound_engines.size()); -    if (ExecuteMethodOnEngine(method_call)) { +    if (ExecuteMethodOnEngine(method_call.method)) {          CallEngineMethod(method_call);      } else {          CallPullerMethod(method_call);      }  } -bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) { -    const auto method = static_cast<BufferMethods>(method_call.method); -    return method >= BufferMethods::NonPullerMethods; +void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, +                          u32 methods_pending) { +    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); + +    ASSERT(subchannel < bound_engines.size()); + +    if (ExecuteMethodOnEngine(method)) { +        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); +    } else { +        for (std::size_t i = 0; i < amount; i++) { +            CallPullerMethod( +                {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)}); +        } +    } +} + +bool GPU::ExecuteMethodOnEngine(u32 method) { +    const auto buffer_method = static_cast<BufferMethods>(method); +    return buffer_method >= BufferMethods::NonPullerMethods;  }  void GPU::CallPullerMethod(const MethodCall& method_call) { @@ -298,6 +318,31 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {      }  } +void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, +                                u32 methods_pending) { +    const EngineID engine = bound_engines[subchannel]; + +    switch (engine) { +    case EngineID::FERMI_TWOD_A: +        fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); +        break; +    case EngineID::MAXWELL_B: +        maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); +        break; +    case EngineID::KEPLER_COMPUTE_B: +        kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); +        break; +    case EngineID::MAXWELL_DMA_COPY_A: +        maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); +        break; +    case EngineID::KEPLER_INLINE_TO_MEMORY_B: +        kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); +        break; +    default: +        UNIMPLEMENTED_MSG("Unimplemented engine"); +    } +} +  void GPU::ProcessBindMethod(const MethodCall& method_call) {      // Bind the current subchannel to the desired engine id.      LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 5e3eb94e9..dd51c95b7 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -155,6 +155,10 @@ public:      /// Calls a GPU method.      void CallMethod(const MethodCall& method_call); +    /// Calls a GPU multivalue method. +    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, +                         u32 methods_pending); +      /// Flush all current written commands into the host GPU for execution.      void FlushCommands();      /// Synchronizes CPU writes with Host GPU memory. @@ -309,8 +313,12 @@ private:      /// Calls a GPU engine method.      void CallEngineMethod(const MethodCall& method_call); +    /// Calls a GPU engine multivalue method. +    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, +                               u32 methods_pending); +      /// Determines where the method should be executed. -    bool ExecuteMethodOnEngine(const MethodCall& method_call); +    bool ExecuteMethodOnEngine(u32 method);  protected:      std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 
