diff options
| author | bunnei <bunneidev@gmail.com> | 2018-11-28 10:12:37 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-11-28 10:12:37 -0500 | 
| commit | 6f849887c9e7fa51743809c3a4bcfd2e319dff8d (patch) | |
| tree | ba26ea7fcb626dacab4b7997149ad52c21ba526c | |
| parent | 881f5ad70f8500035d6e22000d010b58bfbe93b7 (diff) | |
| parent | ac74b71d7530452126792c5fa0bf01fe7378ba00 (diff) | |
Merge pull request #1792 from bunnei/dma-pusher
gpu: Rewrite GPU command list processing with DmaPusher class.
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/video_core/command_processor.h | 53 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 123 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 99 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 53 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_compute.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_compute.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 53 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 27 | ||||
| -rw-r--r-- | src/video_core/macro_interpreter.cpp | 2 | 
18 files changed, 365 insertions, 110 deletions
| diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 874d5e1c3..2e2b0ae1c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -8,7 +8,6 @@  #include "core/core.h"  #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"  #include "core/memory.h" -#include "video_core/command_processor.h"  #include "video_core/gpu.h"  #include "video_core/memory_manager.h" @@ -129,6 +128,12 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<      return 0;  } +static void PushGPUEntries(Tegra::CommandList&& entries) { +    auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()}; +    dma_pusher.Push(std::move(entries)); +    dma_pusher.DispatchCalls(); +} +  u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {      if (input.size() < sizeof(IoctlSubmitGpfifo)) {          UNIMPLEMENTED(); @@ -142,11 +147,11 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp                                     params.num_entries * sizeof(Tegra::CommandListHeader),                 "Incorrect input size"); -    std::vector<Tegra::CommandListHeader> entries(params.num_entries); +    Tegra::CommandList entries(params.num_entries);      std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],                  params.num_entries * sizeof(Tegra::CommandListHeader)); -    Core::System::GetInstance().GPU().ProcessCommandLists(entries); +    PushGPUEntries(std::move(entries));      params.fence_out.id = 0;      params.fence_out.value = 0; @@ -163,11 +168,11 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)      LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",                  params.address, params.num_entries, params.flags); -    std::vector<Tegra::CommandListHeader> entries(params.num_entries); +    Tegra::CommandList entries(params.num_entries);      Memory::ReadBlock(params.address, entries.data(),                        params.num_entries * sizeof(Tegra::CommandListHeader)); -    Core::System::GetInstance().GPU().ProcessCommandLists(entries); +    PushGPUEntries(std::move(entries));      params.fence_out.id = 0;      params.fence_out.value = 0; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3f906a517..0406fbcd9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,6 @@  add_library(video_core STATIC -    command_processor.cpp -    command_processor.h +    dma_pusher.cpp +    dma_pusher.h      debug_utils/debug_utils.cpp      debug_utils/debug_utils.h      engines/fermi_2d.cpp diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h deleted file mode 100644 index bd766e77a..000000000 --- a/src/video_core/command_processor.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <type_traits> -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/memory_manager.h" - -namespace Tegra { - -enum class SubmissionMode : u32 { -    IncreasingOld = 0, -    Increasing = 1, -    NonIncreasingOld = 2, -    NonIncreasing = 3, -    Inline = 4, -    IncreaseOnce = 5 -}; - -struct CommandListHeader { -    u32 entry0; // gpu_va_lo -    union { -        u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F) -        BitField<0, 8, u32> gpu_va_hi; -        BitField<8, 2, u32> unk1; -        BitField<10, 21, u32> sz; -        BitField<31, 1, u32> unk2; -    }; - -    GPUVAddr Address() const { -        return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0; -    } -}; -static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size"); - -union CommandHeader { -    u32 hex; - -    BitField<0, 13, u32> method; -    BitField<13, 3, u32> subchannel; - -    BitField<16, 13, u32> arg_count; -    BitField<16, 13, u32> inline_data; - -    BitField<29, 3, SubmissionMode> mode; -}; -static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); -static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); - -} // namespace Tegra diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp new file mode 100644 index 000000000..63a958f11 --- /dev/null +++ b/src/video_core/dma_pusher.cpp @@ -0,0 +1,123 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/microprofile.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/dma_pusher.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" + +namespace Tegra { + +DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} + +DmaPusher::~DmaPusher() = default; + +MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); + +void DmaPusher::DispatchCalls() { +    MICROPROFILE_SCOPE(DispatchCalls); + +    // On entering GPU code, assume all memory may be touched by the ARM core. +    gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); + +    dma_pushbuffer_subindex = 0; + +    while (Core::System::GetInstance().IsPoweredOn()) { +        if (!Step()) { +            break; +        } +    } +} + +bool DmaPusher::Step() { +    if (dma_get != dma_put) { +        // Push buffer non-empty, read a word +        const CommandHeader command_header{ +            Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))}; + +        dma_get += sizeof(u32); + +        if (!non_main) { +            dma_mget = dma_get; +        } + +        // now, see if we're in the middle of a command +        if (dma_state.length_pending) { +            // Second word of long non-inc methods command - method count +            dma_state.length_pending = 0; +            dma_state.method_count = command_header.method_count_; +        } else if (dma_state.method_count) { +            // Data word of methods command +            CallMethod(command_header.argument); + +            if (!dma_state.non_incrementing) { +                dma_state.method++; +            } + +            if (dma_increment_once) { +                dma_state.non_incrementing = true; +            } + +            dma_state.method_count--; +        } else { +            // No command active - this is the first word of a new one +            switch (command_header.mode) { +            case SubmissionMode::Increasing: +                SetState(command_header); +                dma_state.non_incrementing = false; +                dma_increment_once = false; +                break; +            case SubmissionMode::NonIncreasing: +                SetState(command_header); +                dma_state.non_incrementing = true; +                dma_increment_once = false; +                break; +            case SubmissionMode::Inline: +                dma_state.method = command_header.method; +                dma_state.subchannel = command_header.subchannel; +                CallMethod(command_header.arg_count); +                dma_state.non_incrementing = true; +                dma_increment_once = false; +                break; +            case SubmissionMode::IncreaseOnce: +                SetState(command_header); +                dma_state.non_incrementing = false; +                dma_increment_once = true; +                break; +            } +        } +    } else if (ib_enable && !dma_pushbuffer.empty()) { +        // Current pushbuffer empty, but we have more IB entries to read +        const CommandList& command_list{dma_pushbuffer.front()}; +        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; +        dma_get = command_list_header.addr; +        dma_put = dma_get + command_list_header.size * sizeof(u32); +        non_main = command_list_header.is_non_main; + +        if (dma_pushbuffer_subindex >= command_list.size()) { +            // We've gone through the current list, remove it from the queue +            dma_pushbuffer.pop(); +            dma_pushbuffer_subindex = 0; +        } +    } else { +        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do +        return {}; +    } + +    return true; +} + +void DmaPusher::SetState(const CommandHeader& command_header) { +    dma_state.method = command_header.method; +    dma_state.subchannel = command_header.subchannel; +    dma_state.method_count = command_header.method_count; +} + +void DmaPusher::CallMethod(u32 argument) const { +    gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); +} + +} // namespace Tegra diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h new file mode 100644 index 000000000..16e0697c4 --- /dev/null +++ b/src/video_core/dma_pusher.h @@ -0,0 +1,99 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include <queue> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/memory_manager.h" + +namespace Tegra { + +enum class SubmissionMode : u32 { +    IncreasingOld = 0, +    Increasing = 1, +    NonIncreasingOld = 2, +    NonIncreasing = 3, +    Inline = 4, +    IncreaseOnce = 5 +}; + +struct CommandListHeader { +    union { +        u64 raw; +        BitField<0, 40, GPUVAddr> addr; +        BitField<41, 1, u64> is_non_main; +        BitField<42, 21, u64> size; +    }; +}; +static_assert(sizeof(CommandListHeader) == sizeof(u64), "CommandListHeader is incorrect size"); + +union CommandHeader { +    u32 argument; +    BitField<0, 13, u32> method; +    BitField<0, 24, u32> method_count_; +    BitField<13, 3, u32> subchannel; +    BitField<16, 13, u32> arg_count; +    BitField<16, 13, u32> method_count; +    BitField<29, 3, SubmissionMode> mode; +}; +static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); +static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); + +class GPU; + +using CommandList = std::vector<Tegra::CommandListHeader>; + +/** + * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the + * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled + * into a "command stream" consisting of 32-bit words that make up "commands". + * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for + * details on this implementation. + */ +class DmaPusher { +public: +    explicit DmaPusher(GPU& gpu); +    ~DmaPusher(); + +    void Push(CommandList&& entries) { +        dma_pushbuffer.push(std::move(entries)); +    } + +    void DispatchCalls(); + +private: +    bool Step(); + +    void SetState(const CommandHeader& command_header); + +    void CallMethod(u32 argument) const; + +    GPU& gpu; + +    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed +    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer + +    struct DmaState { +        u32 method;            ///< Current method +        u32 subchannel;        ///< Current subchannel +        u32 method_count;      ///< Current method count +        u32 length_pending;    ///< Large NI command length pending +        bool non_incrementing; ///< Current command’s NI flag +    }; + +    DmaState dma_state{}; +    bool dma_increment_once{}; + +    GPUVAddr dma_put{};   ///< pushbuffer current end address +    GPUVAddr dma_get{};   ///< pushbuffer current read address +    GPUVAddr dma_mget{};  ///< main pushbuffer last read address +    bool ib_enable{true}; ///< IB mode enabled +    bool non_main{};      ///< non-main pushbuffer active +}; + +} // namespace Tegra diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index e7721a2be..80f70e332 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -14,13 +14,13 @@ namespace Tegra::Engines {  Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)      : memory_manager(memory_manager), rasterizer{rasterizer} {} -void Fermi2D::WriteReg(u32 method, u32 value) { -    ASSERT_MSG(method < Regs::NUM_REGS, +void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { +    ASSERT_MSG(method_call.method < Regs::NUM_REGS,                 "Invalid Fermi2D register, increase the size of the Regs structure"); -    regs.reg_array[method] = value; +    regs.reg_array[method_call.method] = method_call.argument; -    switch (method) { +    switch (method_call.method) {      case FERMI2D_REG_INDEX(trigger): {          HandleSurfaceCopy();          break; diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 2a6e8bbbb..50009bf75 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -27,7 +27,7 @@ public:      ~Fermi2D() = default;      /// Write the value to the register identified by method. -    void WriteReg(u32 method, u32 value); +    void CallMethod(const GPU::MethodCall& method_call);      struct Regs {          static constexpr std::size_t NUM_REGS = 0x258; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 2adbc9eaf..4880191fc 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -17,19 +17,19 @@ KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,  KeplerMemory::~KeplerMemory() = default; -void KeplerMemory::WriteReg(u32 method, u32 value) { -    ASSERT_MSG(method < Regs::NUM_REGS, +void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { +    ASSERT_MSG(method_call.method < Regs::NUM_REGS,                 "Invalid KeplerMemory register, increase the size of the Regs structure"); -    regs.reg_array[method] = value; +    regs.reg_array[method_call.method] = method_call.argument; -    switch (method) { +    switch (method_call.method) {      case KEPLERMEMORY_REG_INDEX(exec): {          state.write_offset = 0;          break;      }      case KEPLERMEMORY_REG_INDEX(data): { -        ProcessData(value); +        ProcessData(method_call.argument);          break;      }      } diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index bf4a13cff..fe9ebc5b9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -9,6 +9,7 @@  #include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h" +#include "video_core/gpu.h"  #include "video_core/memory_manager.h"  namespace VideoCore { @@ -26,7 +27,7 @@ public:      ~KeplerMemory();      /// Write the value to the register identified by method. -    void WriteReg(u32 method, u32 value); +    void CallMethod(const GPU::MethodCall& method_call);      struct Regs {          static constexpr size_t NUM_REGS = 0x7F; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index f0a5470b9..b19b3a75a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -97,71 +97,74 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {      macro_interpreter.Execute(search->second, std::move(parameters));  } -void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { +void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {      auto debug_context = Core::System::GetInstance().GetGPUDebugContext();      // It is an error to write to a register other than the current macro's ARG register before it      // has finished execution.      if (executing_macro != 0) { -        ASSERT(method == executing_macro + 1); +        ASSERT(method_call.method == executing_macro + 1);      }      // Methods after 0xE00 are special, they're actually triggers for some microcode that was      // uploaded to the GPU during initialization. -    if (method >= MacroRegistersStart) { +    if (method_call.method >= MacroRegistersStart) {          // We're trying to execute a macro          if (executing_macro == 0) {              // A macro call must begin by writing the macro method's register, not its argument. -            ASSERT_MSG((method % 2) == 0, +            ASSERT_MSG((method_call.method % 2) == 0,                         "Can't start macro execution by writing to the ARGS register"); -            executing_macro = method; +            executing_macro = method_call.method;          } -        macro_params.push_back(value); +        macro_params.push_back(method_call.argument);          // Call the macro when there are no more parameters in the command buffer -        if (remaining_params == 0) { +        if (method_call.IsLastCall()) {              CallMacroMethod(executing_macro, std::move(macro_params));          }          return;      } -    ASSERT_MSG(method < Regs::NUM_REGS, +    ASSERT_MSG(method_call.method < Regs::NUM_REGS,                 "Invalid Maxwell3D register, increase the size of the Regs structure");      if (debug_context) {          debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);      } -    if (regs.reg_array[method] != value) { -        regs.reg_array[method] = value; +    if (regs.reg_array[method_call.method] != method_call.argument) { +        regs.reg_array[method_call.method] = method_call.argument;          // Vertex format -        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && -            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { +        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && +            method_call.method < +                MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {              dirty_flags.vertex_attrib_format = true;          }          // Vertex buffer -        if (method >= MAXWELL3D_REG_INDEX(vertex_array) && -            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { -            dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); -        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && -                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { +        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && +            method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {              dirty_flags.vertex_array |= -                1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); -        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && -                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { -            dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); +                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); +        } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && +                   method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { +            dirty_flags.vertex_array |= +                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); +        } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && +                   method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { +            dirty_flags.vertex_array |= +                1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));          }      } -    switch (method) { +    switch (method_call.method) {      case MAXWELL3D_REG_INDEX(macros.data): { -        ProcessMacroUpload(value); +        ProcessMacroUpload(method_call.argument);          break;      }      case MAXWELL3D_REG_INDEX(macros.bind): { -        ProcessMacroBind(value); +        ProcessMacroBind(method_call.argument);          break;      }      case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): @@ -180,7 +183,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {      case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):      case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):      case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { -        ProcessCBData(value); +        ProcessCBData(method_call.argument);          break;      }      case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 9324d9710..84471f181 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1080,7 +1080,7 @@ public:      u32 GetRegisterValue(u32 method) const;      /// Write the value to the register identified by method. -    void WriteReg(u32 method, u32 value, u32 remaining_params); +    void CallMethod(const GPU::MethodCall& method_call);      /// Returns a list of enabled textures for the specified shader stage.      std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp index 8b5f08351..656db6a61 100644 --- a/src/video_core/engines/maxwell_compute.cpp +++ b/src/video_core/engines/maxwell_compute.cpp @@ -8,13 +8,13 @@  namespace Tegra::Engines { -void MaxwellCompute::WriteReg(u32 method, u32 value) { -    ASSERT_MSG(method < Regs::NUM_REGS, +void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) { +    ASSERT_MSG(method_call.method < Regs::NUM_REGS,                 "Invalid MaxwellCompute register, increase the size of the Regs structure"); -    regs.reg_array[method] = value; +    regs.reg_array[method_call.method] = method_call.argument; -    switch (method) { +    switch (method_call.method) {      case MAXWELL_COMPUTE_REG_INDEX(compute): {          LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");          UNREACHABLE(); diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/maxwell_compute.h index 6ea934fb9..1d71f11bd 100644 --- a/src/video_core/engines/maxwell_compute.h +++ b/src/video_core/engines/maxwell_compute.h @@ -9,6 +9,7 @@  #include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h" +#include "video_core/gpu.h"  namespace Tegra::Engines { @@ -42,7 +43,7 @@ public:                    "MaxwellCompute Regs has wrong size");      /// Write the value to the register identified by method. -    void WriteReg(u32 method, u32 value); +    void CallMethod(const GPU::MethodCall& method_call);  };  #define ASSERT_REG_POSITION(field_name, position)                                                  \ diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a34e884fe..06462f570 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -14,16 +14,16 @@ namespace Tegra::Engines {  MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)      : memory_manager(memory_manager), rasterizer{rasterizer} {} -void MaxwellDMA::WriteReg(u32 method, u32 value) { -    ASSERT_MSG(method < Regs::NUM_REGS, +void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { +    ASSERT_MSG(method_call.method < Regs::NUM_REGS,                 "Invalid MaxwellDMA register, increase the size of the Regs structure"); -    regs.reg_array[method] = value; +    regs.reg_array[method_call.method] = method_call.argument;  #define MAXWELLDMA_REG_INDEX(field_name)                                                           \      (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) -    switch (method) { +    switch (method_call.method) {      case MAXWELLDMA_REG_INDEX(exec): {          HandleCopy();          break; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 5f3704f05..1f8cd65d2 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -24,7 +24,7 @@ public:      ~MaxwellDMA() = default;      /// Write the value to the register identified by method. -    void WriteReg(u32 method, u32 value); +    void CallMethod(const GPU::MethodCall& method_call);      struct Regs {          static constexpr std::size_t NUM_REGS = 0x1D6; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 51b3904f6..6c81dee64 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -26,6 +26,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {  GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {      memory_manager = std::make_unique<Tegra::MemoryManager>(); +    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);      maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);      fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);      maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); @@ -51,6 +52,14 @@ const MemoryManager& GPU::MemoryManager() const {      return *memory_manager;  } +DmaPusher& GPU::DmaPusher() { +    return *dma_pusher; +} + +const DmaPusher& GPU::DmaPusher() const { +    return *dma_pusher; +} +  u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {      ASSERT(format != RenderTargetFormat::NONE); @@ -113,4 +122,48 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {      }  } +enum class BufferMethods { +    BindObject = 0, +    CountBufferMethods = 0x40, +}; + +void GPU::CallMethod(const MethodCall& method_call) { +    LOG_TRACE(HW_GPU, +              "Processing method {:08X} on subchannel {} value " +              "{:08X} remaining params {}", +              MethCall.method, MethCall.subchannel, value, remaining_params); + +    ASSERT(method_call.subchannel < bound_engines.size()); + +    if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) { +        // Bind the current subchannel to the desired engine id. +        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, +                  method_call.argument); +        bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); +        return; +    } + +    const EngineID engine = bound_engines[method_call.subchannel]; + +    switch (engine) { +    case EngineID::FERMI_TWOD_A: +        fermi_2d->CallMethod(method_call); +        break; +    case EngineID::MAXWELL_B: +        maxwell_3d->CallMethod(method_call); +        break; +    case EngineID::MAXWELL_COMPUTE_B: +        maxwell_compute->CallMethod(method_call); +        break; +    case EngineID::MAXWELL_DMA_COPY_A: +        maxwell_dma->CallMethod(method_call); +        break; +    case EngineID::KEPLER_INLINE_TO_MEMORY_B: +        kepler_memory->CallMethod(method_call); +        break; +    default: +        UNIMPLEMENTED_MSG("Unimplemented engine"); +    } +} +  } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 5cc1e19ca..af5ccd1e9 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -9,6 +9,7 @@  #include <vector>  #include "common/common_types.h"  #include "core/hle/service/nvflinger/buffer_queue.h" +#include "video_core/dma_pusher.h"  #include "video_core/memory_manager.h"  namespace VideoCore { @@ -119,8 +120,23 @@ public:      explicit GPU(VideoCore::RasterizerInterface& rasterizer);      ~GPU(); -    /// Processes a command list stored at the specified address in GPU memory. -    void ProcessCommandLists(const std::vector<CommandListHeader>& commands); +    struct MethodCall { +        u32 method{}; +        u32 argument{}; +        u32 subchannel{}; +        u32 method_count{}; + +        bool IsLastCall() const { +            return method_count <= 1; +        } + +        MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0) +            : method(method), argument(argument), subchannel(subchannel), +              method_count(method_count) {} +    }; + +    /// Calls a GPU method. +    void CallMethod(const MethodCall& method_call);      /// Returns a reference to the Maxwell3D GPU engine.      Engines::Maxwell3D& Maxwell3D(); @@ -134,7 +150,14 @@ public:      /// Returns a const reference to the GPU memory manager.      const Tegra::MemoryManager& MemoryManager() const; +    /// Returns a reference to the GPU DMA pusher. +    Tegra::DmaPusher& DmaPusher(); + +    /// Returns a const reference to the GPU DMA pusher. +    const Tegra::DmaPusher& DmaPusher() const; +  private: +    std::unique_ptr<Tegra::DmaPusher> dma_pusher;      std::unique_ptr<Tegra::MemoryManager> memory_manager;      /// Mapping of command subchannels to their bound engine ids. diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 2b0dea5cd..9c55e9f1e 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -250,7 +250,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) {  }  void MacroInterpreter::Send(u32 value) { -    maxwell3d.WriteReg(method_address.address, value, 0); +    maxwell3d.CallMethod({method_address.address, value});      // Increment the method address by the method increment.      method_address.address.Assign(method_address.address.Value() +                                    method_address.increment.Value()); | 
