diff options
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.h | 75 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 175 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 45 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 66 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 25 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 83 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 43 | 
11 files changed, 479 insertions, 140 deletions
| diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp new file mode 100644 index 000000000..f8aa4ff55 --- /dev/null +++ b/src/video_core/engines/engine_upload.cpp @@ -0,0 +1,48 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/engines/engine_upload.h" +#include "video_core/memory_manager.h" +#include "video_core/textures/decoders.h" + +namespace Tegra::Engines::Upload { + +State::State(MemoryManager& memory_manager, Registers& regs) +    : memory_manager(memory_manager), regs(regs) {} + +void State::ProcessExec(const bool is_linear) { +    write_offset = 0; +    copy_size = regs.line_length_in * regs.line_count; +    inner_buffer.resize(copy_size); +    this->is_linear = is_linear; +} + +void State::ProcessData(const u32 data, const bool is_last_call) { +    const u32 sub_copy_size = std::min(4U, copy_size - write_offset); +    std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); +    write_offset += sub_copy_size; +    if (!is_last_call) { +        return; +    } +    const GPUVAddr address{regs.dest.Address()}; +    if (is_linear) { +        memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); +    } else { +        UNIMPLEMENTED_IF(regs.dest.z != 0); +        UNIMPLEMENTED_IF(regs.dest.depth != 1); +        UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); +        UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); +        const std::size_t dst_size = Tegra::Texture::CalculateSize( +            true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); +        tmp_buffer.resize(dst_size); +        memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); +        Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, +                                      regs.dest.BlockHeight(), copy_size, inner_buffer.data(), +                                      tmp_buffer.data()); +        memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); +    } +} + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h new file mode 100644 index 000000000..9c6e0d21c --- /dev/null +++ b/src/video_core/engines/engine_upload.h @@ -0,0 +1,75 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <vector> +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines::Upload { + +struct Registers { +    u32 line_length_in; +    u32 line_count; + +    struct { +        u32 address_high; +        u32 address_low; +        u32 pitch; +        union { +            BitField<0, 4, u32> block_width; +            BitField<4, 4, u32> block_height; +            BitField<8, 4, u32> block_depth; +        }; +        u32 width; +        u32 height; +        u32 depth; +        u32 z; +        u32 x; +        u32 y; + +        GPUVAddr Address() const { +            return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); +        } + +        u32 BlockWidth() const { +            return 1U << block_width.Value(); +        } + +        u32 BlockHeight() const { +            return 1U << block_height.Value(); +        } + +        u32 BlockDepth() const { +            return 1U << block_depth.Value(); +        } +    } dest; +}; + +class State { +public: +    State(MemoryManager& memory_manager, Registers& regs); +    ~State() = default; + +    void ProcessExec(const bool is_linear); +    void ProcessData(const u32 data, const bool is_last_call); + +private: +    u32 write_offset = 0; +    u32 copy_size = 0; +    std::vector<u8> inner_buffer; +    std::vector<u8> tmp_buffer; +    bool is_linear = false; +    Registers& regs; +    MemoryManager& memory_manager; +}; + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 2e51b7f13..45f59a4d9 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -21,6 +21,12 @@ class RasterizerInterface;  namespace Tegra::Engines { +/** + * This Engine is known as G80_2D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h + */ +  #define FERMI2D_REG_INDEX(field_name)                                                              \      (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index b1d950460..7404a8163 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -4,12 +4,21 @@  #include "common/assert.h"  #include "common/logging/log.h" +#include "core/core.h"  #include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h"  #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" +#include "video_core/textures/decoders.h"  namespace Tegra::Engines { -KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} +KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, +                             MemoryManager& memory_manager) +    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ +                                                                                  memory_manager, +                                                                                  regs.upload} {}  KeplerCompute::~KeplerCompute() = default; @@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {      regs.reg_array[method_call.method] = method_call.argument;      switch (method_call.method) { +    case KEPLER_COMPUTE_REG_INDEX(exec_upload): { +        upload_state.ProcessExec(regs.exec_upload.linear != 0); +        break; +    } +    case KEPLER_COMPUTE_REG_INDEX(data_upload): { +        const bool is_last_call = method_call.IsLastCall(); +        upload_state.ProcessData(method_call.argument, is_last_call); +        if (is_last_call) { +            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +        } +        break; +    }      case KEPLER_COMPUTE_REG_INDEX(launch): -        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA -        // kernels) -        UNREACHABLE_MSG("Compute shaders are not implemented"); +        ProcessLaunch();          break;      default:          break;      }  } +void KeplerCompute::ProcessLaunch() { + +    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); +    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, +                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); + +    const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; +    LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); +} +  } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index fb6cdf432..5250b8d9b 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -6,22 +6,40 @@  #include <array>  #include <cstddef> +#include <vector> +#include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h" +#include "video_core/engines/engine_upload.h"  #include "video_core/gpu.h" +namespace Core { +class System; +} +  namespace Tegra {  class MemoryManager;  } +namespace VideoCore { +class RasterizerInterface; +} +  namespace Tegra::Engines { +/** + * This Engine is known as GK104_Compute. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h + */ +  #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \      (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))  class KeplerCompute final {  public: -    explicit KeplerCompute(MemoryManager& memory_manager); +    explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, +                           MemoryManager& memory_manager);      ~KeplerCompute();      static constexpr std::size_t NumConstBuffers = 8; @@ -31,30 +49,181 @@ public:          union {              struct { -                INSERT_PADDING_WORDS(0xAF); +                INSERT_PADDING_WORDS(0x60); + +                Upload::Registers upload; + +                struct { +                    union { +                        BitField<0, 1, u32> linear; +                    }; +                } exec_upload; + +                u32 data_upload; + +                INSERT_PADDING_WORDS(0x3F); + +                struct { +                    u32 address; +                    GPUVAddr Address() const { +                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8)); +                    } +                } launch_desc_loc; + +                INSERT_PADDING_WORDS(0x1);                  u32 launch; -                INSERT_PADDING_WORDS(0xC48); +                INSERT_PADDING_WORDS(0x4A7); + +                struct { +                    u32 address_high; +                    u32 address_low; +                    u32 limit; +                    GPUVAddr Address() const { +                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | +                                                     address_low); +                    } +                } tsc; + +                INSERT_PADDING_WORDS(0x3); + +                struct { +                    u32 address_high; +                    u32 address_low; +                    u32 limit; +                    GPUVAddr Address() const { +                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | +                                                     address_low); +                    } +                } tic; + +                INSERT_PADDING_WORDS(0x22); + +                struct { +                    u32 address_high; +                    u32 address_low; +                    GPUVAddr Address() const { +                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | +                                                     address_low); +                    } +                } code_loc; + +                INSERT_PADDING_WORDS(0x3FE); + +                u32 texture_const_buffer_index; + +                INSERT_PADDING_WORDS(0x374);              };              std::array<u32, NUM_REGS> reg_array;          };      } regs{}; + +    struct LaunchParams { +        static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40; + +        INSERT_PADDING_WORDS(0x8); + +        u32 program_start; + +        INSERT_PADDING_WORDS(0x2); + +        BitField<30, 1, u32> linked_tsc; + +        BitField<0, 31, u32> grid_dim_x; +        union { +            BitField<0, 16, u32> grid_dim_y; +            BitField<16, 16, u32> grid_dim_z; +        }; + +        INSERT_PADDING_WORDS(0x3); + +        BitField<0, 16, u32> shared_alloc; + +        BitField<0, 31, u32> block_dim_x; +        union { +            BitField<0, 16, u32> block_dim_y; +            BitField<16, 16, u32> block_dim_z; +        }; + +        union { +            BitField<0, 8, u32> const_buffer_enable_mask; +            BitField<29, 2, u32> cache_layout; +        } memory_config; + +        INSERT_PADDING_WORDS(0x8); + +        struct { +            u32 address_low; +            union { +                BitField<0, 8, u32> address_high; +                BitField<15, 17, u32> size; +            }; +            GPUVAddr Address() const { +                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | +                                             address_low); +            } +        } const_buffer_config[8]; + +        union { +            BitField<0, 20, u32> local_pos_alloc; +            BitField<27, 5, u32> barrier_alloc; +        }; + +        union { +            BitField<0, 20, u32> local_neg_alloc; +            BitField<24, 5, u32> gpr_alloc; +        }; + +        INSERT_PADDING_WORDS(0x11); +    } launch_description; + +    struct { +        u32 write_offset = 0; +        u32 copy_size = 0; +        std::vector<u8> inner_buffer; +    } state{}; +      static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),                    "KeplerCompute Regs has wrong size"); +    static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32), +                  "KeplerCompute LaunchParams has wrong size"); +      /// Write the value to the register identified by method.      void CallMethod(const GPU::MethodCall& method_call);  private: +    Core::System& system; +    VideoCore::RasterizerInterface& rasterizer;      MemoryManager& memory_manager; +    Upload::State upload_state; + +    void ProcessLaunch();  };  #define ASSERT_REG_POSITION(field_name, position)                                                  \      static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \                    "Field " #field_name " has invalid position") +#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position)                                         \ +    static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4,               \ +                  "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D);  ASSERT_REG_POSITION(launch, 0xAF); +ASSERT_REG_POSITION(tsc, 0x557); +ASSERT_REG_POSITION(tic, 0x55D); +ASSERT_REG_POSITION(code_loc, 0x582); +ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); +ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); +ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); +ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); +ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); +ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); +ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);  #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 7387886a3..0561f676c 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -14,9 +14,8 @@  namespace Tegra::Engines { -KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, -                           MemoryManager& memory_manager) -    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} +KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) +    : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}  KeplerMemory::~KeplerMemory() = default; @@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {      switch (method_call.method) {      case KEPLERMEMORY_REG_INDEX(exec): { -        ProcessExec(); +        upload_state.ProcessExec(regs.exec.linear != 0);          break;      }      case KEPLERMEMORY_REG_INDEX(data): { -        ProcessData(method_call.argument, method_call.IsLastCall()); +        const bool is_last_call = method_call.IsLastCall(); +        upload_state.ProcessData(method_call.argument, is_last_call); +        if (is_last_call) { +            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +        }          break;      }      }  } -void KeplerMemory::ProcessExec() { -    state.write_offset = 0; -    state.copy_size = regs.line_length_in * regs.line_count; -    state.inner_buffer.resize(state.copy_size); -} - -void KeplerMemory::ProcessData(u32 data, bool is_last_call) { -    const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); -    std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); -    state.write_offset += sub_copy_size; -    if (is_last_call) { -        const GPUVAddr address{regs.dest.Address()}; -        if (regs.exec.linear != 0) { -            memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); -        } else { -            UNIMPLEMENTED_IF(regs.dest.z != 0); -            UNIMPLEMENTED_IF(regs.dest.depth != 1); -            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); -            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); -            const std::size_t dst_size = Tegra::Texture::CalculateSize( -                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); -            std::vector<u8> tmp_buffer(dst_size); -            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); -            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, -                                          regs.dest.y, regs.dest.BlockHeight(), state.copy_size, -                                          state.inner_buffer.data(), tmp_buffer.data()); -            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); -        } -        system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); -    } -} -  } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5f892ddad..f3bc675a9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -10,6 +10,7 @@  #include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h" +#include "video_core/engines/engine_upload.h"  #include "video_core/gpu.h"  namespace Core { @@ -20,19 +21,20 @@ namespace Tegra {  class MemoryManager;  } -namespace VideoCore { -class RasterizerInterface; -} -  namespace Tegra::Engines { +/** + * This Engine is known as P2MF. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h + */ +  #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \      (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))  class KeplerMemory final {  public: -    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, -                 MemoryManager& memory_manager); +    KeplerMemory(Core::System& system, MemoryManager& memory_manager);      ~KeplerMemory();      /// Write the value to the register identified by method. @@ -45,42 +47,7 @@ public:              struct {                  INSERT_PADDING_WORDS(0x60); -                u32 line_length_in; -                u32 line_count; - -                struct { -                    u32 address_high; -                    u32 address_low; -                    u32 pitch; -                    union { -                        BitField<0, 4, u32> block_width; -                        BitField<4, 4, u32> block_height; -                        BitField<8, 4, u32> block_depth; -                    }; -                    u32 width; -                    u32 height; -                    u32 depth; -                    u32 z; -                    u32 x; -                    u32 y; - -                    GPUVAddr Address() const { -                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | -                                                     address_low); -                    } - -                    u32 BlockWidth() const { -                        return 1U << block_width.Value(); -                    } - -                    u32 BlockHeight() const { -                        return 1U << block_height.Value(); -                    } - -                    u32 BlockDepth() const { -                        return 1U << block_depth.Value(); -                    } -                } dest; +                Upload::Registers upload;                  struct {                      union { @@ -96,28 +63,17 @@ public:          };      } regs{}; -    struct { -        u32 write_offset = 0; -        u32 copy_size = 0; -        std::vector<u8> inner_buffer; -    } state{}; -  private:      Core::System& system; -    VideoCore::RasterizerInterface& rasterizer;      MemoryManager& memory_manager; - -    void ProcessExec(); -    void ProcessData(u32 data, bool is_last_call); +    Upload::State upload_state;  };  #define ASSERT_REG_POSITION(field_name, position)                                                  \      static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \                    "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(line_length_in, 0x60); -ASSERT_REG_POSITION(line_count, 0x61); -ASSERT_REG_POSITION(dest, 0x62); +ASSERT_REG_POSITION(upload, 0x60);  ASSERT_REG_POSITION(exec, 0x6C);  ASSERT_REG_POSITION(data, 0x6D);  #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9780417f2..d7b586db9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;  Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,                       MemoryManager& memory_manager) -    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ -                                                                                  *this} { +    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, +      macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {      InitializeRegisterDefaults();  } @@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {          ProcessSyncPoint();          break;      } +    case MAXWELL3D_REG_INDEX(exec_upload): { +        upload_state.ProcessExec(regs.exec_upload.linear != 0); +        break; +    } +    case MAXWELL3D_REG_INDEX(data_upload): { +        const bool is_last_call = method_call.IsLastCall(); +        upload_state.ProcessData(method_call.argument, is_last_call); +        if (is_last_call) { +            dirty_flags.OnMemoryWrite(); +        } +        break; +    }      default:          break;      } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 85d309d9b..4883b582a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -14,6 +14,7 @@  #include "common/common_funcs.h"  #include "common/common_types.h"  #include "common/math_util.h" +#include "video_core/engines/engine_upload.h"  #include "video_core/gpu.h"  #include "video_core/macro_interpreter.h"  #include "video_core/textures/texture.h" @@ -32,6 +33,12 @@ class RasterizerInterface;  namespace Tegra::Engines { +/** + * This Engine is known as GF100_3D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h + */ +  #define MAXWELL3D_REG_INDEX(field_name)                                                            \      (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) @@ -580,7 +587,18 @@ public:                      u32 bind;                  } macros; -                INSERT_PADDING_WORDS(0x69); +                INSERT_PADDING_WORDS(0x17); + +                Upload::Registers upload; +                struct { +                    union { +                        BitField<0, 1, u32> linear; +                    }; +                } exec_upload; + +                u32 data_upload; + +                INSERT_PADDING_WORDS(0x44);                  struct {                      union { @@ -1176,6 +1194,8 @@ private:      /// Interpreter for the macro codes uploaded to the GPU.      MacroInterpreter macro_interpreter; +    Upload::State upload_state; +      /// Retrieves information about a specific TIC entry from the TIC buffer.      Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -1219,6 +1239,9 @@ private:                    "Field " #field_name " has invalid position")  ASSERT_REG_POSITION(macros, 0x45); +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D);  ASSERT_REG_POSITION(sync_info, 0xB2);  ASSERT_REG_POSITION(tfb_enabled, 0x1D1);  ASSERT_REG_POSITION(rt, 0x200); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2426d0067..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {      ASSERT(regs.exec.enable_2d == 1); -    const std::size_t copy_size = regs.x_count * regs.y_count; +    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { +        ASSERT(regs.src_params.size_z == 1); +        // If the input is tiled and the output is linear, deswizzle the input and copy it over. +        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; +        const std::size_t src_size = Texture::CalculateSize( +            true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, +            regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); -    auto source_ptr{memory_manager.GetPointer(source)}; -    auto dst_ptr{memory_manager.GetPointer(dest)}; +        const std::size_t dst_size = regs.dst_pitch * regs.y_count; -    if (!source_ptr) { -        LOG_ERROR(HW_GPU, "source_ptr is invalid"); -        return; -    } +        if (read_buffer.size() < src_size) { +            read_buffer.resize(src_size); +        } -    if (!dst_ptr) { -        LOG_ERROR(HW_GPU, "dst_ptr is invalid"); -        return; -    } +        if (write_buffer.size() < dst_size) { +            write_buffer.resize(dst_size); +        } -    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { -        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated -        // copying. -        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); +        memory_manager.ReadBlock(source, read_buffer.data(), src_size); +        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); -        // We have to invalidate the destination region to evict any outdated surfaces from the -        // cache. We do this before actually writing the new data because the destination address -        // might contain a dirty surface that will have to be written back to memory. -        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); -    }; +        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, +                                  regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), +                                  write_buffer.data(), regs.src_params.BlockHeight(), +                                  regs.src_params.pos_x, regs.src_params.pos_y); -    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { -        ASSERT(regs.src_params.size_z == 1); -        // If the input is tiled and the output is linear, deswizzle the input and copy it over. +        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); +    } else { +        ASSERT(regs.dst_params.BlockDepth() == 1); -        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; +        const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; -        FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, -                           copy_size * src_bytes_per_pixel); +        const std::size_t dst_size = Texture::CalculateSize( +            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, +            regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); -        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, -                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, -                                  regs.src_params.BlockHeight(), regs.src_params.pos_x, -                                  regs.src_params.pos_y); -    } else { -        ASSERT(regs.dst_params.size_z == 1); -        ASSERT(regs.src_pitch == regs.x_count); +        const std::size_t dst_layer_size = Texture::CalculateSize( +            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, +            regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); -        const u32 src_bpp = regs.src_pitch / regs.x_count; +        const std::size_t src_size = regs.src_pitch * regs.y_count; -        FlushAndInvalidate(regs.src_pitch * regs.y_count, -                           regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); +        if (read_buffer.size() < src_size) { +            read_buffer.resize(src_size); +        } + +        if (write_buffer.size() < dst_size) { +            write_buffer.resize(dst_size); +        } + +        memory_manager.ReadBlock(source, read_buffer.data(), src_size); +        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);          // If the input is linear and the output is tiled, swizzle the input and copy it over.          Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, -                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); +                                src_bytes_per_pixel, +                                write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, +                                read_buffer.data(), regs.dst_params.BlockHeight()); + +        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);      }  } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c6b649842..e5942f671 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -6,6 +6,7 @@  #include <array>  #include <cstddef> +#include <vector>  #include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h" @@ -25,6 +26,11 @@ class RasterizerInterface;  namespace Tegra::Engines { +/** + * This Engine is known as GK104_Copy. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml + */ +  class MaxwellDMA final {  public:      explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, @@ -63,6 +69,16 @@ public:          static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); +        enum class ComponentMode : u32 { +            Src0 = 0, +            Src1 = 1, +            Src2 = 2, +            Src3 = 3, +            Const0 = 4, +            Const1 = 5, +            Zero = 6, +        }; +          enum class CopyMode : u32 {              None = 0,              Unk1 = 1, @@ -128,7 +144,26 @@ public:                  u32 x_count;                  u32 y_count; -                INSERT_PADDING_WORDS(0xBB); +                INSERT_PADDING_WORDS(0xB8); + +                u32 const0; +                u32 const1; +                union { +                    BitField<0, 4, ComponentMode> component0; +                    BitField<4, 4, ComponentMode> component1; +                    BitField<8, 4, ComponentMode> component2; +                    BitField<12, 4, ComponentMode> component3; +                    BitField<16, 2, u32> component_size; +                    BitField<20, 3, u32> src_num_components; +                    BitField<24, 3, u32> dst_num_components; + +                    u32 SrcBytePerPixel() const { +                        return src_num_components.Value() * component_size.Value(); +                    } +                    u32 DstBytePerPixel() const { +                        return dst_num_components.Value() * component_size.Value(); +                    } +                } swizzle_config;                  Parameters dst_params; @@ -149,6 +184,9 @@ private:      MemoryManager& memory_manager; +    std::vector<u8> read_buffer; +    std::vector<u8> write_buffer; +      /// Performs the copy from the source buffer to the destination buffer as configured in the      /// registers.      void HandleCopy(); @@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);  ASSERT_REG_POSITION(dst_pitch, 0x105);  ASSERT_REG_POSITION(x_count, 0x106);  ASSERT_REG_POSITION(y_count, 0x107); +ASSERT_REG_POSITION(const0, 0x1C0); +ASSERT_REG_POSITION(const1, 0x1C1); +ASSERT_REG_POSITION(swizzle_config, 0x1C2);  ASSERT_REG_POSITION(dst_params, 0x1C3);  ASSERT_REG_POSITION(src_params, 0x1CA); | 
