diff options
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 99 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 150 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 74 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.cpp | 24 | 
15 files changed, 357 insertions, 140 deletions
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index e3d5fb8a9..08586d33c 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {  }  void KeplerCompute::ProcessLaunch() { -      const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();      memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,                                     LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); -    const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; -    LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); +    const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; +    LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); + +    rasterizer.DispatchCompute(code_addr);  }  } // namespace Tegra::Engines diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 1b4975498..e25754e37 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const {      return *maxwell_3d;  } +Engines::KeplerCompute& GPU::KeplerCompute() { +    return *kepler_compute; +} + +const Engines::KeplerCompute& GPU::KeplerCompute() const { +    return *kepler_compute; +} +  MemoryManager& GPU::MemoryManager() {      return *memory_manager;  } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe6628923..0ace0ff4f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -155,6 +155,12 @@ public:      /// Returns a const reference to the Maxwell3D GPU engine.      const Engines::Maxwell3D& Maxwell3D() const; +    /// Returns a reference to the KeplerCompute GPU engine. +    Engines::KeplerCompute& KeplerCompute(); + +    /// Returns a reference to the KeplerCompute GPU engine. +    const Engines::KeplerCompute& KeplerCompute() const; +      /// Returns a reference to the GPU memory manager.      Tegra::MemoryManager& MemoryManager(); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2b7367568..9881df0d5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -34,6 +34,9 @@ public:      /// Clear the current framebuffer      virtual void Clear() = 0; +    /// Dispatches a compute shader invocation +    virtual void DispatchCompute(GPUVAddr code_addr) = 0; +      /// Notify rasterizer that all caches should be flushed to Switch memory      virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0432a9e10..c59e687b6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -4,6 +4,7 @@  #include <algorithm>  #include <array> +#include <bitset>  #include <memory>  #include <string>  #include <string_view> @@ -19,6 +20,7 @@  #include "core/core.h"  #include "core/hle/kernel/process.h"  #include "core/settings.h" +#include "video_core/engines/kepler_compute.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/memory_manager.h"  #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -326,9 +328,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {          Shader shader{shader_cache.GetStageProgram(program)}; -        const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; +        const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);          SetupDrawConstBuffers(stage_enum, shader); -        SetupGlobalRegions(stage_enum, shader); +        SetupDrawGlobalMemory(stage_enum, shader);          const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};          const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; @@ -783,6 +785,45 @@ void RasterizerOpenGL::DrawArrays() {      gpu.dirty.memory_general = false;  } +void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { +    if (!GLAD_GL_ARB_compute_variable_group_size) { +        LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " +                                 "lack of GL_ARB_compute_variable_group_size"); +        return; +    } + +    auto kernel = shader_cache.GetComputeKernel(code_addr); +    const auto [program, next_bindings] = kernel->GetProgramHandle({}); +    state.draw.shader_program = program; +    state.draw.program_pipeline = 0; + +    const std::size_t buffer_size = +        Tegra::Engines::KeplerCompute::NumConstBuffers * +        (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); +    buffer_cache.Map(buffer_size); + +    bind_ubo_pushbuffer.Setup(0); +    bind_ssbo_pushbuffer.Setup(0); + +    SetupComputeConstBuffers(kernel); +    SetupComputeGlobalMemory(kernel); + +    // TODO(Rodrigo): Bind images and samplers + +    buffer_cache.Unmap(); + +    bind_ubo_pushbuffer.Bind(); +    bind_ssbo_pushbuffer.Bind(); + +    state.ApplyShaderProgram(); +    state.ApplyProgramPipeline(); + +    const auto& launch_desc = system.GPU().KeplerCompute().launch_description; +    glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, +                                  launch_desc.grid_dim_z, launch_desc.block_dim_x, +                                  launch_desc.block_dim_y, launch_desc.block_dim_z); +} +  void RasterizerOpenGL::FlushAll() {}  void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { @@ -856,12 +897,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,  void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,                                               const Shader& shader) {      MICROPROFILE_SCOPE(OpenGL_UBO); -    const auto stage_index = static_cast<std::size_t>(stage); -    const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; - -    // Upload only the enabled buffers from the 16 constbuffers of each shader stage +    const auto& stages = system.GPU().Maxwell3D().state.shader_stages; +    const auto& shader_stage = stages[static_cast<std::size_t>(stage)];      for (const auto& entry : shader->GetShaderEntries().const_buffers) { -        SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); +        const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; +        SetupConstBuffer(buffer, entry); +    } +} + +void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { +    MICROPROFILE_SCOPE(OpenGL_UBO); +    const auto& launch_desc = system.GPU().KeplerCompute().launch_description; +    for (const auto& entry : kernel->GetShaderEntries().const_buffers) { +        const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; +        const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); +        Tegra::Engines::ConstBufferInfo buffer; +        buffer.address = config.Address(); +        buffer.size = config.size; +        buffer.enabled = mask[entry.GetIndex()]; +        SetupConstBuffer(buffer, entry);      }  } @@ -882,24 +936,39 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b      bind_ubo_pushbuffer.Push(cbuf, offset, size);  } -void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, -                                          const Shader& shader) { +void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, +                                             const Shader& shader) {      auto& gpu{system.GPU()};      auto& memory_manager{gpu.MemoryManager()};      const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; -    const auto alignment{device.GetShaderStorageBufferAlignment()}; -      for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {          const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; -        const auto actual_addr{memory_manager.Read<u64>(addr)}; +        const auto gpu_addr{memory_manager.Read<u64>(addr)};          const auto size{memory_manager.Read<u32>(addr + 8)}; +        SetupGlobalMemory(entry, gpu_addr, size); +    } +} -        const auto [ssbo, buffer_offset] = -            buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); -        bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); +void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { +    auto& gpu{system.GPU()}; +    auto& memory_manager{gpu.MemoryManager()}; +    const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; +    for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { +        const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; +        const auto gpu_addr{memory_manager.Read<u64>(addr)}; +        const auto size{memory_manager.Read<u32>(addr + 8)}; +        SetupGlobalMemory(entry, gpu_addr, size);      }  } +void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, +                                         GPUVAddr gpu_addr, std::size_t size) { +    const auto alignment{device.GetShaderStorageBufferAlignment()}; +    const auto [ssbo, buffer_offset] = +        buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); +    bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); +} +  TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,                                                     BaseBindings base_bindings) {      MICROPROFILE_SCOPE(OpenGL_Texture); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ef34d3f54..8b123c48d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -58,6 +58,7 @@ public:      void DrawArrays() override;      void Clear() override; +    void DispatchCompute(GPUVAddr code_addr) override;      void FlushAll() override;      void FlushRegion(CacheAddr addr, u64 size) override;      void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -115,13 +116,23 @@ private:      void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,                                 const Shader& shader); +    /// Configures the current constbuffers to use for the kernel invocation. +    void SetupComputeConstBuffers(const Shader& kernel); +      /// Configures a constant buffer.      void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,                            const GLShader::ConstBufferEntry& entry);      /// Configures the current global memory entries to use for the draw command. -    void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, -                            const Shader& shader); +    void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, +                               const Shader& shader); + +    /// Configures the current global memory entries to use for the kernel invocation. +    void SetupComputeGlobalMemory(const Shader& kernel); + +    /// Configures a constant buffer. +    void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, +                           std::size_t size);      /// Configures the current textures to use for the draw command. Returns shaders texture buffer      /// usage. diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 456ba0403..1c90facc3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -23,13 +23,13 @@ namespace OpenGL {  using VideoCommon::Shader::ProgramCode; -// One UBO is always reserved for emulation values -constexpr u32 RESERVED_UBOS = 1; +// One UBO is always reserved for emulation values on staged shaders +constexpr u32 STAGE_RESERVED_UBOS = 1;  struct UnspecializedShader {      std::string code;      GLShader::ShaderEntries entries; -    Maxwell::ShaderProgram program_type; +    ProgramType program_type;  };  namespace { @@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g  }  /// Gets the shader type from a Maxwell program type -constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { +constexpr GLenum GetShaderType(ProgramType program_type) {      switch (program_type) { -    case Maxwell::ShaderProgram::VertexA: -    case Maxwell::ShaderProgram::VertexB: +    case ProgramType::VertexA: +    case ProgramType::VertexB:          return GL_VERTEX_SHADER; -    case Maxwell::ShaderProgram::Geometry: +    case ProgramType::Geometry:          return GL_GEOMETRY_SHADER; -    case Maxwell::ShaderProgram::Fragment: +    case ProgramType::Fragment:          return GL_FRAGMENT_SHADER; +    case ProgramType::Compute: +        return GL_COMPUTE_SHADER;      default:          return GL_NONE;      } @@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen      }  } +ProgramType GetProgramType(Maxwell::ShaderProgram program) { +    switch (program) { +    case Maxwell::ShaderProgram::VertexA: +        return ProgramType::VertexA; +    case Maxwell::ShaderProgram::VertexB: +        return ProgramType::VertexB; +    case Maxwell::ShaderProgram::TesselationControl: +        return ProgramType::TessellationControl; +    case Maxwell::ShaderProgram::TesselationEval: +        return ProgramType::TessellationEval; +    case Maxwell::ShaderProgram::Geometry: +        return ProgramType::Geometry; +    case Maxwell::ShaderProgram::Fragment: +        return ProgramType::Fragment; +    } +    UNREACHABLE(); +    return {}; +} +  /// Calculates the size of a program stream  std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {      constexpr std::size_t start_offset = 10; @@ -128,13 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {  }  /// Hashes one (or two) program streams -u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, +u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,                          const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {      if (size_a == 0) {          size_a = CalculateProgramSize(code);      }      u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); -    if (program_type != Maxwell::ShaderProgram::VertexA) { +    if (program_type != ProgramType::VertexA) {          return unique_identifier;      }      // VertexA programs include two programs @@ -152,12 +173,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&  }  /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, +GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,                                        ProgramCode program_code, ProgramCode program_code_b) {      GLShader::ShaderSetup setup(program_code);      setup.program.size_a = CalculateProgramSize(program_code);      setup.program.size_b = 0; -    if (program_type == Maxwell::ShaderProgram::VertexA) { +    if (program_type == ProgramType::VertexA) {          // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.          // Conventional HW does not support this, so we combine VertexA and VertexB into one          // stage here. @@ -168,22 +189,23 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr          program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);      switch (program_type) { -    case Maxwell::ShaderProgram::VertexA: -    case Maxwell::ShaderProgram::VertexB: +    case ProgramType::VertexA: +    case ProgramType::VertexB:          return GLShader::GenerateVertexShader(device, setup); -    case Maxwell::ShaderProgram::Geometry: +    case ProgramType::Geometry:          return GLShader::GenerateGeometryShader(device, setup); -    case Maxwell::ShaderProgram::Fragment: +    case ProgramType::Fragment:          return GLShader::GenerateFragmentShader(device, setup); +    case ProgramType::Compute: +        return GLShader::GenerateComputeShader(device, setup);      default: -        LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); -        UNREACHABLE(); +        UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));          return {};      }  }  CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, -                               Maxwell::ShaderProgram program_type, const ProgramVariant& variant, +                               ProgramType program_type, const ProgramVariant& variant,                                 bool hint_retrievable = false) {      auto base_bindings{variant.base_bindings};      const auto primitive_mode{variant.primitive_mode}; @@ -194,7 +216,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn      if (entries.shader_viewport_layer_array) {          source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";      } -    source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); +    if (program_type == ProgramType::Compute) { +        source += "#extension GL_ARB_compute_variable_group_size : require\n"; +    } +    source += '\n'; + +    if (program_type != ProgramType::Compute) { +        source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); +    }      for (const auto& cbuf : entries.const_buffers) {          source += @@ -221,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn          source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);      } -    if (program_type == Maxwell::ShaderProgram::Geometry) { +    if (program_type == ProgramType::Geometry) {          const auto [glsl_topology, debug_name, max_vertices] =              GetPrimitiveDescription(primitive_mode);          source += "layout (" + std::string(glsl_topology) + ") in;\n";          source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';      } +    if (program_type == ProgramType::Compute) { +        source += "layout (local_size_variable) in;\n"; +    }      source += code; @@ -255,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() {  } // Anonymous namespace -CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, +CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,                             GLShader::ProgramResult result)      : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},        unique_identifier{params.unique_identifier}, program_type{program_type}, @@ -268,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,                                             ProgramCode&& program_code_b) {      const auto code_size{CalculateProgramSize(program_code)};      const auto code_size_b{CalculateProgramSize(program_code_b)}; -    auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; +    auto result{ +        CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};      if (result.first.empty()) {          // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now          return {};      }      params.disk_cache.SaveRaw(ShaderDiskCacheRaw( -        params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), -        static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), -        std::move(program_code_b))); +        params.unique_identifier, GetProgramType(program_type), +        static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), +        std::move(program_code), std::move(program_code_b))); -    return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); +    return std::shared_ptr<CachedShader>( +        new CachedShader(params, GetProgramType(program_type), std::move(result)));  }  Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,                                            Maxwell::ShaderProgram program_type,                                            GLShader::ProgramResult result) { -    return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); +    return std::shared_ptr<CachedShader>( +        new CachedShader(params, GetProgramType(program_type), std::move(result))); +} + +Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { +    auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; + +    const auto code_size{CalculateProgramSize(code)}; +    params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, +                                                 static_cast<u32>(code_size / sizeof(u64)), 0, +                                                 std::move(code), {})); + +    return std::shared_ptr<CachedShader>( +        new CachedShader(params, ProgramType::Compute, std::move(result))); +} + +Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, +                                           GLShader::ProgramResult result) { +    return std::shared_ptr<CachedShader>( +        new CachedShader(params, ProgramType::Compute, std::move(result)));  }  std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {      GLuint handle{}; -    if (program_type == Maxwell::ShaderProgram::Geometry) { +    if (program_type == ProgramType::Geometry) {          handle = GetGeometryShader(variant);      } else {          const auto [entry, is_cache_miss] = programs.try_emplace(variant); @@ -308,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar          handle = program->handle;      } -    auto base_bindings{variant.base_bindings}; -    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; +    auto base_bindings = variant.base_bindings; +    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); +    if (program_type != ProgramType::Compute) { +        base_bindings.cbuf += STAGE_RESERVED_UBOS; +    }      base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());      base_bindings.sampler += static_cast<u32>(entries.samplers.size()); @@ -589,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {      // No shader found - create a new one      ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};      ProgramCode program_code_b; -    if (program == Maxwell::ShaderProgram::VertexA) { +    const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; +    if (is_program_a) {          const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};          program_code_b = GetShaderCode(memory_manager, program_addr_b,                                         memory_manager.GetPointer(program_addr_b));      } -    const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); +    const auto unique_identifier = +        GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);      const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};      const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,                                    host_ptr,   unique_identifier}; @@ -612,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {      return last_shaders[static_cast<std::size_t>(program)] = shader;  } +Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { +    auto& memory_manager{system.GPU().MemoryManager()}; +    const auto host_ptr{memory_manager.GetPointer(code_addr)}; +    auto kernel = TryGet(host_ptr); +    if (kernel) { +        return kernel; +    } + +    // No kernel found - create a new one +    auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; +    const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; +    const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; +    const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, +                                  host_ptr,   unique_identifier}; + +    const auto found = precompiled_shaders.find(unique_identifier); +    if (found == precompiled_shaders.end()) { +        kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); +    } else { +        kernel = CachedShader::CreateKernelFromCache(params, found->second); +    } + +    Register(kernel); +    return kernel; +} +  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index bbb53cdf4..a3106a0ff 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -61,6 +61,11 @@ public:                                         Maxwell::ShaderProgram program_type,                                         GLShader::ProgramResult result); +    static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); + +    static Shader CreateKernelFromCache(const ShaderParameters& params, +                                        GLShader::ProgramResult result); +      VAddr GetCpuAddr() const override {          return cpu_addr;      } @@ -78,7 +83,7 @@ public:      std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);  private: -    explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, +    explicit CachedShader(const ShaderParameters& params, ProgramType program_type,                            GLShader::ProgramResult result);      // Geometry programs. These are needed because GLSL needs an input topology but it's not @@ -104,7 +109,7 @@ private:      u8* host_ptr{};      VAddr cpu_addr{};      u64 unique_identifier{}; -    Maxwell::ShaderProgram program_type{}; +    ProgramType program_type{};      ShaderDiskCacheOpenGL& disk_cache;      const PrecompiledPrograms& precompiled_programs; @@ -132,6 +137,9 @@ public:      /// Gets the current specified shader stage program      Shader GetStageProgram(Maxwell::ShaderProgram program); +    /// Gets a compute kernel in the passed address +    Shader GetComputeKernel(GPUVAddr code_addr); +  protected:      // We do not have to flush this cache as things in it are never modified by us.      void FlushObjectInner(const Shader& object) override {} diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e19d502bc..ffe26b241 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -37,7 +37,6 @@ using namespace std::string_literals;  using namespace VideoCommon::Shader;  using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;  using Operation = const OperationNode&;  enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; @@ -162,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) {      return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));  } +constexpr bool IsVertexShader(ProgramType stage) { +    return stage == ProgramType::VertexA || stage == ProgramType::VertexB; +} +  class GLSLDecompiler final {  public: -    explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, +    explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,                              std::string suffix)          : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} @@ -248,21 +251,21 @@ public:          }          entries.clip_distances = ir.GetClipDistances();          entries.shader_viewport_layer_array = -            stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); +            IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());          entries.shader_length = ir.GetLength();          return entries;      }  private:      void DeclareVertex() { -        if (stage != ShaderStage::Vertex) +        if (!IsVertexShader(stage))              return;          DeclareVertexRedeclarations();      }      void DeclareGeometry() { -        if (stage != ShaderStage::Geometry) { +        if (stage != ProgramType::Geometry) {              return;          } @@ -293,14 +296,14 @@ private:                  break;              }          } -        if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { +        if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {              if (ir.UsesLayer()) {                  code.AddLine("int gl_Layer;");              }              if (ir.UsesViewportIndex()) {                  code.AddLine("int gl_ViewportIndex;");              } -        } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && +        } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&                     !device.HasVertexViewportLayer()) {              LOG_ERROR(                  Render_OpenGL, @@ -337,11 +340,16 @@ private:      }      void DeclareLocalMemory() { -        if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { -            const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; -            code.AddLine("float {}[{}];", GetLocalMemory(), element_count); -            code.AddNewLine(); +        // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at +        // specialization time. +        const u64 local_memory_size = +            stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); +        if (local_memory_size == 0) { +            return;          } +        const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; +        code.AddLine("float {}[{}];", GetLocalMemory(), element_count); +        code.AddNewLine();      }      void DeclareInternalFlags() { @@ -395,12 +403,12 @@ private:          const u32 location{GetGenericAttributeIndex(index)};          std::string name{GetInputAttribute(index)}; -        if (stage == ShaderStage::Geometry) { +        if (stage == ProgramType::Geometry) {              name = "gs_" + name + "[]";          }          std::string suffix; -        if (stage == ShaderStage::Fragment) { +        if (stage == ProgramType::Fragment) {              const auto input_mode{header.ps.GetAttributeUse(location)};              if (skip_unused && input_mode == AttributeUse::Unused) {                  return; @@ -412,7 +420,7 @@ private:      }      void DeclareOutputAttributes() { -        if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { +        if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {              for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {                  DeclareOutputAttribute(ToGenericAttribute(i));              } @@ -534,7 +542,7 @@ private:                  constexpr u32 element_stride{4};                  const u32 address{generic_base + index * generic_stride + element * element_stride}; -                const bool declared{stage != ShaderStage::Fragment || +                const bool declared{stage != ProgramType::Fragment ||                                      header.ps.GetAttributeUse(index) != AttributeUse::Unused};                  const std::string value{declared ? ReadAttribute(attribute, element) : "0"};                  code.AddLine("case 0x{:x}: return {};", address, value); @@ -638,7 +646,7 @@ private:          }          if (const auto abuf = std::get_if<AbufNode>(&*node)) { -            UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, +            UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,                                   "Physical attributes in geometry shaders are not implemented");              if (abuf->IsPhysicalBuffer()) {                  return fmt::format("readPhysicalAttribute(ftou({}))", @@ -693,6 +701,9 @@ private:          }          if (const auto lmem = std::get_if<LmemNode>(&*node)) { +            if (stage == ProgramType::Compute) { +                LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); +            }              return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));          } @@ -722,7 +733,7 @@ private:      std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {          const auto GeometryPass = [&](std::string_view name) { -            if (stage == ShaderStage::Geometry && buffer) { +            if (stage == ProgramType::Geometry && buffer) {                  // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games                  // set an 0x80000000 index for those and the shader fails to build. Find out why                  // this happens and what's its intent. @@ -734,10 +745,10 @@ private:          switch (attribute) {          case Attribute::Index::Position:              switch (stage) { -            case ShaderStage::Geometry: +            case ProgramType::Geometry:                  return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),                                     GetSwizzle(element)); -            case ShaderStage::Fragment: +            case ProgramType::Fragment:                  return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));              default:                  UNREACHABLE(); @@ -758,7 +769,7 @@ private:              // TODO(Subv): Find out what the values are for the first two elements when inside a              // vertex shader, and what's the value of the fourth element when inside a Tess Eval              // shader. -            ASSERT(stage == ShaderStage::Vertex); +            ASSERT(IsVertexShader(stage));              switch (element) {              case 2:                  // Config pack's first value is instance_id. @@ -770,7 +781,7 @@ private:              return "0";          case Attribute::Index::FrontFacing:              // TODO(Subv): Find out what the values are for the other elements. -            ASSERT(stage == ShaderStage::Fragment); +            ASSERT(stage == ProgramType::Fragment);              switch (element) {              case 3:                  return "itof(gl_FrontFacing ? -1 : 0)"; @@ -792,7 +803,7 @@ private:              return value;          }          // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders -        const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; +        const std::string precise = stage != ProgramType::Fragment ? "precise " : "";          const std::string temporary = code.GenerateTemporary();          code.AddLine("{}float {} = {};", precise, temporary, value); @@ -827,12 +838,12 @@ private:                  UNIMPLEMENTED();                  return {};              case 1: -                if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { +                if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {                      return {};                  }                  return std::make_pair("gl_Layer", true);              case 2: -                if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { +                if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {                      return {};                  }                  return std::make_pair("gl_ViewportIndex", true); @@ -1069,6 +1080,9 @@ private:              target = result->first;              is_integer = result->second;          } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { +            if (stage == ProgramType::Compute) { +                LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); +            }              target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));          } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {              const std::string real = Visit(gmem->GetRealAddress()); @@ -1622,7 +1636,7 @@ private:      }      std::string Exit(Operation operation) { -        if (stage != ShaderStage::Fragment) { +        if (stage != ProgramType::Fragment) {              code.AddLine("return;");              return {};          } @@ -1673,7 +1687,7 @@ private:      }      std::string EmitVertex(Operation operation) { -        ASSERT_MSG(stage == ShaderStage::Geometry, +        ASSERT_MSG(stage == ProgramType::Geometry,                     "EmitVertex is expected to be used in a geometry shader.");          // If a geometry shader is attached, it will always flip (it's the last stage before @@ -1684,7 +1698,7 @@ private:      }      std::string EndPrimitive(Operation operation) { -        ASSERT_MSG(stage == ShaderStage::Geometry, +        ASSERT_MSG(stage == ProgramType::Geometry,                     "EndPrimitive is expected to be used in a geometry shader.");          code.AddLine("EndPrimitive();"); @@ -1919,7 +1933,7 @@ private:      }      u32 GetNumPhysicalInputAttributes() const { -        return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); +        return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();      }      u32 GetNumPhysicalAttributes() const { @@ -1932,7 +1946,7 @@ private:      const Device& device;      const ShaderIR& ir; -    const ShaderStage stage; +    const ProgramType stage;      const std::string suffix;      const Header header; @@ -1963,7 +1977,7 @@ std::string GetCommonDeclarations() {          MAX_CONSTBUFFER_ELEMENTS);  } -ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, +ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,                          const std::string& suffix) {      GLSLDecompiler decompiler(device, ir, stage, suffix);      decompiler.Decompile(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02586736d..2ea02f5bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -12,14 +12,26 @@  #include "video_core/engines/maxwell_3d.h"  #include "video_core/shader/shader_ir.h" -namespace OpenGL { -class Device; -} -  namespace VideoCommon::Shader {  class ShaderIR;  } +namespace OpenGL { + +class Device; + +enum class ProgramType : u32 { +    VertexA = 0, +    VertexB = 1, +    TessellationControl = 2, +    TessellationEval = 3, +    Geometry = 4, +    Fragment = 5, +    Compute = 6 +}; + +} // namespace OpenGL +  namespace OpenGL::GLShader {  struct ShaderEntries; @@ -85,6 +97,6 @@ struct ShaderEntries {  std::string GetCommonDeclarations();  ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, -                        Maxwell::ShaderStage stage, const std::string& suffix); +                        ProgramType stage, const std::string& suffix);  } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 7893d1e26..969fe9ced 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {  } // namespace -ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, +ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,                                         u32 program_code_size, u32 program_code_size_b,                                         ProgramCode program_code, ProgramCode program_code_b)      : unique_identifier{unique_identifier}, program_type{program_type}, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4f296dda6..cc8bbd61e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -18,7 +18,6 @@  #include "common/assert.h"  #include "common/common_types.h"  #include "core/file_sys/vfs_vector.h" -#include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_opengl/gl_shader_gen.h"  namespace Core { @@ -34,14 +33,11 @@ namespace OpenGL {  struct ShaderDiskCacheUsage;  struct ShaderDiskCacheDump; -using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; -  using ProgramCode = std::vector<u64>; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - +using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;  using TextureBufferUsage = std::bitset<64>; -/// Allocated bindings used by an OpenGL shader program. +/// Allocated bindings used by an OpenGL shader program  struct BaseBindings {      u32 cbuf{};      u32 gmem{}; @@ -126,7 +122,7 @@ namespace OpenGL {  /// Describes a shader how it's used by the guest GPU  class ShaderDiskCacheRaw {  public: -    explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, +    explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,                                  u32 program_code_size, u32 program_code_size_b,                                  ProgramCode program_code, ProgramCode program_code_b);      ShaderDiskCacheRaw(); @@ -141,30 +137,13 @@ public:      }      bool HasProgramA() const { -        return program_type == Maxwell::ShaderProgram::VertexA; +        return program_type == ProgramType::VertexA;      } -    Maxwell::ShaderProgram GetProgramType() const { +    ProgramType GetProgramType() const {          return program_type;      } -    Maxwell::ShaderStage GetProgramStage() const { -        switch (program_type) { -        case Maxwell::ShaderProgram::VertexA: -        case Maxwell::ShaderProgram::VertexB: -            return Maxwell::ShaderStage::Vertex; -        case Maxwell::ShaderProgram::TesselationControl: -            return Maxwell::ShaderStage::TesselationControl; -        case Maxwell::ShaderProgram::TesselationEval: -            return Maxwell::ShaderStage::TesselationEval; -        case Maxwell::ShaderProgram::Geometry: -            return Maxwell::ShaderStage::Geometry; -        case Maxwell::ShaderProgram::Fragment: -            return Maxwell::ShaderStage::Fragment; -        } -        UNREACHABLE(); -    } -      const ProgramCode& GetProgramCode() const {          return program_code;      } @@ -175,7 +154,7 @@ public:  private:      u64 unique_identifier{}; -    Maxwell::ShaderProgram program_type{}; +    ProgramType program_type{};      u32 program_code_size{};      u32 program_code_size_b{}; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index f9ee8429e..3a8d9e1da 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;  using VideoCommon::Shader::ProgramCode;  using VideoCommon::Shader::ShaderIR; -static constexpr u32 PROGRAM_OFFSET{10}; +static constexpr u32 PROGRAM_OFFSET = 10; +static constexpr u32 COMPUTE_OFFSET = 0;  ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {      const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); @@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {  };  )"; -    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); -    ProgramResult program = -        Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); +    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); +    const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; +    ProgramResult program = Decompile(device, program_ir, stage, "vertex");      out += program.first;      if (setup.IsDualProgram()) {          const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); -        ProgramResult program_b = -            Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); - +        ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");          out += program_b.first;      } @@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {  };  )"; +      const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); -    ProgramResult program = -        Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); +    ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");      out += program.first;      out += R"( @@ -116,9 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {  )";      const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); -    ProgramResult program = -        Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); - +    ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");      out += program.first;      out += R"( @@ -130,4 +127,22 @@ void main() {      return {std::move(out), std::move(program.second)};  } +ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { +    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); + +    std::string out = "// Shader Unique Id: CS" + id + "\n\n"; +    out += GetCommonDeclarations(); + +    const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); +    ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); +    out += program.first; + +    out += R"( +void main() { +    execute_compute(); +} +)"; +    return {std::move(out), std::move(program.second)}; +} +  } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 7cbc590f8..3833e88ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se  /// Generates the GLSL fragment shader program source code for the given FS program  ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); +/// Generates the GLSL compute shader program source code for the given CS program +ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); +  } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5f3fe067e..9e74eda0d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -10,21 +10,25 @@  namespace OpenGL::GLShader { -GLuint LoadShader(const char* source, GLenum type) { -    const char* debug_type; +namespace { +const char* GetStageDebugName(GLenum type) {      switch (type) {      case GL_VERTEX_SHADER: -        debug_type = "vertex"; -        break; +        return "vertex";      case GL_GEOMETRY_SHADER: -        debug_type = "geometry"; -        break; +        return "geometry";      case GL_FRAGMENT_SHADER: -        debug_type = "fragment"; -        break; -    default: -        UNREACHABLE(); +        return "fragment"; +    case GL_COMPUTE_SHADER: +        return "compute";      } +    UNIMPLEMENTED(); +    return "unknown"; +} +} // Anonymous namespace + +GLuint LoadShader(const char* source, GLenum type) { +    const char* debug_type = GetStageDebugName(type);      const GLuint shader_id = glCreateShader(type);      glShaderSource(shader_id, 1, &source, nullptr);      LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);  | 
