diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-11-12 23:39:45 -0300 |
---|---|---|
committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-11-22 21:28:47 -0300 |
commit | dbeb52387979c7e28c0acb03dfc1468146947104 (patch) | |
tree | 8c8e681dcc11a137517839dd64d839541cb6f9ce /src/video_core | |
parent | 4f5d8e434278cd5999bf21e91f0923d55ec8d52b (diff) |
gl_shader_cache: Specialize shared memory size
Shared memory was being declared with an undefined size. Specialize from
guest GPU parameters the compute shader's shared memory size.
Diffstat (limited to 'src/video_core')
5 files changed, 25 insertions, 29 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bd4e5f6e3..ebfe52e6d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -731,7 +731,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z); + launch_desc.block_dim_z, launch_desc.shared_alloc); std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a5789b6d3..982c4e23a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -8,7 +8,9 @@ #include <thread> #include <unordered_set> #include <boost/functional/hash.hpp> +#include "common/alignment.h" #include "common/assert.h" +#include "common/logging/log.h" #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" @@ -322,6 +324,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy source += fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", variant.block_x, variant.block_y, variant.block_z); + + if (variant.shared_memory_size > 0) { + source += fmt::format("shared uint smem[{}];", + Common::AlignUp(variant.shared_memory_size, 4) / 4); + } } source += '\n'; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 51c80bf32..fb2ba0905 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -223,7 +223,7 @@ private: Type type{}; }; -constexpr const char* GetTypeString(Type type) { +const char* GetTypeString(Type type) { switch (type) { case Type::Bool: return "bool"; @@ -243,7 +243,7 @@ constexpr const char* GetTypeString(Type type) { } } -constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { +const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { switch (image_type) { case Tegra::Shader::ImageType::Texture1D: return "1D"; @@ -522,13 +522,6 @@ private: code.AddNewLine(); } - void DeclareSharedMemory() { - if (stage != ProgramType::Compute) { - return; - } - code.AddLine("shared uint {}[];", GetSharedMemory()); - } - void DeclareInternalFlags() { for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { const auto flag_code = static_cast<InternalFlag>(flag); @@ -867,9 +860,7 @@ private: } if (const auto smem = std::get_if<SmemNode>(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), - Type::Uint}; + return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; } if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { @@ -1245,9 +1236,7 @@ private: Type::Uint}; } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { ASSERT(stage == ProgramType::Compute); - target = { - fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), - Type::Uint}; + target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()).AsUint(); const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); @@ -2170,10 +2159,6 @@ private: return "lmem_" + suffix; } - std::string GetSharedMemory() const { - return fmt::format("smem_{}", suffix); - } - std::string GetInternalFlag(InternalFlag flag) const { constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", "overflow_flag"}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9156f180a..d2bb8502a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,11 +52,11 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 7; +constexpr u32 NativeVersion = 8; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 28); +static_assert(sizeof(ProgramVariant) == 32); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4c7ca004d..6f8e51364 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -64,9 +64,10 @@ struct ProgramVariant final { : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} /// Compute constructor. - explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept - : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)} { - } + explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, + u32 shared_memory_size) noexcept + : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, + shared_memory_size{shared_memory_size} {} // Graphics specific parameters. BaseBindings base_bindings{}; @@ -76,11 +77,13 @@ struct ProgramVariant final { u32 block_x{}; u16 block_y{}; u16 block_z{}; + u32 shared_memory_size{}; bool operator==(const ProgramVariant& rhs) const noexcept { - return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) == - std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, - rhs.block_z); + return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, + shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode, + rhs.block_x, rhs.block_y, rhs.block_z, + rhs.shared_memory_size); } bool operator!=(const ProgramVariant& rhs) const noexcept { @@ -129,7 +132,8 @@ struct hash<OpenGL::ProgramVariant> { (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ static_cast<std::size_t>(variant.block_x) ^ (static_cast<std::size_t>(variant.block_y) << 32) ^ - (static_cast<std::size_t>(variant.block_z) << 48); + (static_cast<std::size_t>(variant.block_z) << 48) ^ + (static_cast<std::size_t>(variant.shared_memory_size) << 16); } }; |