diff options
| author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-11-12 23:26:56 -0300 | 
|---|---|---|
| committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-11-22 21:28:47 -0300 | 
| commit | 4f5d8e434278cd5999bf21e91f0923d55ec8d52b (patch) | |
| tree | 542e261e7169cb9ba8ed129a53e0bbd181868752 /src/video_core | |
| parent | dc9961f341be64dcbc13097d4eb7b95db45f9fb9 (diff) | |
gl_shader_cache: Specialize shader workgroup
Drop the usage of ARB_compute_variable_group_size and specialize compute
shaders instead. This permits compute to run on AMD and Intel
proprietary drivers.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 63 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 46 | 
6 files changed, 74 insertions, 68 deletions
| diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 5259d92bd..bd49c6627 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -140,7 +140,7 @@ public:          INSERT_PADDING_WORDS(0x3); -        BitField<0, 16, u32> shared_alloc; +        BitField<0, 18, u32> shared_alloc;          BitField<16, 16, u32> block_dim_x;          union { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b76de71ec..bd4e5f6e3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -273,8 +273,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {          SetupDrawGlobalMemory(stage, shader);          SetupDrawTextures(stage, shader, base_bindings); -        const ProgramVariant variant{base_bindings, primitive_mode}; -        const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); +        const ProgramVariant variant(base_bindings, primitive_mode); +        const auto [program_handle, next_bindings] = shader->GetHandle(variant);          switch (program) {          case Maxwell::ShaderProgram::VertexA: @@ -725,18 +725,14 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {  }  void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { -    if (!GLAD_GL_ARB_compute_variable_group_size) { -        LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " -                                 "lack of GL_ARB_compute_variable_group_size"); -        return; -    } -      auto kernel = shader_cache.GetComputeKernel(code_addr);      SetupComputeTextures(kernel);      SetupComputeImages(kernel); -    const auto [program, next_bindings] = kernel->GetProgramHandle({}); -    state.draw.shader_program = program; +    const auto& launch_desc = system.GPU().KeplerCompute().launch_description; +    const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, +                                 launch_desc.block_dim_z); +    std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);      state.draw.program_pipeline = 0;      const std::size_t buffer_size = @@ -760,10 +756,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {      state.ApplyShaderProgram();      state.ApplyProgramPipeline(); -    const auto& launch_desc = system.GPU().KeplerCompute().launch_description; -    glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, -                                  launch_desc.grid_dim_z, launch_desc.block_dim_x, -                                  launch_desc.block_dim_y, launch_desc.block_dim_z); +    glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);  }  void RasterizerOpenGL::FlushAll() {} diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7ce06a978..a5789b6d3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -255,7 +255,7 @@ void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {  CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,                            const ProgramCode& program_code, const ProgramCode& program_code_b, -                          const ProgramVariant& variant, ConstBufferLocker& locker, +                          ConstBufferLocker& locker, const ProgramVariant& variant,                            bool hint_retrievable = false) {      LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); @@ -268,17 +268,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy      }      const auto entries = GLShader::GetEntries(ir); -    auto base_bindings{variant.base_bindings}; -    const auto primitive_mode{variant.primitive_mode}; -      std::string source = fmt::format(R"(// {}  #version 430 core  #extension GL_ARB_separate_shader_objects : enable  )",                                       GetShaderId(unique_identifier, program_type)); -    if (is_compute) { -        source += "#extension GL_ARB_compute_variable_group_size : require\n"; -    }      if (device.HasShaderBallot()) {          source += "#extension GL_ARB_shader_ballot : require\n";      } @@ -295,6 +289,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy      }      source += '\n'; +    auto base_bindings = variant.base_bindings;      if (!is_compute) {          source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);      } @@ -318,13 +313,15 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy      if (program_type == ProgramType::Geometry) {          const auto [glsl_topology, debug_name, max_vertices] = -            GetPrimitiveDescription(primitive_mode); +            GetPrimitiveDescription(variant.primitive_mode); -        source += "layout (" + std::string(glsl_topology) + ") in;\n\n"; -        source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; +        source += fmt::format("layout ({}) in;\n\n", glsl_topology); +        source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);      }      if (program_type == ProgramType::Compute) { -        source += "layout (local_size_variable) in;\n"; +        source += +            fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", +                        variant.block_x, variant.block_y, variant.block_z);      }      source += '\n'; @@ -422,58 +419,53 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params,                                                            unspecialized.code_b));  } -std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { -    UpdateVariant(); +std::tuple<GLuint, BaseBindings> CachedShader::GetHandle(const ProgramVariant& variant) { +    EnsureValidLockerVariant(); -    const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); +    const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);      auto& program = entry->second;      if (is_cache_miss) {          program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, -                              variant, *curr_variant->locker); -        disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); +                              *curr_locker_variant->locker, variant); +        disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));          LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);      }      auto base_bindings = variant.base_bindings;      base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); -    if (program_type != ProgramType::Compute) { -        base_bindings.cbuf += STAGE_RESERVED_UBOS; -    } +    base_bindings.cbuf += STAGE_RESERVED_UBOS;      base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());      base_bindings.sampler += static_cast<u32>(entries.samplers.size());      return {program->handle, base_bindings};  } -void CachedShader::UpdateVariant() { -    if (curr_variant && !curr_variant->locker->IsConsistent()) { -        curr_variant = nullptr; +bool CachedShader::EnsureValidLockerVariant() { +    const auto previous_variant = curr_locker_variant; +    if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { +        curr_locker_variant = nullptr;      } -    if (!curr_variant) { +    if (!curr_locker_variant) {          for (auto& variant : locker_variants) {              if (variant->locker->IsConsistent()) { -                curr_variant = variant.get(); +                curr_locker_variant = variant.get();              }          }      } -    if (!curr_variant) { +    if (!curr_locker_variant) {          auto& new_variant = locker_variants.emplace_back();          new_variant = std::make_unique<LockerVariant>();          new_variant->locker = MakeLocker(system, program_type); -        curr_variant = new_variant.get(); +        curr_locker_variant = new_variant.get();      } +    return previous_variant == curr_locker_variant;  }  ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,                                              const ConstBufferLocker& locker) const { -    ShaderDiskCacheUsage usage; -    usage.unique_identifier = unique_identifier; -    usage.variant = variant; -    usage.keys = locker.GetKeys(); -    usage.bound_samplers = locker.GetBoundSamplers(); -    usage.bindless_samplers = locker.GetBindlessSamplers(); -    return usage; +    return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), +                                locker.GetBoundSamplers(), locker.GetBindlessSamplers()};  }  ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -534,9 +526,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,              if (!shader) {                  auto locker{MakeLocker(system, unspecialized.program_type)};                  FillLocker(*locker, usage); +                  shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, -                                     unspecialized.code, unspecialized.code_b, usage.variant, -                                     *locker, true); +                                     unspecialized.code, unspecialized.code_b, *locker, +                                     usage.variant, true);              }              std::scoped_lock lock{mutex}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6bd7c9cf1..795b05a19 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -86,7 +86,7 @@ public:      }      /// Gets the GL program handle for the shader -    std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); +    std::tuple<GLuint, BaseBindings> GetHandle(const ProgramVariant& variant);  private:      struct LockerVariant { @@ -98,7 +98,7 @@ private:                            GLShader::ShaderEntries entries, ProgramCode program_code,                            ProgramCode program_code_b); -    void UpdateVariant(); +    bool EnsureValidLockerVariant();      ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,                                    const VideoCommon::Shader::ConstBufferLocker& locker) const; @@ -117,7 +117,7 @@ private:      ProgramCode program_code;      ProgramCode program_code_b; -    LockerVariant* curr_variant = nullptr; +    LockerVariant* curr_locker_variant = nullptr;      std::vector<std::unique_ptr<LockerVariant>> locker_variants;  }; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 3f4daf28d..9156f180a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,11 +52,11 @@ struct BindlessSamplerKey {      Tegra::Engines::SamplerDescriptor sampler{};  }; -constexpr u32 NativeVersion = 6; +constexpr u32 NativeVersion = 7;  // Making sure sizes doesn't change by accident  static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 20); +static_assert(sizeof(ProgramVariant) == 28);  ShaderCacheVersionHash GetShaderCacheVersionHash() {      ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 55311dc6d..4c7ca004d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -44,32 +44,49 @@ struct BaseBindings {      u32 sampler{};      u32 image{}; -    bool operator==(const BaseBindings& rhs) const { +    bool operator==(const BaseBindings& rhs) const noexcept {          return std::tie(cbuf, gmem, sampler, image) ==                 std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);      } -    bool operator!=(const BaseBindings& rhs) const { +    bool operator!=(const BaseBindings& rhs) const noexcept {          return !operator==(rhs);      }  };  static_assert(std::is_trivially_copyable_v<BaseBindings>); -/// Describes the different variants a single program can be compiled. -struct ProgramVariant { -    BaseBindings base_bindings; +/// Describes the different variants a program can be compiled with. +struct ProgramVariant final { +    ProgramVariant() = default; + +    /// Graphics constructor. +    explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept +        : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} + +    /// Compute constructor. +    explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept +        : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)} { +    } + +    // Graphics specific parameters. +    BaseBindings base_bindings{};      GLenum primitive_mode{}; -    bool operator==(const ProgramVariant& rhs) const { -        return std::tie(base_bindings, primitive_mode) == -               std::tie(rhs.base_bindings, rhs.primitive_mode); +    // Compute specific parameters. +    u32 block_x{}; +    u16 block_y{}; +    u16 block_z{}; + +    bool operator==(const ProgramVariant& rhs) const noexcept { +        return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) == +               std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, +                        rhs.block_z);      } -    bool operator!=(const ProgramVariant& rhs) const { +    bool operator!=(const ProgramVariant& rhs) const noexcept {          return !operator==(rhs);      }  }; -  static_assert(std::is_trivially_copyable_v<ProgramVariant>);  /// Describes how a shader is used. @@ -108,8 +125,11 @@ struct hash<OpenGL::BaseBindings> {  template <>  struct hash<OpenGL::ProgramVariant> {      std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { -        return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^ -               (static_cast<std::size_t>(variant.primitive_mode) << 6); +        return std::hash<OpenGL::BaseBindings>{}(variant.base_bindings) ^ +               (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ +               static_cast<std::size_t>(variant.block_x) ^ +               (static_cast<std::size_t>(variant.block_y) << 32) ^ +               (static_cast<std::size_t>(variant.block_z) << 48);      }  }; @@ -117,7 +137,7 @@ template <>  struct hash<OpenGL::ShaderDiskCacheUsage> {      std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {          return static_cast<std::size_t>(usage.unique_identifier) ^ -               std::hash<OpenGL::ProgramVariant>()(usage.variant); +               std::hash<OpenGL::ProgramVariant>{}(usage.variant);      }  }; | 
