diff options
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_context.cpp | 12 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 | 
5 files changed, 43 insertions, 20 deletions
| diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 5456d4e5b..c6325e55f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile  void EmitContext::SetupExtensions(std::string&) {      header += "#extension GL_ARB_separate_shader_objects : enable\n"; -    header += "#extension GL_ARB_sparse_texture2 : enable\n"; -    header += "#extension GL_EXT_texture_shadow_lod : enable\n"; -    // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; +    if (stage != Stage::Compute) { +        // TODO: track this usage +        header += "#extension GL_ARB_sparse_texture2 : enable\n"; +        header += "#extension GL_EXT_texture_shadow_lod : enable\n"; +    }      if (info.uses_int64) {          header += "#extension GL_ARB_gpu_shader_int64 : enable\n";      } @@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) {          info.uses_subgroup_shuffles || info.uses_fswzadd) {          header += "#extension GL_ARB_shader_ballot : enable\n";          header += "#extension GL_ARB_shader_group_vote : enable\n"; +        header += "#extension GL_KHR_shader_subgroup_basic : enable\n"; +        if (!info.uses_int64) { +            header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; +        }      }  } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index e462c977c..8a018acb5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in  }  void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { -    ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); -    // TODO: -    // if (ctx.profile.warp_size_potentially_larger_than_guest) { -    // } +    if (!ctx.profile.warp_size_potentially_larger_than_guest) { +        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); +    } else { +        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; +        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; +        ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); +    }  }  void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { -    ctx.AddU1("{}=anyInvocationARB({});", inst, pred); -    // TODO: -    // if (ctx.profile.warp_size_potentially_larger_than_guest) { -    // } +    if (!ctx.profile.warp_size_potentially_larger_than_guest) { +        ctx.AddU1("{}=anyInvocationARB({});", inst, pred); +    } else { +        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; +        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; +        ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); +    }  }  void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { -    ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); -    // TODO: -    // if (ctx.profile.warp_size_potentially_larger_than_guest) { -    // } +    if (!ctx.profile.warp_size_potentially_larger_than_guest) { +        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); +    } else { +        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; +        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; +        const auto value{fmt::format("({}^{})", ballot, active_mask)}; +        ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); +    }  }  void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { -    ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); -    // TODO: -    // if (ctx.profile.warp_size_potentially_larger_than_guest) { -    // } +    if (!ctx.profile.warp_size_potentially_larger_than_guest) { +        ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); +    } else { +        ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred); +    }  }  void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 071133781..20ea42cff 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() {      has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");      has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;      has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; +    warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;      // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive      // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9b9402c29..ff0ff2b08 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public:          return has_amd_shader_half_float;      } +    bool IsWarpSizePotentiallyLargerThanGuest() const { +        return warp_size_potentially_larger_than_guest; +    } +  private:      static bool TestVariableAoffi();      static bool TestPreciseBug(); @@ -161,6 +165,7 @@ private:      bool has_depth_buffer_float{};      bool has_nv_gpu_shader_5{};      bool has_amd_shader_half_float{}; +    bool warp_size_potentially_larger_than_guest{};  };  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8a052851b..cd11ff653 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo            .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),            .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), -          .warp_size_potentially_larger_than_guest = true, +          .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),            .lower_left_origin_mode = true,            .need_declared_frag_colors = true, | 
