diff options
Diffstat (limited to 'src/shader_recompiler')
42 files changed, 668 insertions, 280 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 525b2363c..07e75f9d8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate_program.h host_translate_info.h ir_opt/collect_shader_info_pass.cpp + ir_opt/conditional_barrier_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/dual_vertex_pass.cpp @@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC ir_opt/identity_removal_pass.cpp ir_opt/layer_pass.cpp ir_opt/lower_fp16_to_fp32.cpp + ir_opt/lower_fp64_to_fp32.cpp ir_opt/lower_int64_to_int32.cpp ir_opt/passes.h ir_opt/position_pass.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0cb1e193e..b795c0179 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -279,6 +279,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_internal;" "OPTION NV_shader_storage_buffer;" "OPTION NV_gpu_program_fp64;"; + // TODO: Enable only when MS is used + header += "OPTION NV_texture_multisample;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -459,7 +461,7 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I header += fmt::format("R{},", index); } if (program.local_memory_size > 0) { - header += fmt::format("lmem[{}],", program.local_memory_size); + header += fmt::format("lmem[{}],", Common::DivCeil(program.local_memory_size, 4U)); } if (program.info.uses_fswzadd) { header += "FSWZA[4],FSWZB[4],"; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 5bfdecc09..2fc2a0ac6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -43,10 +43,6 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } -void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index e67e80fac..85ee27333 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -59,7 +59,14 @@ std::string Image(EmitContext& ctx, IR::TextureInstInfo info, } } -std::string_view TextureType(IR::TextureInstInfo info) { +bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return false; + } + return ctx.info.texture_descriptors.at(info.descriptor_index).is_multisample; +} + +std::string_view TextureType(IR::TextureInstInfo info, bool is_ms = false) { if (info.is_depth) { switch (info.type) { case TextureType::Color1D: @@ -88,9 +95,9 @@ std::string_view TextureType(IR::TextureInstInfo info) { return "ARRAY1D"; case TextureType::Color2D: case TextureType::Color2DRect: - return "2D"; + return is_ms ? "2DMS" : "2D"; case TextureType::ColorArray2D: - return "ARRAY2D"; + return is_ms ? "ARRAY2DMS" : "ARRAY2D"; case TextureType::Color3D: return "3D"; case TextureType::ColorCube: @@ -510,15 +517,16 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const auto sparse_inst{PrepareSparse(inst)}; + const bool is_multisample{ms.type != Type::Void}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; - const std::string_view type{TextureType(info)}; + const std::string_view type{TextureType(info, is_multisample)}; const std::string texture{Texture(ctx, info, index)}; const std::string offset_vec{Offset(ctx, offset)}; const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; const Register ret{ctx.reg_alloc.Define(inst)}; if (info.type == TextureType::Buffer) { ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); - } else if (ms.type != Type::Void) { + } else if (is_multisample) { ctx.Add("MOV.S {}.w,{};" "TXFMS.F{} {},{},{},{}{};", coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); @@ -531,10 +539,11 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod) { + ScalarS32 lod, [[maybe_unused]] const IR::Value& skip_mips) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const std::string texture{Texture(ctx, info, index)}; - const std::string_view type{TextureType(info)}; + const bool is_msaa{IsTextureMsaa(ctx, info)}; + const std::string_view type{TextureType(info, is_msaa)}; ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index eaaf9ba39..1a1ea61d5 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -197,7 +197,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); @@ -582,7 +581,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod); + ScalarS32 lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& derivatives, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 911181c43..376a05827 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -398,162 +398,162 @@ void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value } void EmitGlobalAtomicIAdd32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMin32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMin32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMax32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMax32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicInc32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicDec32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAnd32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicOr32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicXor32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicExchange32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicIAdd64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMin64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMin64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMax64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMax64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicInc64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicDec64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAnd64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicOr64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicXor64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicExchange64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicIAdd32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMin32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMin32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMax32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMax32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicInc32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicDec32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAnd32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicOr32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicXor32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicExchange32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAddF32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAddF16x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAddF32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMinF16x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMinF32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMaxF16x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMaxF32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 8e5e6cf1f..1be4a0f59 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -48,10 +48,6 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); } -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=ftoi({});", inst, value); -} - void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index cecdbb9d6..418505475 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -25,6 +25,13 @@ std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::V return fmt::format("img{}{}", def.binding, index_offset); } +bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return false; + } + return ctx.info.texture_descriptors.at(info.descriptor_index).is_multisample; +} + std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { switch (info.type) { case TextureType::Color1D: @@ -136,6 +143,21 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { } return sparse_inst; } + +std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture, + std::string_view coords) { + switch (info.type) { + case TextureType::Color2D: + case TextureType::Color2DRect: + return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture); + case TextureType::ColorArray2D: + case TextureType::ColorCube: + return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords, + texture); + default: + return std::string{coords}; + } +} } // Anonymous namespace void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, @@ -333,6 +355,13 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } + std::string coords_with_subpixel_offset; + if (ctx.profile.need_gather_subpixel_offset) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); + coords = coords_with_subpixel_offset; + } if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, @@ -380,6 +409,13 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } + std::string coords_with_subpixel_offset; + if (ctx.profile.need_gather_subpixel_offset) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); + coords = coords_with_subpixel_offset; + } if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); @@ -414,7 +450,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, - [[maybe_unused]] std::string_view ms) { + std::string_view ms) { const auto info{inst.Flags<IR::TextureInstInfo>()}; if (info.has_bias) { throw NotImplementedException("EmitImageFetch Bias texture samples"); @@ -431,19 +467,24 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { - if (!offset.empty()) { - ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, - CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); + const auto int_coords{CoordsCastToInt(coords, info)}; + if (!ms.empty()) { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); + } else if (!offset.empty()) { + ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, + CoordsCastToInt(offset, info)); } else { if (info.type == TextureType::Buffer) { ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); } else { - ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, - CoordsCastToInt(coords, info), lod); + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, lod); } } return; } + if (!ms.empty()) { + throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); + } if (!offset.empty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod, @@ -455,29 +496,36 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { + std::string_view lod, const IR::Value& skip_mips_val) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const auto texture{Texture(ctx, info, index)}; + const bool is_msaa{IsTextureMsaa(ctx, info)}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{skip_mips ? "0u" : fmt::format("uint(textureQueryLevels({}))", texture)}; + if (is_msaa && !skip_mips) { + throw NotImplementedException("EmitImageQueryDimensions MSAA QueryLevels"); + } + if (info.type == TextureType::Buffer && !skip_mips) { + throw NotImplementedException("EmitImageQueryDimensions TextureType::Buffer QueryLevels"); + } + const bool uses_lod{!is_msaa && info.type != TextureType::Buffer}; + const auto lod_str{uses_lod ? fmt::format(",int({})", lod) : ""}; switch (info.type) { case TextureType::Color1D: - return ctx.AddU32x4( - "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uint(textureSize({}{})),0u,0u,{});", inst, texture, lod_str, + mips); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: case TextureType::Color2DRect: - return ctx.AddU32x4( - "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({}{})),0u,{});", inst, texture, lod_str, + mips); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - return ctx.AddU32x4( - "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, - lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({}{})),{});", inst, texture, lod_str, mips); case TextureType::Buffer: - throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); + return ctx.AddU32x4("{}=uvec4(uint(textureSize({})),0u,0u,{});", inst, texture, mips); } throw LogicError("Unspecified image type {}", info.type.Value()); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 4151c89de..8d0a65047 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -231,7 +231,6 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); @@ -655,7 +654,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, std::string_view ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod); + std::string_view lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 5d01ec0cd..9ff4028c2 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -61,24 +61,28 @@ std::string OutputDecorator(Stage stage, u32 size) { } } -std::string_view SamplerType(TextureType type, bool is_depth) { - if (is_depth) { - switch (type) { - case TextureType::Color1D: - return "sampler1DShadow"; - case TextureType::ColorArray1D: - return "sampler1DArrayShadow"; - case TextureType::Color2D: - return "sampler2DShadow"; - case TextureType::ColorArray2D: - return "sampler2DArrayShadow"; - case TextureType::ColorCube: - return "samplerCubeShadow"; - case TextureType::ColorArrayCube: - return "samplerCubeArrayShadow"; - default: - throw NotImplementedException("Texture type: {}", type); - } +std::string_view DepthSamplerType(TextureType type) { + switch (type) { + case TextureType::Color1D: + return "sampler1DShadow"; + case TextureType::ColorArray1D: + return "sampler1DArrayShadow"; + case TextureType::Color2D: + return "sampler2DShadow"; + case TextureType::ColorArray2D: + return "sampler2DArrayShadow"; + case TextureType::ColorCube: + return "samplerCubeShadow"; + case TextureType::ColorArrayCube: + return "samplerCubeArrayShadow"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + +std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) { + if (is_multisample) { + ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D); } switch (type) { case TextureType::Color1D: @@ -87,9 +91,9 @@ std::string_view SamplerType(TextureType type, bool is_depth) { return "sampler1DArray"; case TextureType::Color2D: case TextureType::Color2DRect: - return "sampler2D"; + return is_multisample ? "sampler2DMS" : "sampler2D"; case TextureType::ColorArray2D: - return "sampler2DArray"; + return is_multisample ? "sampler2DMSArray" : "sampler2DArray"; case TextureType::Color3D: return "sampler3D"; case TextureType::ColorCube: @@ -306,12 +310,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (runtime_info.force_early_z) { header += "layout(early_fragment_tests)in;"; } - if (info.uses_sample_id) { - header += "in int gl_SampleID;"; - } - if (info.stores_sample_mask) { - header += "out int gl_SampleMask[];"; - } break; case Stage::Compute: stage_name = "cs"; @@ -481,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_index + element}; - if (xfb_varying_index < runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < runtime_info.xfb_count) { xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } @@ -677,7 +675,7 @@ void EmitContext::SetupTextures(Bindings& bindings) { texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { texture_buffers.push_back({bindings.texture, desc.count}); - const auto sampler_type{SamplerType(TextureType::Buffer, false)}; + const auto sampler_type{ColorSamplerType(TextureType::Buffer)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, sampler_type, bindings.texture, array_decorator); @@ -686,7 +684,8 @@ void EmitContext::SetupTextures(Bindings& bindings) { textures.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { textures.push_back({bindings.texture, desc.count}); - const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; + const auto sampler_type{desc.is_depth ? DepthSamplerType(desc.type) + : ColorSamplerType(desc.type, desc.is_multisample)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, sampler_type, bindings.texture, array_decorator); diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.h b/src/shader_recompiler/backend/glsl/glsl_emit_context.h index dfd10ac28..7587f7bab 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.h +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.h @@ -49,7 +49,7 @@ public: void Add(const char* format_str, IR::Inst& inst, Args&&... args) { const auto var_def{var_alloc.AddDefine(inst, type)}; if (var_def.empty()) { - // skip assigment. + // skip assignment. code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...); } else { code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0f86a8004..34592a01f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr } void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { - if (ctx.runtime_info.xfb_varyings.empty()) { + if (ctx.runtime_info.xfb_count == 0) { return; } ctx.AddCapability(spv::Capability::TransformFeedback); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 4b3043b65..0ce73f289 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -69,6 +69,11 @@ Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (!ctx.profile.support_descriptor_aliasing) { + LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic."); + return ctx.ConstantNull(ctx.U64); + } + if (ctx.profile.support_int64_atomics) { const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, binding, offset, sizeof(u64))}; @@ -86,6 +91,11 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (!ctx.profile.support_descriptor_aliasing) { + LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic."); + return ctx.ConstantNull(ctx.U32[2]); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 50daacd95..c4ca28d11 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -18,10 +18,6 @@ void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitBitCastS32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0cd87a48f..2868fc57d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,27 +10,6 @@ namespace Shader::Backend::SPIRV { namespace { -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - -std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; - switch (type) { - case AttributeType::Float: - return AttrInfo{ctx.input_f32, ctx.F32[1], false}; - case AttributeType::UnsignedInt: - return AttrInfo{ctx.input_u32, ctx.U32[1], true}; - case AttributeType::SignedInt: - return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; - case AttributeType::Disabled: - return std::nullopt; - } - throw InvalidArgument("Invalid attribute type {}", type); -} - template <typename... Args> Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { switch (ctx.stage) { @@ -302,15 +281,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast<u32>(attr) % 4}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + const auto& generic{ctx.input_generics.at(index)}; + if (!ValidId(generic.id)) { // Attribute is disabled or varying component is not written return ctx.Const(element == 3 ? 1.0f : 0.0f); } - const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; - const Id value{ctx.OpLoad(type->id, pointer)}; - return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + const Id pointer{ + AttrPointer(ctx, generic.pointer_type, vertex, generic.id, ctx.Const(element))}; + const Id value{ctx.OpLoad(generic.component_type, pointer)}; + return [&ctx, generic, value]() { + switch (generic.load_op) { + case InputGenericLoadOp::Bitcast: + return ctx.OpBitcast(ctx.F32[1], value); + case InputGenericLoadOp::SToF: + return ctx.OpConvertSToF(ctx.F32[1], value); + case InputGenericLoadOp::UToF: + return ctx.OpConvertUToF(ctx.F32[1], value); + default: + return value; + }; + }(); } switch (attr) { case IR::Attribute::PrimitiveId: @@ -339,9 +329,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id)); } else { - const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; - const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; - return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index)); } case IR::Attribute::BaseInstance: return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); @@ -386,9 +374,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { if (ctx.profile.support_vertex_instance_id) { return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); } else { - const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; - const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; - return ctx.OpISub(ctx.U32[1], index, base); + return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); } case IR::Attribute::BaseInstance: return ctx.OpLoad(ctx.U32[1], ctx.base_instance); @@ -473,7 +459,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { } void EmitSetSampleMask(EmitContext& ctx, Id value) { - ctx.OpStore(ctx.sample_mask, value); + const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)}; + ctx.OpStore(pointer, value); } void EmitSetFragDepth(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fb5799c42..7d901c04b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -201,6 +201,13 @@ Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { } } +bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return false; + } + return ctx.textures.at(info.descriptor_index).is_multisample; +} + Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { const auto info{inst->Flags<IR::TextureInstInfo>()}; if (info.relaxed_precision != 0) { @@ -254,6 +261,30 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) { const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); } + +Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture, + Id coords) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + const auto calculate_coords{[&](size_t dim) { + const Id nudge{ctx.Const(0x1p-9f)}; + const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)}; + Id offset{dim == 2 ? ctx.ConstantComposite(ctx.F32[dim], nudge, nudge) + : ctx.ConstantComposite(ctx.F32[dim], nudge, nudge, ctx.f32_zero_value)}; + offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size)); + return ctx.OpFAdd(ctx.F32[dim], coords, offset); + }}; + switch (info.type) { + case TextureType::Color2D: + case TextureType::Color2DRect: + return calculate_coords(2); + case TextureType::ColorArray2D: + case TextureType::ColorCube: + return calculate_coords(3); + default: + return coords; + } +} } // Anonymous namespace Id EmitBindlessImageSampleImplicitLod(EmitContext&) { @@ -416,6 +447,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const IR::Value& offset, const IR::Value& offset2) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); + if (ctx.profile.need_gather_subpixel_offset) { + coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); + } return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), operands.MaskOptional(), operands.Span()); @@ -425,6 +459,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); + if (ctx.profile.need_gather_subpixel_offset) { + coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); + } return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); @@ -436,34 +473,42 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c if (info.type == TextureType::Buffer) { lod = Id{}; } + if (Sirit::ValidId(ms)) { + // This image is multisampled, lod must be implicit + lod = Id{}; + } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips_val) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; - const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + const bool is_msaa{IsTextureMsaa(ctx, info)}; + const bool uses_lod{!is_msaa && info.type != TextureType::Buffer}; + const auto query{[&](Id type) { + return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod) + : ctx.OpImageQuerySize(type, image); + }}; switch (info.type) { case TextureType::Color1D: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), - zero, zero, mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: case TextureType::Color2DRect: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), - zero, mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), - mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips()); case TextureType::Buffer: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, - zero, mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); } throw LogicError("Unspecified image type {}", info.type.Value()); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e31cdc5e8..a440b557d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -179,7 +179,6 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastS32F32(EmitContext& ctx); void EmitBitCastF16U16(EmitContext&); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); @@ -540,7 +539,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c5db19d09..77ff8c573 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -17,7 +17,22 @@ Id GetThreadId(EmitContext& ctx) { Id WarpExtract(EmitContext& ctx, Id value) { const Id thread_id{GetThreadId(ctx)}; const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; - return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); + if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) { + const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))}; + const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))}; + const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))}; + const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))}; + const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)}; + const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)}; + const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)}; + return c0_or_c1_or_c2_or_c3; + } else { + return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); + } } Id LoadMask(EmitContext& ctx, Id mask) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index a0c155fdb..bec5db173 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -25,16 +25,11 @@ enum class Operation { FPMax, }; -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; const bool depth{desc.is_depth}; + const bool ms{desc.is_multisample}; switch (desc.type) { case TextureType::Color1D: return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); @@ -42,9 +37,9 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); case TextureType::Color2D: case TextureType::Color2DRect: - return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format); case TextureType::ColorArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format); case TextureType::Color3D: return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); case TextureType::ColorCube: @@ -165,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_attr_index + element}; - if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < ctx.runtime_info.xfb_count) { xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } @@ -205,23 +200,37 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { return ctx.TypeVector(ctx.TypeInt(32, true), 4); case AttributeType::UnsignedInt: return ctx.U32[4]; + case AttributeType::SignedScaled: + return ctx.profile.support_scaled_attributes ? ctx.F32[4] + : ctx.TypeVector(ctx.TypeInt(32, true), 4); + case AttributeType::UnsignedScaled: + return ctx.profile.support_scaled_attributes ? ctx.F32[4] : ctx.U32[4]; case AttributeType::Disabled: break; } throw InvalidArgument("Invalid attribute type {}", type); } -std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; +InputGenericInfo GetAttributeInfo(EmitContext& ctx, AttributeType type, Id id) { switch (type) { case AttributeType::Float: - return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + return InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}; case AttributeType::UnsignedInt: - return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + return InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::Bitcast}; case AttributeType::SignedInt: - return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + return InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), + InputGenericLoadOp::Bitcast}; + case AttributeType::SignedScaled: + return ctx.profile.support_scaled_attributes + ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} + : InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), + InputGenericLoadOp::SToF}; + case AttributeType::UnsignedScaled: + return ctx.profile.support_scaled_attributes + ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} + : InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::UToF}; case AttributeType::Disabled: - return std::nullopt; + return InputGenericInfo{}; } throw InvalidArgument("Invalid attribute type {}", type); } @@ -745,18 +754,29 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast<u32>(index))}; - if (!type) { + const auto& generic{input_generics.at(index)}; + const Id generic_id{generic.id}; + if (!ValidId(generic_id)) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(index)}; - const Id pointer{is_array - ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) - : OpAccessChain(type->pointer, generic_id, masked_index)}; - const Id value{OpLoad(type->id, pointer)}; - const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; + const Id pointer{ + is_array ? OpAccessChain(generic.pointer_type, generic_id, vertex, masked_index) + : OpAccessChain(generic.pointer_type, generic_id, masked_index)}; + const Id value{OpLoad(generic.component_type, pointer)}; + const Id result{[this, generic, value]() { + switch (generic.load_op) { + case InputGenericLoadOp::Bitcast: + return OpBitcast(F32[1], value); + case InputGenericLoadOp::SToF: + return OpConvertSToF(F32[1], value); + case InputGenericLoadOp::UToF: + return OpConvertUToF(F32[1], value); + default: + return value; + }; + }()}; OpReturnValue(result); ++label_index; } @@ -1287,6 +1307,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in .pointer_type = pointer_type, .image_type = image_type, .count = desc.count, + .is_multisample = desc.is_multisample, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1455,7 +1476,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); Name(id, fmt::format("in_attr{}", index)); - input_generics[index] = id; + input_generics[index] = GetAttributeInfo(*this, input_type, id); if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { Decorate(id, spv::Decoration::PassthroughNV); @@ -1570,7 +1591,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); } if (info.stores_sample_mask) { - sample_mask = DefineOutput(*this, U32[1], std::nullopt); + const Id array_type{TypeArray(U32[1], Const(1U))}; + sample_mask = DefineOutput(*this, array_type, std::nullopt); Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); } break; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index dbc5c55b9..e63330f11 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -35,6 +35,7 @@ struct TextureDefinition { Id pointer_type; Id image_type; u32 count; + bool is_multisample; }; struct TextureBufferDefinition { @@ -94,6 +95,20 @@ struct StorageDefinitions { Id U32x4{}; }; +enum class InputGenericLoadOp { + None, + Bitcast, + SToF, + UToF, +}; + +struct InputGenericInfo { + Id id; + Id pointer_type; + Id component_type; + InputGenericLoadOp load_op; +}; + struct GenericElementInfo { Id id{}; u32 first_element{}; @@ -282,7 +297,7 @@ public: bool need_input_position_indirect{}; Id input_position{}; - std::array<Id, 32> input_generics{}; + std::array<InputGenericInfo, 32> input_generics{}; Id output_point_size{}; Id output_position{}; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index eb2e49a68..b7caa4246 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -704,11 +704,6 @@ IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) { } template <> -IR::S32 IREmitter::BitCast<IR::S32, IR::F32>(const IR::F32& value) { - return Inst<IR::S32>(Opcode::BitCastS32F32, value); -} - -template <> IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { return Inst<IR::F32>(Opcode::BitCastF32U32, value); } @@ -1851,15 +1846,16 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); } -Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions : Opcode::BindlessImageQueryDimensions}; - return Inst(op, handle, lod); + return Inst(op, handle, lod, skip_mips); } Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, - TextureInstInfo info) { - return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod); + const IR::U1& skip_mips, TextureInstInfo info) { + return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips); } Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7aaaa4ab0..f3c81dbe1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -320,9 +320,10 @@ public: [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, const F32& lod, const Value& offset, TextureInstInfo info); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - TextureInstInfo info); + const IR::U1& skip_mips); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips, TextureInstInfo info); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); @@ -408,7 +409,8 @@ private: } template <typename T> - requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags { + requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) + struct Flags { Flags() = default; Flags(T proxy_) : proxy{proxy_} {} diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index d155afd0f..e300714f3 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -38,7 +38,6 @@ constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; constexpr Type U32{Type::U32}; constexpr Type U64{Type::U64}; -constexpr Type S32{Type::S32}; constexpr Type F16{Type::F16}; constexpr Type F32{Type::F32}; constexpr Type F64{Type::F64}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1fe3749cc..4447d67b0 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -175,7 +175,6 @@ OPCODE(SelectF64, F64, U1, OPCODE(BitCastU16F16, U16, F16, ) OPCODE(BitCastU32F32, U32, F32, ) OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastS32F32, S32, F32, ) OPCODE(BitCastF16U16, F16, U16, ) OPCODE(BitCastF32U32, F32, U32, ) OPCODE(BitCastF64U64, F64, U64, ) @@ -483,7 +482,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, U1, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) @@ -496,7 +495,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, U1, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageRead, U32x4, U32, Opaque, ) @@ -509,7 +508,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, Opaq OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageRead, U32x4, Opaque, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 5a7c706ad..04c8c4ddb 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -24,22 +24,21 @@ enum class Type { U16 = 1 << 7, U32 = 1 << 8, U64 = 1 << 9, - S32 = 1 << 10, - F16 = 1 << 11, - F32 = 1 << 12, - F64 = 1 << 13, - U32x2 = 1 << 14, - U32x3 = 1 << 15, - U32x4 = 1 << 16, - F16x2 = 1 << 17, - F16x3 = 1 << 18, - F16x4 = 1 << 19, - F32x2 = 1 << 20, - F32x3 = 1 << 21, - F32x4 = 1 << 22, - F64x2 = 1 << 23, - F64x3 = 1 << 24, - F64x4 = 1 << 25, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 30ba12316..346169328 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -23,8 +23,6 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} -Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {} - Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} @@ -71,7 +69,6 @@ bool Value::operator==(const Value& other) const { return imm_u16 == other.imm_u16; case Type::U32: case Type::F32: - case Type::S32: return imm_u32 == other.imm_u32; case Type::U64: case Type::F64: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 8b34356fd..c27546b0e 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -43,7 +43,6 @@ public: explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; explicit Value(u32 value) noexcept; - explicit Value(s32 value) noexcept; explicit Value(f32 value) noexcept; explicit Value(u64 value) noexcept; explicit Value(f64 value) noexcept; @@ -66,7 +65,6 @@ public: [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; [[nodiscard]] u32 U32() const; - [[nodiscard]] s32 S32() const; [[nodiscard]] f32 F32() const; [[nodiscard]] u64 U64() const; [[nodiscard]] f64 F64() const; @@ -86,7 +84,6 @@ private: u8 imm_u8; u16 imm_u16; u32 imm_u32; - s32 imm_s32; f32 imm_f32; u64 imm_u64; f64 imm_f64; @@ -101,9 +98,8 @@ public: TypedValue() = default; template <IR::Type other_type> - requires((other_type & type_) != IR::Type::Void) explicit(false) - TypedValue(const TypedValue<other_type>& value) - : Value(value) {} + requires((other_type & type_) != IR::Type::Void) + explicit(false) TypedValue(const TypedValue<other_type>& value) : Value(value) {} explicit TypedValue(const Value& value) : Value(value) { if ((value.Type() & type_) == IR::Type::Void) { @@ -194,16 +190,16 @@ public: void ReplaceOpcode(IR::Opcode opcode); template <typename FlagsType> - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) - [[nodiscard]] FlagsType Flags() const noexcept { + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) + [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); return ret; } template <typename FlagsType> - requires(sizeof(FlagsType) <= sizeof(u32) && - std::is_trivially_copyable_v<FlagsType>) void SetFlags(FlagsType value) noexcept { + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) + void SetFlags(FlagsType value) noexcept { std::memcpy(&flags, &value, sizeof(value)); } @@ -268,7 +264,6 @@ using U8 = TypedValue<Type::U8>; using U16 = TypedValue<Type::U16>; using U32 = TypedValue<Type::U32>; using U64 = TypedValue<Type::U64>; -using S32 = TypedValue<Type::S32>; using F16 = TypedValue<Type::F16>; using F32 = TypedValue<Type::F32>; using F64 = TypedValue<Type::F64>; @@ -380,14 +375,6 @@ inline u32 Value::U32() const { return imm_u32; } -inline s32 Value::S32() const { - if (IsIdentity()) { - return inst->Arg(0).S32(); - } - DEBUG_ASSERT(type == Type::S32); - return imm_s32; -} - inline f32 Value::F32() const { if (IsIdentity()) { return inst->Arg(0).F32(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp index 442365a26..c2a0ee6f1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -30,7 +30,7 @@ void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& hi union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; - BitField<0, 8, IR::Reg> lo_bits_reg; + BitField<8, 8, IR::Reg> lo_bits_reg; BitField<37, 2, MaxShift> max_shift; BitField<47, 1, u64> cc; BitField<48, 2, u64> x_mode; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 639da1e9c..eeb49444f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -102,12 +102,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::F32 value{v.ir.CompositeExtract(sample, element)}; if (element < 2) { - IR::U32 casted_value; - if (element == 0) { - casted_value = v.ir.ConvertFToU(32, value); - } else { - casted_value = v.ir.ConvertFToS(16, value); - } + IR::U32 casted_value = v.ir.ConvertFToU(32, value); v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); } else { v.F(dest_reg, value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index f8cfd4ab6..39af62559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -15,11 +15,13 @@ enum class Mode : u64 { SamplePos = 5, }; -IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg, u64 mask) { switch (mode) { case Mode::Dimension: { + const bool needs_num_mips{((mask >> 3) & 1) != 0}; + const IR::U1 skip_mips{v.ir.Imm1(!needs_num_mips)}; const IR::U32 lod{v.X(src_reg)}; - return v.ir.ImageQueryDimension(handle, lod); + return v.ir.ImageQueryDimension(handle, lod, skip_mips); } case Mode::TextureType: case Mode::SamplePos: @@ -46,7 +48,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { handle = v.X(src_reg); ++src_reg; } - const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + const IR::Value query{Query(v, handle, txq.mode, src_reg, txq.mask)}; IR::Reg dest_reg{txq.dest_reg}; for (int element = 0; element < 4; ++element) { if (((txq.mask >> element) & 1) == 0) { diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a42453e90..928b35561 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -280,19 +280,25 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo RemoveUnreachableBlocks(program); // Replace instructions before the SSA rewrite + if (!host_info.support_float64) { + Optimization::LowerFp64ToFp32(program); + } if (!host_info.support_float16) { Optimization::LowerFp16ToFp32(program); } if (!host_info.support_int64) { Optimization::LowerInt64ToInt32(program); } + if (!host_info.support_conditional_barrier) { + Optimization::ConditionalBarrierPass(program); + } Optimization::SsaRewritePass(program); Optimization::ConstantPropagationPass(env, program); Optimization::PositionPass(env, program); - Optimization::GlobalMemoryToStorageBufferPass(program, host_info); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program, host_info); if (Settings::values.resolution_info.active) { diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 55fc48768..7d2ded907 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -10,14 +10,16 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { + bool support_float64{}; ///< True when the device supports 64-bit floats bool support_float16{}; ///< True when the device supports 16-bit floats bool support_int64{}; ///< True when the device supports 64-bit integers bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS - u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry ///< passthrough shaders + bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional + ///< control flow }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5a4195217..70292686f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -424,6 +424,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; break; + case IR::Opcode::LoadLocal: + case IR::Opcode::WriteLocal: + info.uses_local_memory = true; + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp new file mode 100644 index 000000000..c3ed27f4f --- /dev/null +++ b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void ConditionalBarrierPass(IR::Program& program) { + s32 conditional_control_flow_count{0}; + s32 conditional_return_count{0}; + for (IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::If: + case IR::AbstractSyntaxNode::Type::Loop: + conditional_control_flow_count++; + break; + case IR::AbstractSyntaxNode::Type::EndIf: + case IR::AbstractSyntaxNode::Type::Repeat: + conditional_control_flow_count--; + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + case IR::AbstractSyntaxNode::Type::Return: + if (conditional_control_flow_count > 0) { + conditional_return_count++; + } + break; + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + if ((conditional_control_flow_count > 0 || conditional_return_count > 0) && + inst.GetOpcode() == IR::Opcode::Barrier) { + LOG_WARNING(Shader, "Barrier within conditional control flow"); + inst.ReplaceOpcode(IR::Opcode::Identity); + } + } + break; + default: + break; + } + } + ASSERT(conditional_control_flow_count == 0); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 9101722ba..d1e59f22e 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -11,7 +11,6 @@ #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { @@ -36,6 +35,7 @@ struct Bias { u32 index; u32 offset_begin; u32 offset_end; + u32 alignment; }; using boost::container::flat_set; @@ -350,7 +350,8 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) .index = index.U32(), .offset = offset.U32(), }; - if (!Common::IsAligned(storage_buffer.offset, 16)) { + const u32 alignment{bias ? bias->alignment : 8U}; + if (!Common::IsAligned(storage_buffer.offset, alignment)) { // The SSBO pointer has to be aligned return std::nullopt; } @@ -372,6 +373,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) .index = 0, .offset_begin = 0x110, .offset_end = 0x610, + .alignment = 16, }; // Track the low address of the instruction const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; @@ -387,8 +389,11 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also fails, use NVN fallbacks + LOG_WARNING(Shader, "Storage buffer failed to track, using global memory fallbacks"); return; } + LOG_WARNING(Shader, "Storage buffer tracked without bias, index {} offset {}", + storage_buffer->index, storage_buffer->offset); } // Collect storage buffer and the instruction if (IsGlobalMemoryWrite(inst)) { @@ -403,7 +408,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { @@ -416,10 +421,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. - IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; - - // Align the offset base to match the host alignment requirements - low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); + const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; return ir.ISub(offset, low_cbuf); } @@ -514,7 +516,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { +void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -538,8 +540,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; - const IR::U32 offset{ - StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp new file mode 100644 index 000000000..5db7a38ad --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp @@ -0,0 +1,185 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { + +constexpr s32 F64ToF32Exp = +1023 - 127; +constexpr s32 F32ToF64Exp = +127 - 1023; + +IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) { + const IR::U32 lo{ir.CompositeExtract(packed, 0)}; + const IR::U32 hi{ir.CompositeExtract(packed, 1)}; + const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))}; + const IR::U32 mantissa{ + ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)}; + const IR::U32 result{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))}; + return ir.BitCast<IR::F32>(result); +} + +IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) { + const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))}; + const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))}; + const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)}; + const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))}; + const IR::U32 hi{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))}; + return ir.CompositeConstruct(lo, hi); +} + +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs64: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd64: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil64: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor64: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma64: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul64: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg64: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven64: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate64: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp64: + return IR::Opcode::FPClamp32; + case IR::Opcode::FPTrunc64: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF64x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF64x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF64x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF64x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF64x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF64x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF64x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF64x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF64x4: + return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::FPOrdEqual64: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual64: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual64: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual64: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan64: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan64: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan64: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan64: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual64: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual64: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual64: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual64: + return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan64: + return IR::Opcode::FPIsNan32; + case IR::Opcode::ConvertS16F64: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F64: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F64: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F64: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F64: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F64: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::ConvertF32F64: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64F32: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF64S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF64S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF64S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF64U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF64U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF64U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF64U64: + return IR::Opcode::ConvertF32U64; + default: + return op; + } +} + +void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0))); + break; + } + case IR::Opcode::UnpackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0))); + break; + } + default: + inst.ReplaceOpcode(Replace(inst.GetOpcode())); + break; + } +} + +} // Anonymous namespace + +void LowerFp64ToFp32(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 4ffad1172..629d18fa1 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -13,10 +13,12 @@ struct HostTranslateInfo; namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); +void ConditionalBarrierPass(IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); -void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); +void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); +void LowerFp64ToFp32(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); void RescalingPass(IR::Program& program); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index f5c86fcb1..d374c976a 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -355,21 +355,21 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { }; } -TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { +u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) { const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset) << cbuf.shift_left}; const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset) << cbuf.secondary_shift_left}; - return env.ReadTextureType(lhs_raw | rhs_raw); + return lhs_raw | rhs_raw; +} + +TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { + return env.ReadTextureType(GetTextureHandle(env, cbuf)); } TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) { - const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; - const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; - const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; - const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; - return env.ReadTexturePixelFormat(lhs_raw | rhs_raw); + return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf)); } class Descriptors { @@ -386,8 +386,10 @@ public: return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && + desc.shift_left == existing.shift_left && desc.secondary_cbuf_index == existing.secondary_cbuf_index && desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.secondary_shift_left == existing.secondary_shift_left && desc.count == existing.count && desc.size_shift == existing.size_shift && desc.has_secondary == existing.has_secondary; }); @@ -405,15 +407,20 @@ public: } u32 Add(const TextureDescriptor& desc) { - return Add(texture_descriptors, desc, [&desc](const auto& existing) { + const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.is_depth == existing.is_depth && desc.has_secondary == existing.has_secondary && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && + desc.shift_left == existing.shift_left && desc.secondary_cbuf_index == existing.secondary_cbuf_index && desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.secondary_shift_left == existing.secondary_shift_left && desc.count == existing.count && desc.size_shift == existing.size_shift; - }); + })}; + // TODO: Read this from TIC + texture_descriptors[index].is_multisample |= desc.is_multisample; + return index; } u32 Add(const ImageDescriptor& desc) { @@ -452,7 +459,8 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { const IR::Value coord(inst.Arg(1)); const IR::Value handle(ir.Imm32(0)); const IR::U32 lod{ir.Imm32(0)}; - const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, info); + const IR::U1 skip_mips{ir.Imm1(true)}; + const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, skip_mips, info); inst.SetArg( 1, ir.CompositeConstruct( ir.FPMul(IR::F32(ir.CompositeExtract(coord, 0)), @@ -486,10 +494,10 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_ const IR::F32 w(ir.CompositeExtract(new_inst, 3)); const IR::F16F32F64 max_value(ir.Imm32(get_max_value())); const IR::Value converted = - ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(x)), max_value), - ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(y)), max_value), - ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(z)), max_value), - ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(w)), max_value)); + ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(x)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(y)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(z)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(w)), max_value)); inst.ReplaceUsesWith(converted); } } // Anonymous namespace @@ -524,6 +532,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags<IR::TextureInstInfo>()}; + bool is_multisample{false}; switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(ReadTextureType(env, cbuf)); @@ -538,6 +547,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo } break; case IR::Opcode::ImageFetch: + if (flags.type == TextureType::Color2D || flags.type == TextureType::Color2DRect || + flags.type == TextureType::ColorArray2D) { + is_multisample = !inst->Arg(4).IsEmpty(); + } else { + inst->SetArg(4, IR::U32{}); + } if (flags.type != TextureType::Color1D) { break; } @@ -613,6 +628,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo index = descriptors.Add(TextureDescriptor{ .type = flags.type, .is_depth = flags.is_depth != 0, + .is_multisample = is_multisample, .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index 2b42c4ba2..5d648b159 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -10,7 +10,7 @@ namespace Shader { template <typename T> -requires std::is_destructible_v<T> + requires std::is_destructible_v<T> class ObjectPool { public: explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { @@ -18,7 +18,7 @@ public: } template <typename... Args> - requires std::is_constructible_v<T, Args...> + requires std::is_constructible_v<T, Args...> [[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward<Args>(args)...); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 253e0d0bd..9ca97f6a4 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -43,6 +43,7 @@ struct Profile { bool support_gl_variable_aoffi{}; bool support_gl_sparse_textures{}; bool support_gl_derivative_control{}; + bool support_scaled_attributes{}; bool warp_size_potentially_larger_than_guest{}; @@ -52,6 +53,10 @@ struct Profile { bool need_declared_frag_colors{}; /// Prevents fast math optimizations that may cause inaccuracies bool need_fastmath_off{}; + /// Some GPU vendors use a different rounding precision when calculating texture pixel + /// coordinates with the 16.8 format in the ImageGather instruction than the Maxwell + /// architecture. Applying an offset does fix this mismatching rounding behaviour. + bool need_gather_subpixel_offset{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; @@ -73,6 +78,8 @@ struct Profile { bool has_gl_bool_ref_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; + /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs + bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; u32 gl_max_compute_smem_size{}; }; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 549b81ef7..619c0b138 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -17,6 +17,8 @@ enum class AttributeType : u8 { Float, SignedInt, UnsignedInt, + SignedScaled, + UnsignedScaled, Disabled, }; @@ -82,7 +84,8 @@ struct RuntimeInfo { bool glasm_use_storage_buffers{}; /// Transform feedback state for each varying - std::vector<TransformFeedbackVarying> xfb_varyings; + std::array<TransformFeedbackVarying, 256> xfb_varyings{}; + u32 xfb_count{0}; }; } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f93181e1e..b4b4afd37 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -109,6 +109,7 @@ using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescrip struct TextureDescriptor { TextureType type; bool is_depth; + bool is_multisample; bool has_secondary; u32 cbuf_index; u32 cbuf_offset; @@ -171,6 +172,7 @@ struct Info { bool stores_indexed_attributes{}; bool stores_global_memory{}; + bool uses_local_memory{}; bool uses_fp16{}; bool uses_fp64{}; |