diff options
Diffstat (limited to 'src/shader_recompiler/backend')
19 files changed, 323 insertions, 185 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0cb1e193e..b795c0179 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -279,6 +279,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_internal;" "OPTION NV_shader_storage_buffer;" "OPTION NV_gpu_program_fp64;"; + // TODO: Enable only when MS is used + header += "OPTION NV_texture_multisample;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -459,7 +461,7 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I header += fmt::format("R{},", index); } if (program.local_memory_size > 0) { - header += fmt::format("lmem[{}],", program.local_memory_size); + header += fmt::format("lmem[{}],", Common::DivCeil(program.local_memory_size, 4U)); } if (program.info.uses_fswzadd) { header += "FSWZA[4],FSWZB[4],"; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 5bfdecc09..2fc2a0ac6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -43,10 +43,6 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } -void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index e67e80fac..85ee27333 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -59,7 +59,14 @@ std::string Image(EmitContext& ctx, IR::TextureInstInfo info, } } -std::string_view TextureType(IR::TextureInstInfo info) { +bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return false; + } + return ctx.info.texture_descriptors.at(info.descriptor_index).is_multisample; +} + +std::string_view TextureType(IR::TextureInstInfo info, bool is_ms = false) { if (info.is_depth) { switch (info.type) { case TextureType::Color1D: @@ -88,9 +95,9 @@ std::string_view TextureType(IR::TextureInstInfo info) { return "ARRAY1D"; case TextureType::Color2D: case TextureType::Color2DRect: - return "2D"; + return is_ms ? "2DMS" : "2D"; case TextureType::ColorArray2D: - return "ARRAY2D"; + return is_ms ? "ARRAY2DMS" : "ARRAY2D"; case TextureType::Color3D: return "3D"; case TextureType::ColorCube: @@ -510,15 +517,16 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const auto sparse_inst{PrepareSparse(inst)}; + const bool is_multisample{ms.type != Type::Void}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; - const std::string_view type{TextureType(info)}; + const std::string_view type{TextureType(info, is_multisample)}; const std::string texture{Texture(ctx, info, index)}; const std::string offset_vec{Offset(ctx, offset)}; const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; const Register ret{ctx.reg_alloc.Define(inst)}; if (info.type == TextureType::Buffer) { ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); - } else if (ms.type != Type::Void) { + } else if (is_multisample) { ctx.Add("MOV.S {}.w,{};" "TXFMS.F{} {},{},{},{}{};", coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); @@ -531,10 +539,11 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod) { + ScalarS32 lod, [[maybe_unused]] const IR::Value& skip_mips) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const std::string texture{Texture(ctx, info, index)}; - const std::string_view type{TextureType(info)}; + const bool is_msaa{IsTextureMsaa(ctx, info)}; + const std::string_view type{TextureType(info, is_msaa)}; ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index eaaf9ba39..1a1ea61d5 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -197,7 +197,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); @@ -582,7 +581,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod); + ScalarS32 lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& derivatives, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 911181c43..376a05827 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -398,162 +398,162 @@ void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value } void EmitGlobalAtomicIAdd32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMin32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMin32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMax32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMax32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicInc32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicDec32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAnd32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicOr32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicXor32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicExchange32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicIAdd64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMin64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMin64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMax64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMax64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicInc64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicDec64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAnd64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicOr64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicXor64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicExchange64(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicIAdd32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMin32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMin32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicSMax32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicUMax32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicInc32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicDec32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAnd32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicOr32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicXor32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicExchange32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAddF32(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAddF16x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicAddF32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMinF16x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMinF32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMaxF16x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } void EmitGlobalAtomicMaxF32x2(EmitContext&) { - throw NotImplementedException("GLSL Instrucion"); + throw NotImplementedException("GLSL Instruction"); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 8e5e6cf1f..1be4a0f59 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -48,10 +48,6 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); } -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=ftoi({});", inst, value); -} - void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index cecdbb9d6..418505475 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -25,6 +25,13 @@ std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::V return fmt::format("img{}{}", def.binding, index_offset); } +bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return false; + } + return ctx.info.texture_descriptors.at(info.descriptor_index).is_multisample; +} + std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { switch (info.type) { case TextureType::Color1D: @@ -136,6 +143,21 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { } return sparse_inst; } + +std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture, + std::string_view coords) { + switch (info.type) { + case TextureType::Color2D: + case TextureType::Color2DRect: + return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture); + case TextureType::ColorArray2D: + case TextureType::ColorCube: + return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords, + texture); + default: + return std::string{coords}; + } +} } // Anonymous namespace void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, @@ -333,6 +355,13 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } + std::string coords_with_subpixel_offset; + if (ctx.profile.need_gather_subpixel_offset) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); + coords = coords_with_subpixel_offset; + } if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, @@ -380,6 +409,13 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } + std::string coords_with_subpixel_offset; + if (ctx.profile.need_gather_subpixel_offset) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); + coords = coords_with_subpixel_offset; + } if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); @@ -414,7 +450,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, - [[maybe_unused]] std::string_view ms) { + std::string_view ms) { const auto info{inst.Flags<IR::TextureInstInfo>()}; if (info.has_bias) { throw NotImplementedException("EmitImageFetch Bias texture samples"); @@ -431,19 +467,24 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { - if (!offset.empty()) { - ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, - CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); + const auto int_coords{CoordsCastToInt(coords, info)}; + if (!ms.empty()) { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); + } else if (!offset.empty()) { + ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, + CoordsCastToInt(offset, info)); } else { if (info.type == TextureType::Buffer) { ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); } else { - ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, - CoordsCastToInt(coords, info), lod); + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, lod); } } return; } + if (!ms.empty()) { + throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); + } if (!offset.empty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod, @@ -455,29 +496,36 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { + std::string_view lod, const IR::Value& skip_mips_val) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const auto texture{Texture(ctx, info, index)}; + const bool is_msaa{IsTextureMsaa(ctx, info)}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{skip_mips ? "0u" : fmt::format("uint(textureQueryLevels({}))", texture)}; + if (is_msaa && !skip_mips) { + throw NotImplementedException("EmitImageQueryDimensions MSAA QueryLevels"); + } + if (info.type == TextureType::Buffer && !skip_mips) { + throw NotImplementedException("EmitImageQueryDimensions TextureType::Buffer QueryLevels"); + } + const bool uses_lod{!is_msaa && info.type != TextureType::Buffer}; + const auto lod_str{uses_lod ? fmt::format(",int({})", lod) : ""}; switch (info.type) { case TextureType::Color1D: - return ctx.AddU32x4( - "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uint(textureSize({}{})),0u,0u,{});", inst, texture, lod_str, + mips); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: case TextureType::Color2DRect: - return ctx.AddU32x4( - "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({}{})),0u,{});", inst, texture, lod_str, + mips); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - return ctx.AddU32x4( - "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, - lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({}{})),{});", inst, texture, lod_str, mips); case TextureType::Buffer: - throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); + return ctx.AddU32x4("{}=uvec4(uint(textureSize({})),0u,0u,{});", inst, texture, mips); } throw LogicError("Unspecified image type {}", info.type.Value()); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 4151c89de..8d0a65047 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -231,7 +231,6 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); @@ -655,7 +654,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, std::string_view ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod); + std::string_view lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 5d01ec0cd..9ff4028c2 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -61,24 +61,28 @@ std::string OutputDecorator(Stage stage, u32 size) { } } -std::string_view SamplerType(TextureType type, bool is_depth) { - if (is_depth) { - switch (type) { - case TextureType::Color1D: - return "sampler1DShadow"; - case TextureType::ColorArray1D: - return "sampler1DArrayShadow"; - case TextureType::Color2D: - return "sampler2DShadow"; - case TextureType::ColorArray2D: - return "sampler2DArrayShadow"; - case TextureType::ColorCube: - return "samplerCubeShadow"; - case TextureType::ColorArrayCube: - return "samplerCubeArrayShadow"; - default: - throw NotImplementedException("Texture type: {}", type); - } +std::string_view DepthSamplerType(TextureType type) { + switch (type) { + case TextureType::Color1D: + return "sampler1DShadow"; + case TextureType::ColorArray1D: + return "sampler1DArrayShadow"; + case TextureType::Color2D: + return "sampler2DShadow"; + case TextureType::ColorArray2D: + return "sampler2DArrayShadow"; + case TextureType::ColorCube: + return "samplerCubeShadow"; + case TextureType::ColorArrayCube: + return "samplerCubeArrayShadow"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + +std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) { + if (is_multisample) { + ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D); } switch (type) { case TextureType::Color1D: @@ -87,9 +91,9 @@ std::string_view SamplerType(TextureType type, bool is_depth) { return "sampler1DArray"; case TextureType::Color2D: case TextureType::Color2DRect: - return "sampler2D"; + return is_multisample ? "sampler2DMS" : "sampler2D"; case TextureType::ColorArray2D: - return "sampler2DArray"; + return is_multisample ? "sampler2DMSArray" : "sampler2DArray"; case TextureType::Color3D: return "sampler3D"; case TextureType::ColorCube: @@ -306,12 +310,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (runtime_info.force_early_z) { header += "layout(early_fragment_tests)in;"; } - if (info.uses_sample_id) { - header += "in int gl_SampleID;"; - } - if (info.stores_sample_mask) { - header += "out int gl_SampleMask[];"; - } break; case Stage::Compute: stage_name = "cs"; @@ -481,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_index + element}; - if (xfb_varying_index < runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < runtime_info.xfb_count) { xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } @@ -677,7 +675,7 @@ void EmitContext::SetupTextures(Bindings& bindings) { texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { texture_buffers.push_back({bindings.texture, desc.count}); - const auto sampler_type{SamplerType(TextureType::Buffer, false)}; + const auto sampler_type{ColorSamplerType(TextureType::Buffer)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, sampler_type, bindings.texture, array_decorator); @@ -686,7 +684,8 @@ void EmitContext::SetupTextures(Bindings& bindings) { textures.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { textures.push_back({bindings.texture, desc.count}); - const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; + const auto sampler_type{desc.is_depth ? DepthSamplerType(desc.type) + : ColorSamplerType(desc.type, desc.is_multisample)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, sampler_type, bindings.texture, array_decorator); diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.h b/src/shader_recompiler/backend/glsl/glsl_emit_context.h index dfd10ac28..7587f7bab 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.h +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.h @@ -49,7 +49,7 @@ public: void Add(const char* format_str, IR::Inst& inst, Args&&... args) { const auto var_def{var_alloc.AddDefine(inst, type)}; if (var_def.empty()) { - // skip assigment. + // skip assignment. code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...); } else { code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0f86a8004..34592a01f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr } void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { - if (ctx.runtime_info.xfb_varyings.empty()) { + if (ctx.runtime_info.xfb_count == 0) { return; } ctx.AddCapability(spv::Capability::TransformFeedback); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 4b3043b65..0ce73f289 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -69,6 +69,11 @@ Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (!ctx.profile.support_descriptor_aliasing) { + LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic."); + return ctx.ConstantNull(ctx.U64); + } + if (ctx.profile.support_int64_atomics) { const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, binding, offset, sizeof(u64))}; @@ -86,6 +91,11 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (!ctx.profile.support_descriptor_aliasing) { + LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic."); + return ctx.ConstantNull(ctx.U32[2]); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 50daacd95..c4ca28d11 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -18,10 +18,6 @@ void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitBitCastS32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0cd87a48f..2868fc57d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,27 +10,6 @@ namespace Shader::Backend::SPIRV { namespace { -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - -std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; - switch (type) { - case AttributeType::Float: - return AttrInfo{ctx.input_f32, ctx.F32[1], false}; - case AttributeType::UnsignedInt: - return AttrInfo{ctx.input_u32, ctx.U32[1], true}; - case AttributeType::SignedInt: - return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; - case AttributeType::Disabled: - return std::nullopt; - } - throw InvalidArgument("Invalid attribute type {}", type); -} - template <typename... Args> Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { switch (ctx.stage) { @@ -302,15 +281,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast<u32>(attr) % 4}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + const auto& generic{ctx.input_generics.at(index)}; + if (!ValidId(generic.id)) { // Attribute is disabled or varying component is not written return ctx.Const(element == 3 ? 1.0f : 0.0f); } - const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; - const Id value{ctx.OpLoad(type->id, pointer)}; - return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + const Id pointer{ + AttrPointer(ctx, generic.pointer_type, vertex, generic.id, ctx.Const(element))}; + const Id value{ctx.OpLoad(generic.component_type, pointer)}; + return [&ctx, generic, value]() { + switch (generic.load_op) { + case InputGenericLoadOp::Bitcast: + return ctx.OpBitcast(ctx.F32[1], value); + case InputGenericLoadOp::SToF: + return ctx.OpConvertSToF(ctx.F32[1], value); + case InputGenericLoadOp::UToF: + return ctx.OpConvertUToF(ctx.F32[1], value); + default: + return value; + }; + }(); } switch (attr) { case IR::Attribute::PrimitiveId: @@ -339,9 +329,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id)); } else { - const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; - const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; - return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index)); } case IR::Attribute::BaseInstance: return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); @@ -386,9 +374,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { if (ctx.profile.support_vertex_instance_id) { return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); } else { - const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; - const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; - return ctx.OpISub(ctx.U32[1], index, base); + return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); } case IR::Attribute::BaseInstance: return ctx.OpLoad(ctx.U32[1], ctx.base_instance); @@ -473,7 +459,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { } void EmitSetSampleMask(EmitContext& ctx, Id value) { - ctx.OpStore(ctx.sample_mask, value); + const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)}; + ctx.OpStore(pointer, value); } void EmitSetFragDepth(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fb5799c42..7d901c04b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -201,6 +201,13 @@ Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { } } +bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return false; + } + return ctx.textures.at(info.descriptor_index).is_multisample; +} + Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { const auto info{inst->Flags<IR::TextureInstInfo>()}; if (info.relaxed_precision != 0) { @@ -254,6 +261,30 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) { const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); } + +Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture, + Id coords) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + const auto calculate_coords{[&](size_t dim) { + const Id nudge{ctx.Const(0x1p-9f)}; + const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)}; + Id offset{dim == 2 ? ctx.ConstantComposite(ctx.F32[dim], nudge, nudge) + : ctx.ConstantComposite(ctx.F32[dim], nudge, nudge, ctx.f32_zero_value)}; + offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size)); + return ctx.OpFAdd(ctx.F32[dim], coords, offset); + }}; + switch (info.type) { + case TextureType::Color2D: + case TextureType::Color2DRect: + return calculate_coords(2); + case TextureType::ColorArray2D: + case TextureType::ColorCube: + return calculate_coords(3); + default: + return coords; + } +} } // Anonymous namespace Id EmitBindlessImageSampleImplicitLod(EmitContext&) { @@ -416,6 +447,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const IR::Value& offset, const IR::Value& offset2) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); + if (ctx.profile.need_gather_subpixel_offset) { + coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); + } return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), operands.MaskOptional(), operands.Span()); @@ -425,6 +459,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); + if (ctx.profile.need_gather_subpixel_offset) { + coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); + } return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); @@ -436,34 +473,42 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c if (info.type == TextureType::Buffer) { lod = Id{}; } + if (Sirit::ValidId(ms)) { + // This image is multisampled, lod must be implicit + lod = Id{}; + } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips_val) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; - const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + const bool is_msaa{IsTextureMsaa(ctx, info)}; + const bool uses_lod{!is_msaa && info.type != TextureType::Buffer}; + const auto query{[&](Id type) { + return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod) + : ctx.OpImageQuerySize(type, image); + }}; switch (info.type) { case TextureType::Color1D: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), - zero, zero, mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: case TextureType::Color2DRect: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), - zero, mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), - mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips()); case TextureType::Buffer: - return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, - zero, mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); } throw LogicError("Unspecified image type {}", info.type.Value()); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e31cdc5e8..a440b557d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -179,7 +179,6 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastS32F32(EmitContext& ctx); void EmitBitCastF16U16(EmitContext&); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); @@ -540,7 +539,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c5db19d09..77ff8c573 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -17,7 +17,22 @@ Id GetThreadId(EmitContext& ctx) { Id WarpExtract(EmitContext& ctx, Id value) { const Id thread_id{GetThreadId(ctx)}; const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; - return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); + if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) { + const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))}; + const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))}; + const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))}; + const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))}; + const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)}; + const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)}; + const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)}; + return c0_or_c1_or_c2_or_c3; + } else { + return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); + } } Id LoadMask(EmitContext& ctx, Id mask) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index a0c155fdb..bec5db173 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -25,16 +25,11 @@ enum class Operation { FPMax, }; -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; const bool depth{desc.is_depth}; + const bool ms{desc.is_multisample}; switch (desc.type) { case TextureType::Color1D: return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); @@ -42,9 +37,9 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); case TextureType::Color2D: case TextureType::Color2DRect: - return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format); case TextureType::ColorArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format); case TextureType::Color3D: return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); case TextureType::ColorCube: @@ -165,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_attr_index + element}; - if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < ctx.runtime_info.xfb_count) { xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } @@ -205,23 +200,37 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { return ctx.TypeVector(ctx.TypeInt(32, true), 4); case AttributeType::UnsignedInt: return ctx.U32[4]; + case AttributeType::SignedScaled: + return ctx.profile.support_scaled_attributes ? ctx.F32[4] + : ctx.TypeVector(ctx.TypeInt(32, true), 4); + case AttributeType::UnsignedScaled: + return ctx.profile.support_scaled_attributes ? ctx.F32[4] : ctx.U32[4]; case AttributeType::Disabled: break; } throw InvalidArgument("Invalid attribute type {}", type); } -std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; +InputGenericInfo GetAttributeInfo(EmitContext& ctx, AttributeType type, Id id) { switch (type) { case AttributeType::Float: - return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + return InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}; case AttributeType::UnsignedInt: - return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + return InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::Bitcast}; case AttributeType::SignedInt: - return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + return InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), + InputGenericLoadOp::Bitcast}; + case AttributeType::SignedScaled: + return ctx.profile.support_scaled_attributes + ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} + : InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), + InputGenericLoadOp::SToF}; + case AttributeType::UnsignedScaled: + return ctx.profile.support_scaled_attributes + ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} + : InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::UToF}; case AttributeType::Disabled: - return std::nullopt; + return InputGenericInfo{}; } throw InvalidArgument("Invalid attribute type {}", type); } @@ -745,18 +754,29 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast<u32>(index))}; - if (!type) { + const auto& generic{input_generics.at(index)}; + const Id generic_id{generic.id}; + if (!ValidId(generic_id)) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(index)}; - const Id pointer{is_array - ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) - : OpAccessChain(type->pointer, generic_id, masked_index)}; - const Id value{OpLoad(type->id, pointer)}; - const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; + const Id pointer{ + is_array ? OpAccessChain(generic.pointer_type, generic_id, vertex, masked_index) + : OpAccessChain(generic.pointer_type, generic_id, masked_index)}; + const Id value{OpLoad(generic.component_type, pointer)}; + const Id result{[this, generic, value]() { + switch (generic.load_op) { + case InputGenericLoadOp::Bitcast: + return OpBitcast(F32[1], value); + case InputGenericLoadOp::SToF: + return OpConvertSToF(F32[1], value); + case InputGenericLoadOp::UToF: + return OpConvertUToF(F32[1], value); + default: + return value; + }; + }()}; OpReturnValue(result); ++label_index; } @@ -1287,6 +1307,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in .pointer_type = pointer_type, .image_type = image_type, .count = desc.count, + .is_multisample = desc.is_multisample, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1455,7 +1476,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); Name(id, fmt::format("in_attr{}", index)); - input_generics[index] = id; + input_generics[index] = GetAttributeInfo(*this, input_type, id); if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { Decorate(id, spv::Decoration::PassthroughNV); @@ -1570,7 +1591,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); } if (info.stores_sample_mask) { - sample_mask = DefineOutput(*this, U32[1], std::nullopt); + const Id array_type{TypeArray(U32[1], Const(1U))}; + sample_mask = DefineOutput(*this, array_type, std::nullopt); Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); } break; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index dbc5c55b9..e63330f11 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -35,6 +35,7 @@ struct TextureDefinition { Id pointer_type; Id image_type; u32 count; + bool is_multisample; }; struct TextureBufferDefinition { @@ -94,6 +95,20 @@ struct StorageDefinitions { Id U32x4{}; }; +enum class InputGenericLoadOp { + None, + Bitcast, + SToF, + UToF, +}; + +struct InputGenericInfo { + Id id; + Id pointer_type; + Id component_type; + InputGenericLoadOp load_op; +}; + struct GenericElementInfo { Id id{}; u32 first_element{}; @@ -282,7 +297,7 @@ public: bool need_input_position_indirect{}; Id input_position{}; - std::array<Id, 32> input_generics{}; + std::array<InputGenericInfo, 32> input_generics{}; Id output_point_size{}; Id output_position{}; |