diff options
Diffstat (limited to 'src/shader_recompiler/backend')
9 files changed, 53 insertions, 41 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index d0e308124..64e7bad75 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -559,12 +559,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& offset, const IR::Value& lod_clamp) { const auto info{inst.Flags<IR::TextureInstInfo>()}; ScopedRegister dpdx, dpdy, coords; - const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; if (multi_component) { // Allocate this early to avoid aliasing other registers dpdx = ScopedRegister{ctx.reg_alloc}; dpdy = ScopedRegister{ctx.reg_alloc}; - if (info.num_derivates >= 3) { + if (info.num_derivatives >= 3) { coords = ScopedRegister{ctx.reg_alloc}; } } @@ -584,7 +584,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, dpdy.reg, derivatives_vec); Register final_coord; - if (info.num_derivates >= 3) { + if (info.num_derivatives >= 3) { ctx.Add("MOV.F {}.z,{}.x;" "MOV.F {}.z,{}.y;", dpdx.reg, coord_vec, dpdy.reg, coord_vec); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 2705ab140..9319ea007 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/glasm/glasm_emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { @@ -35,7 +36,9 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std continue; } const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; - ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr + const u64 ssbo_align_mask{~(ctx.profile.min_ssbo_alignment - 1U)}; + ctx.Add("LDC.U64 DC.x,c{}[{}];" // unaligned_ssbo_addr + "AND.U64 DC.x,DC.x,{};" // ssbo_addr = unaligned_ssbo_addr & ssbo_align_mask "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size @@ -44,8 +47,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b "IF NE.x;" // if cond "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr - ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, - address, address); + ssbo.cbuf_index, ssbo.cbuf_offset, ssbo_align_mask, ssbo.cbuf_index, + ssbo.cbuf_offset + 8, address, address, address); if (pointer_based) { ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index d9872ecc2..6e940bd5a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -548,15 +548,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, if (sparse_inst) { throw NotImplementedException("EmitImageGradient Sparse"); } - if (!offset.IsEmpty() && info.num_derivates <= 2) { + if (!offset.IsEmpty() && info.num_derivatives <= 2) { throw NotImplementedException("EmitImageGradient offset"); } const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; - const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; if (multi_component) { - if (info.num_derivates >= 3) { + if (info.num_derivatives >= 3) { const auto offset_vec{ctx.var_alloc.Consume(offset)}; ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 9ff4028c2..fd9a99449 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -601,7 +601,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); } - const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; + const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)}; + const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; + const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; + const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; func += addr_statment; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 34592a01f..0031fa5fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -407,7 +407,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct } ctx.AddCapability(spv::Capability::DemoteToHelperInvocation); } - if (info.stores[IR::Attribute::ViewportIndex]) { + if (info.stores[IR::Attribute::ViewportIndex] && profile.support_multi_viewport) { ctx.AddCapability(spv::Capability::MultiViewport); } if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1d77426e0..e5a78a914 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -84,6 +84,10 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } return std::nullopt; case IR::Attribute::ViewportIndex: + if (!ctx.profile.support_multi_viewport) { + LOG_WARNING(Shader, "Ignoring viewport index store on non-supporting driver"); + return std::nullopt; + } if (ctx.profile.support_viewport_index_layer_non_geometry || ctx.stage == Shader::Stage::Geometry) { return OutAttr{ctx.viewport_index, ctx.U32[1]}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8decdf399..22ceca19c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -67,22 +67,22 @@ public: } } - explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, - Id offset, Id lod_clamp) { - if (!Sirit::ValidId(derivates)) { - throw LogicError("Derivates must be present"); + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, + u32 num_derivatives, Id offset, Id lod_clamp) { + if (!Sirit::ValidId(derivatives)) { + throw LogicError("Derivatives must be present"); } boost::container::static_vector<Id, 3> deriv_x_accum; boost::container::static_vector<Id, 3> deriv_y_accum; - for (u32 i = 0; i < num_derivates; ++i) { - deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); - deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); + for (u32 i = 0; i < num_derivatives; ++i) { + deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2)); + deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1)); } - const Id derivates_X{ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; - const Id derivates_Y{ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; - Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); + const Id derivatives_X{ctx.OpCompositeConstruct( + ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; + const Id derivatives_Y{ctx.OpCompositeConstruct( + ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } @@ -91,26 +91,26 @@ public: } } - explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, Id offset, Id lod_clamp) { - if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { - throw LogicError("Derivates must be present"); + if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { + throw LogicError("Derivatives must be present"); } boost::container::static_vector<Id, 3> deriv_1_accum{ - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), - ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 0), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 2), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 0), }; boost::container::static_vector<Id, 3> deriv_2_accum{ - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), - ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 1), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 3), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 1), }; - const Id derivates_id1{ctx.OpCompositeConstruct( + const Id derivatives_id1{ctx.OpCompositeConstruct( ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; - const Id derivates_id2{ctx.OpCompositeConstruct( + const Id derivatives_id2{ctx.OpCompositeConstruct( ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; - Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); + Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2); if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } @@ -548,12 +548,12 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I } Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp) { + Id derivatives, Id offset, Id lod_clamp) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const auto operands = - info.num_derivates == 3 - ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) - : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, + info.num_derivatives == 3 + ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp) + : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a440b557d..7d34575c8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i const IR::Value& skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp); + Id derivatives, Id offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 57df6fc34..3350f1f85 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -891,7 +891,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, zero, ssbo_size_cbuf_offset)}; - const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; + const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)}; + const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; + const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))}; const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), |