summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp6
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp9
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp6
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp5
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp56
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h2
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h2
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp29
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp3
-rw-r--r--src/shader_recompiler/host_translate_info.h1
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp10
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h3
-rw-r--r--src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp79
-rw-r--r--src/shader_recompiler/profile.h3
21 files changed, 174 insertions, 70 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 83b763447..19db17c6d 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC
ir_opt/rescaling_pass.cpp
ir_opt/ssa_rewrite_pass.cpp
ir_opt/texture_pass.cpp
+ ir_opt/vendor_workaround_pass.cpp
ir_opt/verification_pass.cpp
object_pool.h
precompiled_headers.h
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index d0e308124..64e7bad75 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -559,12 +559,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& offset, const IR::Value& lod_clamp) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
ScopedRegister dpdx, dpdy, coords;
- const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp};
if (multi_component) {
// Allocate this early to avoid aliasing other registers
dpdx = ScopedRegister{ctx.reg_alloc};
dpdy = ScopedRegister{ctx.reg_alloc};
- if (info.num_derivates >= 3) {
+ if (info.num_derivatives >= 3) {
coords = ScopedRegister{ctx.reg_alloc};
}
}
@@ -584,7 +584,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
dpdy.reg, derivatives_vec);
Register final_coord;
- if (info.num_derivates >= 3) {
+ if (info.num_derivatives >= 3) {
ctx.Add("MOV.F {}.z,{}.x;"
"MOV.F {}.z,{}.y;",
dpdx.reg, coord_vec, dpdy.reg, coord_vec);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
index 2705ab140..9319ea007 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -5,6 +5,7 @@
#include "shader_recompiler/backend/glasm/glasm_emit_context.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Backend::GLASM {
@@ -35,7 +36,9 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std
continue;
}
const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
- ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr
+ const u64 ssbo_align_mask{~(ctx.profile.min_ssbo_alignment - 1U)};
+ ctx.Add("LDC.U64 DC.x,c{}[{}];" // unaligned_ssbo_addr
+ "AND.U64 DC.x,DC.x,{};" // ssbo_addr = unaligned_ssbo_addr & ssbo_align_mask
"LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32
"CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32
"ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size
@@ -44,8 +47,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std
"AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
"IF NE.x;" // if cond
"SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
- ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
- address, address);
+ ssbo.cbuf_index, ssbo.cbuf_offset, ssbo_align_mask, ssbo.cbuf_index,
+ ssbo.cbuf_offset + 8, address, address, address);
if (pointer_based) {
ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
"ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index d9872ecc2..6e940bd5a 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -548,15 +548,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
if (sparse_inst) {
throw NotImplementedException("EmitImageGradient Sparse");
}
- if (!offset.IsEmpty() && info.num_derivates <= 2) {
+ if (!offset.IsEmpty() && info.num_derivatives <= 2) {
throw NotImplementedException("EmitImageGradient offset");
}
const auto texture{Texture(ctx, info, index)};
const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
- const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp};
const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
if (multi_component) {
- if (info.num_derivates >= 3) {
+ if (info.num_derivatives >= 3) {
const auto offset_vec{ctx.var_alloc.Consume(offset)};
ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture,
coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index 9ff4028c2..fd9a99449 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -601,7 +601,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
}
- const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])};
+ const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)};
+ const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)};
+ const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])};
+ const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)};
const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
func += addr_statment;
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 34592a01f..0031fa5fb 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -407,7 +407,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
}
ctx.AddCapability(spv::Capability::DemoteToHelperInvocation);
}
- if (info.stores[IR::Attribute::ViewportIndex]) {
+ if (info.stores[IR::Attribute::ViewportIndex] && profile.support_multi_viewport) {
ctx.AddCapability(spv::Capability::MultiViewport);
}
if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 1d77426e0..e5a78a914 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -84,6 +84,10 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
}
return std::nullopt;
case IR::Attribute::ViewportIndex:
+ if (!ctx.profile.support_multi_viewport) {
+ LOG_WARNING(Shader, "Ignoring viewport index store on non-supporting driver");
+ return std::nullopt;
+ }
if (ctx.profile.support_viewport_index_layer_non_geometry ||
ctx.stage == Shader::Stage::Geometry) {
return OutAttr{ctx.viewport_index, ctx.U32[1]};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 8decdf399..22ceca19c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -67,22 +67,22 @@ public:
}
}
- explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates,
- Id offset, Id lod_clamp) {
- if (!Sirit::ValidId(derivates)) {
- throw LogicError("Derivates must be present");
+ explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives,
+ u32 num_derivatives, Id offset, Id lod_clamp) {
+ if (!Sirit::ValidId(derivatives)) {
+ throw LogicError("Derivatives must be present");
}
boost::container::static_vector<Id, 3> deriv_x_accum;
boost::container::static_vector<Id, 3> deriv_y_accum;
- for (u32 i = 0; i < num_derivates; ++i) {
- deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2));
- deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1));
+ for (u32 i = 0; i < num_derivatives; ++i) {
+ deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2));
+ deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1));
}
- const Id derivates_X{ctx.OpCompositeConstruct(
- ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
- const Id derivates_Y{ctx.OpCompositeConstruct(
- ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
- Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y);
+ const Id derivatives_X{ctx.OpCompositeConstruct(
+ ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
+ const Id derivatives_Y{ctx.OpCompositeConstruct(
+ ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
+ Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y);
if (Sirit::ValidId(offset)) {
Add(spv::ImageOperandsMask::Offset, offset);
}
@@ -91,26 +91,26 @@ public:
}
}
- explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2,
+ explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2,
Id offset, Id lod_clamp) {
- if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) {
- throw LogicError("Derivates must be present");
+ if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) {
+ throw LogicError("Derivatives must be present");
}
boost::container::static_vector<Id, 3> deriv_1_accum{
- ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0),
- ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2),
- ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0),
+ ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 0),
+ ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 2),
+ ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 0),
};
boost::container::static_vector<Id, 3> deriv_2_accum{
- ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1),
- ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3),
- ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1),
+ ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 1),
+ ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 3),
+ ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 1),
};
- const Id derivates_id1{ctx.OpCompositeConstruct(
+ const Id derivatives_id1{ctx.OpCompositeConstruct(
ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})};
- const Id derivates_id2{ctx.OpCompositeConstruct(
+ const Id derivatives_id2{ctx.OpCompositeConstruct(
ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})};
- Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2);
+ Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2);
if (Sirit::ValidId(offset)) {
Add(spv::ImageOperandsMask::Offset, offset);
}
@@ -548,12 +548,12 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
}
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
- Id derivates, Id offset, Id lod_clamp) {
+ Id derivatives, Id offset, Id lod_clamp) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const auto operands =
- info.num_derivates == 3
- ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp)
- : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset,
+ info.num_derivatives == 3
+ ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp)
+ : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset,
lod_clamp);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index a440b557d..7d34575c8 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
const IR::Value& skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
- Id derivates, Id offset, Id lod_clamp);
+ Id derivatives, Id offset, Id lod_clamp);
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 57df6fc34..3350f1f85 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -891,7 +891,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
zero, ssbo_size_cbuf_offset)};
- const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
+ const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)};
+ const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
+ const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))};
const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index b7caa4246..49171c470 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1864,11 +1864,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture
return Inst(op, Flags{info}, handle, coords);
}
-Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
+Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives,
const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
: Opcode::BindlessImageGradient};
- return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
+ return Inst(op, Flags{info}, handle, coords, derivatives, offset, lod_clamp);
}
Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index f3c81dbe1..6c30897f4 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -335,7 +335,7 @@ public:
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
const U32& lod, const U32& multisampling, TextureInstInfo info);
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
- const Value& derivates, const Value& offset,
+ const Value& derivatives, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 1e9e8c8f5..c20c2401f 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -40,7 +40,7 @@ union TextureInstInfo {
BitField<21, 1, u32> has_lod_clamp;
BitField<22, 1, u32> relaxed_precision;
BitField<23, 2, u32> gather_component;
- BitField<25, 2, u32> num_derivates;
+ BitField<25, 2, u32> num_derivatives;
BitField<27, 3, ImageFormat> image_format;
BitField<30, 1, u32> ndv_is_active;
};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
index dd34507bc..4ce3dd0cd 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -59,7 +59,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
BitField<51, 3, IR::Pred> sparse_pred;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
- BitField<20, 8, IR::Reg> derivate_reg;
+ BitField<20, 8, IR::Reg> derivative_reg;
BitField<28, 3, TextureType> type;
BitField<31, 4, u64> mask;
BitField<36, 13, u64> cbuf_offset;
@@ -71,7 +71,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
}
IR::Value coords;
- u32 num_derivates{};
+ u32 num_derivatives{};
IR::Reg base_reg{txd.coord_reg};
IR::Reg last_reg;
IR::Value handle;
@@ -90,42 +90,42 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
switch (txd.type) {
case TextureType::_1D: {
coords = v.F(base_reg);
- num_derivates = 1;
+ num_derivatives = 1;
last_reg = base_reg + 1;
break;
}
case TextureType::ARRAY_1D: {
last_reg = base_reg + 1;
coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
- num_derivates = 1;
+ num_derivatives = 1;
break;
}
case TextureType::_2D: {
last_reg = base_reg + 2;
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
- num_derivates = 2;
+ num_derivatives = 2;
break;
}
case TextureType::ARRAY_2D: {
last_reg = base_reg + 2;
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
- num_derivates = 2;
+ num_derivatives = 2;
break;
}
default:
throw NotImplementedException("Invalid texture type");
}
- const IR::Reg derivate_reg{txd.derivate_reg};
- IR::Value derivates;
- switch (num_derivates) {
+ const IR::Reg derivative_reg{txd.derivative_reg};
+ IR::Value derivatives;
+ switch (num_derivatives) {
case 1: {
- derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
+ derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1));
break;
}
case 2: {
- derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
- v.F(derivate_reg + 2), v.F(derivate_reg + 3));
+ derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1),
+ v.F(derivative_reg + 2), v.F(derivative_reg + 3));
break;
}
default:
@@ -150,9 +150,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
IR::TextureInstInfo info{};
info.type.Assign(GetType(txd.type));
- info.num_derivates.Assign(num_derivates);
+ info.num_derivatives.Assign(num_derivatives);
info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
- const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
+ const IR::Value sample{
+ v.ir.ImageGradient(handle, coords, derivatives, offset, lod_clamp, info)};
IR::Reg dest_reg{txd.dest_reg};
for (size_t element = 0; element < 4; ++element) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 928b35561..321ea625b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -298,7 +298,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::PositionPass(env, program);
- Optimization::GlobalMemoryToStorageBufferPass(program);
+ Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
Optimization::TexturePass(env, program, host_info);
if (Settings::values.resolution_info.active) {
@@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
Optimization::CollectShaderInfoPass(env, program);
Optimization::LayerPass(program, host_info);
+ Optimization::VendorWorkaroundPass(program);
CollectInterpolationInfo(env, program);
AddNVNStorageBuffers(program);
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 7d2ded907..1b53404fc 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -16,6 +16,7 @@ struct HostTranslateInfo {
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
+ u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index f46e55122..ec12c843a 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) {
}
}
-bool FoldDerivateYFromCorrection(IR::Inst& inst) {
+bool FoldDerivativeYFromCorrection(IR::Inst& inst) {
const IR::Value lhs_value{inst.Arg(0)};
const IR::Value rhs_value{inst.Arg(1)};
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) {
if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
return;
}
- if (FoldDerivateYFromCorrection(inst)) {
+ if (FoldDerivativeYFromCorrection(inst)) {
return;
}
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
}
-bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
+bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) {
if (coord.IsImmediate()) {
return false;
}
@@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
IR::Inst* const inst2 = coords.InstRecursive();
std::array<std::array<IR::Value, 3>, 3> results_matrix;
for (size_t i = 0; i < 3; i++) {
- if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
+ if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) {
return;
}
}
@@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
results_matrix[1][1], results_matrix[1][2]);
IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
- info.num_derivates.Assign(3);
+ info.num_derivatives.Assign(3);
IR::Value new_gradient_instruction =
ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index d1e59f22e..0cea79945 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
@@ -408,7 +409,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
}
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
-IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::U32 offset;
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -421,7 +422,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
}
// Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes.
- const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+ IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+
+ // Align the offset base to match the host alignment requirements
+ low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
}
@@ -516,7 +520,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
}
} // Anonymous namespace
-void GlobalMemoryToStorageBufferPass(IR::Program& program) {
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
StorageInfo info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
@@ -540,7 +544,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst};
- const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+ const IR::U32 offset{
+ StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
Replace(*block, *inst, index, offset);
}
}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 629d18fa1..1e637cb23 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -16,7 +16,7 @@ void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConditionalBarrierPass(IR::Program& program);
void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
-void GlobalMemoryToStorageBufferPass(IR::Program& program);
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
void IdentityRemovalPass(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
@@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program);
void PositionPass(Environment& env, IR::Program& program);
void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
+void VendorWorkaroundPass(IR::Program& program);
void VerificationPass(const IR::Program& program);
// Dual Vertex
diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
new file mode 100644
index 000000000..08c658cb8
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+namespace {
+void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) {
+ /*
+ * Workaround for an NVIDIA bug seen in Super Mario RPG
+ *
+ * We are looking for this pattern:
+ * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16
+ * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional?
+ * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16
+ * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16
+ * %result = IAdd32 %lhs_shl, %rhs_bfe
+ *
+ * And replacing the IAdd32 with a BitwiseOr32
+ * %result = BitwiseOr32 %lhs_shl, %rhs_bfe
+ *
+ */
+ IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
+ IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()};
+ if (!lhs_shl || !rhs_bfe) {
+ return;
+ }
+ if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+ lhs_shl->Arg(1) != IR::Value{16U}) {
+ return;
+ }
+ if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} ||
+ rhs_bfe->Arg(2) != IR::Value{16U}) {
+ return;
+ }
+ IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
+ if (!lhs_mul) {
+ return;
+ }
+ const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract};
+ if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 &&
+ lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return;
+ }
+ IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()};
+ if (!lhs_bfe) {
+ return;
+ }
+ if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return;
+ }
+ if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
+ return;
+ }
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)}));
+}
+
+} // Anonymous namespace
+
+void VendorWorkaroundPass(IR::Program& program) {
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::IAdd32:
+ AddingByteSwapsWorkaround(*block, inst);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 38d820db2..66901a965 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -43,6 +43,7 @@ struct Profile {
bool support_gl_sparse_textures{};
bool support_gl_derivative_control{};
bool support_scaled_attributes{};
+ bool support_multi_viewport{};
bool warp_size_potentially_larger_than_guest{};
@@ -84,6 +85,8 @@ struct Profile {
/// Maxwell and earlier nVidia architectures have broken robust support
bool has_broken_robust{};
+
+ u64 min_ssbo_alignment{};
};
} // namespace Shader