summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp10
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h3
-rw-r--r--src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp79
4 files changed, 95 insertions, 10 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index f46e55122..ec12c843a 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) {
}
}
-bool FoldDerivateYFromCorrection(IR::Inst& inst) {
+bool FoldDerivativeYFromCorrection(IR::Inst& inst) {
const IR::Value lhs_value{inst.Arg(0)};
const IR::Value rhs_value{inst.Arg(1)};
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) {
if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
return;
}
- if (FoldDerivateYFromCorrection(inst)) {
+ if (FoldDerivativeYFromCorrection(inst)) {
return;
}
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
}
-bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
+bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) {
if (coord.IsImmediate()) {
return false;
}
@@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
IR::Inst* const inst2 = coords.InstRecursive();
std::array<std::array<IR::Value, 3>, 3> results_matrix;
for (size_t i = 0; i < 3; i++) {
- if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
+ if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) {
return;
}
}
@@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
results_matrix[1][1], results_matrix[1][2]);
IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
- info.num_derivates.Assign(3);
+ info.num_derivatives.Assign(3);
IR::Value new_gradient_instruction =
ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index d1e59f22e..0cea79945 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
@@ -408,7 +409,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
}
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
-IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::U32 offset;
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -421,7 +422,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
}
// Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes.
- const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+ IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+
+ // Align the offset base to match the host alignment requirements
+ low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
}
@@ -516,7 +520,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
}
} // Anonymous namespace
-void GlobalMemoryToStorageBufferPass(IR::Program& program) {
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
StorageInfo info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
@@ -540,7 +544,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst};
- const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+ const IR::U32 offset{
+ StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
Replace(*block, *inst, index, offset);
}
}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 629d18fa1..1e637cb23 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -16,7 +16,7 @@ void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConditionalBarrierPass(IR::Program& program);
void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
-void GlobalMemoryToStorageBufferPass(IR::Program& program);
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
void IdentityRemovalPass(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
@@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program);
void PositionPass(Environment& env, IR::Program& program);
void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
+void VendorWorkaroundPass(IR::Program& program);
void VerificationPass(const IR::Program& program);
// Dual Vertex
diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
new file mode 100644
index 000000000..08c658cb8
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+namespace {
+void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) {
+ /*
+ * Workaround for an NVIDIA bug seen in Super Mario RPG
+ *
+ * We are looking for this pattern:
+ * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16
+ * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional?
+ * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16
+ * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16
+ * %result = IAdd32 %lhs_shl, %rhs_bfe
+ *
+ * And replacing the IAdd32 with a BitwiseOr32
+ * %result = BitwiseOr32 %lhs_shl, %rhs_bfe
+ *
+ */
+ IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
+ IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()};
+ if (!lhs_shl || !rhs_bfe) {
+ return;
+ }
+ if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+ lhs_shl->Arg(1) != IR::Value{16U}) {
+ return;
+ }
+ if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} ||
+ rhs_bfe->Arg(2) != IR::Value{16U}) {
+ return;
+ }
+ IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
+ if (!lhs_mul) {
+ return;
+ }
+ const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract};
+ if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 &&
+ lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return;
+ }
+ IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()};
+ if (!lhs_bfe) {
+ return;
+ }
+ if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return;
+ }
+ if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
+ return;
+ }
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)}));
+}
+
+} // Anonymous namespace
+
+void VendorWorkaroundPass(IR::Program& program) {
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::IAdd32:
+ AddingByteSwapsWorkaround(*block, inst);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization