diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
4 files changed, 95 insertions, 10 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f46e55122..ec12c843a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) { } } -bool FoldDerivateYFromCorrection(IR::Inst& inst) { +bool FoldDerivativeYFromCorrection(IR::Inst& inst) { const IR::Value lhs_value{inst.Arg(0)}; const IR::Value rhs_value{inst.Arg(1)}; IR::Inst* const lhs_op{lhs_value.InstRecursive()}; @@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) { if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { return; } - if (FoldDerivateYFromCorrection(inst)) { + if (FoldDerivativeYFromCorrection(inst)) { return; } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; @@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { } } -bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { +bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) { if (coord.IsImmediate()) { return false; } @@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { IR::Inst* const inst2 = coords.InstRecursive(); std::array<std::array<IR::Value, 3>, 3> results_matrix; for (size_t i = 0; i < 3; i++) { - if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { + if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) { return; } } @@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], results_matrix[1][1], results_matrix[1][2]); IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); - info.num_derivates.Assign(3); + info.num_derivatives.Assign(3); IR::Value new_gradient_instruction = ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index d1e59f22e..0cea79945 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { @@ -408,7 +409,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { @@ -421,7 +422,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. - const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + + // Align the offset base to match the host alignment requirements + low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); return ir.ISub(offset, low_cbuf); } @@ -516,7 +520,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Program& program) { +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -540,7 +544,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; - const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + const IR::U32 offset{ + StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 629d18fa1..1e637cb23 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -16,7 +16,7 @@ void CollectShaderInfoPass(Environment& env, IR::Program& program); void ConditionalBarrierPass(IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); -void GlobalMemoryToStorageBufferPass(IR::Program& program); +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); void IdentityRemovalPass(IR::Program& program); void LowerFp64ToFp32(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); @@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); void PositionPass(Environment& env, IR::Program& program); void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); +void VendorWorkaroundPass(IR::Program& program); void VerificationPass(const IR::Program& program); // Dual Vertex diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp new file mode 100644 index 000000000..08c658cb8 --- /dev/null +++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +namespace { +void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { + /* + * Workaround for an NVIDIA bug seen in Super Mario RPG + * + * We are looking for this pattern: + * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional? + * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 + * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %result = IAdd32 %lhs_shl, %rhs_bfe + * + * And replacing the IAdd32 with a BitwiseOr32 + * %result = BitwiseOr32 %lhs_shl, %rhs_bfe + * + */ + IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; + IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; + if (!lhs_shl || !rhs_bfe) { + return; + } + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { + return; + } + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || + rhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; + if (!lhs_mul) { + return; + } + const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && + lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; + if (!lhs_bfe) { + return; + } + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); +} + +} // Anonymous namespace + +void VendorWorkaroundPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::IAdd32: + AddingByteSwapsWorkaround(*block, inst); + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization |