From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 9 Jan 2021 03:30:07 -0300 Subject: shader: Initial recompiler work --- .../ir_opt/dead_code_elimination_pass.cpp | 23 ++++++ .../ir_opt/get_set_elimination_pass.cpp | 87 ++++++++++++++++++++++ .../ir_opt/identity_removal_pass.cpp | 37 +++++++++ src/shader_recompiler/ir_opt/passes.h | 16 ++++ src/shader_recompiler/ir_opt/verification_pass.cpp | 50 +++++++++++++ 5 files changed, 213 insertions(+) create mode 100644 src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/identity_removal_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/passes.h create mode 100644 src/shader_recompiler/ir_opt/verification_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp new file mode 100644 index 000000000..bbaa412f6 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void DeadCodeEliminationPass(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (IR::Inst& inst : std::views::reverse(block)) { + if (!inst.HasUses() && !inst.MayHaveSideEffects()) { + inst.Invalidate(); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp new file mode 100644 index 000000000..21b8526cd --- /dev/null +++ b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp @@ -0,0 +1,87 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +using Iterator = IR::Block::iterator; + +enum class TrackingType { + Reg, +}; + +struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + Iterator last_set_instruction; + bool set_instruction_present = false; +}; + +void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst, + TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; +} + +RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) { + RegisterInfo info{}; + info.register_value = IR::Value{&*get_inst}; + info.tracking_type = tracking_type; + return info; +} + +void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + if (info.register_value.IsEmpty()) { + info = Nothing(get_inst, tracking_type); + return; + } + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + info = Nothing(get_inst, tracking_type); +} +} // Anonymous namespace + +void GetSetElimination(IR::Block& block) { + std::array reg_info; + + for (Iterator inst = block.begin(); inst != block.end(); ++inst) { + switch (inst->Opcode()) { + case IR::Opcode::GetRegister: { + const IR::Reg reg{inst->Arg(0).Reg()}; + if (reg == IR::Reg::RZ) { + break; + } + const size_t index{static_cast(reg)}; + DoGet(reg_info.at(index), inst, TrackingType::Reg); + break; + } + case IR::Opcode::SetRegister: { + const IR::Reg reg{inst->Arg(0).Reg()}; + if (reg == IR::Reg::RZ) { + break; + } + const size_t index{static_cast(reg)}; + DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg); + break; + } + default: + break; + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp new file mode 100644 index 000000000..f9bb063fb --- /dev/null +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -0,0 +1,37 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void IdentityRemovalPass(IR::Block& block) { + std::vector to_invalidate; + + for (auto inst = block.begin(); inst != block.end();) { + const size_t num_args{inst->NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Value arg; + while ((arg = inst->Arg(i)).IsIdentity()) { + inst->SetArg(i, arg.Inst()->Arg(0)); + } + } + if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*inst); + inst = block.Instructions().erase(inst); + } else { + ++inst; + } + } + + for (IR::Inst* const inst : to_invalidate) { + inst->Invalidate(); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h new file mode 100644 index 000000000..fe5454e9a --- /dev/null +++ b/src/shader_recompiler/ir_opt/passes.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::Optimization { + +void DeadCodeEliminationPass(IR::Block& block); +void GetSetElimination(IR::Block& block); +void IdentityRemovalPass(IR::Block& block); +void VerificationPass(const IR::Block& block); + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp new file mode 100644 index 000000000..36d9ae39b --- /dev/null +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +static void ValidateTypes(const IR::Block& block) { + for (const IR::Inst& inst : block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Type t1{inst.Arg(i).Type()}; + const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + if (!IR::AreTypesCompatible(t1, t2)) { + throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); + } + } + } +} + +static void ValidateUses(const IR::Block& block) { + std::map actual_uses; + for (const IR::Inst& inst : block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Value arg{inst.Arg(i)}; + if (!arg.IsImmediate()) { + ++actual_uses[arg.Inst()]; + } + } + } + for (const auto [inst, uses] : actual_uses) { + if (inst->UseCount() != uses) { + throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); + } + } +} + +void VerificationPass(const IR::Block& block) { + ValidateTypes(block); + ValidateUses(block); +} + +} // namespace Shader::Optimization -- cgit v1.2.3 From 6c4cc0cd062fbbba5349da1108d3c23cb330ca8a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 2 Feb 2021 21:07:00 -0300 Subject: shader: SSA and dominance --- .../ir_opt/identity_removal_pass.cpp | 1 - src/shader_recompiler/ir_opt/passes.h | 9 ++ src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 155 +++++++++++++++++++++ 3 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index f9bb063fb..7f8500087 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -28,7 +28,6 @@ void IdentityRemovalPass(IR::Block& block) { ++inst; } } - for (IR::Inst* const inst : to_invalidate) { inst->Invalidate(); } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index fe5454e9a..83f094d73 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -5,12 +5,21 @@ #pragma once #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" namespace Shader::Optimization { +template +void Invoke(Func&& func, IR::Function& function) { + for (const auto& block : function.blocks) { + func(*block); + } +} + void DeadCodeEliminationPass(IR::Block& block); void GetSetElimination(IR::Block& block); void IdentityRemovalPass(IR::Block& block); +void SsaRewritePass(IR::Function& function); void VerificationPass(const IR::Block& block); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp new file mode 100644 index 000000000..a4b256a40 --- /dev/null +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -0,0 +1,155 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file implements the SSA rewriting algorithm proposed in +// +// Simple and Efficient Construction of Static Single Assignment Form. +// Braun M., Buchwald S., Hack S., Leißa R., Mallon C., Zwinkau A. (2013) +// In: Jhala R., De Bosschere K. (eds) +// Compiler Construction. CC 2013. +// Lecture Notes in Computer Science, vol 7791. +// Springer, Berlin, Heidelberg +// +// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 +// + +#include + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +using ValueMap = boost::container::flat_map>; + +struct DefTable { + [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { + return regs[IR::RegIndex(variable)]; + } + + [[nodiscard]] ValueMap& operator[](IR::Pred variable) noexcept { + return preds[IR::PredIndex(variable)]; + } + + std::array regs; + std::array preds; +}; + +IR::Opcode UndefOpcode(IR::Reg) noexcept { + return IR::Opcode::Undef32; +} + +IR::Opcode UndefOpcode(IR::Pred) noexcept { + return IR::Opcode::Undef1; +} + +[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { + return inst.Opcode() == IR::Opcode::Phi; +} + +class Pass { +public: + void WriteVariable(auto variable, IR::Block* block, const IR::Value& value) { + current_def[variable].insert_or_assign(block, value); + } + + IR::Value ReadVariable(auto variable, IR::Block* block) { + auto& def{current_def[variable]}; + if (const auto it{def.find(block)}; it != def.end()) { + return it->second; + } + return ReadVariableRecursive(variable, block); + } + +private: + IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { + IR::Value val; + if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + val = ReadVariable(variable, preds.front()); + } else { + // Break potential cycles with operandless phi + val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + WriteVariable(variable, block, val); + val = AddPhiOperands(variable, val, block); + } + WriteVariable(variable, block, val); + return val; + } + + IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) { + for (IR::Block* const pred : block->ImmediatePredecessors()) { + phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred)); + } + return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); + } + + IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value same; + for (const auto& pair : phi.Inst()->PhiOperands()) { + const IR::Value& op{pair.second}; + if (op == same || op == phi) { + // Unique value or self-reference + continue; + } + if (!same.IsEmpty()) { + // The phi merges at least two values: not trivial + return phi; + } + same = op; + } + if (same.IsEmpty()) { + // The phi is unreachable or in the start block + const auto first_not_phi{std::ranges::find_if_not(block->Instructions(), IsPhi)}; + same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; + } + // Reroute all uses of phi to same and remove phi + phi.Inst()->ReplaceUsesWith(same); + // TODO: Try to recursively remove all phi users, which might have become trivial + return same; + } + + DefTable current_def; +}; +} // Anonymous namespace + +void SsaRewritePass(IR::Function& function) { + Pass pass; + for (const auto& block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.Opcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); + } + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization -- cgit v1.2.3 From d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Feb 2021 16:43:04 -0300 Subject: shader: Initial instruction support --- .../ir_opt/get_set_elimination_pass.cpp | 87 ---------------------- src/shader_recompiler/ir_opt/passes.h | 1 - 2 files changed, 88 deletions(-) delete mode 100644 src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp deleted file mode 100644 index 21b8526cd..000000000 --- a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/ir_opt/passes.h" - -namespace Shader::Optimization { -namespace { -using Iterator = IR::Block::iterator; - -enum class TrackingType { - Reg, -}; - -struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - Iterator last_set_instruction; - bool set_instruction_present = false; -}; - -void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst, - TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; -} - -RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) { - RegisterInfo info{}; - info.register_value = IR::Value{&*get_inst}; - info.tracking_type = tracking_type; - return info; -} - -void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - if (info.register_value.IsEmpty()) { - info = Nothing(get_inst, tracking_type); - return; - } - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - info = Nothing(get_inst, tracking_type); -} -} // Anonymous namespace - -void GetSetElimination(IR::Block& block) { - std::array reg_info; - - for (Iterator inst = block.begin(); inst != block.end(); ++inst) { - switch (inst->Opcode()) { - case IR::Opcode::GetRegister: { - const IR::Reg reg{inst->Arg(0).Reg()}; - if (reg == IR::Reg::RZ) { - break; - } - const size_t index{static_cast(reg)}; - DoGet(reg_info.at(index), inst, TrackingType::Reg); - break; - } - case IR::Opcode::SetRegister: { - const IR::Reg reg{inst->Arg(0).Reg()}; - if (reg == IR::Reg::RZ) { - break; - } - const size_t index{static_cast(reg)}; - DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg); - break; - } - default: - break; - } - } -} - -} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 83f094d73..7ed4005ed 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -17,7 +17,6 @@ void Invoke(Func&& func, IR::Function& function) { } void DeadCodeEliminationPass(IR::Block& block); -void GetSetElimination(IR::Block& block); void IdentityRemovalPass(IR::Block& block); void SsaRewritePass(IR::Function& function); void VerificationPass(const IR::Block& block); -- cgit v1.2.3 From e81739493a0cacc1efe3295f9d287d5d31b1a989 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 05:58:02 -0300 Subject: shader: Constant propagation and global memory to storage buffer --- .../ir_opt/constant_propagation_pass.cpp | 146 +++++++++ .../global_memory_to_storage_buffer_pass.cpp | 331 +++++++++++++++++++++ .../ir_opt/identity_removal_pass.cpp | 28 +- src/shader_recompiler/ir_opt/passes.h | 6 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 56 +++- src/shader_recompiler/ir_opt/verification_pass.cpp | 42 +-- 6 files changed, 573 insertions(+), 36 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/constant_propagation_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp new file mode 100644 index 000000000..02f5b653d --- /dev/null +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -0,0 +1,146 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_util.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { + if (static_cast(shift) + static_cast(count) > Common::BitSize()) { + throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); + } + return (base >> shift) & ((1U << count) - 1); +} + +template +[[nodiscard]] T Arg(const IR::Value& value) { + if constexpr (std::is_same_v) { + return value.U1(); + } else if constexpr (std::is_same_v) { + return value.U32(); + } else if constexpr (std::is_same_v) { + return value.U64(); + } +} + +template +bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { + const auto arg = [](const IR::Value& value) { + if constexpr (std::is_invocable_r_v) { + return value.U1(); + } else if constexpr (std::is_invocable_r_v) { + return value.U32(); + } else if constexpr (std::is_invocable_r_v) { + return value.U64(); + } + }; + const IR::Value lhs{inst.Arg(0)}; + const IR::Value rhs{inst.Arg(1)}; + + const bool is_lhs_immediate{lhs.IsImmediate()}; + const bool is_rhs_immediate{rhs.IsImmediate()}; + + if (is_lhs_immediate && is_rhs_immediate) { + const auto result{imm_fn(arg(lhs), arg(rhs))}; + inst.ReplaceUsesWith(IR::Value{result}); + return false; + } + if (is_lhs_immediate && !is_rhs_immediate) { + IR::Inst* const rhs_inst{rhs.InstRecursive()}; + if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; + inst.SetArg(0, rhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* const lhs_inst{lhs.InstRecursive()}; + if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; + inst.SetArg(0, lhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } + } + return true; +} + +void FoldGetRegister(IR::Inst& inst) { + if (inst.Arg(0).Reg() == IR::Reg::RZ) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + } +} + +void FoldGetPred(IR::Inst& inst) { + if (inst.Arg(0).Pred() == IR::Pred::PT) { + inst.ReplaceUsesWith(IR::Value{true}); + } +} + +template +void FoldAdd(IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + return; + } + if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate() && Arg(rhs) == 0) { + inst.ReplaceUsesWith(inst.Arg(0)); + } +} + +void FoldLogicalAnd(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(inst.Arg(0)); + } else { + inst.ReplaceUsesWith(IR::Value{false}); + } + } +} + +void ConstantPropagation(IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::GetRegister: + return FoldGetRegister(inst); + case IR::Opcode::GetPred: + return FoldGetPred(inst); + case IR::Opcode::IAdd32: + return FoldAdd(inst); + case IR::Opcode::IAdd64: + return FoldAdd(inst); + case IR::Opcode::BitFieldUExtract: + if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { + inst.ReplaceUsesWith(IR::Value{ + BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); + } + break; + case IR::Opcode::LogicalAnd: + return FoldLogicalAnd(inst); + default: + break; + } +} +} // Anonymous namespace + +void ConstantPropagationPass(IR::Block& block) { + std::ranges::for_each(block, ConstantPropagation); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp new file mode 100644 index 000000000..ee69a5c9d --- /dev/null +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -0,0 +1,331 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +/// Address in constant buffers to the storage buffer descriptor +struct StorageBufferAddr { + auto operator<=>(const StorageBufferAddr&) const noexcept = default; + + u32 index; + u32 offset; +}; + +/// Block iterator to a global memory instruction and the storage buffer it uses +struct StorageInst { + StorageBufferAddr storage_buffer; + IR::Block::iterator inst; +}; + +/// Bias towards a certain range of constant buffers when looking for storage buffers +struct Bias { + u32 index; + u32 offset_begin; + u32 offset_end; +}; + +using StorageBufferSet = + boost::container::flat_set, + boost::container::small_vector>; +using StorageInstVector = boost::container::small_vector; + +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemory(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return true; + default: + return false; + } +} + +/// Converts a global memory opcode to its storage buffer equivalent +IR::Opcode GlobalToStorage(IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::LoadGlobalS8: + return IR::Opcode::LoadStorageS8; + case IR::Opcode::LoadGlobalU8: + return IR::Opcode::LoadStorageU8; + case IR::Opcode::LoadGlobalS16: + return IR::Opcode::LoadStorageS16; + case IR::Opcode::LoadGlobalU16: + return IR::Opcode::LoadStorageU16; + case IR::Opcode::LoadGlobal32: + return IR::Opcode::LoadStorage32; + case IR::Opcode::LoadGlobal64: + return IR::Opcode::LoadStorage64; + case IR::Opcode::LoadGlobal128: + return IR::Opcode::LoadStorage128; + case IR::Opcode::WriteGlobalS8: + return IR::Opcode::WriteStorageS8; + case IR::Opcode::WriteGlobalU8: + return IR::Opcode::WriteStorageU8; + case IR::Opcode::WriteGlobalS16: + return IR::Opcode::WriteStorageS16; + case IR::Opcode::WriteGlobalU16: + return IR::Opcode::WriteStorageU16; + case IR::Opcode::WriteGlobal32: + return IR::Opcode::WriteStorage32; + case IR::Opcode::WriteGlobal64: + return IR::Opcode::WriteStorage64; + case IR::Opcode::WriteGlobal128: + return IR::Opcode::WriteStorage128; + default: + throw InvalidArgument("Invalid global memory opcode {}", opcode); + } +} + +/// Returns true when a storage buffer address satisfies a bias +bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { + return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && + storage_buffer.offset < bias.offset_end; +} + +/// Ignores a global memory operation, reads return zero and writes are ignored +void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { + const IR::Value zero{u32{0}}; + switch (inst->Opcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + inst->ReplaceUsesWith(zero); + break; + case IR::Opcode::LoadGlobal64: + inst->ReplaceUsesWith( + IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); + break; + case IR::Opcode::LoadGlobal128: + inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( + inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); + break; + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + inst->Invalidate(); + break; + default: + throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); + } +} + +/// Recursively tries to track the storage buffer address used by a global memory instruction +std::optional Track(const IR::Value& value, const Bias* bias) { + if (value.IsImmediate()) { + // Immediates can't be a storage buffer + return std::nullopt; + } + const IR::Inst* const inst{value.InstRecursive()}; + if (inst->Opcode() == IR::Opcode::GetCbuf) { + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Definitely not a storage buffer if it's read from a non-immediate index + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support SSBO arrays + return std::nullopt; + } + const StorageBufferAddr storage_buffer{ + .index = index.U32(), + .offset = offset.U32(), + }; + if (bias && !MeetsBias(storage_buffer, *bias)) { + // We have to blacklist some addresses in case we wrongly point to them + return std::nullopt; + } + return storage_buffer; + } + // Reversed loops are more likely to find the right result + for (size_t arg = inst->NumArgs(); arg--;) { + if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) { + return *storage_buffer; + } + } + return std::nullopt; +} + +/// Collects the storage buffer used by a global memory instruction and the instruction itself +void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, + StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { + // NVN puts storage buffers in a specific range, we have to bias towards these addresses to + // avoid getting false positives + static constexpr Bias nvn_bias{ + .index{0}, + .offset_begin{0x110}, + .offset_end{0x610}, + }; + // First try to find storage buffers in the NVN address + const IR::U64 addr{inst->Arg(0)}; + std::optional storage_buffer{Track(addr, &nvn_bias)}; + if (!storage_buffer) { + // If it fails, track without a bias + storage_buffer = Track(addr, nullptr); + if (!storage_buffer) { + // If that also failed, drop the global memory usage + IgnoreGlobalMemory(block, inst); + } + } + // Collect storage buffer and the instruction + storage_buffer_set.insert(*storage_buffer); + to_replace.push_back(StorageInst{ + .storage_buffer{*storage_buffer}, + .inst{inst}, + }); +} + +/// Tries to track the first 32-bits of a global memory instruction +std::optional TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { + // The first argument is the low level GPU pointer to the global memory instruction + const IR::U64 addr{inst->Arg(0)}; + if (addr.IsImmediate()) { + // Not much we can do if it's an immediate + return std::nullopt; + } + // This address is expected to either be a PackUint2x32 or a IAdd64 + IR::Inst* addr_inst{addr.InstRecursive()}; + s32 imm_offset{0}; + if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + // If it's an IAdd64, get the immediate offset it is applying and grab the address + // instruction. This expects for the instruction to be canonicalized having the address on + // the first argument and the immediate offset on the second one. + const IR::U64 imm_offset_value{addr_inst->Arg(1)}; + if (!imm_offset_value.IsImmediate()) { + return std::nullopt; + } + imm_offset = static_cast(static_cast(imm_offset_value.U64())); + const IR::U64 iadd_addr{addr_inst->Arg(0)}; + if (iadd_addr.IsImmediate()) { + return std::nullopt; + } + addr_inst = iadd_addr.Inst(); + } + // With IAdd64 handled, now PackUint2x32 is expected without exceptions + if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + return std::nullopt; + } + // PackUint2x32 is expected to be generated from a vector + const IR::Value vector{addr_inst->Arg(0)}; + if (vector.IsImmediate()) { + return std::nullopt; + } + // This vector is expected to be a CompositeConstruct2 + IR::Inst* const vector_inst{vector.InstRecursive()}; + if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { + return std::nullopt; + } + // Grab the first argument from the CompositeConstruct2, this is the low address. + // Re-apply the offset in case we found one. + const IR::U32 low_addr{vector_inst->Arg(0)}; + return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; +} + +/// Returns the offset in indices (not bytes) for an equivalent storage instruction +IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { + IR::IREmitter ir{block, inst}; + IR::U32 offset; + if (const std::optional low_addr{TrackLowAddress(ir, &*inst)}) { + offset = *low_addr; + } else { + offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); + } + // Subtract the least significant 32 bits from the guest offset. The result is the storage + // buffer offset in bytes. + const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + return ir.ISub(offset, low_cbuf); +} + +/// Replace a global memory load instruction with its storage buffer equivalent +void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; + const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; + inst->ReplaceUsesWith(value); +} + +/// Replace a global memory write instruction with its storage buffer equivalent +void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; + block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); + inst->Invalidate(); +} + +/// Replace a global memory instruction with its storage buffer equivalent +void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, + const IR::U32& offset) { + switch (inst->Opcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + return ReplaceLoad(block, inst, storage_index, offset); + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return ReplaceWrite(block, inst, storage_index, offset); + default: + throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); + } +} +} // Anonymous namespace + +void GlobalMemoryToStorageBufferPass(IR::Block& block) { + StorageBufferSet storage_buffers; + StorageInstVector to_replace; + + for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { + if (!IsGlobalMemory(*inst)) { + continue; + } + CollectStorageBuffers(block, inst, storage_buffers, to_replace); + } + for (const auto [storage_buffer, inst] : to_replace) { + const auto it{storage_buffers.find(storage_buffer)}; + const IR::U32 storage_index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; + Replace(block, inst, storage_index, offset); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 7f8500087..39a972919 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -10,22 +10,24 @@ namespace Shader::Optimization { -void IdentityRemovalPass(IR::Block& block) { +void IdentityRemovalPass(IR::Function& function) { std::vector to_invalidate; - for (auto inst = block.begin(); inst != block.end();) { - const size_t num_args{inst->NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - IR::Value arg; - while ((arg = inst->Arg(i)).IsIdentity()) { - inst->SetArg(i, arg.Inst()->Arg(0)); + for (auto& block : function.blocks) { + for (auto inst = block->begin(); inst != block->end();) { + const size_t num_args{inst->NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Value arg; + while ((arg = inst->Arg(i)).IsIdentity()) { + inst->SetArg(i, arg.Inst()->Arg(0)); + } + } + if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*inst); + inst = block->Instructions().erase(inst); + } else { + ++inst; } - } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { - to_invalidate.push_back(&*inst); - inst = block.Instructions().erase(inst); - } else { - ++inst; } } for (IR::Inst* const inst : to_invalidate) { diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 7ed4005ed..578a24d89 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) { } } +void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); +void GlobalMemoryToStorageBufferPass(IR::Block& block); +void IdentityRemovalPass(IR::Function& function); void SsaRewritePass(IR::Function& function); -void VerificationPass(const IR::Block& block); +void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a4b256a40..3c9b020e0 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,8 +14,6 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // -#include - #include #include "shader_recompiler/frontend/ir/basic_block.h" @@ -30,6 +28,12 @@ namespace Shader::Optimization { namespace { using ValueMap = boost::container::flat_map>; +struct FlagTag {}; +struct ZeroFlagTag : FlagTag {}; +struct SignFlagTag : FlagTag {}; +struct CarryFlagTag : FlagTag {}; +struct OverflowFlagTag : FlagTag {}; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -39,8 +43,28 @@ struct DefTable { return preds[IR::PredIndex(variable)]; } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { + return zero_flag; + } + + [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept { + return sign_flag; + } + + [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept { + return carry_flag; + } + + [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept { + return overflow_flag; + } + std::array regs; std::array preds; + ValueMap zero_flag; + ValueMap sign_flag; + ValueMap carry_flag; + ValueMap overflow_flag; }; IR::Opcode UndefOpcode(IR::Reg) noexcept { @@ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept { return IR::Opcode::Undef1; } +IR::Opcode UndefOpcode(const FlagTag&) noexcept { + return IR::Opcode::Undef1; +} + [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { return inst.Opcode() == IR::Opcode::Phi; } @@ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) { pass.WriteVariable(pred, block.get(), inst.Arg(1)); } break; + case IR::Opcode::SetZFlag: + pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); + break; + case IR::Opcode::SetSFlag: + pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); + break; + case IR::Opcode::SetCFlag: + pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); + break; + case IR::Opcode::SetOFlag: + pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); + break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); @@ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) { inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); } break; + case IR::Opcode::GetZFlag: + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); + break; + case IR::Opcode::GetSFlag: + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); + break; + case IR::Opcode::GetCFlag: + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); + break; + case IR::Opcode::GetOFlag: + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 36d9ae39b..8a5adf5a2 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -11,40 +11,44 @@ namespace Shader::Optimization { -static void ValidateTypes(const IR::Block& block) { - for (const IR::Inst& inst : block) { - const size_t num_args{inst.NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; - if (!IR::AreTypesCompatible(t1, t2)) { - throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); +static void ValidateTypes(const IR::Function& function) { + for (const auto& block : function.blocks) { + for (const IR::Inst& inst : *block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Type t1{inst.Arg(i).Type()}; + const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + if (!IR::AreTypesCompatible(t1, t2)) { + throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); + } } } } } -static void ValidateUses(const IR::Block& block) { +static void ValidateUses(const IR::Function& function) { std::map actual_uses; - for (const IR::Inst& inst : block) { - const size_t num_args{inst.NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - const IR::Value arg{inst.Arg(i)}; - if (!arg.IsImmediate()) { - ++actual_uses[arg.Inst()]; + for (const auto& block : function.blocks) { + for (const IR::Inst& inst : *block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Value arg{inst.Arg(i)}; + if (!arg.IsImmediate()) { + ++actual_uses[arg.Inst()]; + } } } } for (const auto [inst, uses] : actual_uses) { if (inst->UseCount() != uses) { - throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); + throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/); } } } -void VerificationPass(const IR::Block& block) { - ValidateTypes(block); - ValidateUses(block); +void VerificationPass(const IR::Function& function) { + ValidateTypes(function); + ValidateUses(function); } } // namespace Shader::Optimization -- cgit v1.2.3 From dc04a50ac2aa0bc71db701d0eea857765c2581f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Jul 2021 17:11:47 -0300 Subject: shader: Remove illegal character in SSA pass --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 3c9b020e0..a62d3f56b 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -5,7 +5,7 @@ // This file implements the SSA rewriting algorithm proposed in // // Simple and Efficient Construction of Static Single Assignment Form. -// Braun M., Buchwald S., Hack S., Leißa R., Mallon C., Zwinkau A. (2013) +// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013) // In: Jhala R., De Bosschere K. (eds) // Compiler Construction. CC 2013. // Lecture Notes in Computer Science, vol 7791. -- cgit v1.2.3 From be94ee88d227d0d3dbeabe9ade98bacd910c7a7e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 19:19:36 -0300 Subject: shader: Make typed IR --- .../ir_opt/constant_propagation_pass.cpp | 20 ++++++++++++++++++++ .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 21 +++++++++++---------- 2 files changed, 31 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 02f5b653d..7fb3192d8 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -5,6 +5,7 @@ #include #include +#include "common/bit_cast.h" #include "common/bit_util.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/microinstruction.h" @@ -25,6 +26,8 @@ template return value.U1(); } else if constexpr (std::is_same_v) { return value.U32(); + } else if constexpr (std::is_same_v) { + return value.F32(); } else if constexpr (std::is_same_v) { return value.U64(); } @@ -115,6 +118,19 @@ void FoldLogicalAnd(IR::Inst& inst) { } } +template +void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{Common::BitCast(Arg(value))}); + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (value.InstRecursive()->Opcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + } +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -123,6 +139,10 @@ void ConstantPropagation(IR::Inst& inst) { return FoldGetPred(inst); case IR::Opcode::IAdd32: return FoldAdd(inst); + case IR::Opcode::BitCastF32U32: + return FoldBitCast(inst, IR::Opcode::BitCastU32F32); + case IR::Opcode::BitCastU32F32: + return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(inst); case IR::Opcode::BitFieldUExtract: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index ee69a5c9d..34393e1d5 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -108,8 +108,8 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce storage_buffer.offset < bias.offset_end; } -/// Ignores a global memory operation, reads return zero and writes are ignored -void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { +/// Discards a global memory operation, reads return zero and writes are ignored +void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { const IR::Value zero{u32{0}}; switch (inst->Opcode()) { case IR::Opcode::LoadGlobalS8: @@ -120,12 +120,12 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { inst->ReplaceUsesWith(zero); break; case IR::Opcode::LoadGlobal64: - inst->ReplaceUsesWith( - IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); + inst->ReplaceUsesWith(IR::Value{ + &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})}); break; case IR::Opcode::LoadGlobal128: inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( - inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); + inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})}); break; case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: @@ -137,7 +137,8 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { inst->Invalidate(); break; default: - throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", + inst->Opcode()); } } @@ -196,7 +197,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, storage_buffer = Track(addr, nullptr); if (!storage_buffer) { // If that also failed, drop the global memory usage - IgnoreGlobalMemory(block, inst); + DiscardGlobalMemory(block, inst); } } // Collect storage buffer and the instruction @@ -242,12 +243,12 @@ std::optional TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { if (vector.IsImmediate()) { return std::nullopt; } - // This vector is expected to be a CompositeConstruct2 + // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { + if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } - // Grab the first argument from the CompositeConstruct2, this is the low address. + // Grab the first argument from the CompositeConstructU32x2, this is the low address. // Re-apply the offset in case we found one. const IR::U32 low_addr{vector_inst->Arg(0)}; return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; -- cgit v1.2.3 From 16cb00c521cae6e93ec49d10e15b575b7bc4857e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 23:11:23 -0300 Subject: shader: Add pools and rename files --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 28 +++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a62d3f56b..7713e3ba9 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -19,7 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" #include "shader_recompiler/ir_opt/passes.h" @@ -150,52 +150,52 @@ private: void SsaRewritePass(IR::Function& function) { Pass pass; - for (const auto& block : function.blocks) { + for (IR::Block* const block : function.blocks) { for (IR::Inst& inst : block->Instructions()) { switch (inst.Opcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - pass.WriteVariable(reg, block.get(), inst.Arg(1)); + pass.WriteVariable(reg, block, inst.Arg(1)); } break; case IR::Opcode::SetPred: if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - pass.WriteVariable(pred, block.get(), inst.Arg(1)); + pass.WriteVariable(pred, block, inst.Arg(1)); } break; case IR::Opcode::SetZFlag: - pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetSFlag: - pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetCFlag: - pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetOFlag: - pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); } break; case IR::Opcode::GetPred: if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); } break; case IR::Opcode::GetZFlag: - inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; case IR::Opcode::GetSFlag: - inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); break; case IR::Opcode::GetCFlag: - inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); break; case IR::Opcode::GetOFlag: - inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; default: break; -- cgit v1.2.3 From da8096e6e35af250dcc56a1af76b8a211df63a90 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Feb 2021 02:38:22 -0300 Subject: shader: Properly store phi on Inst --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 7713e3ba9..15a9db90a 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -104,32 +104,34 @@ private: val = ReadVariable(variable, preds.front()); } else { // Break potential cycles with operandless phi - val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + val = IR::Value{&phi_inst}; WriteVariable(variable, block, val); - val = AddPhiOperands(variable, val, block); + val = AddPhiOperands(variable, phi_inst, block); } WriteVariable(variable, block, val); return val; } - IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) { + IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const pred : block->ImmediatePredecessors()) { - phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred)); + phi.AddPhiOperand(pred, ReadVariable(variable, pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } - IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) { IR::Value same; - for (const auto& pair : phi.Inst()->PhiOperands()) { - const IR::Value& op{pair.second}; - if (op == same || op == phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { + const IR::Value& op{phi.Arg(arg_index)}; + if (op == same || op == IR::Value{&phi}) { // Unique value or self-reference continue; } if (!same.IsEmpty()) { // The phi merges at least two values: not trivial - return phi; + return IR::Value{&phi}; } same = op; } @@ -139,7 +141,7 @@ private: same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; } // Reroute all uses of phi to same and remove phi - phi.Inst()->ReplaceUsesWith(same); + phi.ReplaceUsesWith(same); // TODO: Try to recursively remove all phi users, which might have become trivial return same; } -- cgit v1.2.3 From 6dafb08f52ac78119669a698c4b9a39bffd48f8f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Feb 2021 04:47:53 -0300 Subject: shader: Better constant folding --- .../ir_opt/constant_propagation_pass.cpp | 59 +++++++++++++++++----- 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7fb3192d8..f1170c61e 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include "common/bit_cast.h" @@ -13,12 +14,17 @@ namespace Shader::Optimization { namespace { -[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { - if (static_cast(shift) + static_cast(count) > Common::BitSize()) { - throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); - } - return (base >> shift) & ((1U << count) - 1); -} +// Metaprogramming stuff to get arguments information out of a lambda +template +struct LambdaTraits : LambdaTraits::operator())> {}; + +template +struct LambdaTraits { + template + using ArgType = std::tuple_element_t>; + + static constexpr size_t NUM_ARGS{sizeof...(Args)}; +}; template [[nodiscard]] T Arg(const IR::Value& value) { @@ -104,6 +110,14 @@ void FoldAdd(IR::Inst& inst) { } } +template +void FoldSelect(IR::Inst& inst) { + const IR::Value cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + } +} + void FoldLogicalAnd(IR::Inst& inst) { if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { return; @@ -131,6 +145,21 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { } } +template +IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { + using Traits = LambdaTraits; + return IR::Value{func(Arg>(inst.Arg(I))...)}; +} + +template +void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + using Indices = std::make_index_sequence::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -145,14 +174,20 @@ void ConstantPropagation(IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(inst); - case IR::Opcode::BitFieldUExtract: - if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { - inst.ReplaceUsesWith(IR::Value{ - BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); - } - break; + case IR::Opcode::Select32: + return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); + case IR::Opcode::ULessThan: + return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + case IR::Opcode::BitFieldUExtract: + return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + if (static_cast(shift) + static_cast(count) > Common::BitSize()) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, + base, shift, count); + } + return (base >> shift) & ((1U << count) - 1); + }); default: break; } -- cgit v1.2.3 From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: spirv: Initial SPIR-V support --- src/shader_recompiler/ir_opt/identity_removal_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 39a972919..593efde39 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -13,7 +13,7 @@ namespace Shader::Optimization { void IdentityRemovalPass(IR::Function& function) { std::vector to_invalidate; - for (auto& block : function.blocks) { + for (IR::Block* const block : function.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; for (size_t i = 0; i < num_args; ++i) { -- cgit v1.2.3 From 9170200a11715d131645d1ffb92e86e6ef0d7e88 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 11 Feb 2021 16:39:06 -0300 Subject: shader: Initial implementation of an AST --- .../ir_opt/constant_propagation_pass.cpp | 50 ++++++++++++++++++++++ src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 24 +++++++++-- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 ++ 3 files changed, 75 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f1170c61e..9fba6ac23 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -132,6 +132,32 @@ void FoldLogicalAnd(IR::Inst& inst) { } } +void FoldLogicalOr(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(IR::Value{true}); + } else { + inst.ReplaceUsesWith(inst.Arg(0)); + } + } +} + +void FoldLogicalNot(IR::Inst& inst) { + const IR::U1 value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{!value.U1()}); + return; + } + IR::Inst* const arg{value.InstRecursive()}; + if (arg->Opcode() == IR::Opcode::LogicalNot) { + inst.ReplaceUsesWith(arg->Arg(0)); + } +} + template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; @@ -160,6 +186,24 @@ void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); } +void FoldBranchConditional(IR::Inst& inst) { + const IR::U1 cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + // TODO: Convert to Branch + return; + } + const IR::Inst* cond_inst{cond.InstRecursive()}; + if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + const IR::Value true_label{inst.Arg(1)}; + const IR::Value false_label{inst.Arg(2)}; + // Remove negation on the conditional (take the parameter out of LogicalNot) and swap + // the branches + inst.SetArg(0, cond_inst->Arg(0)); + inst.SetArg(1, false_label); + inst.SetArg(2, true_label); + } +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -178,6 +222,10 @@ void ConstantPropagation(IR::Inst& inst) { return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); + case IR::Opcode::LogicalOr: + return FoldLogicalOr(inst); + case IR::Opcode::LogicalNot: + return FoldLogicalNot(inst); case IR::Opcode::ULessThan: return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); case IR::Opcode::BitFieldUExtract: @@ -188,6 +236,8 @@ void ConstantPropagation(IR::Inst& inst) { } return (base >> shift) & ((1U << count) - 1); }); + case IR::Opcode::BranchConditional: + return FoldBranchConditional(inst); default: break; } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 15a9db90a..8ca996e93 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -34,6 +34,13 @@ struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; +struct GotoVariable : FlagTag { + GotoVariable() = default; + explicit GotoVariable(u32 index_) : index{index_} {} + + u32 index; +}; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -43,6 +50,10 @@ struct DefTable { return preds[IR::PredIndex(variable)]; } + [[nodiscard]] ValueMap& operator[](GotoVariable goto_variable) { + return goto_vars[goto_variable.index]; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -61,6 +72,7 @@ struct DefTable { std::array regs; std::array preds; + boost::container::flat_map goto_vars; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -68,15 +80,15 @@ struct DefTable { }; IR::Opcode UndefOpcode(IR::Reg) noexcept { - return IR::Opcode::Undef32; + return IR::Opcode::UndefU32; } IR::Opcode UndefOpcode(IR::Pred) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } IR::Opcode UndefOpcode(const FlagTag&) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { @@ -165,6 +177,9 @@ void SsaRewritePass(IR::Function& function) { pass.WriteVariable(pred, block, inst.Arg(1)); } break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -187,6 +202,9 @@ void SsaRewritePass(IR::Function& function) { inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); } break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 8a5adf5a2..32b56eb57 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,6 +14,10 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Function& function) { for (const auto& block : function.blocks) { for (const IR::Inst& inst : *block) { + if (inst.Opcode() == IR::Opcode::Phi) { + // Skip validation on phi nodes + continue; + } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; -- cgit v1.2.3 From 8af9297f0972d0aaa8306369c5d04926b886a89e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 01:24:32 -0300 Subject: shader: Misc fixes --- .../ir_opt/constant_propagation_pass.cpp | 27 ++--- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 113 +++++++++++---------- 2 files changed, 70 insertions(+), 70 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9fba6ac23..cbde65b9b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -32,6 +32,8 @@ template return value.U1(); } else if constexpr (std::is_same_v) { return value.U32(); + } else if constexpr (std::is_same_v) { + return static_cast(value.U32()); } else if constexpr (std::is_same_v) { return value.F32(); } else if constexpr (std::is_same_v) { @@ -39,17 +41,8 @@ template } } -template +template bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { - const auto arg = [](const IR::Value& value) { - if constexpr (std::is_invocable_r_v) { - return value.U1(); - } else if constexpr (std::is_invocable_r_v) { - return value.U32(); - } else if constexpr (std::is_invocable_r_v) { - return value.U64(); - } - }; const IR::Value lhs{inst.Arg(0)}; const IR::Value rhs{inst.Arg(1)}; @@ -57,14 +50,14 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { const bool is_rhs_immediate{rhs.IsImmediate()}; if (is_lhs_immediate && is_rhs_immediate) { - const auto result{imm_fn(arg(lhs), arg(rhs))}; + const auto result{imm_fn(Arg(lhs), Arg(rhs))}; inst.ReplaceUsesWith(IR::Value{result}); return false; } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { - const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; + const auto combined{imm_fn(Arg(lhs), Arg(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); } else { @@ -76,7 +69,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { - const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; + const auto combined{imm_fn(Arg(rhs), Arg(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); } @@ -101,7 +94,7 @@ void FoldAdd(IR::Inst& inst) { if (inst.HasAssociatedPseudoOperation()) { return; } - if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { + if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -119,7 +112,7 @@ void FoldSelect(IR::Inst& inst) { } void FoldLogicalAnd(IR::Inst& inst) { - if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -133,7 +126,7 @@ void FoldLogicalAnd(IR::Inst& inst) { } void FoldLogicalOr(IR::Inst& inst) { - if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -226,6 +219,8 @@ void ConstantPropagation(IR::Inst& inst) { return FoldLogicalOr(inst); case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); + case IR::Opcode::SLessThan: + return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); case IR::Opcode::ULessThan: return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); case IR::Opcode::BitFieldUExtract: diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 8ca996e93..7eaf719c4 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -113,6 +113,7 @@ private: IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { IR::Value val; if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + // Optimize the common case of one predecessor: no phi needed val = ReadVariable(variable, preds.front()); } else { // Break potential cycles with operandless phi @@ -160,66 +161,70 @@ private: DefTable current_def; }; + +void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; + case IR::Opcode::SetZFlag: + pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetSFlag: + pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetCFlag: + pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetOFlag: + pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); + } + break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; + case IR::Opcode::GetZFlag: + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); + break; + case IR::Opcode::GetSFlag: + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); + break; + case IR::Opcode::GetCFlag: + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); + break; + case IR::Opcode::GetOFlag: + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); + break; + default: + break; + } +} } // Anonymous namespace void SsaRewritePass(IR::Function& function) { Pass pass; for (IR::Block* const block : function.blocks) { for (IR::Inst& inst : block->Instructions()) { - switch (inst.Opcode()) { - case IR::Opcode::SetRegister: - if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - pass.WriteVariable(reg, block, inst.Arg(1)); - } - break; - case IR::Opcode::SetPred: - if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - pass.WriteVariable(pred, block, inst.Arg(1)); - } - break; - case IR::Opcode::SetGotoVariable: - pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); - break; - case IR::Opcode::SetZFlag: - pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetSFlag: - pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetCFlag: - pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetOFlag: - pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::GetRegister: - if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); - } - break; - case IR::Opcode::GetPred: - if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); - } - break; - case IR::Opcode::GetGotoVariable: - inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); - break; - case IR::Opcode::GetZFlag: - inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); - break; - case IR::Opcode::GetSFlag: - inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); - break; - case IR::Opcode::GetCFlag: - inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); - break; - case IR::Opcode::GetOFlag: - inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); - break; - default: - break; - } + VisitInst(pass, block, inst); } } } -- cgit v1.2.3 From cbfb7d182a4e90e4e263696d1fca35e47d3eabb4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 20:15:42 -0300 Subject: shader: Support SSA loops on IR --- .../ir_opt/dead_code_elimination_pass.cpp | 2 +- src/shader_recompiler/ir_opt/passes.h | 8 +-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 62 +++++++++++++++++----- 3 files changed, 55 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index bbaa412f6..132b2012a 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -13,7 +13,7 @@ namespace Shader::Optimization { void DeadCodeEliminationPass(IR::Block& block) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (IR::Inst& inst : std::views::reverse(block)) { + for (IR::Inst& inst : block | std::views::reverse) { if (!inst.HasUses() && !inst.MayHaveSideEffects()) { inst.Invalidate(); } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 578a24d89..30eb31588 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -4,14 +4,16 @@ #pragma once +#include + #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" namespace Shader::Optimization { template -void Invoke(Func&& func, IR::Function& function) { - for (const auto& block : function.blocks) { +void PostOrderInvoke(Func&& func, IR::Function& function) { + for (const auto& block : function.post_order_blocks) { func(*block); } } @@ -20,7 +22,7 @@ void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); void GlobalMemoryToStorageBufferPass(IR::Block& block); void IdentityRemovalPass(IR::Function& function); -void SsaRewritePass(IR::Function& function); +void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 7eaf719c4..13f9c914a 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,7 +14,13 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // +#include +#include +#include +#include + #include +#include #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" @@ -26,9 +32,9 @@ namespace Shader::Optimization { namespace { -using ValueMap = boost::container::flat_map>; - -struct FlagTag {}; +struct FlagTag { + auto operator<=>(const FlagTag&) const noexcept = default; +}; struct ZeroFlagTag : FlagTag {}; struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; @@ -38,9 +44,15 @@ struct GotoVariable : FlagTag { GotoVariable() = default; explicit GotoVariable(u32 index_) : index{index_} {} + auto operator<=>(const GotoVariable&) const noexcept = default; + u32 index; }; +using Variant = std::variant; +using ValueMap = boost::container::flat_map>; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -102,19 +114,35 @@ public: } IR::Value ReadVariable(auto variable, IR::Block* block) { - auto& def{current_def[variable]}; + const ValueMap& def{current_def[variable]}; if (const auto it{def.find(block)}; it != def.end()) { return it->second; } return ReadVariableRecursive(variable, block); } + void SealBlock(IR::Block* block) { + const auto it{incomplete_phis.find(block)}; + if (it != incomplete_phis.end()) { + for (auto& [variant, phi] : it->second) { + std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); + } + } + sealed_blocks.insert(block); + } + private: IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { IR::Value val; - if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + if (!sealed_blocks.contains(block)) { + // Incomplete CFG + IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + incomplete_phis[block].insert_or_assign(variable, phi); + val = IR::Value{&*phi}; + } else if (const std::span imm_preds{block->ImmediatePredecessors()}; + imm_preds.size() == 1) { // Optimize the common case of one predecessor: no phi needed - val = ReadVariable(variable, preds.front()); + val = ReadVariable(variable, imm_preds.front()); } else { // Break potential cycles with operandless phi IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; @@ -127,8 +155,8 @@ private: } IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { - for (IR::Block* const pred : block->ImmediatePredecessors()) { - phi.AddPhiOperand(pred, ReadVariable(variable, pred)); + for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { + phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } @@ -159,6 +187,9 @@ private: return same; } + boost::container::flat_set sealed_blocks; + boost::container::flat_map> + incomplete_phis; DefTable current_def; }; @@ -218,14 +249,19 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { break; } } + +void VisitBlock(Pass& pass, IR::Block* block) { + for (IR::Inst& inst : block->Instructions()) { + VisitInst(pass, block, inst); + } + pass.SealBlock(block); +} } // Anonymous namespace -void SsaRewritePass(IR::Function& function) { +void SsaRewritePass(std::span post_order_blocks) { Pass pass; - for (IR::Block* const block : function.blocks) { - for (IR::Inst& inst : block->Instructions()) { - VisitInst(pass, block, inst); - } + for (IR::Block* const block : post_order_blocks | std::views::reverse) { + VisitBlock(pass, block); } } -- cgit v1.2.3 From 1b0cf2309c760c1cb97a230a1572f8e87f84444a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 22:46:40 -0300 Subject: shader: Add support for forward declarations --- src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 34393e1d5..08fd364bb 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -161,8 +161,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) return std::nullopt; } const StorageBufferAddr storage_buffer{ - .index = index.U32(), - .offset = offset.U32(), + .index{index.U32()}, + .offset{offset.U32()}, }; if (bias && !MeetsBias(storage_buffer, *bias)) { // We have to blacklist some addresses in case we wrongly point to them -- cgit v1.2.3 From 1c0b8bca5e1d2af65dff66c19b7ebb3060ce1229 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Feb 2021 00:07:52 -0300 Subject: shader: Fix tracking --- .../global_memory_to_storage_buffer_pass.cpp | 122 ++++++++++++--------- 1 file changed, 72 insertions(+), 50 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 08fd364bb..b40c0c57b 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -142,6 +142,58 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { } } +struct LowAddrInfo { + IR::U32 value; + s32 imm_offset; +}; + +/// Tries to track the first 32-bits of a global memory instruction +std::optional TrackLowAddress(IR::Inst* inst) { + // The first argument is the low level GPU pointer to the global memory instruction + const IR::U64 addr{inst->Arg(0)}; + if (addr.IsImmediate()) { + // Not much we can do if it's an immediate + return std::nullopt; + } + // This address is expected to either be a PackUint2x32 or a IAdd64 + IR::Inst* addr_inst{addr.InstRecursive()}; + s32 imm_offset{0}; + if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + // If it's an IAdd64, get the immediate offset it is applying and grab the address + // instruction. This expects for the instruction to be canonicalized having the address on + // the first argument and the immediate offset on the second one. + const IR::U64 imm_offset_value{addr_inst->Arg(1)}; + if (!imm_offset_value.IsImmediate()) { + return std::nullopt; + } + imm_offset = static_cast(static_cast(imm_offset_value.U64())); + const IR::U64 iadd_addr{addr_inst->Arg(0)}; + if (iadd_addr.IsImmediate()) { + return std::nullopt; + } + addr_inst = iadd_addr.Inst(); + } + // With IAdd64 handled, now PackUint2x32 is expected without exceptions + if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + return std::nullopt; + } + // PackUint2x32 is expected to be generated from a vector + const IR::Value vector{addr_inst->Arg(0)}; + if (vector.IsImmediate()) { + return std::nullopt; + } + // This vector is expected to be a CompositeConstructU32x2 + IR::Inst* const vector_inst{vector.InstRecursive()}; + if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + return std::nullopt; + } + // Grab the first argument from the CompositeConstructU32x2, this is the low address. + return LowAddrInfo{ + .value{IR::U32{vector_inst->Arg(0)}}, + .imm_offset{imm_offset}, + }; +} + /// Recursively tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { if (value.IsImmediate()) { @@ -191,13 +243,26 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, }; // First try to find storage buffers in the NVN address const IR::U64 addr{inst->Arg(0)}; - std::optional storage_buffer{Track(addr, &nvn_bias)}; + if (addr.IsImmediate()) { + // Immediate addresses can't be lowered to a storage buffer + DiscardGlobalMemory(block, inst); + return; + } + // Track the low address of the instruction + const std::optional low_addr_info{TrackLowAddress(addr.InstRecursive())}; + if (!low_addr_info) { + DiscardGlobalMemory(block, inst); + return; + } + const IR::U32 low_addr{low_addr_info->value}; + std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { // If it fails, track without a bias - storage_buffer = Track(addr, nullptr); + storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also failed, drop the global memory usage DiscardGlobalMemory(block, inst); + return; } } // Collect storage buffer and the instruction @@ -208,58 +273,15 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, }); } -/// Tries to track the first 32-bits of a global memory instruction -std::optional TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { - // The first argument is the low level GPU pointer to the global memory instruction - const IR::U64 addr{inst->Arg(0)}; - if (addr.IsImmediate()) { - // Not much we can do if it's an immediate - return std::nullopt; - } - // This address is expected to either be a PackUint2x32 or a IAdd64 - IR::Inst* addr_inst{addr.InstRecursive()}; - s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { - // If it's an IAdd64, get the immediate offset it is applying and grab the address - // instruction. This expects for the instruction to be canonicalized having the address on - // the first argument and the immediate offset on the second one. - const IR::U64 imm_offset_value{addr_inst->Arg(1)}; - if (!imm_offset_value.IsImmediate()) { - return std::nullopt; - } - imm_offset = static_cast(static_cast(imm_offset_value.U64())); - const IR::U64 iadd_addr{addr_inst->Arg(0)}; - if (iadd_addr.IsImmediate()) { - return std::nullopt; - } - addr_inst = iadd_addr.Inst(); - } - // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { - return std::nullopt; - } - // PackUint2x32 is expected to be generated from a vector - const IR::Value vector{addr_inst->Arg(0)}; - if (vector.IsImmediate()) { - return std::nullopt; - } - // This vector is expected to be a CompositeConstructU32x2 - IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { - return std::nullopt; - } - // Grab the first argument from the CompositeConstructU32x2, this is the low address. - // Re-apply the offset in case we found one. - const IR::U32 low_addr{vector_inst->Arg(0)}; - return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; -} - /// Returns the offset in indices (not bytes) for an equivalent storage instruction IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { IR::IREmitter ir{block, inst}; IR::U32 offset; - if (const std::optional low_addr{TrackLowAddress(ir, &*inst)}) { - offset = *low_addr; + if (const std::optional low_addr{TrackLowAddress(&*inst)}) { + offset = low_addr->value; + if (low_addr->imm_offset != 0) { + offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); + } } else { offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); } -- cgit v1.2.3 From b5d7279d878211654b4abb165d94af763a365f47 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 04:10:22 -0300 Subject: spirv: Initial bindings support --- .../ir_opt/collect_shader_info_pass.cpp | 81 +++++++++++++++ .../ir_opt/constant_propagation_pass.cpp | 76 +++++++++++--- .../global_memory_to_storage_buffer_pass.cpp | 110 ++++++++++++--------- src/shader_recompiler/ir_opt/passes.h | 4 +- 4 files changed, 210 insertions(+), 61 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp new file mode 100644 index 000000000..f2326dea1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -0,0 +1,81 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +void AddConstantBufferDescriptor(Info& info, u32 index) { + auto& descriptor{info.constant_buffers.at(index)}; + if (descriptor) { + return; + } + descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + .index{index}, + .count{1}, + }); +} + +void Visit(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::WorkgroupId: + info.uses_workgroup_id = true; + break; + case IR::Opcode::LocalInvocationId: + info.uses_local_invocation_id = true; + break; + case IR::Opcode::FPAbs16: + case IR::Opcode::FPAdd16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPNeg16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPSaturate16: + case IR::Opcode::FPTrunc16: + info.uses_fp16; + break; + case IR::Opcode::FPAbs64: + case IR::Opcode::FPAdd64: + case IR::Opcode::FPCeil64: + case IR::Opcode::FPFloor64: + case IR::Opcode::FPFma64: + case IR::Opcode::FPMax64: + case IR::Opcode::FPMin64: + case IR::Opcode::FPMul64: + case IR::Opcode::FPNeg64: + case IR::Opcode::FPRecip64: + case IR::Opcode::FPRecipSqrt64: + case IR::Opcode::FPRoundEven64: + case IR::Opcode::FPSaturate64: + case IR::Opcode::FPTrunc64: + info.uses_fp64 = true; + break; + case IR::Opcode::GetCbuf: + if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { + AddConstantBufferDescriptor(info, index.U32()); + } else { + throw NotImplementedException("Constant buffer with non-immediate index"); + } + break; + default: + break; + } +} +} // Anonymous namespace + +void CollectShaderInfoPass(IR::Program& program) { + Info& info{program.info}; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index cbde65b9b..f1ad16d60 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -77,6 +77,16 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { return true; } +template +bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return false; + } + using Indices = std::make_index_sequence::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + return true; +} + void FoldGetRegister(IR::Inst& inst) { if (inst.Arg(0).Reg() == IR::Reg::RZ) { inst.ReplaceUsesWith(IR::Value{u32{0}}); @@ -103,6 +113,52 @@ void FoldAdd(IR::Inst& inst) { } } +void FoldISub32(IR::Inst& inst) { + if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { + return; + } + if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { + return; + } + // ISub32 is generally used to subtract two constant buffers, compare and replace this with + // zero if they equal. + const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { + return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + }}; + IR::Inst* op_a{inst.Arg(0).InstRecursive()}; + IR::Inst* op_b{inst.Arg(1).InstRecursive()}; + if (equal_cbuf(op_a, op_b)) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + return; + } + // It's also possible a value is being added to a cbuf and then subtracted + if (op_b->Opcode() == IR::Opcode::IAdd32) { + // Canonicalize local variables to simplify the following logic + std::swap(op_a, op_b); + } + if (op_b->Opcode() != IR::Opcode::GetCbuf) { + return; + } + IR::Inst* const inst_cbuf{op_b}; + if (op_a->Opcode() != IR::Opcode::IAdd32) { + return; + } + IR::Value add_op_a{op_a->Arg(0)}; + IR::Value add_op_b{op_a->Arg(1)}; + if (add_op_b.IsImmediate()) { + // Canonicalize + std::swap(add_op_a, add_op_b); + } + if (add_op_b.IsImmediate()) { + return; + } + IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; + if (equal_cbuf(add_cbuf, inst_cbuf)) { + inst.ReplaceUsesWith(add_op_a); + } +} + template void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; @@ -170,15 +226,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< return IR::Value{func(Arg>(inst.Arg(I))...)}; } -template -void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - using Indices = std::make_index_sequence::NUM_ARGS>; - inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); -} - void FoldBranchConditional(IR::Inst& inst) { const IR::U1 cond{inst.Arg(0)}; if (cond.IsImmediate()) { @@ -205,6 +252,8 @@ void ConstantPropagation(IR::Inst& inst) { return FoldGetPred(inst); case IR::Opcode::IAdd32: return FoldAdd(inst); + case IR::Opcode::ISub32: + return FoldISub32(inst); case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: @@ -220,17 +269,20 @@ void ConstantPropagation(IR::Inst& inst) { case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); case IR::Opcode::SLessThan: - return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + return; case IR::Opcode::ULessThan: - return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + return; case IR::Opcode::BitFieldUExtract: - return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, base, shift, count); } return (base >> shift) & ((1U << count) - 1); }); + return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); default: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index b40c0c57b..bf230a850 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -28,7 +28,8 @@ struct StorageBufferAddr { /// Block iterator to a global memory instruction and the storage buffer it uses struct StorageInst { StorageBufferAddr storage_buffer; - IR::Block::iterator inst; + IR::Inst* inst; + IR::Block* block; }; /// Bias towards a certain range of constant buffers when looking for storage buffers @@ -41,7 +42,7 @@ struct Bias { using StorageBufferSet = boost::container::flat_set, boost::container::small_vector>; -using StorageInstVector = boost::container::small_vector; +using StorageInstVector = boost::container::small_vector; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -109,23 +110,22 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce } /// Discards a global memory operation, reads return zero and writes are ignored -void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { +void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: case IR::Opcode::LoadGlobalU16: case IR::Opcode::LoadGlobal32: - inst->ReplaceUsesWith(zero); + inst.ReplaceUsesWith(zero); break; case IR::Opcode::LoadGlobal64: - inst->ReplaceUsesWith(IR::Value{ - &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); break; case IR::Opcode::LoadGlobal128: - inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( - inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); break; case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: @@ -134,11 +134,10 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: - inst->Invalidate(); + inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", - inst->Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); } } @@ -232,8 +231,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) } /// Collects the storage buffer used by a global memory instruction and the instruction itself -void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, - StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { +void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, + StorageInstVector& to_replace) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -241,19 +240,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, .offset_begin{0x110}, .offset_end{0x610}, }; - // First try to find storage buffers in the NVN address - const IR::U64 addr{inst->Arg(0)}; - if (addr.IsImmediate()) { - // Immediate addresses can't be lowered to a storage buffer - DiscardGlobalMemory(block, inst); - return; - } // Track the low address of the instruction - const std::optional low_addr_info{TrackLowAddress(addr.InstRecursive())}; + const std::optional low_addr_info{TrackLowAddress(&inst)}; if (!low_addr_info) { DiscardGlobalMemory(block, inst); return; } + // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { @@ -269,21 +262,22 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{inst}, + .inst{&inst}, + .block{&block}, }); } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { - IR::IREmitter ir{block, inst}; +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; - if (const std::optional low_addr{TrackLowAddress(&*inst)}) { + if (const std::optional low_addr{TrackLowAddress(&inst)}) { offset = low_addr->value; if (low_addr->imm_offset != 0) { offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); } } else { - offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); + offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. @@ -292,25 +286,27 @@ IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferA } /// Replace a global memory load instruction with its storage buffer equivalent -void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; - inst->ReplaceUsesWith(value); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; + inst.ReplaceUsesWith(value); } /// Replace a global memory write instruction with its storage buffer equivalent -void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); - inst->Invalidate(); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); + inst.Invalidate(); } /// Replace a global memory instruction with its storage buffer equivalent -void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -328,26 +324,44 @@ void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_ case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); } } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Block& block) { +void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { - if (!IsGlobalMemory(*inst)) { - continue; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; + } + CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + } } - CollectStorageBuffers(block, inst, storage_buffers, to_replace); } - for (const auto [storage_buffer, inst] : to_replace) { - const auto it{storage_buffers.find(storage_buffer)}; - const IR::U32 storage_index{IR::Value{static_cast(storage_buffers.index_of(it))}}; - const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; - Replace(block, inst, storage_index, offset); + Info& info{program.info}; + u32 storage_index{}; + for (const StorageBufferAddr& storage_buffer : storage_buffers) { + info.storage_buffers_descriptors.push_back({ + .cbuf_index{storage_buffer.index}, + .cbuf_offset{storage_buffer.offset}, + .count{1}, + }); + info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); + ++storage_index; + } + for (const StorageInst& storage_inst : to_replace) { + const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; + const auto it{storage_buffers.find(storage_inst.storage_buffer)}; + const IR::U32 index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + IR::Block* const block{storage_inst.block}; + IR::Inst* const inst{storage_inst.inst}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 30eb31588..89e5811d3 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,6 +8,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" namespace Shader::Optimization { @@ -18,9 +19,10 @@ void PostOrderInvoke(Func&& func, IR::Function& function) { } } +void CollectShaderInfoPass(IR::Program& program); void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); -void GlobalMemoryToStorageBufferPass(IR::Block& block); +void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); -- cgit v1.2.3 From 58914796c06662f4f901a4f195057ee1327cf055 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 19:50:23 -0300 Subject: shader: Add XMAD multiplication folding optimization --- .../ir_opt/constant_propagation_pass.cpp | 82 ++++++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f1ad16d60..9eb61b54c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -9,6 +9,7 @@ #include "common/bit_cast.h" #include "common/bit_util.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/ir_opt/passes.h" @@ -99,8 +100,71 @@ void FoldGetPred(IR::Inst& inst) { } } +/// Replaces the pattern generated by two XMAD multiplications +bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { + /* + * We are looking for this pattern: + * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) + * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) + * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) + * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) + * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) + * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) + * + * And replacing it with + * %result = IMul32 %factor_a, %factor_b + * + * This optimization has been proven safe by LLVM and MSVC. + */ + const IR::Value lhs_arg{inst.Arg(0)}; + const IR::Value rhs_arg{inst.Arg(1)}; + if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) { + return false; + } + IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; + if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + return false; + } + if (lhs_shl->Arg(0).IsImmediate()) { + return false; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; + IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; + if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + return false; + } + if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { + return false; + } + const IR::U32 factor_b{lhs_mul->Arg(1)}; + if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) { + return false; + } + IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; + IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; + if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return false; + } + if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) { + return false; + } + if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) { + return false; + } + const IR::U32 factor_a{lhs_bfe->Arg(0)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b)); + return true; +} + template -void FoldAdd(IR::Inst& inst) { +void FoldAdd(IR::Block& block, IR::Inst& inst) { if (inst.HasAssociatedPseudoOperation()) { return; } @@ -110,6 +174,12 @@ void FoldAdd(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate() && Arg(rhs) == 0) { inst.ReplaceUsesWith(inst.Arg(0)); + return; + } + if constexpr (std::is_same_v) { + if (FoldXmadMultiply(block, inst)) { + return; + } } } @@ -244,14 +314,14 @@ void FoldBranchConditional(IR::Inst& inst) { } } -void ConstantPropagation(IR::Inst& inst) { +void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: return FoldGetPred(inst); case IR::Opcode::IAdd32: - return FoldAdd(inst); + return FoldAdd(block, inst); case IR::Opcode::ISub32: return FoldISub32(inst); case IR::Opcode::BitCastF32U32: @@ -259,7 +329,7 @@ void ConstantPropagation(IR::Inst& inst) { case IR::Opcode::BitCastU32F32: return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: - return FoldAdd(inst); + return FoldAdd(block, inst); case IR::Opcode::Select32: return FoldSelect(inst); case IR::Opcode::LogicalAnd: @@ -292,7 +362,9 @@ void ConstantPropagation(IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Block& block) { - std::ranges::for_each(block, ConstantPropagation); + for (IR::Inst& inst : block) { + ConstantPropagation(block, inst); + } } } // namespace Shader::Optimization -- cgit v1.2.3 From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- .../ir_opt/collect_shader_info_pass.cpp | 18 ++++- .../ir_opt/constant_propagation_pass.cpp | 12 ++-- .../ir_opt/lower_fp16_to_fp32.cpp | 79 ++++++++++++++++++++++ src/shader_recompiler/ir_opt/passes.h | 1 + 4 files changed, 103 insertions(+), 7 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f2326dea1..f7f102f53 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF16x3: + case IR::Opcode::CompositeConstructF16x4: + case IR::Opcode::CompositeExtractF16x2: + case IR::Opcode::CompositeExtractF16x3: + case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::PackFloat2x16: + case IR::Opcode::UnpackFloat2x16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS32F16: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU32F16: + case IR::Opcode::ConvertU64F16: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: case IR::Opcode::FPTrunc16: - info.uses_fp16; + info.uses_fp16 = true; break; case IR::Opcode::FPAbs64: case IR::Opcode::FPAdd64: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9eb61b54c..4d4e88259 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) { bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { /* * We are looking for this pattern: - * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) - * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) - * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) - * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) - * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) - * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) + * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %rhs_mul = IMul32 %rhs_bfe, %factor_b + * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %rhs_mul = IMul32 %lhs_bfe, %factor_b + * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 + * %result = IAdd32 %lhs_shl, %rhs_mul * * And replacing it with * %result = IMul32 %factor_a, %factor_b diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..c7032f168 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs16: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd16: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil16: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor16: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma16: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul16: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg16: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven16: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate16: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPTrunc16: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF16x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF16x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF16x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF16x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF16x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF16x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::ConvertS16F16: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F16: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F16: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F16: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F16: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F16: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::PackFloat2x16: + return IR::Opcode::PackHalf2x16; + case IR::Opcode::UnpackFloat2x16: + return IR::Opcode::UnpackHalf2x16; + default: + return op; + } +} +} // Anonymous namespace + +void LowerFp16ToFp32(IR::Program& program) { + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.Opcode())); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 89e5811d3..38106308c 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); +void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- .../ir_opt/collect_shader_info_pass.cpp | 71 ++++++++++++++++++++-- .../global_memory_to_storage_buffer_pass.cpp | 1 - 2 files changed, 65 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f7f102f53..6662ef4cd 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,23 +2,28 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { namespace { -void AddConstantBufferDescriptor(Info& info, u32 index) { - auto& descriptor{info.constant_buffers.at(index)}; - if (descriptor) { +void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { + if (count != 1) { + throw NotImplementedException("Constant buffer descriptor indexing"); + } + if ((info.constant_buffer_mask & (1U << index)) != 0) { return; } - descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + info.constant_buffer_mask |= 1U << index; + info.constant_buffer_descriptors.push_back({ .index{index}, .count{1}, }); } -void Visit(Info& info, IR::Inst& inst) { +void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { break; case IR::Opcode::GetCbuf: if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32()); + AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { break; } } + +void VisitFpModifiers(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp16_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp16_denorms_preserve = true; + break; + } + break; + } + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + break; + } + default: + break; + } +} + +void Visit(Info& info, IR::Inst& inst) { + VisitUsages(info, inst); + VisitFpModifiers(info, inst); +} } // Anonymous namespace void CollectShaderInfoPass(IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index bf230a850..03bd547b7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_offset{storage_buffer.offset}, .count{1}, }); - info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); ++storage_index; } for (const StorageInst& storage_inst : to_replace) { -- cgit v1.2.3 From e44752ddc8804961eb84f8c225bb36d5b4c77bc1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 22:59:16 -0300 Subject: shader: FMUL, select, RRO, and MUFU fixes --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 4d4e88259..ae3d5a7d6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -330,7 +330,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); - case IR::Opcode::Select32: + case IR::Opcode::SelectU32: return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); -- cgit v1.2.3 From 3bc857f2f34b2959a545d3b4e26f27ca9751f788 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 18:33:07 -0300 Subject: shader: Avoid infinite recursion when tracking global memory --- .../global_memory_to_storage_buffer_pass.cpp | 31 ++++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 03bd547b7..98e3dfef7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -43,6 +43,8 @@ using StorageBufferSet = boost::container::flat_set, boost::container::small_vector>; using StorageInstVector = boost::container::small_vector; +using VisitedBlocks = boost::container::flat_set, + boost::container::small_vector>; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -194,7 +196,8 @@ std::optional TrackLowAddress(IR::Inst* inst) { } /// Recursively tries to track the storage buffer address used by a global memory instruction -std::optional Track(const IR::Value& value, const Bias* bias) { +std::optional Track(IR::Block* block, const IR::Value& value, const Bias* bias, + VisitedBlocks& visited) { if (value.IsImmediate()) { // Immediates can't be a storage buffer return std::nullopt; @@ -223,8 +226,24 @@ std::optional Track(const IR::Value& value, const Bias* bias) } // Reversed loops are more likely to find the right result for (size_t arg = inst->NumArgs(); arg--;) { - if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) { - return *storage_buffer; + if (inst->Opcode() == IR::Opcode::Phi) { + // If we are going through a phi node, mark the current block as visited + visited.insert(block); + // and skip already visited blocks to avoid looping forever + IR::Block* const phi_block{inst->PhiBlock(arg)}; + if (visited.contains(phi_block)) { + // Already visited, skip + continue; + } + const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; + } + } else { + const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; + } } } return std::nullopt; @@ -248,10 +267,12 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; - std::optional storage_buffer{Track(low_addr, &nvn_bias)}; + VisitedBlocks visited_blocks; + std::optional storage_buffer{Track(&block, low_addr, &nvn_bias, visited_blocks)}; if (!storage_buffer) { // If it fails, track without a bias - storage_buffer = Track(low_addr, nullptr); + visited_blocks.clear(); + storage_buffer = Track(&block, low_addr, nullptr, visited_blocks); if (!storage_buffer) { // If that also failed, drop the global memory usage DiscardGlobalMemory(block, inst); -- cgit v1.2.3 From 8810c88b7e3de2766bf47e07e941fb2c58c6b4b0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 24 Feb 2021 20:31:15 -0500 Subject: shader: Implement SEL --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 13f9c914a..19d35b1f8 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -109,11 +109,13 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { class Pass { public: - void WriteVariable(auto variable, IR::Block* block, const IR::Value& value) { + template + void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { current_def[variable].insert_or_assign(block, value); } - IR::Value ReadVariable(auto variable, IR::Block* block) { + template + IR::Value ReadVariable(Type variable, IR::Block* block) { const ValueMap& def{current_def[variable]}; if (const auto it{def.find(block)}; it != def.end()) { return it->second; @@ -132,7 +134,8 @@ public: } private: - IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { + template + IR::Value ReadVariableRecursive(Type variable, IR::Block* block) { IR::Value val; if (!sealed_blocks.contains(block)) { // Incomplete CFG @@ -154,7 +157,8 @@ private: return val; } - IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { + template + IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } -- cgit v1.2.3 From 4006929c986a2e0e52429fe21201a7ad5ca3fea9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Mar 2021 03:07:19 -0300 Subject: shader: Implement HADD2 --- .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 2 +- src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 98e3dfef7..965e52135 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -298,7 +298,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); } } else { - offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); + offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index c7032f168..14a5cb50f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -44,6 +44,12 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::CompositeExtractF32x3; case IR::Opcode::CompositeExtractF16x4: return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF16x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF16x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF16x4: + return IR::Opcode::CompositeInsertF32x4; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: @@ -60,6 +66,10 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::PackHalf2x16; case IR::Opcode::UnpackFloat2x16: return IR::Opcode::UnpackHalf2x16; + case IR::Opcode::ConvertF32F16: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF16F32: + return IR::Opcode::Identity; default: return op; } -- cgit v1.2.3 From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: shader: Initial support for textures and TEX --- .../ir_opt/collect_shader_info_pass.cpp | 19 ++ .../global_memory_to_storage_buffer_pass.cpp | 15 +- src/shader_recompiler/ir_opt/passes.h | 2 + src/shader_recompiler/ir_opt/texture_pass.cpp | 199 +++++++++++++++++++++ 4 files changed, 226 insertions(+), 9 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/texture_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6662ef4cd..960beadd4 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) { throw NotImplementedException("Constant buffer with non-immediate index"); } break; + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: { + const TextureType type{inst.Flags().type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || + type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 965e52135..2625c0bb2 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -226,6 +226,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, } // Reversed loops are more likely to find the right result for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; if (inst->Opcode() == IR::Opcode::Phi) { // If we are going through a phi node, mark the current block as visited visited.insert(block); @@ -235,15 +236,11 @@ std::optional Track(IR::Block* block, const IR::Value& value, // Already visited, skip continue; } - const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } - } else { - const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; } } return std::nullopt; diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 38106308c..3b7e7306b 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -6,6 +6,7 @@ #include +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/program.h" @@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); +void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..80e4ad6a9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -0,0 +1,199 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +struct ConstBufferAddr { + u32 index; + u32 offset; +}; + +struct TextureInst { + ConstBufferAddr cbuf; + IR::Inst* inst; + IR::Block* block; +}; + +using TextureInstVector = boost::container::small_vector; + +using VisitedBlocks = boost::container::flat_set, + boost::container::small_vector>; + +IR::Opcode IndexedInstruction(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + return IR::Opcode::ImageSampleImplicitLod; + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + return IR::Opcode::ImageSampleExplicitLod; + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + return IR::Opcode::ImageSampleDrefImplicitLod; + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return IR::Opcode::ImageSampleDrefExplicitLod; + default: + return IR::Opcode::Void; + } +} + +bool IsBindless(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return true; + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + return false; + default: + throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + } +} + +bool IsTextureInstruction(const IR::Inst& inst) { + return IndexedInstruction(inst) != IR::Opcode::Void; +} + +std::optional Track(IR::Block* block, const IR::Value& value, + VisitedBlocks& visited) { + if (value.IsImmediate()) { + // Immediates can't be a storage buffer + return std::nullopt; + } + const IR::Inst* const inst{value.InstRecursive()}; + if (inst->Opcode() == IR::Opcode::GetCbuf) { + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Reading a bindless texture from variable indices is valid + // but not supported here at the moment + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support arrays of textures + return std::nullopt; + } + return ConstBufferAddr{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + } + // Reversed loops are more likely to find the right result + for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; + if (inst->Opcode() == IR::Opcode::Phi) { + // If we are going through a phi node, mark the current block as visited + visited.insert(block); + // and skip already visited blocks to avoid looping forever + IR::Block* const phi_block{inst->PhiBlock(arg)}; + if (visited.contains(phi_block)) { + // Already visited, skip + continue; + } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; + if (storage_buffer) { + return *storage_buffer; + } + } + return std::nullopt; +} + +TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { + ConstBufferAddr addr; + if (IsBindless(inst)) { + VisitedBlocks visited; + const std::optional track_addr{Track(block, IR::Value{&inst}, visited)}; + if (!track_addr) { + throw NotImplementedException("Failed to track bindless texture constant buffer"); + } + addr = *track_addr; + } else { + addr = ConstBufferAddr{ + .index{env.TextureBoundBuffer()}, + .offset{inst.Arg(0).U32()}, + }; + } + return TextureInst{ + .cbuf{addr}, + .inst{&inst}, + .block{block}, + }; +} + +class Descriptors { +public: + explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} + + u32 Add(const TextureDescriptor& descriptor) { + // TODO: Handle arrays + auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { + return descriptor.cbuf_index == existing.cbuf_index && + descriptor.cbuf_offset == existing.cbuf_offset && + descriptor.type == existing.type; + })}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(descriptor); + return static_cast(descriptors.size()) - 1; + } + +private: + TextureDescriptors& descriptors; +}; +} // Anonymous namespace + +void TexturePass(Environment& env, IR::Program& program) { + TextureInstVector to_replace; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; + } + to_replace.push_back(MakeInst(env, block, inst)); + } + } + } + // Sort instructions to visit textures by constant buffer index, then by offset + std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.offset < rhs.cbuf.offset; + }); + std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.index < rhs.cbuf.index; + }); + Descriptors descriptors{program.info.texture_descriptors}; + for (TextureInst& texture_inst : to_replace) { + // TODO: Handle arrays + IR::Inst* const inst{texture_inst.inst}; + const u32 index{descriptors.Add(TextureDescriptor{ + .type{inst->Flags().type}, + .cbuf_index{texture_inst.cbuf.index}, + .cbuf_offset{texture_inst.cbuf.offset}, + .count{1}, + })}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + inst->SetArg(0, IR::Value{index}); + } +} + +} // namespace Shader::Optimization -- cgit v1.2.3 From 3a63fa0477ea8297c80133d35494e1dfdc012f95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 9 Mar 2021 17:14:57 -0300 Subject: shader: Partial implementation of LDC --- .../ir_opt/collect_shader_info_pass.cpp | 135 +++++++++++++++++++-- .../ir_opt/constant_propagation_pass.cpp | 22 +++- .../global_memory_to_storage_buffer_pass.cpp | 2 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- 4 files changed, 146 insertions(+), 15 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 960beadd4..cdbe85221 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { - case IR::Opcode::WorkgroupId: - info.uses_workgroup_id = true; - break; - case IR::Opcode::LocalInvocationId: - info.uses_local_invocation_id = true; - break; case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: case IR::Opcode::CompositeExtractF16x2: case IR::Opcode::CompositeExtractF16x3: case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::SelectF16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: case IR::Opcode::PackFloat2x16: @@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPTrunc64: info.uses_fp64 = true; break; - case IR::Opcode::GetCbuf: + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::UndefU8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + case IR::Opcode::SelectU8: + info.uses_int8 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::UndefU16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + case IR::Opcode::SelectU16: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS16F32: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU16F32: + case IR::Opcode::ConvertU16F64: + info.uses_int16 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU64: + case IR::Opcode::UndefU64: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::SelectU64: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + case IR::Opcode::IAdd64: + case IR::Opcode::ISub64: + case IR::Opcode::INeg64: + case IR::Opcode::ShiftLeftLogical64: + case IR::Opcode::ShiftRightLogical64: + case IR::Opcode::ShiftRightArithmetic64: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertS64F32: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertU64F32: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertU64U32: + case IR::Opcode::ConvertU32U64: + case IR::Opcode::ConvertF16U64: + case IR::Opcode::ConvertF32U64: + case IR::Opcode::ConvertF64U64: + info.uses_int64 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::WorkgroupId: + info.uses_workgroup_id = true; + break; + case IR::Opcode::LocalInvocationId: + info.uses_local_invocation_id = true; + break; + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU64: { if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + info.used_constant_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + info.used_constant_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::GetCbufU32: + info.used_constant_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::GetCbufF32: + info.used_constant_buffer_types |= IR::Type::F32; + break; + case IR::Opcode::GetCbufU64: + info.used_constant_buffer_types |= IR::Type::U64; + break; + default: + break; + } break; + } case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ae3d5a7d6..7ba9ebe9b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; @@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbuf) { + if (op_b->Opcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; @@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) { } } -template +template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; if (value.IsImmediate()) { @@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (value.InstRecursive()->Opcode() == reverse) { + if (arg_inst->Opcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } + if constexpr (op == IR::Opcode::BitCastF32U32) { + if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + // Replace the bitcast with a typed constant buffer read + inst.ReplaceOpcode(IR::Opcode::GetCbufF32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } } } @@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ISub32: return FoldISub32(inst); case IR::Opcode::BitCastF32U32: - return FoldBitCast(inst, IR::Opcode::BitCastU32F32); + return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: - return FoldBitCast(inst, IR::Opcode::BitCastF32U32); + return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); case IR::Opcode::SelectU32: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 2625c0bb2..5d98d278e 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -203,7 +203,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 80e4ad6a9..ec802e02c 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -78,7 +78,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { -- cgit v1.2.3 From ba8c1d2eb479d04b2b0d847efd67468b688765d4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 10 Mar 2021 22:42:17 -0500 Subject: shader: Implement FCMP still need to configure some settings for NV denorm flush and intel NaN --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index cdbe85221..70d75ad6c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -256,7 +256,19 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven32: case IR::Opcode::FPFloor32: case IR::Opcode::FPCeil32: - case IR::Opcode::FPTrunc32: { + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: { const auto control{inst.Flags()}; switch (control.fmz_mode) { case IR::FmzMode::DontCare: -- cgit v1.2.3 From 71f96fa6366dc6dd306a953bca1b958fb32bc55a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Mar 2021 03:41:05 -0300 Subject: shader: Implement CAL inlining function calls --- .../ir_opt/collect_shader_info_pass.cpp | 8 +++----- .../ir_opt/constant_propagation_pass.cpp | 8 +++++--- .../ir_opt/dead_code_elimination_pass.cpp | 10 ++++++---- .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 12 +++++------- src/shader_recompiler/ir_opt/identity_removal_pass.cpp | 4 ++-- src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | 8 +++----- src/shader_recompiler/ir_opt/passes.h | 18 +++++------------- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 5 ++--- src/shader_recompiler/ir_opt/texture_pass.cpp | 12 +++++------- src/shader_recompiler/ir_opt/verification_pass.cpp | 16 ++++++++-------- 10 files changed, 44 insertions(+), 57 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 70d75ad6c..708b6b267 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -296,11 +296,9 @@ void Visit(Info& info, IR::Inst& inst) { void CollectShaderInfoPass(IR::Program& program) { Info& info{program.info}; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - Visit(info, inst); - } + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); } } } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7ba9ebe9b..a39db2bf1 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -371,9 +371,11 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } } // Anonymous namespace -void ConstantPropagationPass(IR::Block& block) { - for (IR::Inst& inst : block) { - ConstantPropagation(block, inst); +void ConstantPropagationPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + ConstantPropagation(*block, inst); + } } } diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 132b2012a..8ad59f42e 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -10,12 +10,14 @@ namespace Shader::Optimization { -void DeadCodeEliminationPass(IR::Block& block) { +void DeadCodeEliminationPass(IR::Program& program) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (IR::Inst& inst : block | std::views::reverse) { - if (!inst.HasUses() && !inst.MayHaveSideEffects()) { - inst.Invalidate(); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions() | std::views::reverse) { + if (!inst.HasUses() && !inst.MayHaveSideEffects()) { + inst.Invalidate(); + } } } } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 5d98d278e..1faa1ec88 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,14 +351,12 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (!IsGlobalMemory(inst)) { - continue; - } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; } + CollectStorageBuffers(*block, inst, storage_buffers, to_replace); } } Info& info{program.info}; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 593efde39..8790b48f2 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -10,10 +10,10 @@ namespace Shader::Optimization { -void IdentityRemovalPass(IR::Function& function) { +void IdentityRemovalPass(IR::Program& program) { std::vector to_invalidate; - for (IR::Block* const block : function.blocks) { + for (IR::Block* const block : program.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; for (size_t i = 0; i < num_args; ++i) { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 14a5cb50f..74acb8bb6 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -77,11 +77,9 @@ IR::Opcode Replace(IR::Opcode op) { } // Anonymous namespace void LowerFp16ToFp32(IR::Program& program) { - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.blocks) { - for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); - } + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.Opcode())); } } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 3b7e7306b..5c1fc166c 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,26 +8,18 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Optimization { -template -void PostOrderInvoke(Func&& func, IR::Function& function) { - for (const auto& block : function.post_order_blocks) { - func(*block); - } -} - void CollectShaderInfoPass(IR::Program& program); -void ConstantPropagationPass(IR::Block& block); -void DeadCodeEliminationPass(IR::Block& block); +void ConstantPropagationPass(IR::Program& program); +void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); -void IdentityRemovalPass(IR::Function& function); +void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); -void SsaRewritePass(std::span post_order_blocks); +void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); -void VerificationPass(const IR::Function& function); +void VerificationPass(const IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 19d35b1f8..f89fd51c8 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -23,7 +23,6 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" @@ -262,9 +261,9 @@ void VisitBlock(Pass& pass, IR::Block* block) { } } // Anonymous namespace -void SsaRewritePass(std::span post_order_blocks) { +void SsaRewritePass(IR::Program& program) { Pass pass; - for (IR::Block* const block : post_order_blocks | std::views::reverse) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { VisitBlock(pass, block); } } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index ec802e02c..de9d633e2 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -164,14 +164,12 @@ private: void TexturePass(Environment& env, IR::Program& program) { TextureInstVector to_replace; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (!IsTextureInstruction(inst)) { - continue; - } - to_replace.push_back(MakeInst(env, block, inst)); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; } + to_replace.push_back(MakeInst(env, block, inst)); } } // Sort instructions to visit textures by constant buffer index, then by offset diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 32b56eb57..4080b37cc 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -11,8 +11,8 @@ namespace Shader::Optimization { -static void ValidateTypes(const IR::Function& function) { - for (const auto& block : function.blocks) { +static void ValidateTypes(const IR::Program& program) { + for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { if (inst.Opcode() == IR::Opcode::Phi) { // Skip validation on phi nodes @@ -30,9 +30,9 @@ static void ValidateTypes(const IR::Function& function) { } } -static void ValidateUses(const IR::Function& function) { +static void ValidateUses(const IR::Program& program) { std::map actual_uses; - for (const auto& block : function.blocks) { + for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { @@ -45,14 +45,14 @@ static void ValidateUses(const IR::Function& function) { } for (const auto [inst, uses] : actual_uses) { if (inst->UseCount() != uses) { - throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/); + throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program)); } } } -void VerificationPass(const IR::Function& function) { - ValidateTypes(function); - ValidateUses(function); +void VerificationPass(const IR::Program& program) { + ValidateTypes(program); + ValidateUses(program); } } // namespace Shader::Optimization -- cgit v1.2.3 From 32b6c63485626f10b3bc8efb0239064cc781115e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Mar 2021 01:33:25 -0300 Subject: shader: Reorder phi nodes when redefined as undefined opcodes --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index f89fd51c8..d09bcec36 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -181,8 +181,16 @@ private: } if (same.IsEmpty()) { // The phi is unreachable or in the start block - const auto first_not_phi{std::ranges::find_if_not(block->Instructions(), IsPhi)}; + // First remove the phi node from the block, it will be reinserted + IR::Block::InstructionList& list{block->Instructions()}; + list.erase(IR::Block::InstructionList::s_iterator_to(phi)); + + // Insert an undef instruction after all phi nodes (to keep phi instructions on top) + const auto first_not_phi{std::ranges::find_if_not(list, IsPhi)}; same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; + + // Insert the phi node after the undef opcode, this will be replaced with an identity + list.insert(first_not_phi, phi); } // Reroute all uses of phi to same and remove phi phi.ReplaceUsesWith(same); -- cgit v1.2.3 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- .../ir_opt/collect_shader_info_pass.cpp | 60 ++++++++++++++++++++-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- 2 files changed, 57 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 708b6b267..fbbe28632 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { return; } info.constant_buffer_mask |= 1U << index; - info.constant_buffer_descriptors.push_back({ - .index{index}, - .count{1}, - }); + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), + ConstantBufferDescriptor{ + .index{index}, + .count{1}, + }); +} + +void GetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.loads_position = true; + break; + default: + throw NotImplementedException("Get attribute {}", attribute); + } +} + +void SetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.stores_position = true; + break; + default: + throw NotImplementedException("Set attribute {}", attribute); + } } void VisitUsages(Info& info, IR::Inst& inst) { @@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.Opcode()) { + case IR::Opcode::DemoteToHelperInvocation: + info.uses_demote_to_helper_invocation = true; + break; + case IR::Opcode::GetAttribute: + GetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetAttribute: + SetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetFragColor: + info.stores_frag_color[inst.Arg(0).U32()] = true; + break; + case IR::Opcode::SetFragDepth: + info.stores_frag_depth = true; + break; case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; break; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index d09bcec36..bab7ca186 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -169,7 +169,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op == same || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { // Unique value or self-reference continue; } -- cgit v1.2.3 From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: shader: Implement I2F --- .../ir_opt/collect_shader_info_pass.cpp | 28 ++++++++++++++++++++++ .../ir_opt/lower_fp16_to_fp32.cpp | 16 +++++++++++++ 2 files changed, 44 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fbbe28632..e72505d61 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,6 +79,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU32F16: case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16S32: + case IR::Opcode::ConvertF16S64: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF16U32: + case IR::Opcode::ConvertF16U64: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -105,6 +113,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: case IR::Opcode::FPTrunc64: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64S32: + case IR::Opcode::ConvertF64S64: + case IR::Opcode::ConvertF64U8: + case IR::Opcode::ConvertF64U16: + case IR::Opcode::ConvertF64U32: + case IR::Opcode::ConvertF64U64: info.uses_fp64 = true; break; default: @@ -123,6 +139,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: case IR::Opcode::SelectU8: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF32S8: + case IR::Opcode::ConvertF32U8: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64U8: info.uses_int8 = true; break; default: @@ -149,6 +171,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU16F32: case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF32S16: + case IR::Opcode::ConvertF32U16: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64U16: info.uses_int16 = true; break; default: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 74acb8bb6..baa3d22df 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -70,6 +70,22 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::Identity; case IR::Opcode::ConvertF16F32: return IR::Opcode::Identity; + case IR::Opcode::ConvertF16S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF16S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF16S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF16S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF16U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF16U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF16U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF16U64: + return IR::Opcode::ConvertF32U64; default: return op; } -- cgit v1.2.3 From 76c8a962ac4eae77e71d66a72c448930240339f9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 19:11:56 -0300 Subject: spirv: Implement VertexId and InstanceId, refactor code --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e72505d61..e7fa3fce0 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -38,6 +38,12 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.loads_position = true; break; + case IR::Attribute::InstanceId: + info.loads_instance_id = true; + break; + case IR::Attribute::VertexId: + info.loads_vertex_id = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } -- cgit v1.2.3 From 27fb97377eeb40849260ea866a90519521c6f59b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:33:19 +0100 Subject: shader: Fix floating point comparison for FP16 --- .../ir_opt/lower_fp16_to_fp32.cpp | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index baa3d22df..7723c9a57 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -50,6 +50,30 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::CompositeInsertF32x3; case IR::Opcode::CompositeInsertF16x4: return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::FPOrdEqual16: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual16: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual16: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual16: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan16: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan16: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan16: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan16: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual16: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual16: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual16: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual16: + return IR::Opcode::FPUnordGreaterThanEqual32; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: -- cgit v1.2.3 From a77e764726938a26803fa90a9c69ccdd32ab09cd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 00:42:56 -0300 Subject: shader: Add support for fp16 comparisons and misc fixes --- .../ir_opt/collect_shader_info_pass.cpp | 16 ++++++++++++++++ src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | 2 ++ 2 files changed, 18 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e7fa3fce0..fd6069c65 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -74,6 +74,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::CompositeExtractF16x2: case IR::Opcode::CompositeExtractF16x3: case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::CompositeInsertF16x2: + case IR::Opcode::CompositeInsertF16x3: + case IR::Opcode::CompositeInsertF16x4: case IR::Opcode::SelectF16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: @@ -103,6 +106,19 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: case IR::Opcode::FPTrunc16: + case IR::Opcode::FPOrdEqual16: + case IR::Opcode::FPUnordEqual16: + case IR::Opcode::FPOrdNotEqual16: + case IR::Opcode::FPUnordNotEqual16: + case IR::Opcode::FPOrdLessThan16: + case IR::Opcode::FPUnordLessThan16: + case IR::Opcode::FPOrdGreaterThan16: + case IR::Opcode::FPUnordGreaterThan16: + case IR::Opcode::FPOrdLessThanEqual16: + case IR::Opcode::FPUnordLessThanEqual16: + case IR::Opcode::FPOrdGreaterThanEqual16: + case IR::Opcode::FPUnordGreaterThanEqual16: + case IR::Opcode::FPIsNan16: info.uses_fp16 = true; break; case IR::Opcode::FPAbs64: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 7723c9a57..0e8862f45 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -74,6 +74,8 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::FPOrdGreaterThanEqual32; case IR::Opcode::FPUnordGreaterThanEqual16: return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan16: + return IR::Opcode::FPIsNan32; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: -- cgit v1.2.3 From 8b3b9c3371626c217a3865adae26191fce31ccce Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 17:27:44 -0300 Subject: shader: Add missing fp64 usage flags --- .../ir_opt/collect_shader_info_pass.cpp | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fd6069c65..2a66403de 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -121,6 +121,19 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPIsNan16: info.uses_fp16 = true; break; + case IR::Opcode::CompositeConstructF64x2: + case IR::Opcode::CompositeConstructF64x3: + case IR::Opcode::CompositeConstructF64x4: + case IR::Opcode::CompositeExtractF64x2: + case IR::Opcode::CompositeExtractF64x3: + case IR::Opcode::CompositeExtractF64x4: + case IR::Opcode::CompositeInsertF64x2: + case IR::Opcode::CompositeInsertF64x3: + case IR::Opcode::CompositeInsertF64x4: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackDouble2x32: + case IR::Opcode::UnpackDouble2x32: case IR::Opcode::FPAbs64: case IR::Opcode::FPAdd64: case IR::Opcode::FPCeil64: @@ -135,6 +148,27 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: case IR::Opcode::FPTrunc64: + case IR::Opcode::FPOrdEqual64: + case IR::Opcode::FPUnordEqual64: + case IR::Opcode::FPOrdNotEqual64: + case IR::Opcode::FPUnordNotEqual64: + case IR::Opcode::FPOrdLessThan64: + case IR::Opcode::FPUnordLessThan64: + case IR::Opcode::FPOrdGreaterThan64: + case IR::Opcode::FPUnordGreaterThan64: + case IR::Opcode::FPOrdLessThanEqual64: + case IR::Opcode::FPUnordLessThanEqual64: + case IR::Opcode::FPOrdGreaterThanEqual64: + case IR::Opcode::FPUnordGreaterThanEqual64: + case IR::Opcode::FPIsNan64: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertS32F64: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertU32F64: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertF32F64: + case IR::Opcode::ConvertF64F32: case IR::Opcode::ConvertF64S8: case IR::Opcode::ConvertF64S16: case IR::Opcode::ConvertF64S32: -- cgit v1.2.3 From a62f04efab4331eeabd4441962f86a5e87db3f2d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 09:32:16 +0100 Subject: shader: Implement F2F --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 2a66403de..e9f64cf3f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -404,7 +404,9 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { case IR::Opcode::FPOrdLessThanEqual32: case IR::Opcode::FPUnordLessThanEqual32: case IR::Opcode::FPOrdGreaterThanEqual32: - case IR::Opcode::FPUnordGreaterThanEqual32: { + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: { const auto control{inst.Flags()}; switch (control.fmz_mode) { case IR::FmzMode::DontCare: -- cgit v1.2.3 From e4e1cc11b8f7649171fe922b2899e57120bfba53 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 19:28:37 -0400 Subject: shader: Implement DMNMX, DSET, DSETP --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 1 + src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e9f64cf3f..f44eac5d8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -130,6 +130,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::CompositeInsertF64x2: case IR::Opcode::CompositeInsertF64x3: case IR::Opcode::CompositeInsertF64x4: + case IR::Opcode::SelectF64: case IR::Opcode::BitCastU64F64: case IR::Opcode::BitCastF64U64: case IR::Opcode::PackDouble2x32: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index a39db2bf1..ef7766d22 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -229,7 +229,6 @@ void FoldISub32(IR::Inst& inst) { } } -template void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; if (cond.IsImmediate()) { @@ -340,8 +339,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); + case IR::Opcode::SelectU1: + case IR::Opcode::SelectU8: + case IR::Opcode::SelectU16: case IR::Opcode::SelectU32: - return FoldSelect(inst); + case IR::Opcode::SelectU64: + case IR::Opcode::SelectF16: + case IR::Opcode::SelectF32: + case IR::Opcode::SelectF64: + return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); case IR::Opcode::LogicalOr: -- cgit v1.2.3 From 2be5c7eff40647344da7951dc5e20a7deebf78aa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 21:56:27 -0300 Subject: shader: Fold interpolation multiplications --- .../ir_opt/constant_propagation_pass.cpp | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ef7766d22..3dab424f6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -236,6 +236,38 @@ void FoldSelect(IR::Inst& inst) { } } +void FoldFPMul32(IR::Inst& inst) { + const auto control{inst.Flags()}; + if (control.no_contraction) { + return; + } + // Fold interpolation operations + const IR::Value lhs_value{inst.Arg(0)}; + const IR::Value rhs_value{inst.Arg(1)}; + if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { + return; + } + IR::Inst* const lhs_op{lhs_value.InstRecursive()}; + IR::Inst* const rhs_op{rhs_value.InstRecursive()}; + if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + return; + } + const IR::Value recip_source{rhs_op->Arg(0)}; + const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()}; + if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) { + return; + } + IR::Inst* const attr_a{recip_source.InstRecursive()}; + IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; + if (attr_a->Opcode() != IR::Opcode::GetAttribute || + attr_b->Opcode() != IR::Opcode::GetAttribute) { + return; + } + if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { + inst.ReplaceUsesWith(lhs_op->Arg(0)); + } +} + void FoldLogicalAnd(IR::Inst& inst) { if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { return; @@ -348,6 +380,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); + case IR::Opcode::FPMul32: + return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); case IR::Opcode::LogicalOr: -- cgit v1.2.3 From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f44eac5d8..db5138e4d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::VoteAll: + case IR::Opcode::VoteAny: + case IR::Opcode::VoteEqual: + case IR::Opcode::SubgroupBallot: + info.uses_subgroup_vote = true; + break; default: break; } -- cgit v1.2.3 From 8cb9443cb99c4510e6ef26a91d09a31a8fa6281f Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 00:02:30 +0100 Subject: shader: Fix F2I --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 ++ src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index db5138e4d..32f276f3b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -105,6 +105,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPNeg16: case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: + case IR::Opcode::FPClamp16: case IR::Opcode::FPTrunc16: case IR::Opcode::FPOrdEqual16: case IR::Opcode::FPUnordEqual16: @@ -148,6 +149,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRecipSqrt64: case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: + case IR::Opcode::FPClamp64: case IR::Opcode::FPTrunc64: case IR::Opcode::FPOrdEqual64: case IR::Opcode::FPUnordEqual64: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0e8862f45..0d2c91ed6 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -30,6 +30,8 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::FPRoundEven32; case IR::Opcode::FPSaturate16: return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp16: + return IR::Opcode::FPClamp32; case IR::Opcode::FPTrunc16: return IR::Opcode::FPTrunc32; case IR::Opcode::CompositeConstructF16x2: -- cgit v1.2.3 From 49e87ea8ab86f94239a6830666f3a8f897a0167a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 19:38:37 -0300 Subject: shader: Track first bindless argument instead of the instruction itself --- src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index de9d633e2..2c8164b8a 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -121,7 +121,7 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { VisitedBlocks visited; - const std::optional track_addr{Track(block, IR::Value{&inst}, visited)}; + const std::optional track_addr{Track(block, inst.Arg(0), visited)}; if (!track_addr) { throw NotImplementedException("Failed to track bindless texture constant buffer"); } -- cgit v1.2.3 From 32c5483beb2f79f5d55eb2906f2bfdfa1698bca3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Mar 2021 11:31:37 -0400 Subject: shader: Implement SHFL --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 32f276f3b..61cc314c7 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -307,6 +307,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::ShuffleIndex: + case IR::Opcode::ShuffleUp: + case IR::Opcode::ShuffleDown: + case IR::Opcode::ShuffleButterfly: + info.uses_subgroup_invocation_id = true; + break; case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::GetCbufU16: -- cgit v1.2.3 From c7c518e280d1ac04adb08d45145690fd06ac7b18 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 23:41:55 +0100 Subject: shader: Implement TLD4 and TLD4_B --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 8 +++++++- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 12 ++++++++++++ src/shader_recompiler/ir_opt/texture_pass.cpp | 10 ++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 61cc314c7..6fe06fda8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -352,14 +352,20 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: case IR::Opcode::BoundImageSampleDrefImplicitLod: case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageGather: + case IR::Opcode::BoundImageGatherDref: case IR::Opcode::ImageSampleImplicitLod: case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleDrefImplicitLod: - case IR::Opcode::ImageSampleDrefExplicitLod: { + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3dab424f6..28060dccf 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -403,6 +403,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return (base >> shift) & ((1U << count) - 1); }); return; + case IR::Opcode::BitFieldSExtract: + FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { + const size_t back_shift = static_cast(shift) + static_cast(count); + if (back_shift > Common::BitSize()) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, + base, shift, count); + } + const size_t left_shift = Common::BitSize() - back_shift; + return static_cast(static_cast(base << left_shift) >> + static_cast(Common::BitSize() - count)); + }); + return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); default: diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 2c8164b8a..454ac3e71 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -45,6 +45,12 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageSampleDrefExplicitLod: case IR::Opcode::BindlessImageSampleDrefExplicitLod: return IR::Opcode::ImageSampleDrefExplicitLod; + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BoundImageGather: + return IR::Opcode::ImageGather; + case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BoundImageGatherDref: + return IR::Opcode::ImageGatherDref; default: return IR::Opcode::Void; } @@ -56,11 +62,15 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BindlessImageGatherDref: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: case IR::Opcode::BoundImageSampleDrefImplicitLod: case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageGather: + case IR::Opcode::BoundImageGatherDref: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); -- cgit v1.2.3 From 742d11c2ad948c8630be15901514ec9e5e5fcd20 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 16:02:04 +0100 Subject: shader: Implement TLD4.PTP --- .../ir_opt/constant_propagation_pass.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 28060dccf..12159e738 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -355,6 +355,17 @@ void FoldBranchConditional(IR::Inst& inst) { } } +void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) { + for (size_t i = 0; i < amount; i++) { + if (!inst.Arg(i).IsConstantContainer()) { + return; + } + } + auto info{inst.Flags()}; + info.is_constant = true; + inst.SetFlags(info); +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -380,6 +391,13 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); + case IR::Opcode::CompositeConstructU32x2: + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF32x2: + case IR::Opcode::CompositeConstructF64x2: + return FoldConstantComposite(inst, 2); + case IR::Opcode::CompositeConstructArrayU32x2: + return FoldConstantComposite(inst, 4); case IR::Opcode::FPMul32: return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: -- cgit v1.2.3 From b5db38f50e9f81964bf0cc946e4ed5b00fe564d0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 19:24:50 +0100 Subject: shader: Add IR opcode for ImageFetch --- src/shader_recompiler/ir_opt/texture_pass.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 454ac3e71..0167dd06e 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -51,6 +51,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BoundImageGatherDref: return IR::Opcode::ImageGatherDref; + case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BoundImageFetch: + return IR::Opcode::ImageFetch; default: return IR::Opcode::Void; } @@ -64,6 +67,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageSampleDrefExplicitLod: case IR::Opcode::BindlessImageGather: case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BindlessImageFetch: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -71,6 +75,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageSampleDrefExplicitLod: case IR::Opcode::BoundImageGather: case IR::Opcode::BoundImageGatherDref: + case IR::Opcode::BoundImageFetch: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); -- cgit v1.2.3 From d9c5bd9509e82fcde72c18663989931f97ed6518 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 16:46:07 -0300 Subject: shader: Refactor PTP and other minor changes --- .../ir_opt/constant_propagation_pass.cpp | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 12159e738..052f1609b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -355,17 +355,6 @@ void FoldBranchConditional(IR::Inst& inst) { } } -void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) { - for (size_t i = 0; i < amount; i++) { - if (!inst.Arg(i).IsConstantContainer()) { - return; - } - } - auto info{inst.Flags()}; - info.is_constant = true; - inst.SetFlags(info); -} - void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -391,13 +380,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); - case IR::Opcode::CompositeConstructU32x2: - case IR::Opcode::CompositeConstructF16x2: - case IR::Opcode::CompositeConstructF32x2: - case IR::Opcode::CompositeConstructF64x2: - return FoldConstantComposite(inst, 2); - case IR::Opcode::CompositeConstructArrayU32x2: - return FoldConstantComposite(inst, 4); case IR::Opcode::FPMul32: return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: @@ -423,12 +405,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return; case IR::Opcode::BitFieldSExtract: FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { - const size_t back_shift = static_cast(shift) + static_cast(count); + const size_t back_shift{static_cast(shift) + static_cast(count)}; if (back_shift > Common::BitSize()) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, base, shift, count); } - const size_t left_shift = Common::BitSize() - back_shift; + const size_t left_shift{Common::BitSize() - back_shift}; return static_cast(static_cast(base << left_shift) >> static_cast(Common::BitSize() - count)); }); -- cgit v1.2.3 From 17063d16a3cfe6542e74265739191e1d018fc456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:45:38 -0300 Subject: shader: Implement TXQ and fix FragDepth --- .../ir_opt/collect_shader_info_pass.cpp | 3 ++- src/shader_recompiler/ir_opt/texture_pass.cpp | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6fe06fda8..80ca8db26 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -365,7 +365,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageSampleDrefImplicitLod: case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: { + case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageQueryDimensions: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 0167dd06e..dfacf848f 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -54,6 +54,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BindlessImageFetch: case IR::Opcode::BoundImageFetch: return IR::Opcode::ImageFetch; + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BindlessImageQueryDimensions: + return IR::Opcode::ImageQueryDimensions; default: return IR::Opcode::Void; } @@ -68,6 +71,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageGather: case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BindlessImageQueryDimensions: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -76,6 +80,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGather: case IR::Opcode::BoundImageGatherDref: case IR::Opcode::BoundImageFetch: + case IR::Opcode::BoundImageQueryDimensions: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); @@ -198,13 +203,20 @@ void TexturePass(Environment& env, IR::Program& program) { for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + + const auto& cbuf{texture_inst.cbuf}; + auto flags{inst->Flags()}; + if (inst->Opcode() == IR::Opcode::ImageQueryDimensions) { + flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); + inst->SetFlags(flags); + } const u32 index{descriptors.Add(TextureDescriptor{ - .type{inst->Flags().type}, - .cbuf_index{texture_inst.cbuf.index}, - .cbuf_offset{texture_inst.cbuf.offset}, + .type{flags.type}, + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, .count{1}, })}; - inst->ReplaceOpcode(IndexedInstruction(*inst)); inst->SetArg(0, IR::Value{index}); } } -- cgit v1.2.3 From f0031babeb3ed04aef2468840aa37f4da13b2524 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:55:37 -0300 Subject: shader: Implement front face --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 80ca8db26..0ec0d4c01 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -44,6 +44,9 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::VertexId: info.loads_vertex_id = true; break; + case IR::Attribute::FrontFace: + info.loads_front_face = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } -- cgit v1.2.3 From dbd882ddeb1a1a9233c0085d0b8ccb022db385b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 04:59:58 -0300 Subject: shader: Better interpolation and disabled attributes support --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 0ec0d4c01..60be67228 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -28,7 +28,7 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { void GetAttribute(Info& info, IR::Attribute attribute) { if (IR::IsGeneric(attribute)) { - info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true; + info.input_generics.at(IR::GenericAttributeIndex(attribute)).used = true; return; } switch (attribute) { -- cgit v1.2.3 From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60be67228..c932c307b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS8: case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: + case IR::Opcode::LoadSharedU8: + case IR::Opcode::LoadSharedS8: + case IR::Opcode::WriteSharedU8: case IR::Opcode::SelectU8: case IR::Opcode::ConvertF16S8: case IR::Opcode::ConvertF16U8: @@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS16: case IR::Opcode::WriteStorageU16: case IR::Opcode::WriteStorageS16: + case IR::Opcode::LoadSharedU16: + case IR::Opcode::LoadSharedS16: + case IR::Opcode::WriteSharedU16: case IR::Opcode::SelectU16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: -- cgit v1.2.3 From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: shader: Implement BRX --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index bab7ca186..259233746 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,8 +48,12 @@ struct GotoVariable : FlagTag { u32 index; }; +struct IndirectBranchVariable { + auto operator<=>(const IndirectBranchVariable&) const noexcept = default; +}; + using Variant = std::variant; + OverflowFlagTag, GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map>; struct DefTable { @@ -65,6 +69,10 @@ struct DefTable { return goto_vars[goto_variable.index]; } + [[nodiscard]] ValueMap& operator[](IndirectBranchVariable) { + return indirect_branch_var; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -84,6 +92,7 @@ struct DefTable { std::array regs; std::array preds; boost::container::flat_map goto_vars; + ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -102,6 +111,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { + return IR::Opcode::UndefU32; +} + [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { return inst.Opcode() == IR::Opcode::Phi; } @@ -219,6 +232,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetIndirectBranchVariable: + pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -244,6 +260,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetIndirectBranchVariable: + inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; -- cgit v1.2.3 From 6c51f496320f698e123207c09ca61e55180a31b5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 22:23:45 -0400 Subject: shader: Implement FSWZADD --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c932c307b..81090335f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -389,6 +389,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SubgroupBallot: info.uses_subgroup_vote = true; break; + case IR::Opcode::FSwizzleAdd: + info.uses_fswzadd = true; + break; default: break; } -- cgit v1.2.3 From b7589fe1154d9e810e83f6c609dad1d646ec0359 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 26 Mar 2021 18:52:06 -0400 Subject: shader: Add PointSize attribute --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 81090335f..a47d54b9c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -58,6 +58,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { return; } switch (attribute) { + case IR::Attribute::PointSize: + info.stores_point_size = true; + break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: -- cgit v1.2.3 From 9d7422d967a97fea7888449652ad93da88e92b54 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 20:05:38 +0200 Subject: shader: Add PointCoord attribute --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a47d54b9c..eb3d1343f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -47,6 +47,10 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::FrontFace: info.loads_front_face = true; break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + info.loads_point_coord = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } -- cgit v1.2.3 From dc1a9a3bed2aa9b0851f07976b0c687172aa3edc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 20:51:05 +0100 Subject: shader: Implement TLD --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index eb3d1343f..3b00d7c8c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -382,6 +382,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageFetch: case IR::Opcode::ImageQueryDimensions: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || -- cgit v1.2.3 From 613b48c4a2ce71a0d0eaba17fe164f4a2e4a3db5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 28 Mar 2021 19:47:52 +0200 Subject: shader,spirv: Implement ImageQueryLod. --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 ++- src/shader_recompiler/ir_opt/texture_pass.cpp | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 3b00d7c8c..04e3a4f53 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -383,7 +383,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: - case IR::Opcode::ImageQueryDimensions: { + case IR::Opcode::ImageQueryDimensions: + case IR::Opcode::ImageQueryLod: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index dfacf848f..6eb286b83 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -57,6 +57,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BindlessImageQueryDimensions: return IR::Opcode::ImageQueryDimensions; + case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BindlessImageQueryLod: + return IR::Opcode::ImageQueryLod; default: return IR::Opcode::Void; } @@ -72,6 +75,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BindlessImageFetch: case IR::Opcode::BindlessImageQueryDimensions: + case IR::Opcode::BindlessImageQueryLod: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -81,6 +85,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGatherDref: case IR::Opcode::BoundImageFetch: case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BoundImageQueryLod: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); -- cgit v1.2.3 From d5bfc630886d98ed77959a9771c67293244aff0e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 02:00:43 +0200 Subject: shader: Implement ImageGradient --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 11 ++++++++++- src/shader_recompiler/ir_opt/texture_pass.cpp | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 04e3a4f53..730d3e91e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -370,12 +370,20 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::BindlessImageSampleDrefExplicitLod: case IR::Opcode::BindlessImageGather: case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BindlessImageQueryDimensions: + case IR::Opcode::BindlessImageQueryLod: + case IR::Opcode::BindlessImageGradient: case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: case IR::Opcode::BoundImageSampleDrefImplicitLod: case IR::Opcode::BoundImageSampleDrefExplicitLod: case IR::Opcode::BoundImageGather: case IR::Opcode::BoundImageGatherDref: + case IR::Opcode::BoundImageFetch: + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BoundImageGradient: case IR::Opcode::ImageSampleImplicitLod: case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleDrefImplicitLod: @@ -384,7 +392,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: case IR::Opcode::ImageQueryDimensions: - case IR::Opcode::ImageQueryLod: { + case IR::Opcode::ImageQueryLod: + case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 6eb286b83..da8977b76 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -60,6 +60,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageQueryLod: case IR::Opcode::BindlessImageQueryLod: return IR::Opcode::ImageQueryLod; + case IR::Opcode::BoundImageGradient: + case IR::Opcode::BindlessImageGradient: + return IR::Opcode::ImageGradient; default: return IR::Opcode::Void; } @@ -76,6 +79,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageFetch: case IR::Opcode::BindlessImageQueryDimensions: case IR::Opcode::BindlessImageQueryLod: + case IR::Opcode::BindlessImageGradient: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -86,6 +90,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageFetch: case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BoundImageGradient: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); -- cgit v1.2.3 From 0c4cf3b9eb7de6624a844ae9ac9d2ba6b2ef3ac2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 21:52:06 +0200 Subject: shader: Implement ClipDistance --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 730d3e91e..50ffc4c19 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -71,6 +71,16 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.stores_position = true; break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: + info.stores_clip_distance = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } -- cgit v1.2.3 From 5f22cd89e20cd39c1395b7bd3b8e667a40f53751 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 31 Mar 2021 19:46:38 -0300 Subject: shader: Fix constant propagation to use reverse post order --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 052f1609b..7da4d50ef 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -424,7 +425,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { for (IR::Inst& inst : block->Instructions()) { ConstantPropagation(*block, inst); } -- cgit v1.2.3 From c3bace756f2b21057e89d104ad18a34b2ad9083c Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 06:39:47 +0200 Subject: shader: Fold comparisons and Pack/Unpack16 --- .../ir_opt/constant_propagation_pass.cpp | 42 +++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7da4d50ef..15e16956e 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include +#include #include #include -#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -332,6 +332,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { } } +void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (arg_inst->Opcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } +} + template IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { using Traits = LambdaTraits; @@ -372,6 +384,10 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); + case IR::Opcode::PackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16); + case IR::Opcode::UnpackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16); case IR::Opcode::SelectU1: case IR::Opcode::SelectU8: case IR::Opcode::SelectU16: @@ -395,6 +411,30 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ULessThan: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); return; + case IR::Opcode::SLessThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); + return; + case IR::Opcode::ULessThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); + return; + case IR::Opcode::SGreaterThan: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); + return; + case IR::Opcode::UGreaterThan: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); + return; + case IR::Opcode::SGreaterThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); + return; + case IR::Opcode::UGreaterThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); + return; + case IR::Opcode::IEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); + return; + case IR::Opcode::INotEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); + return; case IR::Opcode::BitFieldUExtract: FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { -- cgit v1.2.3 From bee81887998070d213c984d4ae4e5ae35de6fd96 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 07:42:58 +0200 Subject: shader: Fold composite extract --- .../ir_opt/constant_propagation_pass.cpp | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 15e16956e..8999c3a3d 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -368,6 +368,50 @@ void FoldBranchConditional(IR::Inst& inst) { } } +std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, + IR::Opcode construct, u32 first_index) { + IR::Inst* const inst{inst_value.InstRecursive()}; + if (inst->Opcode() == construct) { + return inst->Arg(first_index); + } + + if (inst->Opcode() != insert) { + return std::nullopt; + } + + IR::Value value_index{inst->Arg(2)}; + if (!value_index.IsImmediate()) { + return std::nullopt; + } + + const u32 second_index = value_index.U32(); + if (first_index != second_index) { + IR::Value value_composite{inst->Arg(0)}; + if (value_composite.IsImmediate()) { + return std::nullopt; + } + return FoldCompositeExtractImpl(value_composite, insert, construct, first_index); + } + return inst->Arg(1); +} + +void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) { + const IR::Value value_1{inst.Arg(0)}; + const IR::Value value_2{inst.Arg(1)}; + if (value_1.IsImmediate()) { + return; + } + if (!value_2.IsImmediate()) { + return; + } + const u32 first_index = value_2.U32(); + auto result = FoldCompositeExtractImpl(value_1, insert, construct, first_index); + if (!result) { + return; + } + inst.ReplaceUsesWith(*result); +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -458,6 +502,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); + case IR::Opcode::CompositeExtractF32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, + IR::Opcode::CompositeInsertF32x2); + case IR::Opcode::CompositeExtractF32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3, + IR::Opcode::CompositeInsertF32x3); + case IR::Opcode::CompositeExtractF32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4, + IR::Opcode::CompositeInsertF32x4); + case IR::Opcode::CompositeExtractF16x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2, + IR::Opcode::CompositeInsertF16x2); + case IR::Opcode::CompositeExtractF16x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3, + IR::Opcode::CompositeInsertF16x3); + case IR::Opcode::CompositeExtractF16x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, + IR::Opcode::CompositeInsertF16x4); default: break; } -- cgit v1.2.3 From d819ba4489b90955286341c739083e638173b938 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 08:34:45 +0200 Subject: shader: Implement ViewportIndex --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 50ffc4c19..514de6838 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -81,6 +81,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ClipDistance7: info.stores_clip_distance = true; break; + case IR::Attribute::ViewportIndex: + info.stores_viewport_index = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } -- cgit v1.2.3 From 12f5f320985824d1ebad587ebecb0f8406143ebc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 09:21:53 +0200 Subject: shader: Mark SSBOs as written when they are --- .../global_memory_to_storage_buffer_pass.cpp | 31 ++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 1faa1ec88..d4bae249b 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ using StorageBufferSet = using StorageInstVector = boost::container::small_vector; using VisitedBlocks = boost::container::flat_set, boost::container::small_vector>; +using StorageWritesMap = std::map; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -69,6 +71,22 @@ bool IsGlobalMemory(const IR::Inst& inst) { } } +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemoryWrite(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return true; + default: + return false; + } +} + /// Converts a global memory opcode to its storage buffer equivalent IR::Opcode GlobalToStorage(IR::Opcode opcode) { switch (opcode) { @@ -248,7 +266,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, /// Collects the storage buffer used by a global memory instruction and the instruction itself void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, - StorageInstVector& to_replace) { + StorageInstVector& to_replace, StorageWritesMap& writes_map) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -277,6 +295,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } } // Collect storage buffer and the instruction + const bool is_a_write = IsGlobalMemoryWrite(inst); + auto it = writes_map.find(*storage_buffer); + if (it == writes_map.end()) { + writes_map[*storage_buffer] = is_a_write; + } else { + it->second = it->second || is_a_write; + } storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, @@ -350,13 +375,14 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; + StorageWritesMap writes_map; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsGlobalMemory(inst)) { continue; } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_map); } } Info& info{program.info}; @@ -366,6 +392,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index{storage_buffer.index}, .cbuf_offset{storage_buffer.offset}, .count{1}, + .is_written{writes_map[storage_buffer]}, }); ++storage_index; } -- cgit v1.2.3 From ecb30c907266921818d5b6b03e341028fa2ea082 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 22:20:57 +0200 Subject: shader: Improve VOTE.VTG stub --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 51 ++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 259233746..7dab33034 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -38,6 +38,10 @@ struct ZeroFlagTag : FlagTag {}; struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; +struct FCSMFlagTag : FlagTag {}; +struct TAFlagTag : FlagTag {}; +struct TRFlagTag : FlagTag {}; +struct MXFlagTag : FlagTag {}; struct GotoVariable : FlagTag { GotoVariable() = default; @@ -53,7 +57,8 @@ struct IndirectBranchVariable { }; using Variant = std::variant; + OverflowFlagTag, FCSMFlagTag, TAFlagTag, TRFlagTag, MXFlagTag, + GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map>; struct DefTable { @@ -89,6 +94,22 @@ struct DefTable { return overflow_flag; } + [[nodiscard]] ValueMap& operator[](FCSMFlagTag) noexcept { + return fcsm_flag; + } + + [[nodiscard]] ValueMap& operator[](TAFlagTag) noexcept { + return ta_flag; + } + + [[nodiscard]] ValueMap& operator[](TRFlagTag) noexcept { + return tr_flag; + } + + [[nodiscard]] ValueMap& operator[](MXFlagTag) noexcept { + return mr_flag; + } + std::array regs; std::array preds; boost::container::flat_map goto_vars; @@ -97,6 +118,10 @@ struct DefTable { ValueMap sign_flag; ValueMap carry_flag; ValueMap overflow_flag; + ValueMap fcsm_flag; + ValueMap ta_flag; + ValueMap tr_flag; + ValueMap mr_flag; }; IR::Opcode UndefOpcode(IR::Reg) noexcept { @@ -247,6 +272,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetOFlag: pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); break; + case IR::Opcode::SetFCSMFlag: + pass.WriteVariable(FCSMFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetTAFlag: + pass.WriteVariable(TAFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetTRFlag: + pass.WriteVariable(TRFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetMXFlag: + pass.WriteVariable(MXFlagTag{}, block, inst.Arg(0)); + break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); @@ -275,6 +312,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetOFlag: inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; + case IR::Opcode::GetFCSMFlag: + inst.ReplaceUsesWith(pass.ReadVariable(FCSMFlagTag{}, block)); + break; + case IR::Opcode::GetTAFlag: + inst.ReplaceUsesWith(pass.ReadVariable(TAFlagTag{}, block)); + break; + case IR::Opcode::GetTRFlag: + inst.ReplaceUsesWith(pass.ReadVariable(TRFlagTag{}, block)); + break; + case IR::Opcode::GetMXFlag: + inst.ReplaceUsesWith(pass.ReadVariable(MXFlagTag{}, block)); + break; default: break; } -- cgit v1.2.3 From 45d547af11a18434ea17e4427db7286856a19537 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 23:05:47 +0200 Subject: shader: Implement SR_LaneId --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 514de6838..5c1b81638 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -340,6 +340,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ShuffleUp: case IR::Opcode::ShuffleDown: case IR::Opcode::ShuffleButterfly: + case IR::Opcode::LaneId: info.uses_subgroup_invocation_id = true; break; case IR::Opcode::GetCbufU8: -- cgit v1.2.3 From baec84247fe815199595d9e8077b71f3b5c8317e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 01:48:39 +0200 Subject: shader: Address Feedback --- .../ir_opt/constant_propagation_pass.cpp | 9 ++-- .../global_memory_to_storage_buffer_pass.cpp | 10 ++--- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 50 +--------------------- 3 files changed, 9 insertions(+), 60 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 8999c3a3d..1720d7a09 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -374,17 +374,14 @@ std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opco if (inst->Opcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { return std::nullopt; } - IR::Value value_index{inst->Arg(2)}; if (!value_index.IsImmediate()) { return std::nullopt; } - - const u32 second_index = value_index.U32(); + const u32 second_index{value_index.U32()}; if (first_index != second_index) { IR::Value value_composite{inst->Arg(0)}; if (value_composite.IsImmediate()) { @@ -404,8 +401,8 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser if (!value_2.IsImmediate()) { return; } - const u32 first_index = value_2.U32(); - auto result = FoldCompositeExtractImpl(value_1, insert, construct, first_index); + const u32 first_index{value_2.U32()}; + const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)}; if (!result) { return; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index d4bae249b..8876a5c33 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -4,9 +4,9 @@ #include #include +#include #include #include -#include #include #include @@ -295,12 +295,12 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } } // Collect storage buffer and the instruction - const bool is_a_write = IsGlobalMemoryWrite(inst); - auto it = writes_map.find(*storage_buffer); + const bool is_a_write{IsGlobalMemoryWrite(inst)}; + auto it{writes_map.find(*storage_buffer)}; if (it == writes_map.end()) { - writes_map[*storage_buffer] = is_a_write; + writes_map[*storage_buffer] = is_a_write; } else { - it->second = it->second || is_a_write; + it->second = it->second || is_a_write; } storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 7dab33034..72d4abb77 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -38,10 +38,6 @@ struct ZeroFlagTag : FlagTag {}; struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; -struct FCSMFlagTag : FlagTag {}; -struct TAFlagTag : FlagTag {}; -struct TRFlagTag : FlagTag {}; -struct MXFlagTag : FlagTag {}; struct GotoVariable : FlagTag { GotoVariable() = default; @@ -57,8 +53,7 @@ struct IndirectBranchVariable { }; using Variant = std::variant; + OverflowFlagTag, GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map>; struct DefTable { @@ -94,22 +89,6 @@ struct DefTable { return overflow_flag; } - [[nodiscard]] ValueMap& operator[](FCSMFlagTag) noexcept { - return fcsm_flag; - } - - [[nodiscard]] ValueMap& operator[](TAFlagTag) noexcept { - return ta_flag; - } - - [[nodiscard]] ValueMap& operator[](TRFlagTag) noexcept { - return tr_flag; - } - - [[nodiscard]] ValueMap& operator[](MXFlagTag) noexcept { - return mr_flag; - } - std::array regs; std::array preds; boost::container::flat_map goto_vars; @@ -118,10 +97,6 @@ struct DefTable { ValueMap sign_flag; ValueMap carry_flag; ValueMap overflow_flag; - ValueMap fcsm_flag; - ValueMap ta_flag; - ValueMap tr_flag; - ValueMap mr_flag; }; IR::Opcode UndefOpcode(IR::Reg) noexcept { @@ -272,18 +247,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetOFlag: pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); break; - case IR::Opcode::SetFCSMFlag: - pass.WriteVariable(FCSMFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetTAFlag: - pass.WriteVariable(TAFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetTRFlag: - pass.WriteVariable(TRFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetMXFlag: - pass.WriteVariable(MXFlagTag{}, block, inst.Arg(0)); - break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); @@ -312,17 +275,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetOFlag: inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; - case IR::Opcode::GetFCSMFlag: - inst.ReplaceUsesWith(pass.ReadVariable(FCSMFlagTag{}, block)); - break; - case IR::Opcode::GetTAFlag: - inst.ReplaceUsesWith(pass.ReadVariable(TAFlagTag{}, block)); - break; - case IR::Opcode::GetTRFlag: - inst.ReplaceUsesWith(pass.ReadVariable(TRFlagTag{}, block)); - break; - case IR::Opcode::GetMXFlag: - inst.ReplaceUsesWith(pass.ReadVariable(MXFlagTag{}, block)); break; default: break; -- cgit v1.2.3 From ed6a1b1a3def4b8ed8c8fd1a7774a0a14edefc70 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:34:07 +0200 Subject: shader: Address feedback --- .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 20 +++++++++----------- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 1 - 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 8876a5c33..c8bd7b329 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -46,7 +46,9 @@ using StorageBufferSet = using StorageInstVector = boost::container::small_vector; using VisitedBlocks = boost::container::flat_set, boost::container::small_vector>; -using StorageWritesMap = std::map; +using StorageWritesSet = + boost::container::flat_set, + boost::container::small_vector>; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -266,7 +268,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, /// Collects the storage buffer used by a global memory instruction and the instruction itself void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, - StorageInstVector& to_replace, StorageWritesMap& writes_map) { + StorageInstVector& to_replace, StorageWritesSet& writes_set) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -295,12 +297,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } } // Collect storage buffer and the instruction - const bool is_a_write{IsGlobalMemoryWrite(inst)}; - auto it{writes_map.find(*storage_buffer)}; - if (it == writes_map.end()) { - writes_map[*storage_buffer] = is_a_write; - } else { - it->second = it->second || is_a_write; + if (IsGlobalMemoryWrite(inst)) { + writes_set.insert(*storage_buffer); } storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ @@ -375,14 +373,14 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - StorageWritesMap writes_map; + StorageWritesSet writes_set; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsGlobalMemory(inst)) { continue; } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_map); + CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_set); } } Info& info{program.info}; @@ -392,7 +390,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index{storage_buffer.index}, .cbuf_offset{storage_buffer.offset}, .count{1}, - .is_written{writes_map[storage_buffer]}, + .is_written{writes_set.contains(storage_buffer)}, }); ++storage_index; } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 72d4abb77..259233746 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -275,7 +275,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetOFlag: inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; - break; default: break; } -- cgit v1.2.3 From 9a342f5605aef385612053d7d8b564f541952eae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:18:12 -0300 Subject: shader: Rework global memory tracking to use breadth-first search --- .../global_memory_to_storage_buffer_pass.cpp | 149 +++++++++++---------- 1 file changed, 80 insertions(+), 69 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index c8bd7b329..f94c82e21 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -4,9 +4,9 @@ #include #include -#include #include #include +#include #include #include @@ -40,15 +40,19 @@ struct Bias { u32 offset_end; }; +using boost::container::flat_set; +using boost::container::small_vector; using StorageBufferSet = - boost::container::flat_set, - boost::container::small_vector>; -using StorageInstVector = boost::container::small_vector; -using VisitedBlocks = boost::container::flat_set, - boost::container::small_vector>; + flat_set, small_vector>; +using StorageInstVector = small_vector; using StorageWritesSet = - boost::container::flat_set, - boost::container::small_vector>; + flat_set, small_vector>; + +struct StorageInfo { + StorageBufferSet set; + StorageInstVector to_replace; + StorageWritesSet writes; +}; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -215,60 +219,72 @@ std::optional TrackLowAddress(IR::Inst* inst) { }; } -/// Recursively tries to track the storage buffer address used by a global memory instruction -std::optional Track(IR::Block* block, const IR::Value& value, const Bias* bias, - VisitedBlocks& visited) { +/// Tries to get the storage buffer out of a constant buffer read instruction +std::optional TryGetStorageBuffer(const IR::Inst* inst, const Bias* bias) { + if (inst->Opcode() != IR::Opcode::GetCbufU32) { + return std::nullopt; + } + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Definitely not a storage buffer if it's read from a non-immediate index + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support SSBO arrays + return std::nullopt; + } + const StorageBufferAddr storage_buffer{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + if (bias && !MeetsBias(storage_buffer, *bias)) { + // We have to blacklist some addresses in case we wrongly point to them + return std::nullopt; + } + return storage_buffer; +} + +/// Tries to track the storage buffer address used by a global memory instruction +std::optional Track(const IR::Value& value, const Bias* bias) { if (value.IsImmediate()) { - // Immediates can't be a storage buffer + // Nothing to do with immediates return std::nullopt; } - const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbufU32) { - const IR::Value index{inst->Arg(0)}; - const IR::Value offset{inst->Arg(1)}; - if (!index.IsImmediate()) { - // Definitely not a storage buffer if it's read from a non-immediate index - return std::nullopt; - } - if (!offset.IsImmediate()) { - // TODO: Support SSBO arrays - return std::nullopt; - } - const StorageBufferAddr storage_buffer{ - .index{index.U32()}, - .offset{offset.U32()}, - }; - if (bias && !MeetsBias(storage_buffer, *bias)) { - // We have to blacklist some addresses in case we wrongly point to them - return std::nullopt; + // Breadth-first search visiting the right most arguments first + // Small vector has been determined from shaders in Super Smash Bros. Ultimate + small_vector visited; + std::queue queue; + queue.push(value.InstRecursive()); + + while (!queue.empty()) { + // Pop one instruction from the queue + const IR::Inst* const inst{queue.front()}; + queue.pop(); + if (const std::optional result = TryGetStorageBuffer(inst, bias)) { + // This is the instruction we were looking for + return result; } - return storage_buffer; - } - // Reversed loops are more likely to find the right result - for (size_t arg = inst->NumArgs(); arg--;) { - IR::Block* inst_block{block}; - if (inst->Opcode() == IR::Opcode::Phi) { - // If we are going through a phi node, mark the current block as visited - visited.insert(block); - // and skip already visited blocks to avoid looping forever - IR::Block* const phi_block{inst->PhiBlock(arg)}; - if (visited.contains(phi_block)) { - // Already visited, skip + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + const IR::Value arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { continue; } - inst_block = phi_block; - } - const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; + // Queue instruction if it hasn't been visited + const IR::Inst* const arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } } } + // SSA tree has been traversed and the origin hasn't been found return std::nullopt; } /// Collects the storage buffer used by a global memory instruction and the instruction itself -void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, - StorageInstVector& to_replace, StorageWritesSet& writes_set) { +void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -284,24 +300,23 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; - VisitedBlocks visited_blocks; - std::optional storage_buffer{Track(&block, low_addr, &nvn_bias, visited_blocks)}; + std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { // If it fails, track without a bias - visited_blocks.clear(); - storage_buffer = Track(&block, low_addr, nullptr, visited_blocks); + storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also failed, drop the global memory usage + // LOG_ERROR DiscardGlobalMemory(block, inst); return; } } // Collect storage buffer and the instruction if (IsGlobalMemoryWrite(inst)) { - writes_set.insert(*storage_buffer); + info.writes.insert(*storage_buffer); } - storage_buffer_set.insert(*storage_buffer); - to_replace.push_back(StorageInst{ + info.set.insert(*storage_buffer); + info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, .inst{&inst}, .block{&block}, @@ -371,33 +386,29 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } // Anonymous namespace void GlobalMemoryToStorageBufferPass(IR::Program& program) { - StorageBufferSet storage_buffers; - StorageInstVector to_replace; - StorageWritesSet writes_set; - + StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsGlobalMemory(inst)) { continue; } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_set); + CollectStorageBuffers(*block, inst, info); } } - Info& info{program.info}; u32 storage_index{}; - for (const StorageBufferAddr& storage_buffer : storage_buffers) { - info.storage_buffers_descriptors.push_back({ + for (const StorageBufferAddr& storage_buffer : info.set) { + program.info.storage_buffers_descriptors.push_back({ .cbuf_index{storage_buffer.index}, .cbuf_offset{storage_buffer.offset}, .count{1}, - .is_written{writes_set.contains(storage_buffer)}, + .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; } - for (const StorageInst& storage_inst : to_replace) { + for (const StorageInst& storage_inst : info.to_replace) { const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; - const auto it{storage_buffers.find(storage_inst.storage_buffer)}; - const IR::U32 index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + const auto it{info.set.find(storage_inst.storage_buffer)}; + const IR::U32 index{IR::Value{static_cast(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; -- cgit v1.2.3 From 3f594dd86bd1ee1b178109132482c7d6b43e66dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 02:31:09 -0300 Subject: shader: Reimplement GetCbufU64 as GetCbufU32x2 It may generate better code on some compilers and it's easier to handle. --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5c1b81638..07f031ea6 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -271,7 +271,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.Opcode()) { - case IR::Opcode::GetCbufU64: case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -349,7 +348,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufS16: case IR::Opcode::GetCbufU32: case IR::Opcode::GetCbufF32: - case IR::Opcode::GetCbufU64: { + case IR::Opcode::GetCbufU32x2: { if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { AddConstantBufferDescriptor(info, index.U32(), 1); } else { @@ -370,8 +369,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufF32: info.used_constant_buffer_types |= IR::Type::F32; break; - case IR::Opcode::GetCbufU64: - info.used_constant_buffer_types |= IR::Type::U64; + case IR::Opcode::GetCbufU32x2: + info.used_constant_buffer_types |= IR::Type::U32x2; break; default: break; -- cgit v1.2.3 From 85795de99f27e57ddf97696e7915ddd4bdf02976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 03:00:41 -0300 Subject: shader: Abstract breadth searches and use the abstraction --- .../global_memory_to_storage_buffer_pass.cpp | 84 +++++++--------------- src/shader_recompiler/ir_opt/texture_pass.cpp | 68 ++++++------------ 2 files changed, 48 insertions(+), 104 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f94c82e21..0858a0bdd 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -12,6 +12,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/ir_opt/passes.h" @@ -219,68 +220,35 @@ std::optional TrackLowAddress(IR::Inst* inst) { }; } -/// Tries to get the storage buffer out of a constant buffer read instruction -std::optional TryGetStorageBuffer(const IR::Inst* inst, const Bias* bias) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { - return std::nullopt; - } - const IR::Value index{inst->Arg(0)}; - const IR::Value offset{inst->Arg(1)}; - if (!index.IsImmediate()) { - // Definitely not a storage buffer if it's read from a non-immediate index - return std::nullopt; - } - if (!offset.IsImmediate()) { - // TODO: Support SSBO arrays - return std::nullopt; - } - const StorageBufferAddr storage_buffer{ - .index{index.U32()}, - .offset{offset.U32()}, - }; - if (bias && !MeetsBias(storage_buffer, *bias)) { - // We have to blacklist some addresses in case we wrongly point to them - return std::nullopt; - } - return storage_buffer; -} - /// Tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { - if (value.IsImmediate()) { - // Nothing to do with immediates - return std::nullopt; - } - // Breadth-first search visiting the right most arguments first - // Small vector has been determined from shaders in Super Smash Bros. Ultimate - small_vector visited; - std::queue queue; - queue.push(value.InstRecursive()); - - while (!queue.empty()) { - // Pop one instruction from the queue - const IR::Inst* const inst{queue.front()}; - queue.pop(); - if (const std::optional result = TryGetStorageBuffer(inst, bias)) { - // This is the instruction we were looking for - return result; + const auto pred{[bias](const IR::Inst* inst) -> std::optional { + if (inst->Opcode() != IR::Opcode::GetCbufU32) { + return std::nullopt; } - // Visit the right most arguments first - for (size_t arg = inst->NumArgs(); arg--;) { - const IR::Value arg_value{inst->Arg(arg)}; - if (arg_value.IsImmediate()) { - continue; - } - // Queue instruction if it hasn't been visited - const IR::Inst* const arg_inst{arg_value.InstRecursive()}; - if (std::ranges::find(visited, arg_inst) == visited.end()) { - visited.push_back(arg_inst); - queue.push(arg_inst); - } + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Definitely not a storage buffer if it's read from a + // non-immediate index + return std::nullopt; } - } - // SSA tree has been traversed and the origin hasn't been found - return std::nullopt; + if (!offset.IsImmediate()) { + // TODO: Support SSBO arrays + return std::nullopt; + } + const StorageBufferAddr storage_buffer{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + if (bias && !MeetsBias(storage_buffer, *bias)) { + // We have to blacklist some addresses in case we wrongly + // point to them + return std::nullopt; + } + return storage_buffer; + }}; + return BreadthFirstSearch(value, pred); } /// Collects the storage buffer used by a global memory instruction and the instruction itself diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index da8977b76..bcb94ce4d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -2,13 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include -#include #include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" @@ -28,9 +29,6 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; -using VisitedBlocks = boost::container::flat_set, - boost::container::small_vector>; - IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -101,57 +99,35 @@ bool IsTextureInstruction(const IR::Inst& inst) { return IndexedInstruction(inst) != IR::Opcode::Void; } -std::optional Track(IR::Block* block, const IR::Value& value, - VisitedBlocks& visited) { - if (value.IsImmediate()) { - // Immediates can't be a storage buffer +std::optional TryGetConstBuffer(const IR::Inst* inst) { + if (inst->Opcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } - const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbufU32) { - const IR::Value index{inst->Arg(0)}; - const IR::Value offset{inst->Arg(1)}; - if (!index.IsImmediate()) { - // Reading a bindless texture from variable indices is valid - // but not supported here at the moment - return std::nullopt; - } - if (!offset.IsImmediate()) { - // TODO: Support arrays of textures - return std::nullopt; - } - return ConstBufferAddr{ - .index{index.U32()}, - .offset{offset.U32()}, - }; + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Reading a bindless texture from variable indices is valid + // but not supported here at the moment + return std::nullopt; } - // Reversed loops are more likely to find the right result - for (size_t arg = inst->NumArgs(); arg--;) { - IR::Block* inst_block{block}; - if (inst->Opcode() == IR::Opcode::Phi) { - // If we are going through a phi node, mark the current block as visited - visited.insert(block); - // and skip already visited blocks to avoid looping forever - IR::Block* const phi_block{inst->PhiBlock(arg)}; - if (visited.contains(phi_block)) { - // Already visited, skip - continue; - } - inst_block = phi_block; - } - const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; - if (storage_buffer) { - return *storage_buffer; - } + if (!offset.IsImmediate()) { + // TODO: Support arrays of textures + return std::nullopt; } - return std::nullopt; + return ConstBufferAddr{ + .index{index.U32()}, + .offset{offset.U32()}, + }; +} + +std::optional Track(const IR::Value& value) { + return IR::BreadthFirstSearch(value, TryGetConstBuffer); } TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { - VisitedBlocks visited; - const std::optional track_addr{Track(block, inst.Arg(0), visited)}; + const std::optional track_addr{Track(inst.Arg(0))}; if (!track_addr) { throw NotImplementedException("Failed to track bindless texture constant buffer"); } -- cgit v1.2.3 From da6cf2632cd4dc0d2b0278353fcaee0789b418c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 05:17:17 -0300 Subject: shader: Add subgroup masks --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 07f031ea6..0f870535b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -414,6 +414,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::SubgroupEqMask: + case IR::Opcode::SubgroupLtMask: + case IR::Opcode::SubgroupLeMask: + case IR::Opcode::SubgroupGtMask: + case IR::Opcode::SubgroupGeMask: + info.uses_subgroup_mask = true; + break; case IR::Opcode::VoteAll: case IR::Opcode::VoteAny: case IR::Opcode::VoteEqual: -- cgit v1.2.3 From 417fb5d385daa0fb40329709e6b4a53937580989 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 5 Apr 2021 19:10:55 -0300 Subject: shader: Move recursive SSA rewrite to the heap --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 118 ++++++++++++++++------ 1 file changed, 89 insertions(+), 29 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 259233746..ca36253d1 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -119,6 +119,26 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return inst.Opcode() == IR::Opcode::Phi; } +enum class Status { + Start, + SetValue, + PreparePhiArgument, + PushPhiArgument, +}; + +template +struct ReadState { + ReadState(IR::Block* block_) : block{block_} {} + ReadState() = default; + + IR::Block* block{}; + IR::Value result{}; + IR::Inst* phi{}; + IR::Block* const* pred_it{}; + IR::Block* const* pred_end{}; + Status pc{Status::Start}; +}; + class Pass { public: template @@ -127,12 +147,75 @@ public: } template - IR::Value ReadVariable(Type variable, IR::Block* block) { - const ValueMap& def{current_def[variable]}; - if (const auto it{def.find(block)}; it != def.end()) { - return it->second; - } - return ReadVariableRecursive(variable, block); + IR::Value ReadVariable(Type variable, IR::Block* root_block) { + boost::container::small_vector, 64> stack{ + ReadState(nullptr), + ReadState(root_block), + }; + const auto prepare_phi_operand{[&] { + if (stack.back().pred_it == stack.back().pred_end) { + IR::Inst* const phi{stack.back().phi}; + IR::Block* const block{stack.back().block}; + const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))}; + stack.pop_back(); + stack.back().result = result; + WriteVariable(variable, block, result); + } else { + IR::Block* const imm_pred{*stack.back().pred_it}; + stack.back().pc = Status::PushPhiArgument; + stack.emplace_back(imm_pred); + } + }}; + do { + IR::Block* const block{stack.back().block}; + switch (stack.back().pc) { + case Status::Start: { + const ValueMap& def{current_def[variable]}; + if (const auto it{def.find(block)}; it != def.end()) { + stack.back().result = it->second; + } else if (!sealed_blocks.contains(block)) { + // Incomplete CFG + IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + incomplete_phis[block].insert_or_assign(variable, phi); + stack.back().result = IR::Value{&*phi}; + } else if (const std::span imm_preds{block->ImmediatePredecessors()}; + imm_preds.size() == 1) { + // Optimize the common case of one predecessor: no phi needed + stack.back().pc = Status::SetValue; + stack.emplace_back(imm_preds.front()); + break; + } else { + // Break potential cycles with operandless phi + IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + WriteVariable(variable, block, IR::Value{phi}); + + stack.back().phi = phi; + stack.back().pred_it = imm_preds.data(); + stack.back().pred_end = imm_preds.data() + imm_preds.size(); + prepare_phi_operand(); + break; + } + } + [[fallthrough]]; + case Status::SetValue: { + const IR::Value result{stack.back().result}; + WriteVariable(variable, block, result); + stack.pop_back(); + stack.back().result = result; + break; + } + case Status::PushPhiArgument: { + IR::Inst* const phi{stack.back().phi}; + phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); + ++stack.back().pred_it; + } + [[fallthrough]]; + case Status::PreparePhiArgument: + prepare_phi_operand(); + break; + } + } while (stack.size() > 1); + return stack.back().result; } void SealBlock(IR::Block* block) { @@ -146,29 +229,6 @@ public: } private: - template - IR::Value ReadVariableRecursive(Type variable, IR::Block* block) { - IR::Value val; - if (!sealed_blocks.contains(block)) { - // Incomplete CFG - IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - incomplete_phis[block].insert_or_assign(variable, phi); - val = IR::Value{&*phi}; - } else if (const std::span imm_preds{block->ImmediatePredecessors()}; - imm_preds.size() == 1) { - // Optimize the common case of one predecessor: no phi needed - val = ReadVariable(variable, imm_preds.front()); - } else { - // Break potential cycles with operandless phi - IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - val = IR::Value{&phi_inst}; - WriteVariable(variable, block, val); - val = AddPhiOperands(variable, phi_inst, block); - } - WriteVariable(variable, block, val); - return val; - } - template IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { -- cgit v1.2.3 From 1d51803169f72f79e19995072fb9e8a371dbdcbf Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 06:47:14 +0200 Subject: shader: Implement indexed attributes --- .../ir_opt/collect_shader_info_pass.cpp | 36 +++++++++++++++++++++- src/shader_recompiler/ir_opt/passes.h | 2 +- 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 0f870535b..dbe9f1f40 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" @@ -323,6 +324,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SetAttribute: SetAttribute(info, inst.Arg(0).Attribute()); break; + case IR::Opcode::GetAttributeIndexed: + info.loads_indexed_attributes = true; + break; + case IR::Opcode::SetAttributeIndexed: + info.stores_indexed_attributes = true; + break; case IR::Opcode::SetFragColor: info.stores_frag_color[inst.Arg(0).U32()] = true; break; @@ -502,15 +509,42 @@ void Visit(Info& info, IR::Inst& inst) { VisitUsages(info, inst); VisitFpModifiers(info, inst); } + +void GatherInfoFromHeader(Environment& env, Info& info) { + auto stage = env.ShaderStage(); + if (stage == Stage::Compute) { + return; + } + const auto& header = env.SPH(); + if (stage == Stage::Fragment) { + for (size_t i = 0; i < info.input_generics.size(); i++) { + info.input_generics[i].used = + info.input_generics[i].used || header.ps.IsGenericVectorActive(i); + } + return; + } + for (size_t i = 0; i < info.input_generics.size(); i++) { + info.input_generics[i].used = + info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i); + } + for (size_t i = 0; i < info.stores_generics.size(); i++) { + info.stores_generics[i] = + info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i); + } + info.stores_clip_distance = + info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0; +} + } // Anonymous namespace -void CollectShaderInfoPass(IR::Program& program) { +void CollectShaderInfoPass(Environment& env, IR::Program& program) { Info& info{program.info}; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { Visit(info, inst); } } + GatherInfoFromHeader(env, info); } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 5c1fc166c..186104713 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -12,7 +12,7 @@ namespace Shader::Optimization { -void CollectShaderInfoPass(IR::Program& program); +void CollectShaderInfoPass(Environment& env, IR::Program& program); void ConstantPropagationPass(IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); -- cgit v1.2.3 From 73cb17f41bf019df504d2d2af4ebdf45aa3201c6 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 09:38:15 +0200 Subject: shader: Implement indexed Position and ClipDistances --- .../ir_opt/collect_shader_info_pass.cpp | 26 +++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index dbe9f1f40..a14465598 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -517,22 +517,32 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } const auto& header = env.SPH(); if (stage == Stage::Fragment) { + if (!info.loads_indexed_attributes) { + return; + } for (size_t i = 0; i < info.input_generics.size(); i++) { info.input_generics[i].used = info.input_generics[i].used || header.ps.IsGenericVectorActive(i); } + info.loads_position = info.loads_position || header.ps.imap_systemb.position != 0; return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used = - info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i); + if (info.loads_indexed_attributes) { + for (size_t i = 0; i < info.input_generics.size(); i++) { + info.input_generics[i].used = + info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i); + } } - for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] = - info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i); + if (info.stores_indexed_attributes) { + info.loads_position = info.loads_position || header.vtg.imap_systemb.position != 0; + for (size_t i = 0; i < info.stores_generics.size(); i++) { + info.stores_generics[i] = + info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i); + } + info.stores_clip_distance = + info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0; + info.stores_position = info.stores_position || header.vtg.omap_systemb.position != 0; } - info.stores_clip_distance = - info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0; } } // Anonymous namespace -- cgit v1.2.3 From dcaf0e91508562a70a49db7011ad09f13f811d71 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 5 Apr 2021 04:03:12 +0200 Subject: shader: Address feedback --- .../ir_opt/collect_shader_info_pass.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a14465598..1c03ee82a 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -511,37 +511,33 @@ void Visit(Info& info, IR::Inst& inst) { } void GatherInfoFromHeader(Environment& env, Info& info) { - auto stage = env.ShaderStage(); + Stage stage{env.ShaderStage()}; if (stage == Stage::Compute) { return; } - const auto& header = env.SPH(); + const auto& header{env.SPH()}; if (stage == Stage::Fragment) { if (!info.loads_indexed_attributes) { return; } for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used = - info.input_generics[i].used || header.ps.IsGenericVectorActive(i); + info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); } - info.loads_position = info.loads_position || header.ps.imap_systemb.position != 0; + info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used = - info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i); + info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); } + info.loads_position |= header.vtg.imap_systemb.position != 0; } if (info.stores_indexed_attributes) { - info.loads_position = info.loads_position || header.vtg.imap_systemb.position != 0; for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] = - info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i); + info.stores_generics[i] |= header.vtg.IsOutputGenericVectorActive(i); } - info.stores_clip_distance = - info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0; - info.stores_position = info.stores_position || header.vtg.omap_systemb.position != 0; + info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; + info.stores_position |= header.vtg.omap_systemb.position != 0; } } -- cgit v1.2.3 From 1f3eb601acdcdfa4c119cffbf36b5792147b893f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 02:56:15 -0300 Subject: shader: Implement texture buffers --- src/shader_recompiler/ir_opt/texture_pass.cpp | 80 +++++++++++++++++++++------ 1 file changed, 62 insertions(+), 18 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index bcb94ce4d..290ce4179 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -147,24 +147,39 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: - explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} + explicit Descriptors(TextureDescriptors& texture_descriptors_, + TextureBufferDescriptors& texture_buffer_descriptors_) + : texture_descriptors{texture_descriptors_}, texture_buffer_descriptors{ + texture_buffer_descriptors_} {} + + u32 Add(const TextureDescriptor& desc) { + return Add(texture_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type; + }); + } + + u32 Add(const TextureBufferDescriptor& desc) { + return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } - u32 Add(const TextureDescriptor& descriptor) { +private: + template + static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { // TODO: Handle arrays - auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { - return descriptor.cbuf_index == existing.cbuf_index && - descriptor.cbuf_offset == existing.cbuf_offset && - descriptor.type == existing.type; - })}; + const auto it{std::ranges::find_if(descriptors, pred)}; if (it != descriptors.end()) { return static_cast(std::distance(descriptors.begin(), it)); } - descriptors.push_back(descriptor); + descriptors.push_back(desc); return static_cast(descriptors.size()) - 1; } -private: - TextureDescriptors& descriptors; + TextureDescriptors& texture_descriptors; + TextureBufferDescriptors& texture_buffer_descriptors; }; } // Anonymous namespace @@ -185,7 +200,10 @@ void TexturePass(Environment& env, IR::Program& program) { std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { return lhs.cbuf.index < rhs.cbuf.index; }); - Descriptors descriptors{program.info.texture_descriptors}; + Descriptors descriptors{ + program.info.texture_descriptors, + program.info.texture_buffer_descriptors, + }; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; @@ -193,16 +211,42 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags()}; - if (inst->Opcode() == IR::Opcode::ImageQueryDimensions) { + switch (inst->Opcode()) { + case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); + break; + case IR::Opcode::ImageFetch: + if (flags.type != TextureType::Color1D) { + break; + } + if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) { + // Replace with the bound texture type only when it's a texture buffer + // If the instruction is 1D and the bound type is 2D, don't change the code and let + // the rasterizer robustness handle it + // This happens on Fire Emblem: Three Houses + flags.type.Assign(TextureType::Buffer); + } + inst->SetFlags(flags); + break; + default: + break; + } + u32 index; + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(TextureBufferDescriptor{ + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, + .count{1}, + }); + } else { + index = descriptors.Add(TextureDescriptor{ + .type{flags.type}, + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, + .count{1}, + }); } - const u32 index{descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, - })}; inst->SetArg(0, IR::Value{index}); } } -- cgit v1.2.3 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- .../ir_opt/collect_shader_info_pass.cpp | 20 ++++----- .../ir_opt/constant_propagation_pass.cpp | 49 ++++++++++++---------- .../global_memory_to_storage_buffer_pass.cpp | 42 +++++++++---------- .../ir_opt/identity_removal_pass.cpp | 3 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 32 +++++++------- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 +- 8 files changed, 80 insertions(+), 76 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1c03ee82a..edbfcd308 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { @@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { auto& cbufs{info.constant_buffer_descriptors}; cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), ConstantBufferDescriptor{ - .index{index}, - .count{1}, + .index = index, + .count = 1, }); } @@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } void VisitUsages(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: @@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::UndefU8: @@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: case IR::Opcode::UndefU16: @@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::DemoteToHelperInvocation: info.uses_demote_to_helper_invocation = true; break; @@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } else { throw NotImplementedException("Constant buffer with non-immediate index"); } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; @@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } void VisitFpModifiers(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::FPAdd16: case IR::Opcode::FPFma16: case IR::Opcode::FPMul16: @@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) { info.stores_position |= header.vtg.omap_systemb.position != 0; } } - } // Anonymous namespace void CollectShaderInfoPass(Environment& env, IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 1720d7a09..61fbbe04c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; - if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(lhs), Arg(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; - if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(rhs), Arg(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { return false; } IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; - if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { return false; } if (lhs_shl->Arg(0).IsImmediate()) { @@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; - if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { return false; } if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { @@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; - if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } - if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { @@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && - a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + return a->GetOpcode() == IR::Opcode::GetCbufU32 && + b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && + a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; IR::Inst* op_b{inst.Arg(1).InstRecursive()}; @@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) { return; } // It's also possible a value is being added to a cbuf and then subtracted - if (op_b->Opcode() == IR::Opcode::IAdd32) { + if (op_b->GetOpcode() == IR::Opcode::IAdd32) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbufU32) { + if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; - if (op_a->Opcode() != IR::Opcode::IAdd32) { + if (op_a->GetOpcode() != IR::Opcode::IAdd32) { return; } IR::Value add_op_a{op_a->Arg(0)}; @@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; IR::Inst* const rhs_op{rhs_value.InstRecursive()}; - if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || + rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { return; } const IR::Value recip_source{rhs_op->Arg(0)}; @@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const attr_a{recip_source.InstRecursive()}; IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; - if (attr_a->Opcode() != IR::Opcode::GetAttribute || - attr_b->Opcode() != IR::Opcode::GetAttribute) { + if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || + attr_b->GetOpcode() != IR::Opcode::GetAttribute) { return; } if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { @@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) { return; } IR::Inst* const arg{value.InstRecursive()}; - if (arg->Opcode() == IR::Opcode::LogicalNot) { + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { inst.ReplaceUsesWith(arg->Arg(0)); } } @@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } if constexpr (op == IR::Opcode::BitCastF32U32) { - if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { // Replace the bitcast with a typed constant buffer read inst.ReplaceOpcode(IR::Opcode::GetCbufF32); inst.SetArg(0, arg_inst->Arg(0)); @@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } @@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { template IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { using Traits = LambdaTraits; - return IR::Value{func(Arg>(inst.Arg(I))...)}; + return IR::Value{func(Arg>(inst.Arg(I))...)}; } void FoldBranchConditional(IR::Inst& inst) { @@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) { return; } const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { const IR::Value true_label{inst.Arg(1)}; const IR::Value false_label{inst.Arg(2)}; // Remove negation on the conditional (take the parameter out of LogicalNot) and swap @@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) { std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; - if (inst->Opcode() == construct) { + if (inst->GetOpcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { + if (inst->GetOpcode() != insert) { return std::nullopt; } IR::Value value_index{inst->Arg(2)}; @@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser } void ConstantPropagation(IR::Block& block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0858a0bdd..90a65dd16 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -57,7 +57,7 @@ struct StorageInfo { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemoryWrite(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS16: @@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); } } @@ -184,7 +184,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { // This address is expected to either be a PackUint2x32 or a IAdd64 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { // If it's an IAdd64, get the immediate offset it is applying and grab the address // instruction. This expects for the instruction to be canonicalized having the address on // the first argument and the immediate offset on the second one. @@ -200,7 +200,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { addr_inst = iadd_addr.Inst(); } // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { return std::nullopt; } // PackUint2x32 is expected to be generated from a vector @@ -210,20 +210,20 @@ std::optional TrackLowAddress(IR::Inst* inst) { } // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ .value{IR::U32{vector_inst->Arg(0)}}, - .imm_offset{imm_offset}, + .imm_offset = imm_offset, }; } /// Tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ - .index{0}, - .offset_begin{0x110}, - .offset_end{0x610}, + .index = 0, + .offset_begin = 0x110, + .offset_end = 0x610, }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) info.set.insert(*storage_buffer); info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{&inst}, - .block{&block}, + .inst = &inst, + .block = &block, }); } @@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer /// Replace a global memory load instruction with its storage buffer equivalent void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; inst.ReplaceUsesWith(value); @@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, /// Replace a global memory write instruction with its storage buffer equivalent void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); inst.Invalidate(); @@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } } } // Anonymous namespace @@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ - .cbuf_index{storage_buffer.index}, - .cbuf_offset{storage_buffer.offset}, - .count{1}, + .cbuf_index = storage_buffer.index, + .cbuf_offset = storage_buffer.offset, + .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 8790b48f2..38af72dfe 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) { inst->SetArg(i, arg.Inst()->Arg(0)); } } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { to_invalidate.push_back(&*inst); inst = block->Instructions().erase(inst); } else { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0d2c91ed6..52576b07f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) { void LowerFp16ToFp32(IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); + inst.ReplaceOpcode(Replace(inst.GetOpcode())); } } } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ca36253d1..346fcc377 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.Opcode() == IR::Opcode::Phi; + return inst.GetOpcode() == IR::Opcode::Phi; } enum class Status { @@ -278,7 +278,7 @@ private: }; void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { pass.WriteVariable(reg, block, inst.Arg(1)); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 290ce4179..c8aee3d3d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -30,7 +30,7 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; IR::Opcode IndexedInstruction(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BoundImageSampleImplicitLod: return IR::Opcode::ImageSampleImplicitLod; @@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { } bool IsBindless(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: @@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: return false; default: - throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); } } @@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) { } std::optional TryGetConstBuffer(const IR::Inst* inst) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = *track_addr; } else { addr = ConstBufferAddr{ - .index{env.TextureBoundBuffer()}, - .offset{inst.Arg(0).U32()}, + .index = env.TextureBoundBuffer(), + .offset = inst.Arg(0).U32(), }; } return TextureInst{ .cbuf{addr}, - .inst{&inst}, - .block{block}, + .inst = &inst, + .block = block, }; } @@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags()}; - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); @@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .type = flags.type, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } inst->SetArg(0, IR::Value{index}); diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 4080b37cc..dbec96d84 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,14 +14,14 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Program& program) { for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { - if (inst.Opcode() == IR::Opcode::Phi) { + if (inst.GetOpcode() == IR::Opcode::Phi) { // Skip validation on phi nodes continue; } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; if (!IR::AreTypesCompatible(t1, t2)) { throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); } -- cgit v1.2.3 From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: shader: Address feedback + clang format --- src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 90a65dd16..afe871505 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -164,7 +164,8 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", + inst.GetOpcode()); } } -- cgit v1.2.3 From 7cb2ab358517d95ebcd35c94c72b9e91762906c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 01:45:39 -0300 Subject: shader: Implement SULD and SUST --- .../ir_opt/collect_shader_info_pass.cpp | 3 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 91 ++++++++++++++++------ 2 files changed, 69 insertions(+), 25 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index edbfcd308..bc23b0211 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -416,8 +416,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryLod: case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; - info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || - type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index c8aee3d3d..a7b1fcfad 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -61,6 +61,12 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: case IR::Opcode::BindlessImageGradient: return IR::Opcode::ImageGradient; + case IR::Opcode::BoundImageRead: + case IR::Opcode::BindlessImageRead: + return IR::Opcode::ImageRead; + case IR::Opcode::BoundImageWrite: + case IR::Opcode::BindlessImageWrite: + return IR::Opcode::ImageWrite; default: return IR::Opcode::Void; } @@ -78,6 +84,8 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageQueryDimensions: case IR::Opcode::BindlessImageQueryLod: case IR::Opcode::BindlessImageGradient: + case IR::Opcode::BindlessImageRead: + case IR::Opcode::BindlessImageWrite: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -89,6 +97,8 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BoundImageQueryLod: case IR::Opcode::BoundImageGradient: + case IR::Opcode::BoundImageRead: + case IR::Opcode::BoundImageWrite: return false; default: throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); @@ -147,10 +157,18 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: - explicit Descriptors(TextureDescriptors& texture_descriptors_, - TextureBufferDescriptors& texture_buffer_descriptors_) - : texture_descriptors{texture_descriptors_}, texture_buffer_descriptors{ - texture_buffer_descriptors_} {} + explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, + TextureDescriptors& texture_descriptors_, + ImageDescriptors& image_descriptors_) + : texture_buffer_descriptors{texture_buffer_descriptors_}, + texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} + + u32 Add(const TextureBufferDescriptor& desc) { + return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { @@ -159,11 +177,14 @@ public: }); } - u32 Add(const TextureBufferDescriptor& desc) { - return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && + u32 Add(const ImageDescriptor& desc) { + const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { + return desc.type == existing.type && desc.format == existing.format && + desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset; - }); + })}; + image_descriptors[index].is_written |= desc.is_written; + return index; } private: @@ -178,8 +199,9 @@ private: return static_cast(descriptors.size()) - 1; } - TextureDescriptors& texture_descriptors; TextureBufferDescriptors& texture_buffer_descriptors; + TextureDescriptors& texture_descriptors; + ImageDescriptors& image_descriptors; }; } // Anonymous namespace @@ -201,8 +223,9 @@ void TexturePass(Environment& env, IR::Program& program) { return lhs.cbuf.index < rhs.cbuf.index; }); Descriptors descriptors{ - program.info.texture_descriptors, program.info.texture_buffer_descriptors, + program.info.texture_descriptors, + program.info.image_descriptors, }; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays @@ -233,19 +256,41 @@ void TexturePass(Environment& env, IR::Program& program) { break; } u32 index; - if (flags.type == TextureType::Buffer) { - index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index = cbuf.index, - .cbuf_offset = cbuf.offset, - .count = 1, - }); - } else { - index = descriptors.Add(TextureDescriptor{ - .type = flags.type, - .cbuf_index = cbuf.index, - .cbuf_offset = cbuf.offset, - .count = 1, - }); + switch (inst->GetOpcode()) { + case IR::Opcode::ImageRead: + case IR::Opcode::ImageWrite: { + const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; + if (flags.type == TextureType::Buffer) { + throw NotImplementedException("Image buffer"); + } else { + index = descriptors.Add(ImageDescriptor{ + .type = flags.type, + .format = flags.image_format, + .is_written = is_written, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } + break; + } + default: + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(TextureBufferDescriptor{ + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } else { + index = descriptors.Add(TextureDescriptor{ + .type = flags.type, + .is_depth = flags.is_depth != 0, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } + break; } inst->SetArg(0, IR::Value{index}); } -- cgit v1.2.3 From 9280cd649a9c4cd53b929643377547db598bf5f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:22:20 -0300 Subject: shader: Move LaneId to the warp emission file and fix AMD --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index bc23b0211..8c63c9876 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -343,11 +343,11 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::LaneId: case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: case IR::Opcode::ShuffleDown: case IR::Opcode::ShuffleButterfly: - case IR::Opcode::LaneId: info.uses_subgroup_invocation_id = true; break; case IR::Opcode::GetCbufU8: -- cgit v1.2.3 From ab543f18213133b3076b81f30df386d5cb470e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:37:03 -0300 Subject: spirv: Guard against typeless image reads on unsupported devices --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 8c63c9876..9ef8688c9 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -421,6 +421,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageRead: { + const auto flags{inst.Flags()}; + info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } case IR::Opcode::SubgroupEqMask: case IR::Opcode::SubgroupLtMask: case IR::Opcode::SubgroupLeMask: -- cgit v1.2.3 From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: shader: Implement ATOM/S and RED --- .../ir_opt/collect_shader_info_pass.cpp | 70 ++++++++++++ .../global_memory_to_storage_buffer_pass.cpp | 121 ++++++++++++++++++++- .../ir_opt/lower_fp16_to_fp32.cpp | 12 ++ 3 files changed, 201 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9ef8688c9..73373576b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPOrdGreaterThanEqual16: case IR::Opcode::FPUnordGreaterThanEqual16: case IR::Opcode::FPIsNan16: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: info.uses_fp16 = true; break; case IR::Opcode::CompositeConstructF64x2: @@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF16U64: case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: + case IR::Opcode::SharedAtomicExchange64: info.uses_int64 = true; break; default: @@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::SharedAtomicInc32: + info.uses_shared_increment = true; + break; + case IR::Opcode::SharedAtomicDec32: + info.uses_shared_decrement = true; + break; + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::StorageAtomicInc32: + info.uses_global_increment = true; + break; + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::StorageAtomicDec32: + info.uses_global_decrement = true; + break; + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::StorageAtomicAddF32: + info.uses_atomic_f32_add = true; + break; + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + info.uses_atomic_f16x2_add = true; + break; + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::StorageAtomicAddF32x2: + info.uses_atomic_f32x2_add = true; + break; + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + info.uses_atomic_f16x2_min = true; + break; + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::StorageAtomicMinF32x2: + info.uses_atomic_f32x2_min = true; + break; + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: + info.uses_atomic_f16x2_max = true; + break; + case IR::Opcode::GlobalAtomicMaxF32x2: + case IR::Opcode::StorageAtomicMaxF32x2: + info.uses_atomic_f32x2_max = true; + break; + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: + info.uses_64_bit_atomics = true; + break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_64_bit_atomics = true; + info.uses_shared_memory_u32x2 = true; + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index afe871505..0d4f266c3 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: return true; default: return false; @@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::WriteStorage64; case IR::Opcode::WriteGlobal128: return IR::Opcode::WriteStorage128; + case IR::Opcode::GlobalAtomicIAdd32: + return IR::Opcode::StorageAtomicIAdd32; + case IR::Opcode::GlobalAtomicSMin32: + return IR::Opcode::StorageAtomicSMin32; + case IR::Opcode::GlobalAtomicUMin32: + return IR::Opcode::StorageAtomicUMin32; + case IR::Opcode::GlobalAtomicSMax32: + return IR::Opcode::StorageAtomicSMax32; + case IR::Opcode::GlobalAtomicUMax32: + return IR::Opcode::StorageAtomicUMax32; + case IR::Opcode::GlobalAtomicInc32: + return IR::Opcode::StorageAtomicInc32; + case IR::Opcode::GlobalAtomicDec32: + return IR::Opcode::StorageAtomicDec32; + case IR::Opcode::GlobalAtomicAnd32: + return IR::Opcode::StorageAtomicAnd32; + case IR::Opcode::GlobalAtomicOr32: + return IR::Opcode::StorageAtomicOr32; + case IR::Opcode::GlobalAtomicXor32: + return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicIAdd64: + return IR::Opcode::StorageAtomicIAdd64; + case IR::Opcode::GlobalAtomicSMin64: + return IR::Opcode::StorageAtomicSMin64; + case IR::Opcode::GlobalAtomicUMin64: + return IR::Opcode::StorageAtomicUMin64; + case IR::Opcode::GlobalAtomicSMax64: + return IR::Opcode::StorageAtomicSMax64; + case IR::Opcode::GlobalAtomicUMax64: + return IR::Opcode::StorageAtomicUMax64; + case IR::Opcode::GlobalAtomicAnd64: + return IR::Opcode::StorageAtomicAnd64; + case IR::Opcode::GlobalAtomicOr64: + return IR::Opcode::StorageAtomicOr64; + case IR::Opcode::GlobalAtomicXor64: + return IR::Opcode::StorageAtomicXor64; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; + case IR::Opcode::GlobalAtomicExchange64: + return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicAddF32: + return IR::Opcode::StorageAtomicAddF32; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF16x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF16x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF16x2; + case IR::Opcode::GlobalAtomicAddF32x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF32x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF32x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: throw InvalidArgument("Invalid global memory opcode {}", opcode); } @@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index inst.Invalidate(); } +/// Replace an atomic operation on global memory instruction with its storage buffer equivalent +void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{ + &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; + inst.ReplaceUsesWith(value); +} + /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { @@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + return ReplaceAtomic(block, inst, storage_index, offset); default: throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } @@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { CollectStorageBuffers(*block, inst, info); } } - u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ .cbuf_index = storage_buffer.index, @@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); - ++storage_index; } for (const StorageInst& storage_inst : info.to_replace) { const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 52576b07f..62e73d52d 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::ConvertF32U32; case IR::Opcode::ConvertF16U64: return IR::Opcode::ConvertF32U64; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::GlobalAtomicAddF32x2; + case IR::Opcode::StorageAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::GlobalAtomicMinF32x2; + case IR::Opcode::StorageAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::GlobalAtomicMaxF32x2; + case IR::Opcode::StorageAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: return op; } -- cgit v1.2.3 From 5c61e860e4f83524ffce10ca447398e83de81640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 19:16:12 -0300 Subject: shader: Implement SR_THREAD_KILL --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 73373576b..c80d2d29c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -348,6 +348,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::IsHelperInvocation: + info.uses_is_helper_invocation = true; + break; case IR::Opcode::LaneId: case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: -- cgit v1.2.3 From 23b87147321d02abf47868f231f00f29b0d3b87d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:02:44 -0300 Subject: spirv: Define StorageImageWriteWithoutFormat capability when used --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c80d2d29c..ab529e86d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -436,6 +436,11 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageWrite: { + const auto flags{inst.Flags()}; + info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; + break; + } case IR::Opcode::SubgroupEqMask: case IR::Opcode::SubgroupLtMask: case IR::Opcode::SubgroupLeMask: -- cgit v1.2.3 From 2597cee85b74be40bfecf0dc9cda90263d6cce40 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:53 -0300 Subject: shader: Add constant propagation for *&^| binary operations --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 61fbbe04c..ee73b5b60 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -422,6 +422,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldAdd(block, inst); case IR::Opcode::ISub32: return FoldISub32(inst); + case IR::Opcode::IMul32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); + return; case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: @@ -479,6 +482,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::INotEqual: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); return; + case IR::Opcode::BitwiseAnd32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; }); + return; + case IR::Opcode::BitwiseOr32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; }); + return; + case IR::Opcode::BitwiseXor32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; }); + return; case IR::Opcode::BitFieldUExtract: FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { -- cgit v1.2.3 From fa75b9b0626c8e118e27207dd1e82e2f415fc0bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 05:32:21 -0300 Subject: spirv: Rework storage buffers and shader memory --- .../ir_opt/collect_shader_info_pass.cpp | 69 ++++++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ab529e86d..116d93c1c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -315,6 +315,23 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: case IR::Opcode::SharedAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: info.uses_int64 = true; break; default: @@ -457,46 +474,91 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + info.used_storage_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + info.used_storage_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::LoadStorage32: + case IR::Opcode::WriteStorage32: + case IR::Opcode::StorageAtomicIAdd32: + case IR::Opcode::StorageAtomicSMin32: + case IR::Opcode::StorageAtomicUMin32: + case IR::Opcode::StorageAtomicSMax32: + case IR::Opcode::StorageAtomicUMax32: + case IR::Opcode::StorageAtomicAnd32: + case IR::Opcode::StorageAtomicOr32: + case IR::Opcode::StorageAtomicXor32: + case IR::Opcode::StorageAtomicExchange32: + info.used_storage_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::LoadStorage64: + case IR::Opcode::WriteStorage64: + info.used_storage_buffer_types |= IR::Type::U32x2; + break; + case IR::Opcode::LoadStorage128: + case IR::Opcode::WriteStorage128: + info.used_storage_buffer_types |= IR::Type::U32x4; + break; case IR::Opcode::SharedAtomicInc32: info.uses_shared_increment = true; break; case IR::Opcode::SharedAtomicDec32: info.uses_shared_decrement = true; break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_int64_bit_atomics = true; + break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_increment = true; break; case IR::Opcode::GlobalAtomicDec32: case IR::Opcode::StorageAtomicDec32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_decrement = true; break; case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::StorageAtomicAddF32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32_add = true; break; case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::StorageAtomicAddF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_add = true; break; case IR::Opcode::GlobalAtomicAddF32x2: case IR::Opcode::StorageAtomicAddF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_add = true; break; case IR::Opcode::GlobalAtomicMinF16x2: case IR::Opcode::StorageAtomicMinF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_min = true; break; case IR::Opcode::GlobalAtomicMinF32x2: case IR::Opcode::StorageAtomicMinF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_min = true; break; case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::StorageAtomicMaxF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_max = true; break; case IR::Opcode::GlobalAtomicMaxF32x2: case IR::Opcode::StorageAtomicMaxF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_max = true; break; case IR::Opcode::GlobalAtomicIAdd64: @@ -516,11 +578,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::StorageAtomicAnd64: case IR::Opcode::StorageAtomicOr64: case IR::Opcode::StorageAtomicXor64: - info.uses_64_bit_atomics = true; - break; - case IR::Opcode::SharedAtomicExchange64: - info.uses_64_bit_atomics = true; - info.uses_shared_memory_u32x2 = true; + info.used_storage_buffer_types |= IR::Type::U64; + info.uses_int64_bit_atomics = true; break; default: break; -- cgit v1.2.3 From d8ec99dadaa033aa440671572ed38e2614815e11 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 18:09:18 -0300 Subject: spirv: Implement Layer stores --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 116d93c1c..617ec05ce 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -83,6 +83,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ClipDistance7: info.stores_clip_distance = true; break; + case IR::Attribute::Layer: + info.stores_layer = true; + break; case IR::Attribute::ViewportIndex: info.stores_viewport_index = true; break; -- cgit v1.2.3 From 416e1b7441d34512fcb0ffed014daf7ca4bb62bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:36:36 -0300 Subject: spirv: Implement image buffers --- src/shader_recompiler/ir_opt/texture_pass.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index a7b1fcfad..e1d5a2ce1 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -158,9 +158,11 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, + ImageBufferDescriptors& image_buffer_descriptors_, TextureDescriptors& texture_descriptors_, ImageDescriptors& image_descriptors_) : texture_buffer_descriptors{texture_buffer_descriptors_}, + image_buffer_descriptors{image_buffer_descriptors_}, texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} u32 Add(const TextureBufferDescriptor& desc) { @@ -170,6 +172,13 @@ public: }); } + u32 Add(const ImageBufferDescriptor& desc) { + return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } + u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.cbuf_index == existing.cbuf_index && @@ -200,6 +209,7 @@ private: } TextureBufferDescriptors& texture_buffer_descriptors; + ImageBufferDescriptors& image_buffer_descriptors; TextureDescriptors& texture_descriptors; ImageDescriptors& image_descriptors; }; @@ -224,6 +234,7 @@ void TexturePass(Environment& env, IR::Program& program) { }); Descriptors descriptors{ program.info.texture_buffer_descriptors, + program.info.image_buffer_descriptors, program.info.texture_descriptors, program.info.image_descriptors, }; @@ -261,7 +272,13 @@ void TexturePass(Environment& env, IR::Program& program) { case IR::Opcode::ImageWrite: { const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { - throw NotImplementedException("Image buffer"); + index = descriptors.Add(ImageBufferDescriptor{ + .format = flags.image_format, + .is_written = is_written, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); } else { index = descriptors.Add(ImageDescriptor{ .type = flags.type, -- cgit v1.2.3 From 34519d3fc637fa921c0105e01dbbc50cfacd924d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:40:30 -0300 Subject: shader: Mark atomic instructions as writes --- .../global_memory_to_storage_buffer_pass.cpp | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0d4f266c3..378a3a915 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -115,6 +115,33 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: return true; default: return false; -- cgit v1.2.3 From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: shader: Implement tessellation shaders, polygon mode and invocation id --- .../ir_opt/collect_shader_info_pass.cpp | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 617ec05ce..aadcf7999 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -53,6 +53,10 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PointSpriteT: info.loads_point_coord = true; break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + info.loads_tess_coord = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } @@ -94,6 +98,34 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } } +void GetPatch(Info& info, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Reading non-generic patch {}", patch); + } + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; +} + +void SetPatch(Info& info, IR::Patch patch) { + if (IR::IsGeneric(patch)) { + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodBottom: + info.stores_tess_level_outer = true; + break; + case IR::Patch::TessellationLodInteriorU: + case IR::Patch::TessellationLodInteriorV: + info.stores_tess_level_inner = true; + break; + default: + throw NotImplementedException("Set patch {}", patch); + } +} + void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: @@ -350,6 +382,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SetAttribute: SetAttribute(info, inst.Arg(0).Attribute()); break; + case IR::Opcode::GetPatch: + GetPatch(info, inst.Arg(0).Patch()); + break; + case IR::Opcode::SetPatch: + SetPatch(info, inst.Arg(0).Patch()); + break; case IR::Opcode::GetAttributeIndexed: info.loads_indexed_attributes = true; break; @@ -368,6 +406,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::InvocationId: + info.uses_invocation_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; -- cgit v1.2.3 From b0f1255c8cb800e9f336be66b3f16c3d958673d2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 01:55:06 -0300 Subject: shader: Implement PrimitiveId --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index aadcf7999..c84bf211f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -34,6 +34,9 @@ void GetAttribute(Info& info, IR::Attribute attribute) { return; } switch (attribute) { + case IR::Attribute::PrimitiveId: + info.loads_primitive_id = true; + break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: -- cgit v1.2.3 From e3514bcd6b09f623da14c4f3c4ffd988e75577ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 16:31:15 -0300 Subject: spirv: Implement ViewportMask with NV_viewport_array2 --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c84bf211f..9631a445e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -96,6 +96,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ViewportIndex: info.stores_viewport_index = true; break; + case IR::Attribute::ViewportMask: + info.stores_viewport_mask = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } -- cgit v1.2.3 From 95815a3883d708f71db5119f42243e183f32f9a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 17:22:59 -0300 Subject: shader: Implement PIXLD.MY_INDEX --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9631a445e..5d1310466 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -415,6 +415,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::InvocationId: info.uses_invocation_id = true; break; + case IR::Opcode::SampleId: + info.uses_sample_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; -- cgit v1.2.3 From 80940b17069f6baa733a9b572445b27bc7509137 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 18:47:26 -0300 Subject: shader: Implement SampleMask --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5d1310466..60b7d3a36 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -403,6 +403,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SetFragColor: info.stores_frag_color[inst.Arg(0).U32()] = true; break; + case IR::Opcode::SetSampleMask: + info.stores_sample_mask = true; + break; case IR::Opcode::SetFragDepth: info.stores_frag_depth = true; break; -- cgit v1.2.3 From 50f8007172ce143a632270510f96093c82018952 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 16:40:35 -0300 Subject: shader: Fix Phi node types --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 346fcc377..ddd679e39 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -176,6 +176,8 @@ public: } else if (!sealed_blocks.contains(block)) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; } else if (const std::span imm_preds{block->ImmediatePredecessors()}; @@ -187,6 +189,8 @@ public: } else { // Break potential cycles with operandless phi IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + WriteVariable(variable, block, IR::Value{phi}); stack.back().phi = phi; -- cgit v1.2.3 From 04c459fc8d99b41fa8a03c49523599e9bf797f9d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 17 Apr 2021 11:56:45 +0200 Subject: shader: Implement fine derivates constant propagation --- .../ir_opt/collect_shader_info_pass.cpp | 4 ++ .../ir_opt/constant_propagation_pass.cpp | 67 ++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60b7d3a36..e5688667b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -530,6 +530,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::DPdxFine: + case IR::Opcode::DPdyFine: + info.uses_derivates = true; + break; case IR::Opcode::LoadStorageU8: case IR::Opcode::LoadStorageS8: case IR::Opcode::WriteStorageU8: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ee73b5b60..983fb20ab 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -412,6 +412,71 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser inst.ReplaceUsesWith(*result); } +IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { + if (value.IsImmediate()) { + return value; + } + IR::Inst* const inst{value.InstRecursive()}; + if (inst->GetOpcode() == expected_cast) { + return inst->Arg(0).Resolve(); + } + return value; +} + +void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { + const IR::Value swizzle{inst.Arg(2)}; + if (!swizzle.IsImmediate()) { + return; + } + + const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; + const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; + + if (value_1.IsImmediate()) { + return; + } + + const u32 swizzle_value{swizzle.U32()}; + if (swizzle_value != 0x99 && swizzle_value != 0xA5) { + return; + } + + IR::Inst* const inst2{value_1.InstRecursive()}; + if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { + return; + } + const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; + if (value_2 != value_3) { + return; + } + + const IR::Value index{inst2->Arg(1)}; + const IR::Value clamp{inst2->Arg(2)}; + const IR::Value segmentation_mask{inst2->Arg(3)}; + + if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { + return; + } + + if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { + return; + } + + if (swizzle_value == 0x99) { + // DPdxFine + if (index.U32() == 1) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{value_2})); + } + } else if (swizzle_value == 0xA5) { + // DPdyFine + if (index.U32() == 2) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{value_2})); + } + } +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: @@ -532,6 +597,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::CompositeExtractF16x4: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, IR::Opcode::CompositeInsertF16x4); + case IR::Opcode::FSwizzleAdd: + return FoldFSwizzleAdd(block, inst); default: break; } -- cgit v1.2.3 From 080857b60e78836901cf6e9601f48613812fcd04 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 17 Apr 2021 12:51:43 +0200 Subject: shader: Add coarse derivatives --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 ++ src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e5688667b..7473e0bc2 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -532,6 +532,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; case IR::Opcode::DPdxFine: case IR::Opcode::DPdyFine: + case IR::Opcode::DPdxCoarse: + case IR::Opcode::DPdyCoarse: info.uses_derivates = true; break; case IR::Opcode::LoadStorageU8: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 983fb20ab..7e86f64a8 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -428,19 +428,15 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { if (!swizzle.IsImmediate()) { return; } - const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; - if (value_1.IsImmediate()) { return; } - const u32 swizzle_value{swizzle.U32()}; if (swizzle_value != 0x99 && swizzle_value != 0xA5) { return; } - IR::Inst* const inst2{value_1.InstRecursive()}; if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { return; @@ -449,19 +445,15 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { if (value_2 != value_3) { return; } - const IR::Value index{inst2->Arg(1)}; const IR::Value clamp{inst2->Arg(2)}; const IR::Value segmentation_mask{inst2->Arg(3)}; - if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { return; } - if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { return; } - if (swizzle_value == 0x99) { // DPdxFine if (index.U32() == 1) { -- cgit v1.2.3 From f69d0b91ffad7d9ab827f55a9297b8f6da815cc9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 09:07:48 +0200 Subject: shader: Address feedback --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 7473e0bc2..0500a5141 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -534,7 +534,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::DPdyFine: case IR::Opcode::DPdxCoarse: case IR::Opcode::DPdyCoarse: - info.uses_derivates = true; + info.uses_derivatives = true; break; case IR::Opcode::LoadStorageU8: case IR::Opcode::LoadStorageS8: -- cgit v1.2.3 From 7018e524f5e6217b3259333acc4ea09ad036d331 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:33:23 -0300 Subject: shader: Add NVN storage buffer fallbacks When we can't track the SSBO origin of a global memory instruction, leave it as a global memory operation and assume these pointers are in the NVN storage buffer slots, then apply a linear search in the shader's runtime. --- .../ir_opt/collect_shader_info_pass.cpp | 53 ++++++++++++++++++++++ .../global_memory_to_storage_buffer_pass.cpp | 46 ++++--------------- 2 files changed, 61 insertions(+), 38 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 0500a5141..cccf0909d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -187,6 +187,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPUnordGreaterThanEqual16: case IR::Opcode::FPIsNan16: case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::StorageAtomicAddF16x2: case IR::Opcode::StorageAtomicMinF16x2: case IR::Opcode::StorageAtomicMaxF16x2: @@ -373,7 +375,58 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::StorageAtomicAnd64: case IR::Opcode::StorageAtomicOr64: case IR::Opcode::StorageAtomicXor64: + case IR::Opcode::StorageAtomicExchange64: + info.uses_int64 = true; + break; + default: + break; + } + switch (inst.GetOpcode()) { + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: info.uses_int64 = true; + info.uses_global_memory = true; + info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; + info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; break; default: break; diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 378a3a915..f294d297f 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -11,6 +11,7 @@ #include #include +#include "common/alignment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -244,39 +245,6 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce storage_buffer.offset < bias.offset_end; } -/// Discards a global memory operation, reads return zero and writes are ignored -void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - const IR::Value zero{u32{0}}; - switch (inst.GetOpcode()) { - case IR::Opcode::LoadGlobalS8: - case IR::Opcode::LoadGlobalU8: - case IR::Opcode::LoadGlobalS16: - case IR::Opcode::LoadGlobalU16: - case IR::Opcode::LoadGlobal32: - inst.ReplaceUsesWith(zero); - break; - case IR::Opcode::LoadGlobal64: - inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); - break; - case IR::Opcode::LoadGlobal128: - inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); - break; - case IR::Opcode::WriteGlobalS8: - case IR::Opcode::WriteGlobalU8: - case IR::Opcode::WriteGlobalS16: - case IR::Opcode::WriteGlobalU16: - case IR::Opcode::WriteGlobal32: - case IR::Opcode::WriteGlobal64: - case IR::Opcode::WriteGlobal128: - inst.Invalidate(); - break; - default: - throw LogicError("Invalid opcode to discard its global memory operation {}", - inst.GetOpcode()); - } -} - struct LowAddrInfo { IR::U32 value; s32 imm_offset; @@ -350,6 +318,10 @@ std::optional Track(const IR::Value& value, const Bias* bias) .index{index.U32()}, .offset{offset.U32()}, }; + if (!Common::IsAligned(storage_buffer.offset, 16)) { + // The SSBO pointer has to be aligned + return std::nullopt; + } if (bias && !MeetsBias(storage_buffer, *bias)) { // We have to blacklist some addresses in case we wrongly // point to them @@ -372,19 +344,17 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; if (!low_addr_info) { - DiscardGlobalMemory(block, inst); + // Failed to track the low address, use NVN fallbacks return; } // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; - std::optional storage_buffer{Track(low_addr, &nvn_bias)}; + std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { // If it fails, track without a bias storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { - // If that also failed, drop the global memory usage - // LOG_ERROR - DiscardGlobalMemory(block, inst); + // If that also fails, use NVN fallbacks return; } } -- cgit v1.2.3 From a8d46a5eae4e0d65c08f6574cffc48bc55d00940 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:36:57 -0300 Subject: shader: Add constant propagation for arithmetic right shifts --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7e86f64a8..2206f93c2 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -482,6 +482,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::IMul32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); return; + case IR::Opcode::ShiftRightArithmetic32: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast(a >> b); }); + return; case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: -- cgit v1.2.3 From dd860b684c7695097107c1186e96a70e754e5990 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 19:48:45 -0300 Subject: shader: Implement D3D samplers --- src/shader_recompiler/ir_opt/texture_pass.cpp | 80 +++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 11 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e1d5a2ce1..5ac485522 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -19,6 +19,9 @@ namespace { struct ConstBufferAddr { u32 index; u32 offset; + u32 secondary_index; + u32 secondary_offset; + bool has_secondary; }; struct TextureInst { @@ -109,9 +112,38 @@ bool IsTextureInstruction(const IR::Inst& inst) { return IndexedInstruction(inst) != IR::Opcode::Void; } +std::optional TryGetConstBuffer(const IR::Inst* inst); + +std::optional Track(const IR::Value& value) { + return IR::BreadthFirstSearch(value, TryGetConstBuffer); +} + std::optional TryGetConstBuffer(const IR::Inst* inst) { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + switch (inst->GetOpcode()) { + default: return std::nullopt; + case IR::Opcode::BitwiseOr32: { + std::optional lhs{Track(inst->Arg(0))}; + std::optional rhs{Track(inst->Arg(1))}; + if (!lhs || !rhs) { + return std::nullopt; + } + if (lhs->has_secondary || rhs->has_secondary) { + return std::nullopt; + } + if (lhs->index > rhs->index || lhs->offset > rhs->offset) { + std::swap(lhs, rhs); + } + return ConstBufferAddr{ + .index = lhs->index, + .offset = lhs->offset, + .secondary_index = rhs->index, + .secondary_offset = rhs->offset, + .has_secondary = true, + }; + } + case IR::Opcode::GetCbufU32: + break; } const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; @@ -127,13 +159,12 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { return ConstBufferAddr{ .index{index.U32()}, .offset{offset.U32()}, + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } -std::optional Track(const IR::Value& value) { - return IR::BreadthFirstSearch(value, TryGetConstBuffer); -} - TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { @@ -146,6 +177,9 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = ConstBufferAddr{ .index = env.TextureBoundBuffer(), .offset = inst.Arg(0).U32(), + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } return TextureInst{ @@ -155,6 +189,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { }; } +TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { + const u32 secondary_index{cbuf.has_secondary ? cbuf.index : cbuf.secondary_index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.offset : cbuf.secondary_offset}; + const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; + const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; + return env.ReadTextureType(lhs_raw | rhs_raw); +} + class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, @@ -167,8 +209,11 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + return desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -181,8 +226,12 @@ public: u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type; + return desc.type == existing.type && desc.is_depth == existing.is_depth && + desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -247,14 +296,14 @@ void TexturePass(Environment& env, IR::Program& program) { auto flags{inst->Flags()}; switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: - flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); + flags.type.Assign(ReadTextureType(env, cbuf)); inst->SetFlags(flags); break; case IR::Opcode::ImageFetch: if (flags.type != TextureType::Color1D) { break; } - if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) { + if (ReadTextureType(env, cbuf) == TextureType::Buffer) { // Replace with the bound texture type only when it's a texture buffer // If the instruction is 1D and the bound type is 2D, don't change the code and let // the rasterizer robustness handle it @@ -270,6 +319,9 @@ void TexturePass(Environment& env, IR::Program& program) { switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: case IR::Opcode::ImageWrite: { + if (cbuf.has_secondary) { + throw NotImplementedException("Unexpected separate sampler"); + } const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ @@ -294,16 +346,22 @@ void TexturePass(Environment& env, IR::Program& program) { default: if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ .type = flags.type, .is_depth = flags.is_depth != 0, + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } -- cgit v1.2.3 From 420982864634a5e52cea42c43f8623f75483fbcc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:27:55 -0300 Subject: shader: Intrusively store register values in block for SSA pass --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 64 +++++++++++++++-------- 1 file changed, 43 insertions(+), 21 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ddd679e39..bb1a90004 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -57,39 +57,62 @@ using Variant = std::variant>; struct DefTable { - [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { - return regs[IR::RegIndex(variable)]; + const IR::Value& Def(IR::Block* block, IR::Reg variable) noexcept { + return block->SsaRegValue(variable); + } + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) noexcept { + block->SetSsaRegValue(variable, value); } - [[nodiscard]] ValueMap& operator[](IR::Pred variable) noexcept { - return preds[IR::PredIndex(variable)]; + const IR::Value& Def(IR::Block* block, IR::Pred variable) noexcept { + return preds[IR::PredIndex(variable)][block]; + } + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) noexcept { + preds[IR::PredIndex(variable)].insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](GotoVariable goto_variable) { - return goto_vars[goto_variable.index]; + const IR::Value& Def(IR::Block* block, GotoVariable variable) noexcept { + return goto_vars[variable.index][block]; + } + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) noexcept { + goto_vars[variable.index].insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](IndirectBranchVariable) { - return indirect_branch_var; + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) noexcept { + return indirect_branch_var[block]; + } + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) noexcept { + indirect_branch_var.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { - return zero_flag; + const IR::Value& Def(IR::Block* block, ZeroFlagTag) noexcept { + return zero_flag[block]; + } + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) noexcept { + zero_flag.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept { - return sign_flag; + const IR::Value& Def(IR::Block* block, SignFlagTag) noexcept { + return sign_flag[block]; + } + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) noexcept { + sign_flag.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept { - return carry_flag; + const IR::Value& Def(IR::Block* block, CarryFlagTag) noexcept { + return carry_flag[block]; + } + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) noexcept { + carry_flag.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept { - return overflow_flag; + const IR::Value& Def(IR::Block* block, OverflowFlagTag) noexcept { + return overflow_flag[block]; + } + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) noexcept { + overflow_flag.insert_or_assign(block, value); } - std::array regs; std::array preds; boost::container::flat_map goto_vars; ValueMap indirect_branch_var; @@ -143,7 +166,7 @@ class Pass { public: template void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { - current_def[variable].insert_or_assign(block, value); + current_def.SetDef(block, variable, value); } template @@ -170,9 +193,8 @@ public: IR::Block* const block{stack.back().block}; switch (stack.back().pc) { case Status::Start: { - const ValueMap& def{current_def[variable]}; - if (const auto it{def.find(block)}; it != def.end()) { - stack.back().result = it->second; + if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { + stack.back().result = def; } else if (!sealed_blocks.contains(block)) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; -- cgit v1.2.3 From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:47 -0300 Subject: shader: Move microinstruction header to the value header --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 +- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 2 +- src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp | 2 +- src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 2 +- src/shader_recompiler/ir_opt/identity_removal_pass.cpp | 2 +- src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- src/shader_recompiler/ir_opt/verification_pass.cpp | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index cccf0909d..bb4aeb57c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 2206f93c2..770d3de61 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -11,7 +11,7 @@ #include "common/bit_util.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 8ad59f42e..f9c5334b5 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f294d297f..87eca2a0d 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -15,7 +15,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 38af72dfe..6afbe24f7 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 62e73d52d..773e1f961 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index bb1a90004..fe86a164b 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -23,10 +23,10 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index dbec96d84..62bf5f8ff 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -6,7 +6,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { -- cgit v1.2.3 From 23182fa59c45a88b706022c1373e307ba4636cca Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 04:58:23 -0300 Subject: shader: Intrusively store in a block if it's sealed or not --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index fe86a164b..3bab742e7 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -195,7 +195,7 @@ public: case Status::Start: { if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { stack.back().result = def; - } else if (!sealed_blocks.contains(block)) { + } else if (!block->IsSsaSealed()) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); @@ -251,7 +251,7 @@ public: std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } - sealed_blocks.insert(block); + block->SsaSeal(); } private: @@ -297,7 +297,6 @@ private: return same; } - boost::container::flat_set sealed_blocks; boost::container::flat_map> incomplete_phis; DefTable current_def; -- cgit v1.2.3 From d10cf55353175b13bed4cf18791e080ecb7fd95b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:17:59 -0300 Subject: shader: Implement indexed textures --- src/shader_recompiler/ir_opt/texture_pass.cpp | 89 ++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 5ac485522..cfa6b34b9 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -21,6 +22,8 @@ struct ConstBufferAddr { u32 offset; u32 secondary_index; u32 secondary_offset; + IR::U32 dynamic_offset; + u32 count; bool has_secondary; }; @@ -32,6 +35,9 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; +constexpr u32 DESCRIPTOR_SIZE = 8; +constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast(std::countr_zero(DESCRIPTOR_SIZE)); + IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -131,6 +137,9 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { if (lhs->has_secondary || rhs->has_secondary) { return std::nullopt; } + if (lhs->count > 1 || rhs->count > 1) { + return std::nullopt; + } if (lhs->index > rhs->index || lhs->offset > rhs->offset) { std::swap(lhs, rhs); } @@ -139,9 +148,12 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { .offset = lhs->offset, .secondary_index = rhs->index, .secondary_offset = rhs->offset, + .dynamic_offset = {}, + .count = 1, .has_secondary = true, }; } + case IR::Opcode::GetCbufU32x2: case IR::Opcode::GetCbufU32: break; } @@ -152,15 +164,39 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { // but not supported here at the moment return std::nullopt; } - if (!offset.IsImmediate()) { - // TODO: Support arrays of textures + if (offset.IsImmediate()) { + return ConstBufferAddr{ + .index = index.U32(), + .offset = offset.U32(), + .secondary_index = 0, + .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, + .has_secondary = false, + }; + } + IR::Inst* const offset_inst{offset.InstRecursive()}; + if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { + return std::nullopt; + } + u32 base_offset{}; + IR::U32 dynamic_offset; + if (offset_inst->Arg(0).IsImmediate()) { + base_offset = offset_inst->Arg(0).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(1)}; + } else if (offset_inst->Arg(1).IsImmediate()) { + base_offset = offset_inst->Arg(1).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(0)}; + } else { return std::nullopt; } return ConstBufferAddr{ - .index{index.U32()}, - .offset{offset.U32()}, + .index = index.U32(), + .offset = base_offset, .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = dynamic_offset, + .count = 8, .has_secondary = false, }; } @@ -179,11 +215,13 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { .offset = inst.Arg(0).U32(), .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, .has_secondary = false, }; } return TextureInst{ - .cbuf{addr}, + .cbuf = addr, .inst = &inst, .block = block, }; @@ -209,18 +247,20 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.has_secondary == existing.has_secondary && - desc.cbuf_index == existing.cbuf_index && + return desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift && + desc.has_secondary == existing.has_secondary; }); } u32 Add(const ImageBufferDescriptor& desc) { return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; }); } @@ -231,7 +271,8 @@ public: desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift; }); } @@ -239,7 +280,8 @@ public: const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; return index; @@ -310,7 +352,6 @@ void TexturePass(Environment& env, IR::Program& program) { // This happens on Fire Emblem: Three Houses flags.type.Assign(TextureType::Buffer); } - inst->SetFlags(flags); break; default: break; @@ -329,7 +370,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(ImageDescriptor{ @@ -338,7 +380,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; @@ -351,7 +394,8 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(TextureDescriptor{ @@ -362,12 +406,23 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; } - inst->SetArg(0, IR::Value{index}); + flags.descriptor_index.Assign(index); + inst->SetFlags(flags); + + if (cbuf.count > 1) { + const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; + IR::IREmitter ir{*texture_inst.block, insert_point}; + const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; + inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); + } else { + inst->SetArg(0, IR::Value{}); + } } } -- cgit v1.2.3 From 92a01984e6315f3c214990926c8fa5b4474ed339 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:29:59 -0300 Subject: shader: Remove invalidated blocks in dead code elimination pass --- src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index f9c5334b5..1e4a3fdae 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -14,9 +14,12 @@ void DeadCodeEliminationPass(IR::Program& program) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. for (IR::Block* const block : program.post_order_blocks) { - for (IR::Inst& inst : block->Instructions() | std::views::reverse) { - if (!inst.HasUses() && !inst.MayHaveSideEffects()) { - inst.Invalidate(); + auto it{block->end()}; + while (it != block->begin()) { + --it; + if (!it->HasUses() && !it->MayHaveSideEffects()) { + it->Invalidate(); + it = block->Instructions().erase(it); } } } -- cgit v1.2.3 From 25949b864c40405946d434ecc85d6c167f323a24 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:33:49 -0300 Subject: shader: Fix forward referencing identity instructions when inserting phi --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 24 ++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 3bab742e7..a8064a5d0 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -278,20 +278,22 @@ private: } same = op; } + // Remove the phi node from the block, it will be reinserted + IR::Block::InstructionList& list{block->Instructions()}; + list.erase(IR::Block::InstructionList::s_iterator_to(phi)); + + // Find the first non-phi instruction and use it as an insertion point + IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IsPhi)}; if (same.IsEmpty()) { // The phi is unreachable or in the start block - // First remove the phi node from the block, it will be reinserted - IR::Block::InstructionList& list{block->Instructions()}; - list.erase(IR::Block::InstructionList::s_iterator_to(phi)); - - // Insert an undef instruction after all phi nodes (to keep phi instructions on top) - const auto first_not_phi{std::ranges::find_if_not(list, IsPhi)}; - same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; - - // Insert the phi node after the undef opcode, this will be replaced with an identity - list.insert(first_not_phi, phi); + // Insert an undefined instruction and make it the phi node replacement + // The "phi" node reinsertion point is specified after this instruction + reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode); + same = IR::Value{&*reinsert_point}; + ++reinsert_point; } - // Reroute all uses of phi to same and remove phi + // Reinsert the phi node and reroute all its uses to the "same" value + list.insert(reinsert_point, phi); phi.ReplaceUsesWith(same); // TODO: Try to recursively remove all phi users, which might have become trivial return same; -- cgit v1.2.3 From 0c7230a606ae705a28c8a14590808d6bfd3656cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:34:34 -0300 Subject: shader: Add more strict validation the pass --- src/shader_recompiler/ir_opt/verification_pass.cpp | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 62bf5f8ff..207355ecc 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -50,9 +51,50 @@ static void ValidateUses(const IR::Program& program) { } } +static void ValidateForwardDeclarations(const IR::Program& program) { + std::set definitions; + for (const IR::Block* const block : program.blocks) { + for (const IR::Inst& inst : *block) { + definitions.emplace(&inst); + if (inst.GetOpcode() == IR::Opcode::Phi) { + // Phi nodes can have forward declarations + continue; + } + const size_t num_args{inst.NumArgs()}; + for (size_t arg = 0; arg < num_args; ++arg) { + if (inst.Arg(arg).IsImmediate()) { + continue; + } + if (!definitions.contains(inst.Arg(arg).Inst())) { + fmt::print("{}\n", IR::DumpBlock(*block)); + throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block)); + } + } + } + } +} + +static void ValidatePhiNodes(const IR::Program& program) { + for (const IR::Block* const block : program.blocks) { + bool no_more_phis{false}; + for (const IR::Inst& inst : *block) { + if (inst.GetOpcode() == IR::Opcode::Phi) { + if (no_more_phis) { + fmt::print("{}\n", IR::DumpBlock(*block)); + throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block)); + } + } else { + no_more_phis = true; + } + } + } +} + void VerificationPass(const IR::Program& program) { ValidateTypes(program); ValidateUses(program); + ValidateForwardDeclarations(program); + ValidatePhiNodes(program); } } // namespace Shader::Optimization -- cgit v1.2.3 From 50eb03382e8ac8eb4aeb7cdc488a7ee097fec39d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 08:17:53 -0300 Subject: shader: Fix error checking in bitfieldExtract and implement bitfieldInsert folding --- .../ir_opt/constant_propagation_pass.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 770d3de61..f16c5e8f6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -553,7 +553,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return; case IR::Opcode::BitFieldUExtract: FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { - if (static_cast(shift) + static_cast(count) > Common::BitSize()) { + if (static_cast(shift) + static_cast(count) > 32) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, base, shift, count); } @@ -563,13 +563,22 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::BitFieldSExtract: FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { const size_t back_shift{static_cast(shift) + static_cast(count)}; - if (back_shift > Common::BitSize()) { + const size_t left_shift{32 - back_shift}; + const size_t right_shift{static_cast(32 - count)}; + if (back_shift >= 32 || left_shift >= 32 || right_shift >= 32) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, base, shift, count); } - const size_t left_shift{Common::BitSize() - back_shift}; - return static_cast(static_cast(base << left_shift) >> - static_cast(Common::BitSize() - count)); + return static_cast((base << left_shift) >> right_shift); + }); + return; + case IR::Opcode::BitFieldInsert: + FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) { + if (bits >= 32 || offset >= 32) { + throw LogicError("Undefined result in {}({}, {}, {}, {})", + IR::Opcode::BitFieldInsert, base, insert, offset, bits); + } + return (base & ~(~(~0u << bits) << offset)) | (insert << offset); }); return; case IR::Opcode::BranchConditional: -- cgit v1.2.3 From 20e86fd61512626e267824c1a5469084c2d36c7a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 24 Apr 2021 00:49:14 -0400 Subject: shader: Fix BFE s32 undefined check Our unit tests were hitting this exception. --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f16c5e8f6..b1c45d13a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -565,7 +565,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { const size_t back_shift{static_cast(shift) + static_cast(count)}; const size_t left_shift{32 - back_shift}; const size_t right_shift{static_cast(32 - count)}; - if (back_shift >= 32 || left_shift >= 32 || right_shift >= 32) { + if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, base, shift, count); } -- cgit v1.2.3 From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 19 Apr 2021 01:03:38 +0200 Subject: shader: Implement VertexA stage --- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 74 ++++++++++++++++++++++ .../global_memory_to_storage_buffer_pass.cpp | 26 ++++++++ src/shader_recompiler/ir_opt/passes.h | 7 ++ src/shader_recompiler/ir_opt/texture_pass.cpp | 21 ++++++ 4 files changed, 128 insertions(+) create mode 100644 src/shader_recompiler/ir_opt/dual_vertex_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp new file mode 100644 index 000000000..f35c6478a --- /dev/null +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -0,0 +1,74 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "common/bit_cast.h" +#include "common/bit_util.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void VertexATransformPass(IR::Program& program) { + bool replaced_join{}; + bool eliminated_epilogue{}; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::Return: + inst.ReplaceOpcode(IR::Opcode::Join); + replaced_join = true; + break; + case IR::Opcode::Epilogue: + inst.Invalidate(); + eliminated_epilogue = true; + break; + default: + break; + } + if (replaced_join && eliminated_epilogue) { + return; + } + } + } +} + +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } +} + +void DualVertexJoinPass(IR::Program& program) { + const auto& blocks = program.blocks; + s64 s = static_cast(blocks.size()) - 1; + if (s < 1) { + throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); + } + for (s64 index = 0; index < s; index++) { + IR::Block* const current_block = blocks[index]; + IR::Block* const next_block = blocks[index + 1]; + for (IR::Inst& inst : current_block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Join) { + IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; + ir.Branch(next_block); + inst.Invalidate(); + // only 1 join should exist + return; + } + } + } + throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 87eca2a0d..1d11a00d8 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -499,4 +499,30 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { } } +template +static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + // TODO: Handle arrays + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast(descriptors.size()) - 1; +} + +void JoinStorageInfo(Info& base, Info& source) { + auto& descriptors = base.storage_buffers_descriptors; + for (auto& desc : source.storage_buffers_descriptors) { + auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; + })}; + if (it != descriptors.end()) { + it->is_written |= desc.is_written; + continue; + } + descriptors.push_back(desc); + } +} + } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 186104713..e9cb8546a 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -22,4 +22,11 @@ void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Program& program); +// Dual Vertex +void VertexATransformPass(IR::Program& program); +void VertexBTransformPass(IR::Program& program); +void DualVertexJoinPass(IR::Program& program); +void JoinTextureInfo(Info& base, Info& source); +void JoinStorageInfo(Info& base, Info& source); + } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index cfa6b34b9..2b38bcf42 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -426,4 +426,25 @@ void TexturePass(Environment& env, IR::Program& program) { } } +void JoinTextureInfo(Info& base, Info& source) { + Descriptors descriptors{ + base.texture_buffer_descriptors, + base.image_buffer_descriptors, + base.texture_descriptors, + base.image_descriptors, + }; + for (auto& desc : source.texture_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.texture_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_descriptors) { + descriptors.Add(desc); + } +} + } // namespace Shader::Optimization -- cgit v1.2.3 From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 14:56:25 +0200 Subject: shader: Address feedback --- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index f35c6478a..f2d7db0e6 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -4,8 +4,6 @@ #include #include -#include -#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -40,7 +38,7 @@ void VertexATransformPass(IR::Program& program) { } void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Prologue) { return inst.Invalidate(); @@ -51,24 +49,24 @@ void VertexBTransformPass(IR::Program& program) { void DualVertexJoinPass(IR::Program& program) { const auto& blocks = program.blocks; - s64 s = static_cast(blocks.size()) - 1; - if (s < 1) { - throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); + const s64 sub_size = static_cast(blocks.size()) - 1; + if (sub_size < 1) { + throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks"); } - for (s64 index = 0; index < s; index++) { - IR::Block* const current_block = blocks[index]; - IR::Block* const next_block = blocks[index + 1]; + for (s64 index = 0; index < sub_size; ++index) { + IR::Block* const current_block{blocks[index]}; + IR::Block* const next_block{blocks[index + 1]}; for (IR::Inst& inst : current_block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Join) { IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; ir.Branch(next_block); inst.Invalidate(); - // only 1 join should exist + // Only 1 join should exist return; } } } - throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); + throw LogicError("Dual Vertex Join pass failed, no join present"); } } // namespace Shader::Optimization -- cgit v1.2.3 From 7ecc6de56ae01602b25408db8b6658d7a41a419a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 23 Apr 2021 17:47:54 -0400 Subject: shader: Implement Int32 SUATOM/SURED --- .../ir_opt/collect_shader_info_pass.cpp | 36 ++++++++++++ src/shader_recompiler/ir_opt/texture_pass.cpp | 68 +++++++++++++++++++++- 2 files changed, 103 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index bb4aeb57c..7d8794a7e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -565,6 +565,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageWrite: { const auto flags{inst.Flags()}; info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; + info.uses_image_buffers |= flags.type == TextureType::Buffer; break; } case IR::Opcode::SubgroupEqMask: @@ -696,6 +697,41 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.used_storage_buffer_types |= IR::Type::U64; info.uses_int64_bit_atomics = true; break; + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: + info.uses_atomic_image_u32 = true; + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 2b38bcf42..9e0a2fb09 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -76,6 +76,39 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageWrite: case IR::Opcode::BindlessImageWrite: return IR::Opcode::ImageWrite; + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicIAdd32: + return IR::Opcode::ImageAtomicIAdd32; + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicSMin32: + return IR::Opcode::ImageAtomicSMin32; + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + return IR::Opcode::ImageAtomicUMin32; + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicSMax32: + return IR::Opcode::ImageAtomicSMax32; + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + return IR::Opcode::ImageAtomicUMax32; + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicInc32: + return IR::Opcode::ImageAtomicInc32; + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicDec32: + return IR::Opcode::ImageAtomicDec32; + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicAnd32: + return IR::Opcode::ImageAtomicAnd32; + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicOr32: + return IR::Opcode::ImageAtomicOr32; + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicXor32: + return IR::Opcode::ImageAtomicXor32; + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::BindlessImageAtomicExchange32: + return IR::Opcode::ImageAtomicExchange32; default: return IR::Opcode::Void; } @@ -95,6 +128,17 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageGradient: case IR::Opcode::BindlessImageRead: case IR::Opcode::BindlessImageWrite: + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -108,6 +152,17 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: case IR::Opcode::BoundImageRead: case IR::Opcode::BoundImageWrite: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: return false; default: throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); @@ -359,11 +414,22 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: case IR::Opcode::ImageWrite: { if (cbuf.has_secondary) { throw NotImplementedException("Unexpected separate sampler"); } - const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; + const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, -- cgit v1.2.3 From ee61ec2c39e6db53c56e7ac761a2223d99f06908 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 2 May 2021 01:50:27 +0200 Subject: shader: Optimize NVN Fallthrough --- .../ir_opt/collect_shader_info_pass.cpp | 78 ++++++++++++++++++++-- 1 file changed, 71 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 7d8794a7e..13b793d57 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) { } } +void CheckCBufNVN(Info& info, IR::Inst& inst) { + const IR::Value cbuf_index{inst.Arg(0)}; + if (!cbuf_index.IsImmediate()) { + info.nvn_buffer_used.set(); + return; + } + const u32 index{cbuf_index.U32()}; + if (index != 0) { + return; + } + const IR::Value cbuf_offset{inst.Arg(1)}; + if (!cbuf_offset.IsImmediate()) { + info.nvn_buffer_used.set(); + return; + } + const u32 offset{cbuf_offset.U32()}; + const u32 descriptor_size{0x10}; + const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16}; + if (offset >= info.nvn_buffer_base && offset < upper_limit) { + const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size}; + info.nvn_buffer_used.set(nvn_index, true); + } +} + void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: @@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.GetOpcode()) { - case IR::Opcode::LoadGlobalU8: - case IR::Opcode::LoadGlobalS8: - case IR::Opcode::LoadGlobalU16: - case IR::Opcode::LoadGlobalS16: - case IR::Opcode::LoadGlobal32: - case IR::Opcode::LoadGlobal64: - case IR::Opcode::LoadGlobal128: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU16: @@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GlobalAtomicMinF32x2: case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::GlobalAtomicMaxF32x2: + info.stores_global_memory = true; + [[fallthrough]]; + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: info.uses_int64 = true; info.uses_global_memory = true; info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; @@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { } } +void VisitCbufs(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU32x2: { + CheckCBufNVN(info, inst); + break; + } + default: + break; + } +} + void Visit(Info& info, IR::Inst& inst) { VisitUsages(info, inst); VisitFpModifiers(info, inst); + VisitCbufs(info, inst); } void GatherInfoFromHeader(Environment& env, Info& info) { @@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) { void CollectShaderInfoPass(Environment& env, IR::Program& program) { Info& info{program.info}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + info.nvn_buffer_base = base; + for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { Visit(info, inst); -- cgit v1.2.3 From c4fd6b55bc9acd06b2fc89f84fd175d78e14110a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 18:21:28 -0300 Subject: glasm: Implement shuffle and vote instructions on GLASM --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 13b793d57..ea08aacc3 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -504,11 +504,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_is_helper_invocation = true; break; case IR::Opcode::LaneId: + info.uses_subgroup_invocation_id = true; + break; case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: case IR::Opcode::ShuffleDown: case IR::Opcode::ShuffleButterfly: - info.uses_subgroup_invocation_id = true; + info.uses_subgroup_shuffles = true; break; case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: -- cgit v1.2.3 From d54d7de40e7295827b0e4e4026441b53d3fc9569 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:40:54 -0300 Subject: glasm: Rework control flow introducing a syntax list This commit regresses VertexA shaders, their transformation pass has to be adapted to the new control flow. --- .../ir_opt/constant_propagation_pass.cpp | 20 -------- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 56 +++------------------- .../ir_opt/identity_removal_pass.cpp | 1 - src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 +- 4 files changed, 8 insertions(+), 73 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index b1c45d13a..66f1391db 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -353,24 +353,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< return IR::Value{func(Arg>(inst.Arg(I))...)}; } -void FoldBranchConditional(IR::Inst& inst) { - const IR::U1 cond{inst.Arg(0)}; - if (cond.IsImmediate()) { - // TODO: Convert to Branch - return; - } - const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { - const IR::Value true_label{inst.Arg(1)}; - const IR::Value false_label{inst.Arg(2)}; - // Remove negation on the conditional (take the parameter out of LogicalNot) and swap - // the branches - inst.SetArg(0, cond_inst->Arg(0)); - inst.SetArg(1, false_label); - inst.SetArg(2, true_label); - } -} - std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; @@ -581,8 +563,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return (base & ~(~(~0u << bits) << offset)) | (insert << offset); }); return; - case IR::Opcode::BranchConditional: - return FoldBranchConditional(inst); case IR::Opcode::CompositeExtractF32x2: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, IR::Opcode::CompositeInsertF32x2); diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index f2d7db0e6..b0a9f5258 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -13,60 +13,16 @@ namespace Shader::Optimization { -void VertexATransformPass(IR::Program& program) { - bool replaced_join{}; - bool eliminated_epilogue{}; - for (IR::Block* const block : program.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - switch (inst.GetOpcode()) { - case IR::Opcode::Return: - inst.ReplaceOpcode(IR::Opcode::Join); - replaced_join = true; - break; - case IR::Opcode::Epilogue: - inst.Invalidate(); - eliminated_epilogue = true; - break; - default: - break; - } - if (replaced_join && eliminated_epilogue) { - return; - } - } - } +void VertexATransformPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } -void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Prologue) { - return inst.Invalidate(); - } - } - } +void VertexBTransformPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } -void DualVertexJoinPass(IR::Program& program) { - const auto& blocks = program.blocks; - const s64 sub_size = static_cast(blocks.size()) - 1; - if (sub_size < 1) { - throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks"); - } - for (s64 index = 0; index < sub_size; ++index) { - IR::Block* const current_block{blocks[index]}; - IR::Block* const next_block{blocks[index + 1]}; - for (IR::Inst& inst : current_block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Join) { - IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; - ir.Branch(next_block); - inst.Invalidate(); - // Only 1 join should exist - return; - } - } - } - throw LogicError("Dual Vertex Join pass failed, no join present"); +void DualVertexJoinPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 6afbe24f7..e9b55f835 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -12,7 +12,6 @@ namespace Shader::Optimization { void IdentityRemovalPass(IR::Program& program) { std::vector to_invalidate; - for (IR::Block* const block : program.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a8064a5d0..26eb3a3ab 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -202,7 +202,7 @@ public: incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; - } else if (const std::span imm_preds{block->ImmediatePredecessors()}; + } else if (const std::span imm_preds = block->ImmPredecessors(); imm_preds.size() == 1) { // Optimize the common case of one predecessor: no phi needed stack.back().pc = Status::SetValue; @@ -257,7 +257,7 @@ public: private: template IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { - for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { + for (IR::Block* const imm_pred : block->ImmPredecessors()) { phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); -- cgit v1.2.3 From bf5e48ffe4bd48ea681f2a01c8919c97125e88df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 04:48:46 -0300 Subject: glasm: Initial implementation of phi nodes on GLASM --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 26eb3a3ab..e54499ba5 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -138,10 +138,6 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } -[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.GetOpcode() == IR::Opcode::Phi; -} - enum class Status { Start, SetValue, @@ -283,7 +279,7 @@ private: list.erase(IR::Block::InstructionList::s_iterator_to(phi)); // Find the first non-phi instruction and use it as an insertion point - IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IsPhi)}; + IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)}; if (same.IsEmpty()) { // The phi is unreachable or in the start block // Insert an undefined instruction and make it the phi node replacement -- cgit v1.2.3 From 77ee733c3ac8b78194c4214330237f641712c1d6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 03:20:37 -0300 Subject: glasm: Remove unintentionally committed fmt::prints --- src/shader_recompiler/ir_opt/verification_pass.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 207355ecc..975d5aadf 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -66,7 +66,6 @@ static void ValidateForwardDeclarations(const IR::Program& program) { continue; } if (!definitions.contains(inst.Arg(arg).Inst())) { - fmt::print("{}\n", IR::DumpBlock(*block)); throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block)); } } @@ -80,7 +79,6 @@ static void ValidatePhiNodes(const IR::Program& program) { for (const IR::Inst& inst : *block) { if (inst.GetOpcode() == IR::Opcode::Phi) { if (no_more_phis) { - fmt::print("{}\n", IR::DumpBlock(*block)); throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block)); } } else { -- cgit v1.2.3 From fbf5cdcba0f3b03e8cd3019fb285a96037b05f26 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 19:04:26 -0300 Subject: shader: Fix FSwizzleAdd folding when going through phi nodes --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 66f1391db..796b4122d 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -440,13 +440,13 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { // DPdxFine if (index.U32() == 1) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{value_2})); + inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)})); } } else if (swizzle_value == 0xA5) { // DPdyFine if (index.U32() == 2) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{value_2})); + inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)})); } } } -- cgit v1.2.3 From 627161c38e2ee986ea3cc9c7e46e09f54390d701 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 May 2021 01:34:27 -0300 Subject: shader: Fix secondary textures --- src/shader_recompiler/ir_opt/texture_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 9e0a2fb09..76cab04c2 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -283,8 +283,8 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { } TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { - const u32 secondary_index{cbuf.has_secondary ? cbuf.index : cbuf.secondary_index}; - const u32 secondary_offset{cbuf.has_secondary ? cbuf.offset : cbuf.secondary_offset}; + const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index }; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset }; const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; return env.ReadTextureType(lhs_raw | rhs_raw); -- cgit v1.2.3 From 457dda69ccdec4002e794ad1a810b577af66f79f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 May 2021 01:36:25 -0300 Subject: shader: Clang-format secondary textures --- src/shader_recompiler/ir_opt/texture_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 76cab04c2..e9098239d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -283,8 +283,8 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { } TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { - const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index }; - const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset }; + const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; return env.ReadTextureType(lhs_raw | rhs_raw); -- cgit v1.2.3 From ac0f5d2ab6cf0843c9ac6179b52b1e069a78069e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 31 May 2021 04:19:31 -0300 Subject: shader: Track legacy varyings --- .../ir_opt/collect_shader_info_pass.cpp | 101 ++++++++++++++++++--- 1 file changed, 88 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ea08aacc3..b343f0429 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -43,14 +43,11 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.loads_position = true; break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + info.loads_color_front_diffuse = true; break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: @@ -60,6 +57,15 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::TessellationEvaluationPointV: info.loads_tess_coord = true; break; + case IR::Attribute::InstanceId: + info.loads_instance_id = true; + break; + case IR::Attribute::VertexId: + info.loads_vertex_id = true; + break; + case IR::Attribute::FrontFace: + info.loads_front_face = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } @@ -71,6 +77,12 @@ void SetAttribute(Info& info, IR::Attribute attribute) { return; } switch (attribute) { + case IR::Attribute::Layer: + info.stores_layer = true; + break; + case IR::Attribute::ViewportIndex: + info.stores_viewport_index = true; + break; case IR::Attribute::PointSize: info.stores_point_size = true; break; @@ -80,6 +92,72 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.stores_position = true; break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + info.stores_color_front_diffuse = true; + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + info.stores_color_front_specular = true; + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + info.stores_color_back_diffuse = true; + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + info.stores_color_front_specular = true; + break; + case IR::Attribute::FixedFncTexture0S: + case IR::Attribute::FixedFncTexture0T: + case IR::Attribute::FixedFncTexture0R: + case IR::Attribute::FixedFncTexture0Q: + case IR::Attribute::FixedFncTexture1S: + case IR::Attribute::FixedFncTexture1T: + case IR::Attribute::FixedFncTexture1R: + case IR::Attribute::FixedFncTexture1Q: + case IR::Attribute::FixedFncTexture2S: + case IR::Attribute::FixedFncTexture2T: + case IR::Attribute::FixedFncTexture2R: + case IR::Attribute::FixedFncTexture2Q: + case IR::Attribute::FixedFncTexture3S: + case IR::Attribute::FixedFncTexture3T: + case IR::Attribute::FixedFncTexture3R: + case IR::Attribute::FixedFncTexture3Q: + case IR::Attribute::FixedFncTexture4S: + case IR::Attribute::FixedFncTexture4T: + case IR::Attribute::FixedFncTexture4R: + case IR::Attribute::FixedFncTexture4Q: + case IR::Attribute::FixedFncTexture5S: + case IR::Attribute::FixedFncTexture5T: + case IR::Attribute::FixedFncTexture5R: + case IR::Attribute::FixedFncTexture5Q: + case IR::Attribute::FixedFncTexture6S: + case IR::Attribute::FixedFncTexture6T: + case IR::Attribute::FixedFncTexture6R: + case IR::Attribute::FixedFncTexture6Q: + case IR::Attribute::FixedFncTexture7S: + case IR::Attribute::FixedFncTexture7T: + case IR::Attribute::FixedFncTexture7R: + case IR::Attribute::FixedFncTexture7Q: + case IR::Attribute::FixedFncTexture8S: + case IR::Attribute::FixedFncTexture8T: + case IR::Attribute::FixedFncTexture8R: + case IR::Attribute::FixedFncTexture8Q: + case IR::Attribute::FixedFncTexture9S: + case IR::Attribute::FixedFncTexture9T: + case IR::Attribute::FixedFncTexture9R: + case IR::Attribute::FixedFncTexture9Q: + info.stores_fixed_fnc_textures = true; + break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -90,11 +168,8 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ClipDistance7: info.stores_clip_distance = true; break; - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; + case IR::Attribute::FogCoordinate: + info.stores_fog_coordinate = true; break; case IR::Attribute::ViewportMask: info.stores_viewport_mask = true; -- cgit v1.2.3 From 5539b13c5a696de553be86bb7c4ae61a0cdcc754 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 31 May 2021 05:34:53 -0300 Subject: shader,glasm: Implement legacy texcoord loads --- .../ir_opt/collect_shader_info_pass.cpp | 70 ++++++---------------- 1 file changed, 18 insertions(+), 52 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index b343f0429..6a5243c9f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -28,12 +28,16 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attribute) { - if (IR::IsGeneric(attribute)) { - info.input_generics.at(IR::GenericAttributeIndex(attribute)).used = true; +void GetAttribute(Info& info, IR::Attribute attr) { + if (IR::IsGeneric(attr)) { + info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; return; } - switch (attribute) { + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { + info.loads_fixed_fnc_textures = true; + return; + } + switch (attr) { case IR::Attribute::PrimitiveId: info.loads_primitive_id = true; break; @@ -67,16 +71,20 @@ void GetAttribute(Info& info, IR::Attribute attribute) { info.loads_front_face = true; break; default: - throw NotImplementedException("Get attribute {}", attribute); + throw NotImplementedException("Get attribute {}", attr); } } -void SetAttribute(Info& info, IR::Attribute attribute) { - if (IR::IsGeneric(attribute)) { - info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; +void SetAttribute(Info& info, IR::Attribute attr) { + if (IR::IsGeneric(attr)) { + info.stores_generics.at(IR::GenericAttributeIndex(attr)) = true; return; } - switch (attribute) { + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { + info.stores_fixed_fnc_textures = true; + return; + } + switch (attr) { case IR::Attribute::Layer: info.stores_layer = true; break; @@ -116,48 +124,6 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ColorBackSpecularA: info.stores_color_front_specular = true; break; - case IR::Attribute::FixedFncTexture0S: - case IR::Attribute::FixedFncTexture0T: - case IR::Attribute::FixedFncTexture0R: - case IR::Attribute::FixedFncTexture0Q: - case IR::Attribute::FixedFncTexture1S: - case IR::Attribute::FixedFncTexture1T: - case IR::Attribute::FixedFncTexture1R: - case IR::Attribute::FixedFncTexture1Q: - case IR::Attribute::FixedFncTexture2S: - case IR::Attribute::FixedFncTexture2T: - case IR::Attribute::FixedFncTexture2R: - case IR::Attribute::FixedFncTexture2Q: - case IR::Attribute::FixedFncTexture3S: - case IR::Attribute::FixedFncTexture3T: - case IR::Attribute::FixedFncTexture3R: - case IR::Attribute::FixedFncTexture3Q: - case IR::Attribute::FixedFncTexture4S: - case IR::Attribute::FixedFncTexture4T: - case IR::Attribute::FixedFncTexture4R: - case IR::Attribute::FixedFncTexture4Q: - case IR::Attribute::FixedFncTexture5S: - case IR::Attribute::FixedFncTexture5T: - case IR::Attribute::FixedFncTexture5R: - case IR::Attribute::FixedFncTexture5Q: - case IR::Attribute::FixedFncTexture6S: - case IR::Attribute::FixedFncTexture6T: - case IR::Attribute::FixedFncTexture6R: - case IR::Attribute::FixedFncTexture6Q: - case IR::Attribute::FixedFncTexture7S: - case IR::Attribute::FixedFncTexture7T: - case IR::Attribute::FixedFncTexture7R: - case IR::Attribute::FixedFncTexture7Q: - case IR::Attribute::FixedFncTexture8S: - case IR::Attribute::FixedFncTexture8T: - case IR::Attribute::FixedFncTexture8R: - case IR::Attribute::FixedFncTexture8Q: - case IR::Attribute::FixedFncTexture9S: - case IR::Attribute::FixedFncTexture9T: - case IR::Attribute::FixedFncTexture9R: - case IR::Attribute::FixedFncTexture9Q: - info.stores_fixed_fnc_textures = true; - break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -175,7 +141,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { info.stores_viewport_mask = true; break; default: - throw NotImplementedException("Set attribute {}", attribute); + throw NotImplementedException("Set attribute {}", attr); } } -- cgit v1.2.3 From 4a2361a1e2271727f3259e8e4a60869165537253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 02:15:07 -0300 Subject: buffer_cache: Reduce uniform buffer size from shader usage Increases performance significantly on certain titles. --- .../ir_opt/collect_shader_info_pass.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6a5243c9f..fb2031fc8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -560,32 +560,45 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufU32: case IR::Opcode::GetCbufF32: case IR::Opcode::GetCbufU32x2: { - if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32(), 1); - } else { + const IR::Value index{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!index.IsImmediate()) { throw NotImplementedException("Constant buffer with non-immediate index"); } + AddConstantBufferDescriptor(info, index.U32(), 1); + u32 element_size{}; switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; + element_size = 1; break; case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: info.used_constant_buffer_types |= IR::Type::U16; + element_size = 2; break; case IR::Opcode::GetCbufU32: info.used_constant_buffer_types |= IR::Type::U32; + element_size = 4; break; case IR::Opcode::GetCbufF32: info.used_constant_buffer_types |= IR::Type::F32; + element_size = 4; break; case IR::Opcode::GetCbufU32x2: info.used_constant_buffer_types |= IR::Type::U32x2; + element_size = 8; break; default: break; } + u32& size{info.constant_buffer_used_sizes[index.U32()]}; + if (offset.IsImmediate()) { + size = std::max(size, offset.U32() + element_size); + } else { + size = 0x10'000; + } break; } case IR::Opcode::BindlessImageSampleImplicitLod: -- cgit v1.2.3 From 562af301819227d65a251a2c29c997bf798da7ba Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:11:16 +0200 Subject: shader: Fix VertexA Shaders. --- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 24 +++++++++++++++-------- src/shader_recompiler/ir_opt/passes.h | 1 - 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index b0a9f5258..a926123f2 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -13,16 +13,24 @@ namespace Shader::Optimization { -void VertexATransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexATransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return inst.Invalidate(); + } + } + } } -void VertexBTransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); -} - -void DualVertexJoinPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index e9cb8546a..5ebde49ea 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -25,7 +25,6 @@ void VerificationPass(const IR::Program& program); // Dual Vertex void VertexATransformPass(IR::Program& program); void VertexBTransformPass(IR::Program& program); -void DualVertexJoinPass(IR::Program& program); void JoinTextureInfo(Info& base, Info& source); void JoinStorageInfo(Info& base, Info& source); -- cgit v1.2.3 From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 66 ++++++++++++++++------- 1 file changed, 47 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index e54499ba5..a4ba393ef 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,73 +48,91 @@ struct GotoVariable : FlagTag { u32 index; }; +struct LoopSafetyVariable { + LoopSafetyVariable() = default; + explicit LoopSafetyVariable(u32 index_) : index{index_} {} + + auto operator<=>(const LoopSafetyVariable&) const noexcept = default; + + u32 index; +}; + struct IndirectBranchVariable { auto operator<=>(const IndirectBranchVariable&) const noexcept = default; }; -using Variant = std::variant; -using ValueMap = boost::container::flat_map>; +using Variant = + std::variant; +using ValueMap = boost::container::flat_map; struct DefTable { - const IR::Value& Def(IR::Block* block, IR::Reg variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Reg variable) { return block->SsaRegValue(variable); } - void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { block->SetSsaRegValue(variable, value); } - const IR::Value& Def(IR::Block* block, IR::Pred variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Pred variable) { return preds[IR::PredIndex(variable)][block]; } - void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { preds[IR::PredIndex(variable)].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, GotoVariable variable) noexcept { + const IR::Value& Def(IR::Block* block, GotoVariable variable) { return goto_vars[variable.index][block]; } - void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { goto_vars[variable.index].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, IndirectBranchVariable) noexcept { + const IR::Value& Def(IR::Block* block, LoopSafetyVariable variable) { + return loop_safety_vars[variable.index][block]; + } + void SetDef(IR::Block* block, LoopSafetyVariable variable, const IR::Value& value) { + loop_safety_vars[variable.index].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { return indirect_branch_var[block]; } - void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { indirect_branch_var.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, ZeroFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, ZeroFlagTag) { return zero_flag[block]; } - void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { zero_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, SignFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, SignFlagTag) { return sign_flag[block]; } - void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { sign_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, CarryFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, CarryFlagTag) { return carry_flag[block]; } - void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { carry_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, OverflowFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, OverflowFlagTag) { return overflow_flag[block]; } - void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { overflow_flag.insert_or_assign(block, value); } std::array preds; boost::container::flat_map goto_vars; + boost::container::flat_map loop_safety_vars; ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; @@ -134,6 +152,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(const LoopSafetyVariable&) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } @@ -315,6 +337,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetLoopSafetyVariable: + pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(0)); + break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); break; @@ -343,6 +368,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetLoopSafetyVariable: + inst.ReplaceUsesWith(pass.ReadVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetIndirectBranchVariable: inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); break; -- cgit v1.2.3 From 7ac55c2a750f00b41582a86eba5a44dcd781ae98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 17:00:07 -0300 Subject: shader: Fix loop safety to SSA pass --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a4ba393ef..fff25c4a2 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -338,7 +338,7 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; case IR::Opcode::SetLoopSafetyVariable: - pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(0)); + pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); -- cgit v1.2.3 From 3d9ecbe99844c44074c26f2db4db376059f50534 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 24 May 2021 18:35:37 -0400 Subject: glsl: Wip storage atomic ops --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fb2031fc8..884ade004 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -687,9 +687,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorage32: case IR::Opcode::WriteStorage32: case IR::Opcode::StorageAtomicIAdd32: - case IR::Opcode::StorageAtomicSMin32: case IR::Opcode::StorageAtomicUMin32: - case IR::Opcode::StorageAtomicSMax32: case IR::Opcode::StorageAtomicUMax32: case IR::Opcode::StorageAtomicAnd32: case IR::Opcode::StorageAtomicOr32: @@ -714,6 +712,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SharedAtomicExchange64: info.uses_int64_bit_atomics = true; break; + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::StorageAtomicSMin32: + case IR::Opcode::StorageAtomicSMax32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_s32_atomics = true; + break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: info.used_storage_buffer_types |= IR::Type::U32; -- cgit v1.2.3 From 11ba190462c7b69a47598b2d1572fac3bccc4adc Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 01:35:30 -0400 Subject: glsl: Revert ssbo aliasing. Storage Atomics impl --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 884ade004..fb2031fc8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -687,7 +687,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorage32: case IR::Opcode::WriteStorage32: case IR::Opcode::StorageAtomicIAdd32: + case IR::Opcode::StorageAtomicSMin32: case IR::Opcode::StorageAtomicUMin32: + case IR::Opcode::StorageAtomicSMax32: case IR::Opcode::StorageAtomicUMax32: case IR::Opcode::StorageAtomicAnd32: case IR::Opcode::StorageAtomicOr32: @@ -712,13 +714,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SharedAtomicExchange64: info.uses_int64_bit_atomics = true; break; - case IR::Opcode::GlobalAtomicSMin32: - case IR::Opcode::GlobalAtomicSMax32: - case IR::Opcode::StorageAtomicSMin32: - case IR::Opcode::StorageAtomicSMax32: - info.used_storage_buffer_types |= IR::Type::U32; - info.uses_s32_atomics = true; - break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: info.used_storage_buffer_types |= IR::Type::U32; -- cgit v1.2.3 From 5399906c26292634ab3eec5fce88640092e9c4c2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 22:13:50 -0400 Subject: glsl: Track S32 atomics --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fb2031fc8..c22e5992a 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -687,9 +687,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorage32: case IR::Opcode::WriteStorage32: case IR::Opcode::StorageAtomicIAdd32: - case IR::Opcode::StorageAtomicSMin32: case IR::Opcode::StorageAtomicUMin32: - case IR::Opcode::StorageAtomicSMax32: case IR::Opcode::StorageAtomicUMax32: case IR::Opcode::StorageAtomicAnd32: case IR::Opcode::StorageAtomicOr32: @@ -759,6 +757,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_max = true; break; + case IR::Opcode::StorageAtomicSMin32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_s32_min = true; + break; + case IR::Opcode::StorageAtomicSMax32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_s32_max = true; + break; case IR::Opcode::GlobalAtomicIAdd64: case IR::Opcode::GlobalAtomicSMin64: case IR::Opcode::GlobalAtomicUMin64: -- cgit v1.2.3 From 9ccbd749914a3371893ee3d6c1bdcb50c7f777ab Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 14:31:59 -0400 Subject: glsl: Fix ATOM and implement ATOMS --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c22e5992a..dc78cdefb 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -703,6 +703,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::WriteStorage128: info.used_storage_buffer_types |= IR::Type::U32x4; break; + case IR::Opcode::SharedAtomicSMin32: + info.uses_atomic_s32_min = true; + break; + case IR::Opcode::SharedAtomicSMax32: + info.uses_atomic_s32_max = true; + break; case IR::Opcode::SharedAtomicInc32: info.uses_shared_increment = true; break; -- cgit v1.2.3 From 6aa1bf7b6ff86bb7325e5b50709ddf5477b1e855 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 20:14:56 -0400 Subject: glsl: Implement legacy varyings --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index dc78cdefb..10d2822ae 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -35,6 +35,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { info.loads_fixed_fnc_textures = true; + info.loads_legacy_varyings = true; return; } switch (attr) { @@ -52,6 +53,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: info.loads_color_front_diffuse = true; + info.loads_legacy_varyings = true; break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: @@ -82,6 +84,7 @@ void SetAttribute(Info& info, IR::Attribute attr) { } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { info.stores_fixed_fnc_textures = true; + info.stores_legacy_varyings = true; return; } switch (attr) { @@ -105,24 +108,28 @@ void SetAttribute(Info& info, IR::Attribute attr) { case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: info.stores_color_front_diffuse = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ColorFrontSpecularR: case IR::Attribute::ColorFrontSpecularG: case IR::Attribute::ColorFrontSpecularB: case IR::Attribute::ColorFrontSpecularA: info.stores_color_front_specular = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ColorBackDiffuseR: case IR::Attribute::ColorBackDiffuseG: case IR::Attribute::ColorBackDiffuseB: case IR::Attribute::ColorBackDiffuseA: info.stores_color_back_diffuse = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ColorBackSpecularR: case IR::Attribute::ColorBackSpecularG: case IR::Attribute::ColorBackSpecularB: case IR::Attribute::ColorBackSpecularA: - info.stores_color_front_specular = true; + info.stores_color_back_specular = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: -- cgit v1.2.3 From a0d0704affa0f86ba29ef59d90fa06c1b7c974da Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 13 Jun 2021 19:12:03 -0400 Subject: glsl: Conditionally add EXT_texture_shadow_lod --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 10d2822ae..47933df97 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -636,7 +636,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: case IR::Opcode::ImageQueryDimensions: - case IR::Opcode::ImageQueryLod: case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; @@ -644,6 +643,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageQueryLod: { + const auto flags{inst.Flags()}; + const TextureType type{flags.type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_shadow_lod |= flags.is_depth != 0; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } case IR::Opcode::ImageRead: { const auto flags{inst.Flags()}; info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; -- cgit v1.2.3 From d36f667bc0adaa9f50d53efb4c908aadc38921a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:23:57 -0400 Subject: glsl: Address rest of feedback --- src/shader_recompiler/ir_opt/texture_pass.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e9098239d..737f186ab 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -312,11 +312,14 @@ public: } u32 Add(const ImageBufferDescriptor& desc) { - return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.size_shift == existing.size_shift; - }); + })}; + image_buffer_descriptors[index].is_written |= desc.is_written; + image_buffer_descriptors[index].is_read |= desc.is_read; + return index; } u32 Add(const TextureDescriptor& desc) { @@ -339,6 +342,7 @@ public: desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; + image_descriptors[index].is_read |= desc.is_read; return index; } @@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) { throw NotImplementedException("Unexpected separate sampler"); } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) { .type = flags.type, .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, -- cgit v1.2.3 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 47933df97..bab32b58b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,7 +79,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { void SetAttribute(Info& info, IR::Attribute attr) { if (IR::IsGeneric(attr)) { - info.stores_generics.at(IR::GenericAttributeIndex(attr)) = true; + info.stores_generics[IR::GenericAttributeIndex(attr)] = true; return; } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { @@ -956,7 +956,9 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } if (info.stores_indexed_attributes) { for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] |= header.vtg.IsOutputGenericVectorActive(i); + if (header.vtg.IsOutputGenericVectorActive(i)) { + info.stores_generics[i] = true; + } } info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; info.stores_position |= header.vtg.omap_systemb.position != 0; -- cgit v1.2.3 From 0cd08b3e72ed042ae0bf97c62fb6b54580b0dfc9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 17 Jun 2021 20:35:25 -0300 Subject: shader: Align constant buffer sizes to 16 bytes WAR for AMD reading zeroes on uniform buffers of size 2. --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index bab32b58b..a82472152 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/alignment.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" @@ -602,7 +603,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } u32& size{info.constant_buffer_used_sizes[index.U32()]}; if (offset.IsImmediate()) { - size = std::max(size, offset.U32() + element_size); + size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); } else { size = 0x10'000; } -- cgit v1.2.3 From a0365217f5b2ec783738af396ebd82f12cffc0b4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 17 Jun 2021 21:44:12 -0400 Subject: texture_pass: Fix is_read image qualification Atomic operations are considered to have both read and write access. This was not being accounted for. --- src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 737f186ab..44ad10d43 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -434,7 +434,7 @@ void TexturePass(Environment& env, IR::Program& program) { throw NotImplementedException("Unexpected separate sampler"); } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; - const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, -- cgit v1.2.3 From 808ef97a086e7cc58a3ceded1de516ad6a6be5d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 21 Jun 2021 01:07:10 -0300 Subject: shader: Move loop safety tests to code emission --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 32 ++--------------------- 1 file changed, 2 insertions(+), 30 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index fff25c4a2..dcaced83f 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,22 +48,12 @@ struct GotoVariable : FlagTag { u32 index; }; -struct LoopSafetyVariable { - LoopSafetyVariable() = default; - explicit LoopSafetyVariable(u32 index_) : index{index_} {} - - auto operator<=>(const LoopSafetyVariable&) const noexcept = default; - - u32 index; -}; - struct IndirectBranchVariable { auto operator<=>(const IndirectBranchVariable&) const noexcept = default; }; -using Variant = - std::variant; +using Variant = std::variant; using ValueMap = boost::container::flat_map; struct DefTable { @@ -88,13 +78,6 @@ struct DefTable { goto_vars[variable.index].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, LoopSafetyVariable variable) { - return loop_safety_vars[variable.index][block]; - } - void SetDef(IR::Block* block, LoopSafetyVariable variable, const IR::Value& value) { - loop_safety_vars[variable.index].insert_or_assign(block, value); - } - const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { return indirect_branch_var[block]; } @@ -132,7 +115,6 @@ struct DefTable { std::array preds; boost::container::flat_map goto_vars; - boost::container::flat_map loop_safety_vars; ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; @@ -152,10 +134,6 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } -IR::Opcode UndefOpcode(const LoopSafetyVariable&) noexcept { - return IR::Opcode::UndefU32; -} - IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } @@ -337,9 +315,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; - case IR::Opcode::SetLoopSafetyVariable: - pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); - break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); break; @@ -368,9 +343,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; - case IR::Opcode::GetLoopSafetyVariable: - inst.ReplaceUsesWith(pass.ReadVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block)); - break; case IR::Opcode::GetIndirectBranchVariable: inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); break; -- cgit v1.2.3 From 97e80dda55aec0ff791e4990f09c85e7a7067730 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:32:09 -0300 Subject: shader: Add constant propagation to integer vectors --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 796b4122d..3c72203ad 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -563,6 +563,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return (base & ~(~(~0u << bits) << offset)) | (insert << offset); }); return; + case IR::Opcode::CompositeExtractU32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2, + IR::Opcode::CompositeInsertU32x2); + case IR::Opcode::CompositeExtractU32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3, + IR::Opcode::CompositeInsertU32x3); + case IR::Opcode::CompositeExtractU32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4, + IR::Opcode::CompositeInsertU32x4); case IR::Opcode::CompositeExtractF32x2: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, IR::Opcode::CompositeInsertF32x2); -- cgit v1.2.3 From 04ef2160f9e164dbf7c2ab2f37de5533a8d5c450 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:32:47 -0300 Subject: shader: Teach global memory base tracker to follow vectors --- .../global_memory_to_storage_buffer_pass.cpp | 29 +++++++++++----------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 1d11a00d8..70449eeca 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -253,12 +253,12 @@ struct LowAddrInfo { /// Tries to track the first 32-bits of a global memory instruction std::optional TrackLowAddress(IR::Inst* inst) { // The first argument is the low level GPU pointer to the global memory instruction - const IR::U64 addr{inst->Arg(0)}; + const IR::Value addr{inst->Arg(0)}; if (addr.IsImmediate()) { // Not much we can do if it's an immediate return std::nullopt; } - // This address is expected to either be a PackUint2x32 or a IAdd64 + // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { @@ -274,25 +274,24 @@ std::optional TrackLowAddress(IR::Inst* inst) { if (iadd_addr.IsImmediate()) { return std::nullopt; } - addr_inst = iadd_addr.Inst(); + addr_inst = iadd_addr.InstRecursive(); } - // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { - return std::nullopt; - } - // PackUint2x32 is expected to be generated from a vector - const IR::Value vector{addr_inst->Arg(0)}; - if (vector.IsImmediate()) { - return std::nullopt; + // With IAdd64 handled, now PackUint2x32 is expected + if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) { + // PackUint2x32 is expected to be generated from a vector + const IR::Value vector{addr_inst->Arg(0)}; + if (vector.IsImmediate()) { + return std::nullopt; + } + addr_inst = vector.InstRecursive(); } - // This vector is expected to be a CompositeConstructU32x2 - IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { + // The vector is expected to be a CompositeConstructU32x2 + if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ - .value{IR::U32{vector_inst->Arg(0)}}, + .value{IR::U32{addr_inst->Arg(0)}}, .imm_offset = imm_offset, }; } -- cgit v1.2.3 From d8d5501459d6c8b4c39307d293b0f40834dce8f3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:33:42 -0300 Subject: shader: Add int64 to int32 lowering pass --- .../ir_opt/lower_int64_to_int32.cpp | 216 +++++++++++++++++++++ src/shader_recompiler/ir_opt/passes.h | 1 + 2 files changed, 217 insertions(+) create mode 100644 src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp new file mode 100644 index 000000000..787a64f93 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -0,0 +1,216 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Optimization { +namespace { +std::pair Unpack(IR::IREmitter& ir, const IR::Value& packed) { + if (packed.IsImmediate()) { + const u64 value{packed.U64()}; + return { + ir.Imm32(static_cast(value)), + ir.Imm32(static_cast(value >> 32)), + }; + } else { + return std::pair{ + ir.CompositeExtract(packed, 0u), + ir.CompositeExtract(packed, 1u), + }; + } +} + +void IAdd64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("IAdd64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; + const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + + const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)}; + const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))}; + + const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ISub64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ISub64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; + const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + + const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)}; + const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)}; + const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))}; + + const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void INeg64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("INeg64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + lo = ir.BitwiseNot(lo); + hi = ir.BitwiseNot(hi); + + lo = ir.IAdd(lo, ir.Imm32(1)); + + const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))}; + hi = ir.IAdd(hi, carry); + + inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi)); +} + +void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_lo{ir.Imm32(0)}; + const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)}; + const IR::U32 short_ret_lo{shifted_lo}; + const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_hi{ir.Imm32(0)}; + const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)}; + const IR::U32 short_ret_hi{shifted_hi}; + const IR::U32 short_ret_lo{ + ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)}; + + const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_hi{sign_extension}; + const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift)); + const IR::U32 short_ret_hi{shifted_hi}; + const IR::U32 short_ret_lo{ + ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + return inst.ReplaceOpcode(IR::Opcode::Identity); + case IR::Opcode::IAdd64: + return IAdd64To32(block, inst); + case IR::Opcode::ISub64: + return ISub64To32(block, inst); + case IR::Opcode::INeg64: + return INeg64To32(block, inst); + case IR::Opcode::ShiftLeftLogical64: + return ShiftLeftLogical64To32(block, inst); + case IR::Opcode::ShiftRightLogical64: + return ShiftRightLogical64To32(block, inst); + case IR::Opcode::ShiftRightArithmetic64: + return ShiftRightArithmetic64To32(block, inst); + default: + break; + } +} +} // Anonymous namespace + +void LowerInt64ToInt32(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 5ebde49ea..2f89b1ea0 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -18,6 +18,7 @@ void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); +void LowerInt64ToInt32(IR::Program& program); void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Program& program); -- cgit v1.2.3 From 257d2aab7469ba0ea6cf26e6aec99043b5ed2f59 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 24 Jun 2021 00:19:31 -0400 Subject: lower_int64_to_int32: Add missing include --- src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index 787a64f93..abf7c87c7 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { namespace { -- cgit v1.2.3 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- .../ir_opt/collect_shader_info_pass.cpp | 202 +++++++-------------- 1 file changed, 65 insertions(+), 137 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a82472152..5e32ac784 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,130 +29,6 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.loads_fixed_fnc_textures = true; - info.loads_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::PrimitiveId: - info.loads_primitive_id = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.loads_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.loads_color_front_diffuse = true; - info.loads_legacy_varyings = true; - break; - case IR::Attribute::PointSpriteS: - case IR::Attribute::PointSpriteT: - info.loads_point_coord = true; - break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - info.loads_tess_coord = true; - break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; - break; - default: - throw NotImplementedException("Get attribute {}", attr); - } -} - -void SetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.stores_generics[IR::GenericAttributeIndex(attr)] = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.stores_fixed_fnc_textures = true; - info.stores_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; - break; - case IR::Attribute::PointSize: - info.stores_point_size = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.stores_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.stores_color_front_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorFrontSpecularR: - case IR::Attribute::ColorFrontSpecularG: - case IR::Attribute::ColorFrontSpecularB: - case IR::Attribute::ColorFrontSpecularA: - info.stores_color_front_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackDiffuseR: - case IR::Attribute::ColorBackDiffuseG: - case IR::Attribute::ColorBackDiffuseB: - case IR::Attribute::ColorBackDiffuseA: - info.stores_color_back_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackSpecularR: - case IR::Attribute::ColorBackSpecularG: - case IR::Attribute::ColorBackSpecularB: - case IR::Attribute::ColorBackSpecularA: - info.stores_color_back_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: - info.stores_clip_distance = true; - break; - case IR::Attribute::FogCoordinate: - info.stores_fog_coordinate = true; - break; - case IR::Attribute::ViewportMask: - info.stores_viewport_mask = true; - break; - default: - throw NotImplementedException("Set attribute {}", attr); - } -} - void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -511,10 +387,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: - GetAttribute(info, inst.Arg(0).Attribute()); + info.loads.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: - SetAttribute(info, inst.Arg(0).Attribute()); + info.stores.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::GetPatch: GetPatch(info, inst.Arg(0).Patch()); @@ -943,26 +819,78 @@ void GatherInfoFromHeader(Environment& env, Info& info) { if (!info.loads_indexed_attributes) { return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; } - info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); } - info.loads_position |= header.vtg.imap_systemb.position != 0; + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings } if (info.stores_indexed_attributes) { - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (header.vtg.IsOutputGenericVectorActive(i)) { - info.stores_generics[i] = true; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); } } - info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; - info.stores_position |= header.vtg.omap_systemb.position != 0; + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings } } } // Anonymous namespace -- cgit v1.2.3 From 57f222c56e6facb623dccfe1abdc2bdeba8535ec Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 2 Jul 2021 12:45:23 -0400 Subject: dual_vertex_pass: Clang format --- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 28 +++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index a926123f2..3d2c205c2 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -14,23 +14,23 @@ namespace Shader::Optimization { void VertexATransformPass(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Epilogue) { - return inst.Invalidate(); - } - } - } + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return inst.Invalidate(); + } + } + } } void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Prologue) { - return inst.Invalidate(); - } - } - } + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } } } // namespace Shader::Optimization -- cgit v1.2.3 From 41c6cb70f909d7b223824f3879e0009521e9142f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 20:48:54 -0400 Subject: glsl: Fix tracking of info.uses_shadow_lod --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5e32ac784..5ead930f1 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -505,10 +505,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BoundImageQueryLod: case IR::Opcode::BoundImageGradient: - case IR::Opcode::ImageSampleImplicitLod: - case IR::Opcode::ImageSampleExplicitLod: - case IR::Opcode::ImageSampleDrefImplicitLod: - case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: @@ -520,6 +516,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageQueryLod: { const auto flags{inst.Flags()}; const TextureType type{flags.type}; -- cgit v1.2.3 From 49946cf780c317b4c5ccabb52ec433eba01c1970 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Jul 2021 22:10:38 -0400 Subject: shader_recompiler, video_core: Resolve clang errors Silences the following warnings-turned-errors: -Wsign-conversion -Wunused-private-field -Wbraced-scalar-init -Wunused-variable And some other errors --- .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 70449eeca..f9de17b25 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -314,8 +314,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) return std::nullopt; } const StorageBufferAddr storage_buffer{ - .index{index.U32()}, - .offset{offset.U32()}, + .index = index.U32(), + .offset = offset.U32(), }; if (!Common::IsAligned(storage_buffer.offset, 16)) { // The SSBO pointer has to be aligned @@ -484,7 +484,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index = storage_buffer.index, .cbuf_offset = storage_buffer.offset, .count = 1, - .is_written{info.writes.contains(storage_buffer)}, + .is_written = info.writes.contains(storage_buffer), }); } for (const StorageInst& storage_inst : info.to_replace) { -- cgit v1.2.3 From bf2956d77ab0ad06c4b5505cc9906e51e5878274 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Jul 2021 05:22:01 -0300 Subject: shader: Avoid usage of C++20 ranges to build in clang --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 5 +++-- src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp | 2 -- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 6 ------ .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 1 - src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp | 5 +++-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 10 ++++++---- 6 files changed, 12 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3c72203ad..8dd6d6c2c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include -#include #include #include @@ -599,7 +598,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { ConstantPropagation(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 1e4a3fdae..400836301 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index 3d2c205c2..055ba9c54 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -2,12 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include - -#include "common/bit_cast.h" -#include "common/bit_util.h" -#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f9de17b25..4197b0095 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index abf7c87c7..e80d3d1d9 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include "shader_recompiler/exception.h" @@ -207,7 +206,9 @@ void Lower(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void LowerInt64ToInt32(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { Lower(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index dcaced83f..53145fb5e 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,7 +14,6 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // -#include #include #include #include @@ -243,7 +242,9 @@ public: void SealBlock(IR::Block* block) { const auto it{incomplete_phis.find(block)}; if (it != incomplete_phis.end()) { - for (auto& [variant, phi] : it->second) { + for (auto& pair : it->second) { + auto& variant{pair.first}; + auto& phi{pair.second}; std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } @@ -373,8 +374,9 @@ void VisitBlock(Pass& pass, IR::Block* block) { void SsaRewritePass(IR::Program& program) { Pass pass; - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { - VisitBlock(pass, block); + const auto end{program.post_order_blocks.rend()}; + for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { + VisitBlock(pass, *block); } } -- cgit v1.2.3