diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
6 files changed, 573 insertions, 36 deletions
| diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp new file mode 100644 index 000000000..02f5b653d --- /dev/null +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -0,0 +1,146 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <type_traits> + +#include "common/bit_util.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { +    if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) { +        throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); +    } +    return (base >> shift) & ((1U << count) - 1); +} + +template <typename T> +[[nodiscard]] T Arg(const IR::Value& value) { +    if constexpr (std::is_same_v<T, bool>) { +        return value.U1(); +    } else if constexpr (std::is_same_v<T, u32>) { +        return value.U32(); +    } else if constexpr (std::is_same_v<T, u64>) { +        return value.U64(); +    } +} + +template <typename ImmFn> +bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { +    const auto arg = [](const IR::Value& value) { +        if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) { +            return value.U1(); +        } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) { +            return value.U32(); +        } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) { +            return value.U64(); +        } +    }; +    const IR::Value lhs{inst.Arg(0)}; +    const IR::Value rhs{inst.Arg(1)}; + +    const bool is_lhs_immediate{lhs.IsImmediate()}; +    const bool is_rhs_immediate{rhs.IsImmediate()}; + +    if (is_lhs_immediate && is_rhs_immediate) { +        const auto result{imm_fn(arg(lhs), arg(rhs))}; +        inst.ReplaceUsesWith(IR::Value{result}); +        return false; +    } +    if (is_lhs_immediate && !is_rhs_immediate) { +        IR::Inst* const rhs_inst{rhs.InstRecursive()}; +        if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { +            const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; +            inst.SetArg(0, rhs_inst->Arg(0)); +            inst.SetArg(1, IR::Value{combined}); +        } else { +            // Normalize +            inst.SetArg(0, rhs); +            inst.SetArg(1, lhs); +        } +    } +    if (!is_lhs_immediate && is_rhs_immediate) { +        const IR::Inst* const lhs_inst{lhs.InstRecursive()}; +        if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { +            const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; +            inst.SetArg(0, lhs_inst->Arg(0)); +            inst.SetArg(1, IR::Value{combined}); +        } +    } +    return true; +} + +void FoldGetRegister(IR::Inst& inst) { +    if (inst.Arg(0).Reg() == IR::Reg::RZ) { +        inst.ReplaceUsesWith(IR::Value{u32{0}}); +    } +} + +void FoldGetPred(IR::Inst& inst) { +    if (inst.Arg(0).Pred() == IR::Pred::PT) { +        inst.ReplaceUsesWith(IR::Value{true}); +    } +} + +template <typename T> +void FoldAdd(IR::Inst& inst) { +    if (inst.HasAssociatedPseudoOperation()) { +        return; +    } +    if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { +        return; +    } +    const IR::Value rhs{inst.Arg(1)}; +    if (rhs.IsImmediate() && Arg<T>(rhs) == 0) { +        inst.ReplaceUsesWith(inst.Arg(0)); +    } +} + +void FoldLogicalAnd(IR::Inst& inst) { +    if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { +        return; +    } +    const IR::Value rhs{inst.Arg(1)}; +    if (rhs.IsImmediate()) { +        if (rhs.U1()) { +            inst.ReplaceUsesWith(inst.Arg(0)); +        } else { +            inst.ReplaceUsesWith(IR::Value{false}); +        } +    } +} + +void ConstantPropagation(IR::Inst& inst) { +    switch (inst.Opcode()) { +    case IR::Opcode::GetRegister: +        return FoldGetRegister(inst); +    case IR::Opcode::GetPred: +        return FoldGetPred(inst); +    case IR::Opcode::IAdd32: +        return FoldAdd<u32>(inst); +    case IR::Opcode::IAdd64: +        return FoldAdd<u64>(inst); +    case IR::Opcode::BitFieldUExtract: +        if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { +            inst.ReplaceUsesWith(IR::Value{ +                BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); +        } +        break; +    case IR::Opcode::LogicalAnd: +        return FoldLogicalAnd(inst); +    default: +        break; +    } +} +} // Anonymous namespace + +void ConstantPropagationPass(IR::Block& block) { +    std::ranges::for_each(block, ConstantPropagation); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp new file mode 100644 index 000000000..ee69a5c9d --- /dev/null +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -0,0 +1,331 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <compare> +#include <optional> +#include <ranges> + +#include <boost/container/flat_set.hpp> +#include <boost/container/small_vector.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +/// Address in constant buffers to the storage buffer descriptor +struct StorageBufferAddr { +    auto operator<=>(const StorageBufferAddr&) const noexcept = default; + +    u32 index; +    u32 offset; +}; + +/// Block iterator to a global memory instruction and the storage buffer it uses +struct StorageInst { +    StorageBufferAddr storage_buffer; +    IR::Block::iterator inst; +}; + +/// Bias towards a certain range of constant buffers when looking for storage buffers +struct Bias { +    u32 index; +    u32 offset_begin; +    u32 offset_end; +}; + +using StorageBufferSet = +    boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, +                               boost::container::small_vector<StorageBufferAddr, 16>>; +using StorageInstVector = boost::container::small_vector<StorageInst, 32>; + +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemory(const IR::Inst& inst) { +    switch (inst.Opcode()) { +    case IR::Opcode::LoadGlobalS8: +    case IR::Opcode::LoadGlobalU8: +    case IR::Opcode::LoadGlobalS16: +    case IR::Opcode::LoadGlobalU16: +    case IR::Opcode::LoadGlobal32: +    case IR::Opcode::LoadGlobal64: +    case IR::Opcode::LoadGlobal128: +    case IR::Opcode::WriteGlobalS8: +    case IR::Opcode::WriteGlobalU8: +    case IR::Opcode::WriteGlobalS16: +    case IR::Opcode::WriteGlobalU16: +    case IR::Opcode::WriteGlobal32: +    case IR::Opcode::WriteGlobal64: +    case IR::Opcode::WriteGlobal128: +        return true; +    default: +        return false; +    } +} + +/// Converts a global memory opcode to its storage buffer equivalent +IR::Opcode GlobalToStorage(IR::Opcode opcode) { +    switch (opcode) { +    case IR::Opcode::LoadGlobalS8: +        return IR::Opcode::LoadStorageS8; +    case IR::Opcode::LoadGlobalU8: +        return IR::Opcode::LoadStorageU8; +    case IR::Opcode::LoadGlobalS16: +        return IR::Opcode::LoadStorageS16; +    case IR::Opcode::LoadGlobalU16: +        return IR::Opcode::LoadStorageU16; +    case IR::Opcode::LoadGlobal32: +        return IR::Opcode::LoadStorage32; +    case IR::Opcode::LoadGlobal64: +        return IR::Opcode::LoadStorage64; +    case IR::Opcode::LoadGlobal128: +        return IR::Opcode::LoadStorage128; +    case IR::Opcode::WriteGlobalS8: +        return IR::Opcode::WriteStorageS8; +    case IR::Opcode::WriteGlobalU8: +        return IR::Opcode::WriteStorageU8; +    case IR::Opcode::WriteGlobalS16: +        return IR::Opcode::WriteStorageS16; +    case IR::Opcode::WriteGlobalU16: +        return IR::Opcode::WriteStorageU16; +    case IR::Opcode::WriteGlobal32: +        return IR::Opcode::WriteStorage32; +    case IR::Opcode::WriteGlobal64: +        return IR::Opcode::WriteStorage64; +    case IR::Opcode::WriteGlobal128: +        return IR::Opcode::WriteStorage128; +    default: +        throw InvalidArgument("Invalid global memory opcode {}", opcode); +    } +} + +/// Returns true when a storage buffer address satisfies a bias +bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { +    return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && +           storage_buffer.offset < bias.offset_end; +} + +/// Ignores a global memory operation, reads return zero and writes are ignored +void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { +    const IR::Value zero{u32{0}}; +    switch (inst->Opcode()) { +    case IR::Opcode::LoadGlobalS8: +    case IR::Opcode::LoadGlobalU8: +    case IR::Opcode::LoadGlobalS16: +    case IR::Opcode::LoadGlobalU16: +    case IR::Opcode::LoadGlobal32: +        inst->ReplaceUsesWith(zero); +        break; +    case IR::Opcode::LoadGlobal64: +        inst->ReplaceUsesWith( +            IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); +        break; +    case IR::Opcode::LoadGlobal128: +        inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( +            inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); +        break; +    case IR::Opcode::WriteGlobalS8: +    case IR::Opcode::WriteGlobalU8: +    case IR::Opcode::WriteGlobalS16: +    case IR::Opcode::WriteGlobalU16: +    case IR::Opcode::WriteGlobal32: +    case IR::Opcode::WriteGlobal64: +    case IR::Opcode::WriteGlobal128: +        inst->Invalidate(); +        break; +    default: +        throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); +    } +} + +/// Recursively tries to track the storage buffer address used by a global memory instruction +std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { +    if (value.IsImmediate()) { +        // Immediates can't be a storage buffer +        return std::nullopt; +    } +    const IR::Inst* const inst{value.InstRecursive()}; +    if (inst->Opcode() == IR::Opcode::GetCbuf) { +        const IR::Value index{inst->Arg(0)}; +        const IR::Value offset{inst->Arg(1)}; +        if (!index.IsImmediate()) { +            // Definitely not a storage buffer if it's read from a non-immediate index +            return std::nullopt; +        } +        if (!offset.IsImmediate()) { +            // TODO: Support SSBO arrays +            return std::nullopt; +        } +        const StorageBufferAddr storage_buffer{ +            .index = index.U32(), +            .offset = offset.U32(), +        }; +        if (bias && !MeetsBias(storage_buffer, *bias)) { +            // We have to blacklist some addresses in case we wrongly point to them +            return std::nullopt; +        } +        return storage_buffer; +    } +    // Reversed loops are more likely to find the right result +    for (size_t arg = inst->NumArgs(); arg--;) { +        if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) { +            return *storage_buffer; +        } +    } +    return std::nullopt; +} + +/// Collects the storage buffer used by a global memory instruction and the instruction itself +void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, +                           StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { +    // NVN puts storage buffers in a specific range, we have to bias towards these addresses to +    // avoid getting false positives +    static constexpr Bias nvn_bias{ +        .index{0}, +        .offset_begin{0x110}, +        .offset_end{0x610}, +    }; +    // First try to find storage buffers in the NVN address +    const IR::U64 addr{inst->Arg(0)}; +    std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)}; +    if (!storage_buffer) { +        // If it fails, track without a bias +        storage_buffer = Track(addr, nullptr); +        if (!storage_buffer) { +            // If that also failed, drop the global memory usage +            IgnoreGlobalMemory(block, inst); +        } +    } +    // Collect storage buffer and the instruction +    storage_buffer_set.insert(*storage_buffer); +    to_replace.push_back(StorageInst{ +        .storage_buffer{*storage_buffer}, +        .inst{inst}, +    }); +} + +/// Tries to track the first 32-bits of a global memory instruction +std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { +    // The first argument is the low level GPU pointer to the global memory instruction +    const IR::U64 addr{inst->Arg(0)}; +    if (addr.IsImmediate()) { +        // Not much we can do if it's an immediate +        return std::nullopt; +    } +    // This address is expected to either be a PackUint2x32 or a IAdd64 +    IR::Inst* addr_inst{addr.InstRecursive()}; +    s32 imm_offset{0}; +    if (addr_inst->Opcode() == IR::Opcode::IAdd64) { +        // If it's an IAdd64, get the immediate offset it is applying and grab the address +        // instruction. This expects for the instruction to be canonicalized having the address on +        // the first argument and the immediate offset on the second one. +        const IR::U64 imm_offset_value{addr_inst->Arg(1)}; +        if (!imm_offset_value.IsImmediate()) { +            return std::nullopt; +        } +        imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); +        const IR::U64 iadd_addr{addr_inst->Arg(0)}; +        if (iadd_addr.IsImmediate()) { +            return std::nullopt; +        } +        addr_inst = iadd_addr.Inst(); +    } +    // With IAdd64 handled, now PackUint2x32 is expected without exceptions +    if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { +        return std::nullopt; +    } +    // PackUint2x32 is expected to be generated from a vector +    const IR::Value vector{addr_inst->Arg(0)}; +    if (vector.IsImmediate()) { +        return std::nullopt; +    } +    // This vector is expected to be a CompositeConstruct2 +    IR::Inst* const vector_inst{vector.InstRecursive()}; +    if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { +        return std::nullopt; +    } +    // Grab the first argument from the CompositeConstruct2, this is the low address. +    // Re-apply the offset in case we found one. +    const IR::U32 low_addr{vector_inst->Arg(0)}; +    return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; +} + +/// Returns the offset in indices (not bytes) for an equivalent storage instruction +IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { +    IR::IREmitter ir{block, inst}; +    IR::U32 offset; +    if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) { +        offset = *low_addr; +    } else { +        offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); +    } +    // Subtract the least significant 32 bits from the guest offset. The result is the storage +    // buffer offset in bytes. +    const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; +    return ir.ISub(offset, low_cbuf); +} + +/// Replace a global memory load instruction with its storage buffer equivalent +void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +                 const IR::U32& offset) { +    const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; +    const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; +    inst->ReplaceUsesWith(value); +} + +/// Replace a global memory write instruction with its storage buffer equivalent +void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +                  const IR::U32& offset) { +    const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; +    block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); +    inst->Invalidate(); +} + +/// Replace a global memory instruction with its storage buffer equivalent +void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +             const IR::U32& offset) { +    switch (inst->Opcode()) { +    case IR::Opcode::LoadGlobalS8: +    case IR::Opcode::LoadGlobalU8: +    case IR::Opcode::LoadGlobalS16: +    case IR::Opcode::LoadGlobalU16: +    case IR::Opcode::LoadGlobal32: +    case IR::Opcode::LoadGlobal64: +    case IR::Opcode::LoadGlobal128: +        return ReplaceLoad(block, inst, storage_index, offset); +    case IR::Opcode::WriteGlobalS8: +    case IR::Opcode::WriteGlobalU8: +    case IR::Opcode::WriteGlobalS16: +    case IR::Opcode::WriteGlobalU16: +    case IR::Opcode::WriteGlobal32: +    case IR::Opcode::WriteGlobal64: +    case IR::Opcode::WriteGlobal128: +        return ReplaceWrite(block, inst, storage_index, offset); +    default: +        throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); +    } +} +} // Anonymous namespace + +void GlobalMemoryToStorageBufferPass(IR::Block& block) { +    StorageBufferSet storage_buffers; +    StorageInstVector to_replace; + +    for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { +        if (!IsGlobalMemory(*inst)) { +            continue; +        } +        CollectStorageBuffers(block, inst, storage_buffers, to_replace); +    } +    for (const auto [storage_buffer, inst] : to_replace) { +        const auto it{storage_buffers.find(storage_buffer)}; +        const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; +        const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; +        Replace(block, inst, storage_index, offset); +    } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 7f8500087..39a972919 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -10,22 +10,24 @@  namespace Shader::Optimization { -void IdentityRemovalPass(IR::Block& block) { +void IdentityRemovalPass(IR::Function& function) {      std::vector<IR::Inst*> to_invalidate; -    for (auto inst = block.begin(); inst != block.end();) { -        const size_t num_args{inst->NumArgs()}; -        for (size_t i = 0; i < num_args; ++i) { -            IR::Value arg; -            while ((arg = inst->Arg(i)).IsIdentity()) { -                inst->SetArg(i, arg.Inst()->Arg(0)); +    for (auto& block : function.blocks) { +        for (auto inst = block->begin(); inst != block->end();) { +            const size_t num_args{inst->NumArgs()}; +            for (size_t i = 0; i < num_args; ++i) { +                IR::Value arg; +                while ((arg = inst->Arg(i)).IsIdentity()) { +                    inst->SetArg(i, arg.Inst()->Arg(0)); +                } +            } +            if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { +                to_invalidate.push_back(&*inst); +                inst = block->Instructions().erase(inst); +            } else { +                ++inst;              } -        } -        if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { -            to_invalidate.push_back(&*inst); -            inst = block.Instructions().erase(inst); -        } else { -            ++inst;          }      }      for (IR::Inst* const inst : to_invalidate) { diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 7ed4005ed..578a24d89 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) {      }  } +void ConstantPropagationPass(IR::Block& block);  void DeadCodeEliminationPass(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); +void GlobalMemoryToStorageBufferPass(IR::Block& block); +void IdentityRemovalPass(IR::Function& function);  void SsaRewritePass(IR::Function& function); -void VerificationPass(const IR::Block& block); +void VerificationPass(const IR::Function& function);  } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a4b256a40..3c9b020e0 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,8 +14,6 @@  //      https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6  // -#include <map> -  #include <boost/container/flat_map.hpp>  #include "shader_recompiler/frontend/ir/basic_block.h" @@ -30,6 +28,12 @@ namespace Shader::Optimization {  namespace {  using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; +struct FlagTag {}; +struct ZeroFlagTag : FlagTag {}; +struct SignFlagTag : FlagTag {}; +struct CarryFlagTag : FlagTag {}; +struct OverflowFlagTag : FlagTag {}; +  struct DefTable {      [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept {          return regs[IR::RegIndex(variable)]; @@ -39,8 +43,28 @@ struct DefTable {          return preds[IR::PredIndex(variable)];      } +    [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { +        return zero_flag; +    } + +    [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept { +        return sign_flag; +    } + +    [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept { +        return carry_flag; +    } + +    [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept { +        return overflow_flag; +    } +      std::array<ValueMap, IR::NUM_USER_REGS> regs;      std::array<ValueMap, IR::NUM_USER_PREDS> preds; +    ValueMap zero_flag; +    ValueMap sign_flag; +    ValueMap carry_flag; +    ValueMap overflow_flag;  };  IR::Opcode UndefOpcode(IR::Reg) noexcept { @@ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept {      return IR::Opcode::Undef1;  } +IR::Opcode UndefOpcode(const FlagTag&) noexcept { +    return IR::Opcode::Undef1; +} +  [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {      return inst.Opcode() == IR::Opcode::Phi;  } @@ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) {                      pass.WriteVariable(pred, block.get(), inst.Arg(1));                  }                  break; +            case IR::Opcode::SetZFlag: +                pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); +                break; +            case IR::Opcode::SetSFlag: +                pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); +                break; +            case IR::Opcode::SetCFlag: +                pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); +                break; +            case IR::Opcode::SetOFlag: +                pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); +                break;              case IR::Opcode::GetRegister:                  if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {                      inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); @@ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) {                      inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get()));                  }                  break; +            case IR::Opcode::GetZFlag: +                inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); +                break; +            case IR::Opcode::GetSFlag: +                inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); +                break; +            case IR::Opcode::GetCFlag: +                inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); +                break; +            case IR::Opcode::GetOFlag: +                inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); +                break;              default:                  break;              } diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 36d9ae39b..8a5adf5a2 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -11,40 +11,44 @@  namespace Shader::Optimization { -static void ValidateTypes(const IR::Block& block) { -    for (const IR::Inst& inst : block) { -        const size_t num_args{inst.NumArgs()}; -        for (size_t i = 0; i < num_args; ++i) { -            const IR::Type t1{inst.Arg(i).Type()}; -            const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; -            if (!IR::AreTypesCompatible(t1, t2)) { -                throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); +static void ValidateTypes(const IR::Function& function) { +    for (const auto& block : function.blocks) { +        for (const IR::Inst& inst : *block) { +            const size_t num_args{inst.NumArgs()}; +            for (size_t i = 0; i < num_args; ++i) { +                const IR::Type t1{inst.Arg(i).Type()}; +                const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; +                if (!IR::AreTypesCompatible(t1, t2)) { +                    throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); +                }              }          }      }  } -static void ValidateUses(const IR::Block& block) { +static void ValidateUses(const IR::Function& function) {      std::map<IR::Inst*, int> actual_uses; -    for (const IR::Inst& inst : block) { -        const size_t num_args{inst.NumArgs()}; -        for (size_t i = 0; i < num_args; ++i) { -            const IR::Value arg{inst.Arg(i)}; -            if (!arg.IsImmediate()) { -                ++actual_uses[arg.Inst()]; +    for (const auto& block : function.blocks) { +        for (const IR::Inst& inst : *block) { +            const size_t num_args{inst.NumArgs()}; +            for (size_t i = 0; i < num_args; ++i) { +                const IR::Value arg{inst.Arg(i)}; +                if (!arg.IsImmediate()) { +                    ++actual_uses[arg.Inst()]; +                }              }          }      }      for (const auto [inst, uses] : actual_uses) {          if (inst->UseCount() != uses) { -            throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); +            throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/);          }      }  } -void VerificationPass(const IR::Block& block) { -    ValidateTypes(block); -    ValidateUses(block); +void VerificationPass(const IR::Function& function) { +    ValidateTypes(function); +    ValidateUses(function);  }  } // namespace Shader::Optimization | 
