diff options
| author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2021-06-23 01:33:42 -0300 | 
|---|---|---|
| committer | ameerj <52414509+ameerj@users.noreply.github.com> | 2021-07-22 21:51:39 -0400 | 
| commit | d8d5501459d6c8b4c39307d293b0f40834dce8f3 (patch) | |
| tree | 5c44ce2b967f66b1362c8a00b154b7fb1bc2b3ce | |
| parent | 04ef2160f9e164dbf7c2ab2f37de5533a8d5c450 (diff) | |
shader: Add int64 to int32 lowering pass
| -rw-r--r-- | src/shader_recompiler/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp | 216 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/passes.h | 1 | 
3 files changed, 218 insertions, 0 deletions
| diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f6719ad9d..3b5708cb9 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -219,6 +219,7 @@ add_library(shader_recompiler STATIC      ir_opt/global_memory_to_storage_buffer_pass.cpp      ir_opt/identity_removal_pass.cpp      ir_opt/lower_fp16_to_fp32.cpp +    ir_opt/lower_int64_to_int32.cpp      ir_opt/passes.h      ir_opt/ssa_rewrite_pass.cpp      ir_opt/texture_pass.cpp diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp new file mode 100644 index 000000000..787a64f93 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -0,0 +1,216 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <ranges> +#include <utility> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Optimization { +namespace { +std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) { +    if (packed.IsImmediate()) { +        const u64 value{packed.U64()}; +        return { +            ir.Imm32(static_cast<u32>(value)), +            ir.Imm32(static_cast<u32>(value >> 32)), +        }; +    } else { +        return std::pair<IR::U32, IR::U32>{ +            ir.CompositeExtract(packed, 0u), +            ir.CompositeExtract(packed, 1u), +        }; +    } +} + +void IAdd64To32(IR::Block& block, IR::Inst& inst) { +    if (inst.HasAssociatedPseudoOperation()) { +        throw NotImplementedException("IAdd64 emulation with pseudo instructions"); +    } +    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); +    const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; +    const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + +    const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)}; +    const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))}; + +    const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)}; +    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ISub64To32(IR::Block& block, IR::Inst& inst) { +    if (inst.HasAssociatedPseudoOperation()) { +        throw NotImplementedException("ISub64 emulation with pseudo instructions"); +    } +    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); +    const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; +    const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + +    const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)}; +    const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)}; +    const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))}; + +    const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)}; +    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void INeg64To32(IR::Block& block, IR::Inst& inst) { +    if (inst.HasAssociatedPseudoOperation()) { +        throw NotImplementedException("INeg64 emulation with pseudo instructions"); +    } +    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); +    auto [lo, hi]{Unpack(ir, inst.Arg(0))}; +    lo = ir.BitwiseNot(lo); +    hi = ir.BitwiseNot(hi); + +    lo = ir.IAdd(lo, ir.Imm32(1)); + +    const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))}; +    hi = ir.IAdd(hi, carry); + +    inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi)); +} + +void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) { +    if (inst.HasAssociatedPseudoOperation()) { +        throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions"); +    } +    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); +    const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; +    const IR::U32 shift{inst.Arg(1)}; + +    const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)}; +    const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)}; + +    const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; +    const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; +    const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + +    const IR::U32 long_ret_lo{ir.Imm32(0)}; +    const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)}; + +    const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; +    const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)}; +    const IR::U32 short_ret_lo{shifted_lo}; +    const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)}; + +    const IR::U32 zero_ret_lo{lo}; +    const IR::U32 zero_ret_hi{hi}; + +    const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; +    const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + +    const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; +    const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; +    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) { +    if (inst.HasAssociatedPseudoOperation()) { +        throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions"); +    } +    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); +    const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; +    const IR::U32 shift{inst.Arg(1)}; + +    const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; +    const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)}; + +    const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; +    const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; +    const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + +    const IR::U32 long_ret_hi{ir.Imm32(0)}; +    const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)}; + +    const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; +    const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)}; +    const IR::U32 short_ret_hi{shifted_hi}; +    const IR::U32 short_ret_lo{ +        ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + +    const IR::U32 zero_ret_lo{lo}; +    const IR::U32 zero_ret_hi{hi}; + +    const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; +    const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + +    const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; +    const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; +    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) { +    if (inst.HasAssociatedPseudoOperation()) { +        throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions"); +    } +    IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); +    const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; +    const IR::U32 shift{inst.Arg(1)}; + +    const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; +    const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)}; + +    const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))}; + +    const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; +    const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; +    const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + +    const IR::U32 long_ret_hi{sign_extension}; +    const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)}; + +    const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; +    const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift)); +    const IR::U32 short_ret_hi{shifted_hi}; +    const IR::U32 short_ret_lo{ +        ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + +    const IR::U32 zero_ret_lo{lo}; +    const IR::U32 zero_ret_hi{hi}; + +    const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; +    const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + +    const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; +    const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; +    inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void Lower(IR::Block& block, IR::Inst& inst) { +    switch (inst.GetOpcode()) { +    case IR::Opcode::PackUint2x32: +    case IR::Opcode::UnpackUint2x32: +        return inst.ReplaceOpcode(IR::Opcode::Identity); +    case IR::Opcode::IAdd64: +        return IAdd64To32(block, inst); +    case IR::Opcode::ISub64: +        return ISub64To32(block, inst); +    case IR::Opcode::INeg64: +        return INeg64To32(block, inst); +    case IR::Opcode::ShiftLeftLogical64: +        return ShiftLeftLogical64To32(block, inst); +    case IR::Opcode::ShiftRightLogical64: +        return ShiftRightLogical64To32(block, inst); +    case IR::Opcode::ShiftRightArithmetic64: +        return ShiftRightArithmetic64To32(block, inst); +    default: +        break; +    } +} +} // Anonymous namespace + +void LowerInt64ToInt32(IR::Program& program) { +    for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { +        for (IR::Inst& inst : block->Instructions()) { +            Lower(*block, inst); +        } +    } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 5ebde49ea..2f89b1ea0 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -18,6 +18,7 @@ void DeadCodeEliminationPass(IR::Program& program);  void GlobalMemoryToStorageBufferPass(IR::Program& program);  void IdentityRemovalPass(IR::Program& program);  void LowerFp16ToFp32(IR::Program& program); +void LowerInt64ToInt32(IR::Program& program);  void SsaRewritePass(IR::Program& program);  void TexturePass(Environment& env, IR::Program& program);  void VerificationPass(const IR::Program& program); | 
