diff options
| author | Nguyen Dac Nam <nam.kazt.91@gmail.com> | 2020-02-21 19:08:07 +0700 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-02-21 19:08:07 +0700 | 
| commit | 1956a34ee554d641b8a066c4882bc3b0ae77e104 (patch) | |
| tree | d065944be0b5ec1a3f3878a699924dcc30b63da6 | |
| parent | fe8e5d8ae48072d47649f872cee793b714034a5d (diff) | |
shader: implement LOP3 fast replace for old function
ref: https://devtalk.nvidia.com/default/topic/1070081/cuda-programming-and-performance/reverse-lut-for-lop3-lut/
| -rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 94 | 
1 files changed, 58 insertions, 36 deletions
| diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 21366869d..83d3944ac 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {  void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,                                      Node imm_lut, bool sets_cc) { -    constexpr u32 lop_iterations = 32; -    const Node one = Immediate(1); -    const Node two = Immediate(2); - -    Node value; -    for (u32 i = 0; i < lop_iterations; ++i) { -        const Node shift_amount = Immediate(i); - -        const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); -        const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); - -        const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); -        const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); -        const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); - -        const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); -        const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); -        const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); - -        const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); -        const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); - -        const Node shifted_bit = -            Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); -        const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); - -        const Node right = -            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); - -        if (i > 0) { -            value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); -        } else { -            value = right; +    const Node lop3_fast = [&](Node na, Node nb, Node nc, Node ttbl) { +        Node value = Immediate(0); +        ImmediateNode imm = std::get<ImmediateNode>(*ttbl); +        if (imm.GetValue() & 0x01) { +            Node a = Operation(OperationCode::IBitwiseNot, na); +            Node b = Operation(OperationCode::IBitwiseNot, nb); +            Node c = Operation(OperationCode::IBitwiseNot, nc); +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); +            value = Operation(OperationCode::IBitwiseOr, value, r);          } -    } +        if (imm.GetValue() & 0x02) { +            Node a = Operation(OperationCode::IBitwiseNot, na); +            Node b = Operation(OperationCode::IBitwiseNot, nb); +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); +            value = Operation(OperationCode::IBitwiseOr, value, r); +        } +        if (imm.GetValue() & 0x04) { +            Node a = Operation(OperationCode::IBitwiseNot, na); +            Node c = Operation(OperationCode::IBitwiseNot, nc); +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); +            value = Operation(OperationCode::IBitwiseOr, value, r); +        } +        if (imm.GetValue() & 0x08) { +            Node a = Operation(OperationCode::IBitwiseNot, na); +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); +            value = Operation(OperationCode::IBitwiseOr, value, r); +        } +        if (imm.GetValue() & 0x10) { +            Node b = Operation(OperationCode::IBitwiseNot, nb); +            Node c = Operation(OperationCode::IBitwiseNot, nc); +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); +            value = Operation(OperationCode::IBitwiseOr, value, r); +        } +        if (imm.GetValue() & 0x20) { +            Node b = Operation(OperationCode::IBitwiseNot, nb); +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); +            value = Operation(OperationCode::IBitwiseOr, value, r); +        } +        if (imm.GetValue() & 0x40) { +            Node c = Operation(OperationCode::IBitwiseNot, nc); +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); +            value = Operation(OperationCode::IBitwiseOr, value, r); +        } +        if (imm.GetValue() & 0x80) { +            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); +            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); +            value = Operation(OperationCode::IBitwiseOr, value, r); +        } +        return value; +    }(op_a, op_b, op_c, imm_lut); -    SetInternalFlagsFromInteger(bb, value, sets_cc); -    SetRegister(bb, dest, value); +    SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); +    SetRegister(bb, dest, lop3_fast);  }  } // namespace VideoCommon::Shader | 
