diff options
| author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2018-12-23 02:26:35 -0300 | 
|---|---|---|
| committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-01-15 17:54:52 -0300 | 
| commit | dd91650aaf217196a2b1ced17df24bd74349843d (patch) | |
| tree | 537f6098b72cd7fa7ccccc0c44f294faf95e96e9 /src/video_core | |
| parent | d6f76307febaa2deb05112bb2c29ed667210ee2b (diff) | |
shader_decode: Implement HFMA2
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 1 | ||||
| -rw-r--r-- | src/video_core/shader/decode/hfma2.cpp | 54 | ||||
| -rw-r--r-- | src/video_core/shader/glsl_decompiler.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 1 | 
4 files changed, 60 insertions, 5 deletions
| diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c4987b682..9cb23f375 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -648,6 +648,7 @@ union Instruction {              BitField<37, 2, HalfPrecision> precision;              BitField<32, 1, u64> saturate; +            BitField<31, 1, u64> negate_b;              BitField<30, 1, u64> negate_c;              BitField<35, 2, HalfType> type_c;          } rr; diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 5ce08481e..bf7491804 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -2,6 +2,8 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <tuple> +  #include "common/assert.h"  #include "common/common_types.h"  #include "video_core/engines/shader_bytecode.h" @@ -9,6 +11,8 @@  namespace VideoCommon::Shader { +using Tegra::Shader::HalfPrecision; +using Tegra::Shader::HalfType;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; @@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); -    UNIMPLEMENTED(); +    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { +        UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); +    } else { +        UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); +    } + +    constexpr auto identity = HalfType::H0_H1; + +    const HalfType type_a = instr.hfma2.type_a; +    const Node op_a = GetRegister(instr.gpr8); + +    bool neg_b{}, neg_c{}; +    auto [saturate, type_b, op_b, type_c, +          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { +        switch (opcode->get().GetId()) { +        case OpCode::Id::HFMA2_CR: +            neg_b = instr.hfma2.negate_b; +            neg_c = instr.hfma2.negate_c; +            return {instr.hfma2.saturate, instr.hfma2.type_b, +                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, +                    GetRegister(instr.gpr39)}; +        case OpCode::Id::HFMA2_RC: +            neg_b = instr.hfma2.negate_b; +            neg_c = instr.hfma2.negate_c; +            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), +                    instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; +        case OpCode::Id::HFMA2_RR: +            neg_b = instr.hfma2.rr.negate_b; +            neg_c = instr.hfma2.rr.negate_c; +            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), +                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; +        case OpCode::Id::HFMA2_IMM_R: +            neg_c = instr.hfma2.negate_c; +            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), +                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; +        default: +            return {false, identity, Immediate(0), identity, Immediate(0)}; +        } +    }(); +    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); + +    op_b = GetOperandAbsNegHalf(op_b, false, neg_b); +    op_c = GetOperandAbsNegHalf(op_c, false, neg_c); + +    MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; +    Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); +    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); + +    SetRegister(bb, instr.gpr0, value);      return pc;  } diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp index abc9a556d..c364a43ce 100644 --- a/src/video_core/shader/glsl_decompiler.cpp +++ b/src/video_core/shader/glsl_decompiler.cpp @@ -762,9 +762,9 @@ private:          return GenerateBinaryInfix(operation, "/", type, type, type);      } -    std::string FFma(Operation operation) { -        return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float, -                               Type::Float); +    template <Type type> +    std::string Fma(Operation operation) { +        return GenerateTernary(operation, "fma", type, type, type, type);      }      template <Type type> @@ -1231,7 +1231,7 @@ private:          &Add<Type::Float>,          &Mul<Type::Float>,          &Div<Type::Float>, -        &FFma, +        &Fma<Type::Float>,          &Negate<Type::Float>,          &Absolute<Type::Float>,          &FClamp, @@ -1289,6 +1289,7 @@ private:          &Add<Type::HalfFloat>,          &Mul<Type::HalfFloat>, +        &Fma<Type::HalfFloat>,          &Absolute<Type::HalfFloat>,          &HNegate,          &HMergeF32, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ccdf316ac..928e3e7d5 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -110,6 +110,7 @@ enum class OperationCode {      HAdd,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2      HMul,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 +    HFma,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2      HAbsolute, /// (f16vec2 a) -> f16vec2      HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2      HMergeF32, /// (f16vec2 src) -> float | 
