diff options
| author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2019-04-19 23:10:19 -0400 | 
|---|---|---|
| committer | FernandoS27 <fsahmkow27@gmail.com> | 2019-04-20 21:11:33 -0400 | 
| commit | 623b2e4b8f8a5fdde3b2a5594ab698461e81bd66 (patch) | |
| tree | cea5c98ee2429f9dd24855590187499a617077fd /src/video_core/shader | |
| parent | 650d9b1044352139cd7718a097fc4822e47ac3b0 (diff) | |
Corrections Half Float operations on const buffers and implement saturation.
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/decode/arithmetic_half.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/shader/decode/hfma2.cpp | 9 | 
2 files changed, 16 insertions, 15 deletions
| diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 9467f9417..2098c1170 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -9,6 +9,7 @@  namespace VideoCommon::Shader { +using Tegra::Shader::HalfType;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; @@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {              LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());          }      } -    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");      const bool negate_a =          opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; @@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {      Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);      op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); -    Node op_b = [&]() { +    auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {          switch (opcode->get().GetId()) {          case OpCode::Id::HADD2_C:          case OpCode::Id::HMUL2_C: -            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); +            return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};          case OpCode::Id::HADD2_R:          case OpCode::Id::HMUL2_R: -            return GetRegister(instr.gpr20); +            return {instr.alu_half.type_b, GetRegister(instr.gpr20)};          default:              UNREACHABLE(); -            return Immediate(0); +            return {HalfType::F32, Immediate(0)};          }      }(); -    op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); -    op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); +    op_b = UnpackHalfFloat(op_b, type_b); +    // redeclaration to avoid a bug in clang with reusing local bindings in lambdas +    Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);      Node value = [&]() {          switch (opcode->get().GetId()) {          case OpCode::Id::HADD2_C:          case OpCode::Id::HADD2_R: -            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); +            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);          case OpCode::Id::HMUL2_C:          case OpCode::Id::HMUL2_R: -            return Operation(OperationCode::HMul, PRECISE, op_a, op_b); +            return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);          default:              UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());              return Immediate(0);          }      }(); +    value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);      value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);      SetRegister(bb, instr.gpr0, value); @@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {      return pc;  } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 5c1becce5..a425f9eb7 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {          case OpCode::Id::HFMA2_CR:              neg_b = instr.hfma2.negate_b;              neg_c = instr.hfma2.negate_c; -            return {instr.hfma2.saturate, instr.hfma2.type_b, +            return {instr.hfma2.saturate, HalfType::F32,                      GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),                      instr.hfma2.type_reg39, GetRegister(instr.gpr39)};          case OpCode::Id::HFMA2_RC:              neg_b = instr.hfma2.negate_b;              neg_c = instr.hfma2.negate_c;              return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), -                    instr.hfma2.type_b, -                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; +                    HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};          case OpCode::Id::HFMA2_RR:              neg_b = instr.hfma2.rr.negate_b;              neg_c = instr.hfma2.rr.negate_c; @@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {              return {false, identity, Immediate(0), identity, Immediate(0)};          }      }(); -    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");      const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);      op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);      op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);      Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); +    value = GetSaturatedHalfFloat(value, saturate);      value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);      SetRegister(bb, instr.gpr0, value); @@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {      return pc;  } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader | 
