diff options
| author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2021-02-21 17:50:14 -0300 | 
|---|---|---|
| committer | ameerj <52414509+ameerj@users.noreply.github.com> | 2021-07-22 21:51:22 -0400 | 
| commit | 704c6f353f68745168902c6c66c04bb730bd30e6 (patch) | |
| tree | 71ed9654de41b5828ae2613167537d39499d2f3b | |
| parent | e2bc05b17d91854cbb9c0ce3647141bf7d33143e (diff) | |
shader: Rename, implement FADD.SAT and P2R (imm)
18 files changed, 213 insertions, 127 deletions
| diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index fbd4ec6dc..802527255 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -74,9 +74,10 @@ add_library(shader_recompiler STATIC      frontend/maxwell/translate/impl/integer_short_multiply_add.cpp      frontend/maxwell/translate/impl/load_store_attribute.cpp      frontend/maxwell/translate/impl/load_store_memory.cpp -    frontend/maxwell/translate/impl/not_implemented.cpp +    frontend/maxwell/translate/impl/move_predicate_to_register.cpp      frontend/maxwell/translate/impl/move_register.cpp      frontend/maxwell/translate/impl/move_special_register.cpp +    frontend/maxwell/translate/impl/not_implemented.cpp      frontend/maxwell/translate/translate.cpp      frontend/maxwell/translate/translate.h      ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index de624a151..922e294a7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -110,7 +110,7 @@ void EmitCompositeExtractF64x3(EmitContext& ctx);  void EmitCompositeExtractF64x4(EmitContext& ctx);  void EmitSelect8(EmitContext& ctx);  void EmitSelect16(EmitContext& ctx); -void EmitSelect32(EmitContext& ctx); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value);  void EmitSelect64(EmitContext& ctx);  void EmitBitCastU16F16(EmitContext& ctx);  Id EmitBitCastU32F32(EmitContext& ctx, Id value); @@ -130,9 +130,9 @@ void EmitGetZeroFromOp(EmitContext& ctx);  void EmitGetSignFromOp(EmitContext& ctx);  void EmitGetCarryFromOp(EmitContext& ctx);  void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx); -void EmitFPAbs32(EmitContext& ctx); -void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value);  Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);  Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);  Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); @@ -146,9 +146,9 @@ void EmitFPMin64(EmitContext& ctx);  Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);  Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);  Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -void EmitFPNeg16(EmitContext& ctx); -void EmitFPNeg32(EmitContext& ctx); -void EmitFPNeg64(EmitContext& ctx); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value);  void EmitFPRecip32(EmitContext& ctx);  void EmitFPRecip64(EmitContext& ctx);  void EmitFPRecipSqrt32(EmitContext& ctx); @@ -161,9 +161,9 @@ void EmitFPExp2NotReduced(EmitContext& ctx);  void EmitFPCos(EmitContext& ctx);  void EmitFPCosNotReduced(EmitContext& ctx);  void EmitFPLog2(EmitContext& ctx); -void EmitFPSaturate16(EmitContext& ctx); -void EmitFPSaturate32(EmitContext& ctx); -void EmitFPSaturate64(EmitContext& ctx); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value);  Id EmitFPRoundEven16(EmitContext& ctx, Id value);  Id EmitFPRoundEven32(EmitContext& ctx, Id value);  Id EmitFPRoundEven64(EmitContext& ctx, Id value); @@ -186,21 +186,21 @@ void EmitIAbs32(EmitContext& ctx);  Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);  void EmitShiftRightLogical32(EmitContext& ctx);  void EmitShiftRightArithmetic32(EmitContext& ctx); -void EmitBitwiseAnd32(EmitContext& ctx); -void EmitBitwiseOr32(EmitContext& ctx); -void EmitBitwiseXor32(EmitContext& ctx); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b);  void EmitBitFieldInsert(EmitContext& ctx);  void EmitBitFieldSExtract(EmitContext& ctx);  Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count);  Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitULessThan(EmitContext& ctx); -void EmitIEqual(EmitContext& ctx); -void EmitSLessThanEqual(EmitContext& ctx); -void EmitULessThanEqual(EmitContext& ctx); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);  Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitUGreaterThan(EmitContext& ctx); -void EmitINotEqual(EmitContext& ctx); -void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);  Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);  void EmitLogicalOr(EmitContext& ctx);  void EmitLogicalAnd(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9687de37..47f87054b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -12,37 +12,21 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {      if (flags.no_contraction) {          ctx.Decorate(op, spv::Decoration::NoContraction);      } -    switch (flags.rounding) { -    case IR::FpRounding::DontCare: -        break; -    case IR::FpRounding::RN: -        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); -        break; -    case IR::FpRounding::RM: -        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); -        break; -    case IR::FpRounding::RP: -        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP); -        break; -    case IR::FpRounding::RZ: -        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); -        break; -    }      return op;  }  } // Anonymous namespace -void EmitFPAbs16(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs16(EmitContext& ctx, Id value) { +    return ctx.OpFAbs(ctx.F16[1], value);  } -void EmitFPAbs32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs32(EmitContext& ctx, Id value) { +    return ctx.OpFAbs(ctx.F32[1], value);  } -void EmitFPAbs64(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs64(EmitContext& ctx, Id value) { +    return ctx.OpFAbs(ctx.F64[1], value);  }  Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { @@ -97,16 +81,16 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {      return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));  } -void EmitFPNeg16(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg16(EmitContext& ctx, Id value) { +    return ctx.OpFNegate(ctx.F16[1], value);  } -void EmitFPNeg32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg32(EmitContext& ctx, Id value) { +    return ctx.OpFNegate(ctx.F32[1], value);  } -void EmitFPNeg64(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg64(EmitContext& ctx, Id value) { +    return ctx.OpFNegate(ctx.F64[1], value);  }  void EmitFPRecip32(EmitContext&) { @@ -157,16 +141,22 @@ void EmitFPLog2(EmitContext&) {      throw NotImplementedException("SPIR-V Instruction");  } -void EmitFPSaturate16(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate16(EmitContext& ctx, Id value) { +    const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; +    const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; +    return ctx.OpFClamp(ctx.F32[1], value, zero, one);  } -void EmitFPSaturate32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate32(EmitContext& ctx, Id value) { +    const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; +    const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; +    return ctx.OpFClamp(ctx.F32[1], value, zero, one);  } -void EmitFPSaturate64(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate64(EmitContext& ctx, Id value) { +    const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; +    const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; +    return ctx.OpFClamp(ctx.F64[1], value, zero, one);  }  Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 22117a4ee..4c0b5990d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -7,10 +7,39 @@  namespace Shader::Backend::SPIRV {  Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { -    if (inst->HasAssociatedPseudoOperation()) { -        throw NotImplementedException("Pseudo-operations on IAdd32"); +    Id result{}; +    if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { +        const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; +        const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; +        result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); + +        const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; +        carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); +        carry->Invalidate(); +    } else { +        result = ctx.OpIAdd(ctx.U32[1], a, b);      } -    return ctx.OpIAdd(ctx.U32[1], a, b); +    if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) { +        zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); +        zero->Invalidate(); +    } +    if (IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}) { +        sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); +        sign->Invalidate(); +    } +    if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { +        // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c +        constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; +        const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; +        const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Constant(ctx.U32[1], s32_max), a)}; + +        const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; +        const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; +        const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; +        overflow->SetDefinition(carry_flag); +        overflow->Invalidate(); +    } +    return result;  }  void EmitIAdd64(EmitContext&) { @@ -49,16 +78,16 @@ void EmitShiftRightArithmetic32(EmitContext&) {      throw NotImplementedException("SPIR-V Instruction");  } -void EmitBitwiseAnd32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) { +    return ctx.OpBitwiseAnd(ctx.U32[1], a, b);  } -void EmitBitwiseOr32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b) { +    return ctx.OpBitwiseOr(ctx.U32[1], a, b);  } -void EmitBitwiseXor32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) { +    return ctx.OpBitwiseXor(ctx.U32[1], a, b);  }  void EmitBitFieldInsert(EmitContext&) { @@ -77,36 +106,36 @@ Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {      return ctx.OpSLessThan(ctx.U1, lhs, rhs);  } -void EmitULessThan(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { +    return ctx.OpULessThan(ctx.U1, lhs, rhs);  } -void EmitIEqual(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { +    return ctx.OpIEqual(ctx.U1, lhs, rhs);  } -void EmitSLessThanEqual(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +    return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs);  } -void EmitULessThanEqual(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +    return ctx.OpULessThanEqual(ctx.U1, lhs, rhs);  }  Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {      return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);  } -void EmitUGreaterThan(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +    return ctx.OpUGreaterThan(ctx.U1, lhs, rhs);  } -void EmitINotEqual(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { +    return ctx.OpINotEqual(ctx.U1, lhs, rhs);  } -void EmitSGreaterThanEqual(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +    return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs);  }  Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 8d5062724..eb1926a4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -14,8 +14,8 @@ void EmitSelect16(EmitContext&) {      throw NotImplementedException("SPIR-V Instruction");  } -void EmitSelect32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { +    return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);  }  void EmitSelect64(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8f120a2f6..34c2f67fb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -468,11 +468,11 @@ F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F  F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {      switch (value.Type()) { -    case Type::U16: +    case Type::F16:          return Inst<F16>(Opcode::FPAbs16, value); -    case Type::U32: +    case Type::F32:          return Inst<F32>(Opcode::FPAbs32, value); -    case Type::U64: +    case Type::F64:          return Inst<F64>(Opcode::FPAbs64, value);      default:          ThrowInvalidType(value.Type()); @@ -481,11 +481,11 @@ F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {  F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {      switch (value.Type()) { -    case Type::U16: +    case Type::F16:          return Inst<F16>(Opcode::FPNeg16, value); -    case Type::U32: +    case Type::F32:          return Inst<F32>(Opcode::FPNeg32, value); -    case Type::U64: +    case Type::F64:          return Inst<F64>(Opcode::FPNeg64, value);      default:          ThrowInvalidType(value.Type()); @@ -495,10 +495,10 @@ F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {  F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {      F16F32F64 result{value};      if (abs) { -        result = FPAbs(value); +        result = FPAbs(result);      }      if (neg) { -        result = FPNeg(value); +        result = FPNeg(result);      }      return result;  } diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index c6f2f82bf..4e7f32423 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -19,8 +19,8 @@ enum class Pred : u64 {      PT,  }; -constexpr size_t NUM_USER_PREDS = 6; -constexpr size_t NUM_PREDS = 7; +constexpr size_t NUM_USER_PREDS = 7; +constexpr size_t NUM_PREDS = 8;  [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {      return static_cast<size_t>(pred); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 16cdc12e2..ed5dbf41f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,12 +56,12 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo              .post_order_blocks{},          });      } +    fmt::print(stdout, "{}\n", IR::DumpProgram(program));      Optimization::LowerFp16ToFp32(program);      for (IR::Function& function : functions) {          function.post_order_blocks = PostOrder(function.blocks);          Optimization::SsaRewritePass(function.post_order_blocks);      } -    fmt::print(stdout, "{}\n", IR::DumpProgram(program));      Optimization::GlobalMemoryToStorageBufferPass(program);      for (IR::Function& function : functions) {          Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index cb3a326cf..219ffcc6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -9,7 +9,6 @@  namespace Shader::Maxwell {  namespace { -  void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,            const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {      union { @@ -18,9 +17,6 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin          BitField<8, 8, IR::Reg> src_a;      } const fadd{insn}; -    if (sat) { -        throw NotImplementedException("FADD SAT"); -    }      if (cc) {          throw NotImplementedException("FADD CC");      } @@ -31,7 +27,11 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin          .rounding{CastFpRounding(fp_rounding)},          .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},      }; -    v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); +    IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; +    if (sat) { +        value = v.ir.FPSaturate(value); +    } +    v.F(fadd.dest_reg, value);  }  void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -53,15 +53,15 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {  } // Anonymous namespace  void TranslatorVisitor::FADD_reg(u64 insn) { -    FADD(*this, insn, GetReg20F(insn)); +    FADD(*this, insn, GetRegFloat20(insn));  } -void TranslatorVisitor::FADD_cbuf(u64) { -    throw NotImplementedException("FADD (cbuf)"); +void TranslatorVisitor::FADD_cbuf(u64 insn) { +    FADD(*this, insn, GetFloatCbuf(insn));  } -void TranslatorVisitor::FADD_imm(u64) { -    throw NotImplementedException("FADD (imm)"); +void TranslatorVisitor::FADD_imm(u64 insn) { +    FADD(*this, insn, GetFloatImm20(insn));  }  void TranslatorVisitor::FADD32I(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 4d82a0009..81175627f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -158,7 +158,7 @@ void TranslatorVisitor::F2I_cbuf(u64 insn) {          case SrcFormat::F16:              return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};          case SrcFormat::F32: -            return GetCbufF(insn); +            return GetFloatCbuf(insn);          case SrcFormat::F64: {              return UnpackCbuf(*this, insn);          } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 1464f2807..758700d3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s  } // Anonymous namespace  void TranslatorVisitor::FFMA_reg(u64 insn) { -    FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); +    FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn));  }  void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) {  }  void TranslatorVisitor::FFMA_cr(u64 insn) { -    FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); +    FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn));  }  void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 1b1d38be7..5c38d3fc1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -91,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {  } // Anonymous namespace  void TranslatorVisitor::FMUL_reg(u64 insn) { -    return FMUL(*this, insn, GetReg20F(insn)); +    return FMUL(*this, insn, GetRegFloat20(insn));  }  void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 079e3497f..be17bb0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {      return X(reg.index);  } -IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) {      return ir.BitCast<IR::F32>(GetReg20(insn));  } -IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) {      return ir.BitCast<IR::F32>(GetReg39(insn));  } @@ -73,7 +73,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {      return ir.GetCbuf(binding, byte_offset);  } -IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {      return ir.BitCast<IR::F32>(GetCbuf(insn));  } @@ -88,6 +88,17 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) {      return ir.Imm32(value);  } +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { +    union { +        u64 raw; +        BitField<20, 19, u64> value; +        BitField<56, 1, u64> is_negative; +    } const imm{insn}; +    const f32 positive_value{Common::BitCast<f32>(static_cast<u32>(imm.value) << 12)}; +    const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; +    return ir.Imm32(value); +} +  IR::U32 TranslatorVisitor::GetImm32(u64 insn) {      union {          u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 27aba2cf8..4d4cf2ebf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,13 +304,14 @@ public:      [[nodiscard]] IR::U32 GetReg8(u64 insn);      [[nodiscard]] IR::U32 GetReg20(u64 insn);      [[nodiscard]] IR::U32 GetReg39(u64 insn); -    [[nodiscard]] IR::F32 GetReg20F(u64 insn); -    [[nodiscard]] IR::F32 GetReg39F(u64 insn); +    [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); +    [[nodiscard]] IR::F32 GetRegFloat39(u64 insn);      [[nodiscard]] IR::U32 GetCbuf(u64 insn); -    [[nodiscard]] IR::F32 GetCbufF(u64 insn); +    [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);      [[nodiscard]] IR::U32 GetImm20(u64 insn); +    [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);      [[nodiscard]] IR::U32 GetImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 623e78ff8..1493e1815 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -84,8 +84,8 @@ void TranslatorVisitor::IADD_cbuf(u64 insn) {      IADD(*this, insn, GetCbuf(insn));  } -void TranslatorVisitor::IADD_imm(u64) { -    throw NotImplementedException("IADD (imm)"); +void TranslatorVisitor::IADD_imm(u64 insn) { +    IADD(*this, insn, GetImm20(insn));  }  void TranslatorVisitor::IADD32I(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { +    PR, +    CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { +    throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { +    throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { +    union { +        u64 raw; +        BitField<0, 8, IR::Reg> dest_reg; +        BitField<8, 8, IR::Reg> src; +        BitField<40, 1, Mode> mode; +        BitField<41, 2, u64> byte_selector; +    } const p2r{insn}; + +    const u32 mask{GetImm20(insn).U32()}; +    const bool pr_mode{p2r.mode == Mode::PR}; +    const u32 num_items{pr_mode ? 7U : 4U}; +    const u32 offset{static_cast<u32>(p2r.byte_selector) * 8}; +    IR::U32 insert{ir.Imm32(0)}; +    for (u32 index = 0; index < num_items; ++index) { +        if (((mask >> index) & 1) == 0) { +            continue; +        } +        const IR::U1 cond{[this, index, pr_mode] { +            if (pr_mode) { +                return ir.GetPred(IR::Pred{index}); +            } +            switch (index) { +            case 0: +                return ir.GetZFlag(); +            case 1: +                return ir.GetSFlag(); +            case 2: +                return ir.GetCFlag(); +            case 3: +                return ir.GetOFlag(); +            } +            throw LogicError("Unreachable P2R index"); +        }()}; +        const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; +        insert = ir.BitwiseOr(insert, bit); +    } +    const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; +    X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6b2a1356b..628cf1c14 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -633,18 +633,6 @@ void TranslatorVisitor::OUT_imm(u64) {      ThrowNotImplemented(Opcode::OUT_imm);  } -void TranslatorVisitor::P2R_reg(u64) { -    ThrowNotImplemented(Opcode::P2R_reg); -} - -void TranslatorVisitor::P2R_cbuf(u64) { -    ThrowNotImplemented(Opcode::P2R_cbuf); -} - -void TranslatorVisitor::P2R_imm(u64) { -    ThrowNotImplemented(Opcode::P2R_imm); -} -  void TranslatorVisitor::PBK() {      // PBK is a no-op  } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 49ff911d6..b25af6cd3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -191,12 +191,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {          .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,      };      const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; - +    /*      FILE* file = fopen("D:\\shader.spv", "wb");      fwrite(code.data(), 4, code.size(), file);      fclose(file);      std::system("spirv-dis D:\\shader.spv"); - +    */      shader_info->unique_hash = env.ComputeHash();      shader_info->size_bytes = env.ShaderSize();      return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, | 
