diff options
8 files changed, 114 insertions, 41 deletions
| diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2ce839059..4aa3682c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -203,7 +203,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) {              for (size_t i = 0; i < num_args; ++i) {                  IR::Block& phi_block{*phi.PhiBlock(i)};                  auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; -                IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i)); +                IR::IREmitter ir{phi_block, it}; +                const IR::Value arg{phi.Arg(i)}; +                if (arg.IsImmediate()) { +                    ir.PhiMove(phi, arg); +                } else { +                    ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); +                }              }              for (size_t i = 0; i < num_args; ++i) {                  IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 808c72105..9201ccd39 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -23,7 +23,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {  }  void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { -    ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)}); +    // Fake one usage to get a real register out of the condition +    inst.DestructiveAddUsage(1); +    const Register ret{ctx.reg_alloc.Define(inst)}; +    const ScalarS32 input{ctx.reg_alloc.Consume(value)}; +    if (ret != input) { +        ctx.Add("MOV.S {},{};", ret, input); +    }  }  void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index d829f05b3..bff0b7c1c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -52,7 +52,9 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec          // The input composite is not aliased with the return value so we have to copy it before          // hand. But the insert object is not aliased with the return value, so we don't have to          // worry about that -        ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object); +        ctx.Add("MOV.{} {},{};" +                "MOV.{} {}.{},{};", +                type, ret, composite, type, ret, swizzle, object);      } else {          // The return value is alised so we can just insert the object, it doesn't matter if it's          // aliased diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index a7def0897..34725b8c6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -181,7 +181,6 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {      ctx.Add("MOV.S {},-1;"              "MOV.S {}(NONRESIDENT),0;",              sparse_ret, sparse_ret); -    sparse_inst->Invalidate();  }  std::string_view FormatStorage(ImageFormat format) { @@ -215,12 +214,20 @@ void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Regis      const Register ret{ctx.reg_alloc.Define(inst)};      ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);  } + +IR::Inst* PrepareSparse(IR::Inst& inst) { +    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    if (sparse_inst) { +        sparse_inst->Invalidate(); +    } +    return sparse_inst; +}  } // Anonymous namespace  void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,                                  const IR::Value& coord, Register bias_lc, const IR::Value& offset) {      const auto info{inst.Flags<IR::TextureInstInfo>()}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};      const std::string_view type{TextureType(info)}; @@ -259,7 +266,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu  void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,                                  const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {      const auto info{inst.Flags<IR::TextureInstInfo>()}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)};      const std::string texture{Texture(ctx, info, index)}; @@ -288,7 +295,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::      }      const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};      const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)};      const std::string texture{Texture(ctx, info, index)}; @@ -393,7 +400,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::      }      const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};      const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)};      const std::string texture{Texture(ctx, info, index)}; @@ -436,7 +443,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,      const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};      const auto info{inst.Flags<IR::TextureInstInfo>()};      const char comp{"xyzw"[info.gather_component]}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)};      const std::string texture{Texture(ctx, info, index)}; @@ -462,7 +469,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde      // Allocate offsets early so they don't overwrite any consumed register      const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};      const auto info{inst.Flags<IR::TextureInstInfo>()}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)};      const std::string texture{Texture(ctx, info, index)}; @@ -500,7 +507,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde  void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,                      const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {      const auto info{inst.Flags<IR::TextureInstInfo>()}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)};      const std::string texture{Texture(ctx, info, index)}; @@ -547,7 +554,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,          dpdx = ScopedRegister{ctx.reg_alloc};          dpdy = ScopedRegister{ctx.reg_alloc};      } -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)};      const std::string texture{Texture(ctx, info, index)}; @@ -581,7 +588,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,  void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {      const auto info{inst.Flags<IR::TextureInstInfo>()}; -    const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; +    const auto sparse_inst{PrepareSparse(inst)};      const std::string_view format{FormatStorage(info.image_format)};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};      const std::string_view type{TextureType(info)}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e5aac14c8..e9d1bae05 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -9,6 +9,17 @@  namespace Shader::Backend::GLASM {  void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { +    const std::array flags{ +        inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), +        inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), +        inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), +        inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), +    }; +    for (IR::Inst* const flag_inst : flags) { +        if (flag_inst) { +            flag_inst->Invalidate(); +        } +    }      const bool cc{inst.HasAssociatedPseudoOperation()};      const std::string_view cc_mod{cc ? ".CC" : ""};      if (cc) { @@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {      if (!cc) {          return;      } -    static constexpr std::array<std::string_view, 4> masks{"EQ", "SF", "CF", "OF"}; -    const std::array flags{ -        inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), -        inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), -        inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), -        inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), -    }; -    for (size_t i = 0; i < flags.size(); ++i) { -        if (flags[i]) { -            const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; -            ctx.Add("MOV.S {},0;" -                    "MOV.S {}({}.x),-1;", -                    flag_ret, flag_ret, masks[i]); -            flags[i]->Invalidate(); +    static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; +    for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { +        if (!flags[flag_index]) { +            continue; +        } +        const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; +        if (flag_index == 0) { +            ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); +        } else { +            // We could use conditional execution here, but it's broken on Nvidia's compiler +            ctx.Add("IF {}.x;" +                    "MOV.S {}.x,-1;" +                    "ELSE;" +                    "MOV.S {}.x,0;" +                    "ENDIF;", +                    masks[flag_index], flag_ret, flag_ret);          }      }  } @@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal  void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,                            ScalarU32 count) { +    const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); +    const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); +    if (zero) { +        zero->Invalidate(); +    } +    if (sign) { +        sign->Invalidate(); +    } +    if (zero || sign) { +        ctx.reg_alloc.InvalidateConditionCodes(); +    }      const Register ret{ctx.reg_alloc.Define(inst)};      if (count.type != Type::Register && offset.type != Type::Register) {          ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); @@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal                  "BFE.U {},RC,{};",                  count, offset, ret, base);      } -    if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { +    if (zero) {          ctx.Add("SEQ.S {},{},0;", *zero, ret); -        zero->Invalidate();      } -    if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { +    if (sign) {          ctx.Add("SLT.S {},{},0;", *sign, ret); -        sign->Invalidate();      }  } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index af0e13d43..6e30790bb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -51,6 +51,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {  static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,                      const IR::Value& clamp, const IR::Value& segmentation_mask,                      std::string_view op) { +    IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; +    if (in_bounds) { +        in_bounds->Invalidate(); +    }      std::string mask;      if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {          mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); @@ -61,13 +65,11 @@ static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32                  ScalarU32{ctx.reg_alloc.Consume(clamp)});      }      const Register value_ret{ctx.reg_alloc.Define(inst)}; -    IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};      if (in_bounds) {          const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};          ctx.Add("SHF{}.U {},{},{},{};"                  "MOV.U {}.x,{}.y;",                  op, bounds_ret, value, index, mask, value_ret, bounds_ret); -        in_bounds->Invalidate();      } else {          ctx.Add("SHF{}.U {},{},{},{};"                  "MOV.U {}.x,{}.y;", diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 707b22247..c55a833c6 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -22,11 +22,19 @@ Register RegAlloc::LongDefine(IR::Inst& inst) {  }  Value RegAlloc::Peek(const IR::Value& value) { -    return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive()); +    if (value.IsImmediate()) { +        return MakeImm(value); +    } else { +        return PeekInst(*value.Inst()); +    }  }  Value RegAlloc::Consume(const IR::Value& value) { -    return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); +    if (value.IsImmediate()) { +        return MakeImm(value); +    } else { +        return ConsumeInst(*value.Inst()); +    }  }  void RegAlloc::Unref(IR::Inst& inst) { @@ -88,7 +96,14 @@ Value RegAlloc::MakeImm(const IR::Value& value) {  }  Register RegAlloc::Define(IR::Inst& inst, bool is_long) { -    inst.SetDefinition<Id>(Alloc(is_long)); +    if (inst.HasUses()) { +        inst.SetDefinition<Id>(Alloc(is_long)); +    } else { +        Id id{}; +        id.is_long.Assign(is_long ? 1 : 0); +        id.is_null.Assign(1); +        inst.SetDefinition<Id>(id); +    }      return Register{PeekInst(inst)};  } @@ -115,10 +130,12 @@ Id RegAlloc::Alloc(bool is_long) {              num_regs = std::max(num_regs, reg + 1);              use[reg] = true;              Id ret{}; -            ret.index.Assign(static_cast<u32>(reg)); +            ret.is_valid.Assign(1);              ret.is_long.Assign(is_long ? 1 : 0);              ret.is_spill.Assign(0);              ret.is_condition_code.Assign(0); +            ret.is_null.Assign(0); +            ret.index.Assign(static_cast<u32>(reg));              return ret;          }      } @@ -126,6 +143,9 @@ Id RegAlloc::Alloc(bool is_long) {  }  void RegAlloc::Free(Id id) { +    if (id.is_valid == 0) { +        throw LogicError("Freeing invalid register"); +    }      if (id.is_spill != 0) {          throw NotImplementedException("Free spill");      } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 41b7c92be..b97c84146 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -35,10 +35,12 @@ enum class Type : u32 {  struct Id {      union {          u32 raw; -        BitField<0, 29, u32> index; -        BitField<29, 1, u32> is_long; -        BitField<30, 1, u32> is_spill; -        BitField<31, 1, u32> is_condition_code; +        BitField<0, 1, u32> is_valid; +        BitField<1, 1, u32> is_long; +        BitField<2, 1, u32> is_spill; +        BitField<3, 1, u32> is_condition_code; +        BitField<4, 1, u32> is_null; +        BitField<5, 27, u32> index;      };      bool operator==(Id rhs) const noexcept { @@ -164,12 +166,18 @@ auto FormatTo(FormatContext& ctx, Id id) {          throw NotImplementedException("Spill emission");      }      if constexpr (scalar) { +        if (id.is_null != 0) { +            return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); +        }          if (id.is_long != 0) {              return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());          } else {              return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());          }      } else { +        if (id.is_null != 0) { +            return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); +        }          if (id.is_long != 0) {              return fmt::format_to(ctx.out(), "D{}", id.index.Value());          } else { | 
