diff options
| author | bunnei <bunneidev@gmail.com> | 2019-02-06 21:56:14 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-02-06 21:56:14 -0500 | 
| commit | f09d1dffd16ab857d6cf75b862aa0b01777e5673 (patch) | |
| tree | 8d99905781788745e7a058cdb844964bbc15eba4 /src/video_core | |
| parent | ca482997fed867ba1fb4db009924249c8d51fafb (diff) | |
| parent | 0d1d755086f95fb20975fe2a4fd6c9c9de43789b (diff) | |
Merge pull request #2083 from ReinUsesLisp/shader-ir-cbuf-tracking
shader/track: Add a more permissive global memory tracking
Diffstat (limited to 'src/video_core')
30 files changed, 141 insertions, 127 deletions
| diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d84caa6db..70e124dc4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -171,7 +171,7 @@ public:              code.AddLine(fmt::format("case 0x{:x}u: {{", address));              ++code.scope; -            VisitBasicBlock(bb); +            VisitBlock(bb);              --code.scope;              code.AddLine('}'); @@ -423,7 +423,7 @@ private:              code.AddNewLine();      } -    void VisitBasicBlock(const BasicBlock& bb) { +    void VisitBlock(const NodeBlock& bb) {          for (const Node node : bb) {              if (const std::string expr = Visit(node); !expr.empty()) {                  code.AddLine(expr); @@ -575,7 +575,7 @@ private:              code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");              ++code.scope; -            VisitBasicBlock(conditional->GetCode()); +            VisitBlock(conditional->GetCode());              --code.scope;              code.AddLine('}'); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 812983a99..740ac3118 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {      return exit_method = ExitMethod::AlwaysReturn;  } -BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { -    BasicBlock basic_block; +NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { +    NodeBlock basic_block;      for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {          pc = DecodeInstr(basic_block, pc);      }      return basic_block;  } -u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {      // Ignore sched instructions when generating code.      if (IsSchedInstruction(pc, main_offset)) {          return pc + 1; @@ -151,39 +151,38 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {      UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,                           "NeverExecute predicate not implemented"); -    static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> -        decoders = { -            {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, -            {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, -            {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, -            {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, -            {OpCode::Type::Shift, &ShaderIR::DecodeShift}, -            {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, -            {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, -            {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, -            {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, -            {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, -            {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, -            {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, -            {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, -            {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, -            {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, -            {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, -            {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, -            {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, -            {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, -            {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, -            {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, -            {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, -            {OpCode::Type::Video, &ShaderIR::DecodeVideo}, -            {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, -        }; +    static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { +        {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, +        {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, +        {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, +        {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, +        {OpCode::Type::Shift, &ShaderIR::DecodeShift}, +        {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, +        {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, +        {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, +        {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, +        {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, +        {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, +        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, +        {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, +        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, +        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, +        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, +        {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, +        {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, +        {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, +        {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, +        {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, +        {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, +        {OpCode::Type::Video, &ShaderIR::DecodeVideo}, +        {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, +    };      std::vector<Node> tmp_block;      if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { -        pc = (this->*decoder->second)(tmp_block, bb, pc); +        pc = (this->*decoder->second)(tmp_block, pc);      } else { -        pc = DecodeOther(tmp_block, bb, pc); +        pc = DecodeOther(tmp_block, pc);      }      // Some instructions (like SSY) don't have a predicate field, they are always unconditionally @@ -192,11 +191,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {      const auto pred_index = static_cast<u32>(instr.pred.pred_index);      if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { -        bb.push_back( -            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); +        const Node conditional = +            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); +        global_code.push_back(conditional); +        bb.push_back(conditional);      } else {          for (auto& node : tmp_block) { -            bb.push_back(std::move(node)); +            global_code.push_back(node); +            bb.push_back(node);          }      } diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 51b8d55d4..3190e2d7c 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::SubOp; -u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 37eef2bf2..baee89107 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7b4f7d284..c2164ba50 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 4fd3db54e..0d139c0d2 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index cc9a76a19..38bb692d6 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::OpCode;  using Tegra::Shader::Pred;  using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); @@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3      return pc;  } -void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, +void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,                                      Node imm_lut, bool sets_cc) {      constexpr u32 lop_iterations = 32;      const Node one = Immediate(1); diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index b26a6e473..3ed5ccc5a 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::Pred;  using Tegra::Shader::PredicateResultMode;  using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); @@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock&      return pc;  } -void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, -                                   Node op_a, Node op_b, PredicateResultMode predicate_mode, -                                   Pred predicate, bool sets_cc) { +void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, +                                   Node op_b, PredicateResultMode predicate_mode, Pred predicate, +                                   bool sets_cc) {      const Node result = [&]() {          switch (logic_op) {          case LogicOperation::And: diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 0734141b0..6a95dc928 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 942d6729d..601d66f1f 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 728a393a1..a992f73f8 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Register; -u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 52f39d3ff..0559cc8de 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 9f9da2278..1bd6755dd 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index dd3aef6f2..9285b8d05 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Pred; -u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index dfd7cb98f..748368555 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -14,7 +14,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 53c44ae5a..e68512692 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Pred; -u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 43a0a9e10..7a07c5ec6 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::HalfType;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 16eb3985f..a3bf17eba 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index daf97174b..aad836d24 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Pred; -u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 3dd26da20..e006f8138 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -36,7 +36,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {      }  } -u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); @@ -160,7 +160,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {          }();          const Node addr_register = GetRegister(instr.gpr8); -        const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size())); +        const Node base_address = +            TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));          const auto cbuf = std::get_if<CbufNode>(base_address);          ASSERT(cbuf != nullptr);          const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); @@ -464,8 +465,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu      return *used_samplers.emplace(entry).first;  } -void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, -                                        const Node4& components) { +void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {      u32 dest_elem = 0;      for (u32 elem = 0; elem < 4; ++elem) {          if (!instr.tex.IsComponentEnabled(elem)) { @@ -480,7 +480,7 @@ void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,      }  } -void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,                                           const Node4& components) {      // TEXS has two destination registers and a swizzle. The first two elements in the swizzle      // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 @@ -504,7 +504,7 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,      }  } -void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, +void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,                                               const Node4& components) {      // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half      // float instruction). diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index c1e5f4efb..f9502e3d0 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -14,7 +14,7 @@ using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Register; -u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 1717f0653..83c61680e 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Pred; -u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 8bd15fb00..d0495995d 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index bdb4424a6..f070e8912 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 6623f8ff9..951e85f44 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index c3432356d..956c01d9b 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::Pred;  using Tegra::Shader::VideoType;  using Tegra::Shader::VmadShr; -u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 9cb864500..c34843307 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader {  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index d7747103e..ac5112d78 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -337,27 +337,27 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {      }  } -void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { +void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {      bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));  } -void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { +void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {      bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));  } -void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { +void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {      bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));  } -void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { +void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {      bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));  } -void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { +void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {      SetRegister(bb, Register::ZeroIndex + 1 + id, value);  } -void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { +void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {      if (!sets_cc) {          return;      } @@ -366,7 +366,7 @@ void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_c      LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");  } -void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { +void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {      if (!sets_cc) {          return;      } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ef0f3a106..1d4fbef53 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -39,7 +39,7 @@ using NodeData =                   PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;  using Node = const NodeData*;  using Node4 = std::array<Node, 4>; -using BasicBlock = std::vector<Node>; +using NodeBlock = std::vector<Node>;  constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; @@ -539,7 +539,7 @@ public:          Decode();      } -    const std::map<u32, BasicBlock>& GetBasicBlocks() const { +    const std::map<u32, NodeBlock>& GetBasicBlocks() const {          return basic_blocks;      } @@ -590,7 +590,7 @@ private:      ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); -    BasicBlock DecodeRange(u32 begin, u32 end); +    NodeBlock DecodeRange(u32 begin, u32 end);      /**       * Decodes a single instruction from Tegra to IR. @@ -598,33 +598,33 @@ private:       * @param pc Program counter. Offset to decode.       * @return Next address to decode.       */ -    u32 DecodeInstr(BasicBlock& bb, u32 pc); - -    u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); -    u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); +    u32 DecodeInstr(NodeBlock& bb, u32 pc); + +    u32 DecodeArithmetic(NodeBlock& bb, u32 pc); +    u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); +    u32 DecodeBfe(NodeBlock& bb, u32 pc); +    u32 DecodeBfi(NodeBlock& bb, u32 pc); +    u32 DecodeShift(NodeBlock& bb, u32 pc); +    u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); +    u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); +    u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); +    u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); +    u32 DecodeFfma(NodeBlock& bb, u32 pc); +    u32 DecodeHfma2(NodeBlock& bb, u32 pc); +    u32 DecodeConversion(NodeBlock& bb, u32 pc); +    u32 DecodeMemory(NodeBlock& bb, u32 pc); +    u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); +    u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); +    u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); +    u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); +    u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); +    u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); +    u32 DecodeFloatSet(NodeBlock& bb, u32 pc); +    u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); +    u32 DecodeHalfSet(NodeBlock& bb, u32 pc); +    u32 DecodeVideo(NodeBlock& bb, u32 pc); +    u32 DecodeXmad(NodeBlock& bb, u32 pc); +    u32 DecodeOther(NodeBlock& bb, u32 pc);      /// Internalizes node's data and returns a managed pointer to a clone of that node      Node StoreNode(NodeData&& node_data); @@ -673,20 +673,20 @@ private:      Node GetTemporal(u32 id);      /// Sets a register. src value must be a number-evaluated node. -    void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); +    void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);      /// Sets a predicate. src value must be a bool-evaluated node -    void SetPredicate(BasicBlock& bb, u64 dest, Node src); +    void SetPredicate(NodeBlock& bb, u64 dest, Node src);      /// Sets an internal flag. src value must be a bool-evaluated node -    void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); +    void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);      /// Sets a local memory address. address and value must be a number-evaluated node -    void SetLocalMemory(BasicBlock& bb, Node address, Node value); +    void SetLocalMemory(NodeBlock& bb, Node address, Node value);      /// Sets a temporal. Internally it uses a post-RZ register -    void SetTemporal(BasicBlock& bb, u32 id, Node value); +    void SetTemporal(NodeBlock& bb, u32 id, Node value);      /// Sets internal flags from a float -    void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); +    void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);      /// Sets internal flags from an integer -    void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); +    void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);      /// Conditionally absolute/negated float. Absolute is applied first      Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); @@ -727,12 +727,12 @@ private:      /// Extracts a sequence of bits from a node      Node BitfieldExtract(Node value, u32 offset, u32 bits); -    void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, +    void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,                                    const Node4& components); -    void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, +    void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,                                     const Node4& components); -    void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, +    void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,                                         const Node4& components);      Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, @@ -761,16 +761,16 @@ private:      Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,                           u64 byte_height); -    void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, +    void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,                               Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,                               Tegra::Shader::PredicateResultMode predicate_mode,                               Tegra::Shader::Pred predicate, bool sets_cc); -    void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, +    void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,                                Node op_c, Node imm_lut, bool sets_cc); -    Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor); +    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); -    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor); +    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);      template <typename... T>      Node Operation(OperationCode code, const T*... operands) { @@ -812,7 +812,8 @@ private:      u32 coverage_end{};      std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; -    std::map<u32, BasicBlock> basic_blocks; +    std::map<u32, NodeBlock> basic_blocks; +    NodeBlock global_code;      std::vector<std::unique_ptr<NodeData>> stored_nodes; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index d6d29ee9f..be4635342 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -11,7 +11,7 @@  namespace VideoCommon::Shader {  namespace { -std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor, +std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,                                     OperationCode operation_code) {      for (; cursor >= 0; --cursor) {          const Node node = code[cursor]; @@ -19,12 +19,19 @@ std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,              if (operation->GetCode() == operation_code)                  return {node, cursor};          } +        if (const auto conditional = std::get_if<ConditionalNode>(node)) { +            const auto& code = conditional->GetCode(); +            const auto [found, internal_cursor] = +                FindOperation(code, static_cast<s64>(code.size() - 1), operation_code); +            if (found) +                return {found, cursor}; +        }      }      return {};  }  } // namespace -Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { +Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {      if (const auto cbuf = std::get_if<CbufNode>(tracked)) {          // Cbuf found, but it has to be immediate          return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; @@ -50,10 +57,14 @@ Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {          }          return nullptr;      } +    if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { +        const auto& code = conditional->GetCode(); +        return TrackCbuf(tracked, code, static_cast<s64>(code.size())); +    }      return nullptr;  } -std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code, +std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,                                               s64 cursor) {      for (; cursor >= 0; --cursor) {          const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); | 
