diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 6 | 
7 files changed, 85 insertions, 58 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index cedfe30b1..bfc975a04 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -191,10 +191,12 @@ public:          // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems          // unlikely that shaders will use 20 nested SSYs and PBKs. -        constexpr u32 FLOW_STACK_SIZE = 20; -        for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { -            code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); -            code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); +        if (!ir.IsFlowStackDisabled()) { +            constexpr u32 FLOW_STACK_SIZE = 20; +            for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { +                code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); +                code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); +            }          }          code.AddLine("while (true) {{"); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 3af4c6190..c99d95b57 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -1,5 +1,6 @@  #include <list> +#include <map>  #include <unordered_map>  #include <unordered_set>  #include <vector> @@ -104,28 +105,6 @@ struct BlockInfo {      }  }; -struct Stamp { -    Stamp() = default; -    Stamp(u32 address, u32 target) : address{address}, target{target} {} -    u32 address{}; -    u32 target{}; -    bool operator==(const Stamp& sb) const { -        return std::tie(address, target) == std::tie(sb.address, sb.target); -    } -    bool operator<(const Stamp& sb) const { -        return address < sb.address; -    } -    bool operator>(const Stamp& sb) const { -        return address > sb.address; -    } -    bool operator<=(const Stamp& sb) const { -        return address <= sb.address; -    } -    bool operator>=(const Stamp& sb) const { -        return address >= sb.address; -    } -}; -  struct CFGRebuildState {      explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size)          : program_code{program_code}, program_size{program_size} { @@ -144,8 +123,8 @@ struct CFGRebuildState {      std::list<Query> queries{};      std::unordered_map<u32, u32> registered{};      std::unordered_set<u32> labels{}; -    std::set<Stamp> ssy_labels; -    std::set<Stamp> pbk_labels; +    std::map<u32, u32> ssy_labels; +    std::map<u32, u32> pbk_labels;      std::unordered_map<u32, BlockStack> stacks{};      const ProgramCode& program_code;      const std::size_t program_size; @@ -393,7 +372,7 @@ bool TryInspectAddress(CFGRebuildState& state) {      }      case BlockCollision::Inside: {          // This case is the tricky one: -        // We need to Split the block in 2 sepprate blocks +        // We need to Split the block in 2 sepparate blocks          auto it = search_result.second;          block_info = CreateBlockInfo(state, address, it->end);          it->end = address - 1; @@ -428,13 +407,11 @@ bool TryInspectAddress(CFGRebuildState& state) {  }  bool TryQuery(CFGRebuildState& state) { -    auto gather_labels = ([](ControlStack& cc, std::set<Stamp> labels, BlockInfo& block) { -        Stamp start{block.start, 0}; -        Stamp end{block.end, 0}; -        auto gather_start = labels.lower_bound(start); -        auto gather_end = labels.upper_bound(end); +    auto gather_labels = ([](ControlStack& cc, std::map<u32, u32>& labels, BlockInfo& block) { +        auto gather_start = labels.lower_bound(block.start); +        auto gather_end = labels.upper_bound(block.end);          while (gather_start != gather_end) { -            cc.Push(gather_start->target); +            cc.Push(gather_start->second);              gather_start++;          }      }); @@ -444,9 +421,13 @@ bool TryQuery(CFGRebuildState& state) {      Query& q = state.queries.front();      u32 block_index = state.registered[q.address];      BlockInfo& block = state.block_info[block_index]; +    // If the block is visted, check if the stacks match, else gather the ssy/pbk +    // labels into the current stack and look if the branch at the end of the block +    // consumes a label. Schedule new queries accordingly      if (block.visited) {          BlockStack& stack = state.stacks[q.address]; -        bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack); +        bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) && +                        (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack));          state.queries.pop_front();          return all_okay;      } @@ -523,8 +504,10 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre          result_out.blocks.push_back(new_block);      }      if (result_out.decompilable) { +        result_out.labels = std::move(state.labels);          return true;      } +    // If it's not decompilable, merge the unlabelled blocks together      auto back = result_out.blocks.begin();      auto next = std::next(back);      while (next != result_out.blocks.end()) { diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index f5d37a231..4a2cd622c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -3,7 +3,7 @@  #include <cstring>  #include <list>  #include <optional> -#include <vector> +#include <unordered_set>  #include "video_core/engines/shader_bytecode.h"  #include "video_core/shader/shader_ir.h" @@ -48,6 +48,7 @@ struct ShaderCharacteristics {      bool decompilable{};      u32 start;      u32 end; +    std::unordered_set<u32> labels{};  };  bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 1a74b70cb..f9b1960da 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {  void ShaderIR::Decode() {      std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); +    disable_flow_stack = false;      ShaderCharacteristics shader_info{};      bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info);      if (can_proceed) {          coverage_begin = shader_info.start;          coverage_end = shader_info.end;          if (shader_info.decompilable) { +            disable_flow_stack = true; +            auto insert_block = ([this](NodeBlock& nodes, u32 label) { +                if (label == exit_branch) { +                    return; +                } +                basic_blocks.insert({label, nodes}); +            });              std::list<ShaderBlock>& blocks = shader_info.blocks; +            NodeBlock current_block; +            u32 current_label = exit_branch;              for (auto& block : blocks) { -                NodeBlock nodes; +                if (shader_info.labels.count(block.start) != 0) { +                    insert_block(current_block, current_label); +                    current_block.clear(); +                    current_label = block.start; +                }                  if (!block.ignore_branch) { -                    nodes = DecodeRange(block.start, block.end); -                    InsertControlFlow(nodes, block); +                    DecodeRangeInner(current_block, block.start, block.end); +                    InsertControlFlow(current_block, block);                  } else { -                    nodes = DecodeRange(block.start, block.end + 1); +                    DecodeRangeInner(current_block, block.start, block.end + 1);                  } -                basic_blocks.insert({block.start, nodes});              } +            insert_block(current_block, current_label);              return;          } +        LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");          // we can't decompile it, fallback to standard method          for (const auto& block : shader_info.blocks) {              basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});          }          return;      } -    LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); +    LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");      // Now we need to deal with an undecompilable shader. We need to brute force      // a shader that captures every position. @@ -78,10 +93,14 @@ void ShaderIR::Decode() {  NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {      NodeBlock basic_block; +    DecodeRangeInner(basic_block, begin, end); +    return basic_block; +} + +void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {      for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { -        pc = DecodeInstr(basic_block, pc); +        pc = DecodeInstr(bb, pc);      } -    return basic_block;  }  void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index ed3c63781..42e3de02f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -98,9 +98,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {          } else {              const u32 target = pc + 1;              const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); -            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, -                                               true, PRECISE, op_a, Immediate(3)); -            const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); +            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, +                                                 PRECISE, op_a, Immediate(3)); +            const Node operand = +                Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));              branch = Operation(OperationCode::BranchIndirect, convert);          } @@ -119,14 +120,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {              const Node index = GetRegister(instr.gpr8);              const Node op_a =                  GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); -            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, -                                               true, PRECISE, op_a, Immediate(3)); +            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, +                                                 PRECISE, op_a, Immediate(3));              operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));          } else {              const s32 target = pc + instr.brx.GetBranchExtend();              const Node op_a = GetRegister(instr.gpr8); -            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, -                                               true, PRECISE, op_a, Immediate(3)); +            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, +                                                 PRECISE, op_a, Immediate(3));              operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));          }          const Node branch = Operation(OperationCode::BranchIndirect, operand); @@ -143,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {          UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,                               "Constant buffer flow is not supported"); +        if (disable_flow_stack) { +            break; +        } +          // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.          const u32 target = pc + instr.bra.GetBranchTarget();          bb.push_back( @@ -153,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {          UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,                               "Constant buffer PBK is not supported"); +        if (disable_flow_stack) { +            break; +        } +          // PBK pushes to a stack the address where BRK will jump to.          const u32 target = pc + instr.bra.GetBranchTarget();          bb.push_back( @@ -164,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {          UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",                               static_cast<u32>(cc)); +        if (disable_flow_stack) { +            break; +        } +          // The SYNC opcode jumps to the address previously set by the SSY opcode          bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));          break; @@ -172,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {          const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;          UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",                               static_cast<u32>(cc)); +        if (disable_flow_stack) { +            break; +        }          // The BRK opcode jumps to the address previously set by the PBK opcode          bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index e468758a6..7427ed896 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -148,12 +148,12 @@ enum class OperationCode {      ImageStore, /// (MetaImage, float[N] coords) -> void -    Branch,        /// (uint branch_target) -> void -    BranchIndirect,/// (uint branch_target) -> void -    PushFlowStack, /// (uint branch_target) -> void -    PopFlowStack,  /// () -> void -    Exit,          /// () -> void -    Discard,       /// () -> void +    Branch,         /// (uint branch_target) -> void +    BranchIndirect, /// (uint branch_target) -> void +    PushFlowStack,  /// (uint branch_target) -> void +    PopFlowStack,   /// () -> void +    Exit,           /// () -> void +    Discard,        /// () -> void      EmitVertex,   /// () -> void      EndPrimitive, /// () -> void diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index a6729064b..928ac7cb5 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -123,10 +123,15 @@ public:          return header;      } +    bool IsFlowStackDisabled() const { +        return disable_flow_stack; +    } +  private:      void Decode();      NodeBlock DecodeRange(u32 begin, u32 end); +    void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);      void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);      /** @@ -320,6 +325,7 @@ private:      const ProgramCode& program_code;      const u32 main_offset;      const std::size_t program_size; +    bool disable_flow_stack{};      u32 coverage_begin{};      u32 coverage_end{};  | 
