diff options
| author | Zach Hilman <DarkLordZach@users.noreply.github.com> | 2019-06-15 20:30:13 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-06-15 20:30:13 -0400 | 
| commit | c0e7b91145d944b9fcd82605cccac64298d02c4f (patch) | |
| tree | 31aa6304d6afd839a2c8a7ebafadce8da6cc58cd /src/video_core | |
| parent | c140b6ae2ca6bc318f47b74a6946ddb10d282dbe (diff) | |
| parent | fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0 (diff) | |
Merge pull request #2538 from ReinUsesLisp/ssy-pbk
shader: Split SSY and PBK stack
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 7 | 
4 files changed, 78 insertions, 27 deletions
| diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 739477cc9..7dc2e0560 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -143,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) {      return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);  } +constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { +    switch (stack) { +    case MetaStackClass::Ssy: +        return "ssy"; +    case MetaStackClass::Pbk: +        return "pbk"; +    } +    return {}; +} + +std::string FlowStackName(MetaStackClass stack) { +    return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); +} + +std::string FlowStackTopName(MetaStackClass stack) { +    return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); +} +  class GLSLDecompiler final {  public:      explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -173,8 +191,10 @@ public:          // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems          // unlikely that shaders will use 20 nested SSYs and PBKs.          constexpr u32 FLOW_STACK_SIZE = 20; -        code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); -        code.AddLine("uint flow_stack_top = 0u;"); +        for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { +            code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); +            code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); +        }          code.AddLine("while (true) {{");          ++code.scope; @@ -1438,15 +1458,18 @@ private:      }      std::string PushFlowStack(Operation operation) { +        const auto stack = std::get<MetaStackClass>(operation.GetMeta());          const auto target = std::get_if<ImmediateNode>(&*operation[0]);          UNIMPLEMENTED_IF(!target); -        code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); +        code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), +                     target->GetValue());          return {};      }      std::string PopFlowStack(Operation operation) { -        code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); +        const auto stack = std::get<MetaStackClass>(operation.GetMeta()); +        code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));          code.AddLine("break;");          return {};      } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 547883425..33ad9764a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -132,20 +132,16 @@ public:              branch_labels.push_back(label);          } -        // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely -        // that shaders will use 20 nested SSYs and PBKs. -        constexpr u32 FLOW_STACK_SIZE = 20; -        const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));          jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint),                                   spv::StorageClass::Function, Constant(t_uint, first_address))); -        flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type), -                                     spv::StorageClass::Function, ConstantNull(flow_stack_type))); -        flow_stack_top = -            Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0))); +        std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); +        std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();          Name(jmp_to, "jmp_to"); -        Name(flow_stack, "flow_stack"); -        Name(flow_stack_top, "flow_stack_top"); +        Name(ssy_flow_stack, "ssy_flow_stack"); +        Name(ssy_flow_stack_top, "ssy_flow_stack_top"); +        Name(pbk_flow_stack, "pbk_flow_stack"); +        Name(pbk_flow_stack_top, "pbk_flow_stack_top");          Emit(OpBranch(loop_label));          Emit(loop_label); @@ -952,6 +948,7 @@ private:          const auto target = std::get_if<ImmediateNode>(&*operation[0]);          ASSERT(target); +        const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);          const Id current = Emit(OpLoad(t_uint, flow_stack_top));          const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1)));          const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); @@ -962,6 +959,7 @@ private:      }      Id PopFlowStack(Operation operation) { +        const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);          const Id current = Emit(OpLoad(t_uint, flow_stack_top));          const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1)));          const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); @@ -1172,6 +1170,31 @@ private:          Emit(skip_label);      } +    std::tuple<Id, Id> CreateFlowStack() { +        // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely +        // that shaders will use 20 nested SSYs and PBKs. +        constexpr u32 FLOW_STACK_SIZE = 20; +        constexpr auto storage_class = spv::StorageClass::Function; + +        const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); +        const Id stack = Emit(OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, +                                         ConstantNull(flow_stack_type))); +        const Id top = Emit(OpVariable(t_func_uint, storage_class, Constant(t_uint, 0))); +        return std::tie(stack, top); +    } + +    std::pair<Id, Id> GetFlowStack(Operation operation) { +        const auto stack_class = std::get<MetaStackClass>(operation.GetMeta()); +        switch (stack_class) { +        case MetaStackClass::Ssy: +            return {ssy_flow_stack, ssy_flow_stack_top}; +        case MetaStackClass::Pbk: +            return {pbk_flow_stack, pbk_flow_stack_top}; +        } +        UNREACHABLE(); +        return {}; +    } +      static constexpr OperationDecompilersArray operation_decompilers = {          &SPIRVDecompiler::Assign, @@ -1414,8 +1437,10 @@ private:      Id execute_function{};      Id jmp_to{}; -    Id flow_stack_top{}; -    Id flow_stack{}; +    Id ssy_flow_stack_top{}; +    Id pbk_flow_stack_top{}; +    Id ssy_flow_stack{}; +    Id pbk_flow_stack{};      Id continue_label{};      std::map<u32, Id> labels;  }; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 6fc07f213..d46a8ab82 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -109,22 +109,20 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {          UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,                               "Constant buffer flow is not supported"); -        // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the -        // target of the jump that the SYNC instruction will make. The SSY opcode has a similar -        // structure to the BRA opcode. +        // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.          const u32 target = pc + instr.bra.GetBranchTarget(); -        bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); +        bb.push_back( +            Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));          break;      }      case OpCode::Id::PBK: {          UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,                               "Constant buffer PBK is not supported"); -        // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but -        // using SYNC on a PBK address will kill the shader execution. We don't emulate this because -        // it's very unlikely a driver will emit such invalid shader. +        // PBK pushes to a stack the address where BRK will jump to.          const u32 target = pc + instr.bra.GetBranchTarget(); -        bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); +        bb.push_back( +            Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));          break;      }      case OpCode::Id::SYNC: { @@ -133,7 +131,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {                               static_cast<u32>(cc));          // The SYNC opcode jumps to the address previously set by the SSY opcode -        bb.push_back(Operation(OperationCode::PopFlowStack)); +        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));          break;      }      case OpCode::Id::BRK: { @@ -142,7 +140,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {                               static_cast<u32>(cc));          // The BRK opcode jumps to the address previously set by the PBK opcode -        bb.push_back(Operation(OperationCode::PopFlowStack)); +        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));          break;      }      case OpCode::Id::IPA: { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c002f90f9..3cfb911bb 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -174,6 +174,11 @@ enum class InternalFlag {      Amount = 4,  }; +enum class MetaStackClass { +    Ssy, +    Pbk, +}; +  class OperationNode;  class ConditionalNode;  class GprNode; @@ -285,7 +290,7 @@ struct MetaTexture {  };  /// Parameters that modify an operation but are not part of any particular operand -using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; +using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>;  /// Holds any kind of operation that can be done in the IR  class OperationNode final { | 
