diff options
| author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2019-11-14 10:27:27 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-11-14 10:27:27 -0400 | 
| commit | b6f673313104a2c223c40ae8b76068a86be0082d (patch) | |
| tree | 27472ef3cde941294b60962c8f83c3ef5c95a66a /src/video_core | |
| parent | 7f424d0f609b92dabe8586768505d5821dd4c02c (diff) | |
| parent | 3ab05146985f09994fcb98388b22a31f9fbea5bf (diff) | |
Merge pull request #3081 from ReinUsesLisp/fswzadd-shuffles
shader: Implement FSWZADD and reimplement SHFL
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/decode/warp.cpp | 79 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 12 | 
8 files changed, 127 insertions, 125 deletions
| diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 78d6886fb..9fafed4a2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -616,6 +616,14 @@ union Instruction {      } shfl;      union { +        BitField<44, 1, u64> ftz; +        BitField<39, 2, u64> tab5cb8_2; +        BitField<38, 1, u64> ndv; +        BitField<47, 1, u64> cc; +        BitField<28, 8, u64> swizzle; +    } fswzadd; + +    union {          BitField<8, 8, Register> gpr;          BitField<20, 24, s64> offset;      } gmem; @@ -1592,6 +1600,7 @@ public:          DEPBAR,          VOTE,          SHFL, +        FSWZADD,          BFE_C,          BFE_R,          BFE_IMM, @@ -1890,6 +1899,7 @@ private:              INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),              INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),              INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), +            INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),              INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),              INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),              INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c65b24c69..b30d5be74 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -62,6 +62,7 @@ Device::Device() {      max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);      has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&                            GLAD_GL_NV_shader_thread_shuffle; +    has_shader_ballot = GLAD_GL_ARB_shader_ballot;      has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;      has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");      has_variable_aoffi = TestVariableAoffi(); @@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) {      max_vertex_attributes = 16;      max_varyings = 15;      has_warp_intrinsics = true; +    has_shader_ballot = true;      has_vertex_viewport_layer = true;      has_image_load_formatted = true;      has_variable_aoffi = true; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index bf35bd0b6..6c86fe207 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,6 +34,10 @@ public:          return has_warp_intrinsics;      } +    bool HasShaderBallot() const { +        return has_shader_ballot; +    } +      bool HasVertexViewportLayer() const {          return has_vertex_viewport_layer;      } @@ -68,6 +72,7 @@ private:      u32 max_vertex_attributes{};      u32 max_varyings{};      bool has_warp_intrinsics{}; +    bool has_shader_ballot{};      bool has_vertex_viewport_layer{};      bool has_image_load_formatted{};      bool has_variable_aoffi{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 35e5214a5..04a239a39 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy      std::string source = fmt::format(R"(// {}  #version 430 core  #extension GL_ARB_separate_shader_objects : enable -#extension GL_ARB_shader_viewport_layer_array : enable -#extension GL_EXT_shader_image_load_formatted : enable -#extension GL_NV_gpu_shader5 : enable -#extension GL_NV_shader_thread_group : enable -#extension GL_NV_shader_thread_shuffle : enable  )",                                       GetShaderId(unique_identifier, program_type));      if (is_compute) {          source += "#extension GL_ARB_compute_variable_group_size : require\n";      } +    if (device.HasShaderBallot()) { +        source += "#extension GL_ARB_shader_ballot : require\n"; +    } +    if (device.HasVertexViewportLayer()) { +        source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; +    } +    if (device.HasImageLoadFormatted()) { +        source += "#extension GL_EXT_shader_image_load_formatted : require\n"; +    } +    if (device.HasWarpIntrinsics()) { +        source += "#extension GL_NV_gpu_shader5 : require\n" +                  "#extension GL_NV_shader_thread_group : require\n" +                  "#extension GL_NV_shader_thread_shuffle : require\n"; +    }      source += '\n';      if (!is_compute) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0ce59a852..e56ed51de 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1379,6 +1379,26 @@ private:          return GenerateUnary(operation, "float", Type::Float, type);      } +    Expression FSwizzleAdd(Operation operation) { +        const std::string op_a = VisitOperand(operation, 0).AsFloat(); +        const std::string op_b = VisitOperand(operation, 1).AsFloat(); + +        if (!device.HasShaderBallot()) { +            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); +            return {fmt::format("{} + {}", op_a, op_b), Type::Float}; +        } + +        const std::string instr_mask = VisitOperand(operation, 2).AsUint(); +        const std::string mask = code.GenerateTemporary(); +        code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, +                     instr_mask); + +        const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); +        const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); +        return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), +                Type::Float}; +    } +      Expression ICastFloat(Operation operation) {          return GenerateUnary(operation, "int", Type::Int, Type::Float);      } @@ -1942,34 +1962,24 @@ private:          return Vote(operation, "allThreadsEqualNV");      } -    template <const std::string_view& func> -    Expression Shuffle(Operation operation) { -        const std::string value = VisitOperand(operation, 0).AsFloat(); -        if (!device.HasWarpIntrinsics()) { -            LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); -            // On a "single-thread" device we are either on the same thread or out of bounds. Both -            // cases return the passed value. -            return {value, Type::Float}; +    Expression ThreadId(Operation operation) { +        if (!device.HasShaderBallot()) { +            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); +            return {"0U", Type::Uint};          } - -        const std::string index = VisitOperand(operation, 1).AsUint(); -        const std::string width = VisitOperand(operation, 2).AsUint(); -        return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; +        return {"gl_SubGroupInvocationARB", Type::Uint};      } -    template <const std::string_view& func> -    Expression InRangeShuffle(Operation operation) { -        const std::string index = VisitOperand(operation, 0).AsUint(); -        const std::string width = VisitOperand(operation, 1).AsUint(); -        if (!device.HasWarpIntrinsics()) { -            // On a "single-thread" device we are only in bounds when the requested index is 0. -            return {fmt::format("({} == 0U)", index), Type::Bool}; +    Expression ShuffleIndexed(Operation operation) { +        std::string value = VisitOperand(operation, 0).AsFloat(); + +        if (!device.HasShaderBallot()) { +            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); +            return {std::move(value), Type::Float};          } -        const std::string in_range = code.GenerateTemporary(); -        code.AddLine("bool {};", in_range); -        code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); -        return {in_range, Type::Bool}; +        const std::string index = VisitOperand(operation, 1).AsUint(); +        return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};      }      struct Func final { @@ -1981,11 +1991,6 @@ private:          static constexpr std::string_view Or = "Or";          static constexpr std::string_view Xor = "Xor";          static constexpr std::string_view Exchange = "Exchange"; - -        static constexpr std::string_view ShuffleIndexed = "shuffleNV"; -        static constexpr std::string_view ShuffleUp = "shuffleUpNV"; -        static constexpr std::string_view ShuffleDown = "shuffleDownNV"; -        static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";      };      static constexpr std::array operation_decompilers = { @@ -2016,6 +2021,7 @@ private:          &GLSLDecompiler::FTrunc,          &GLSLDecompiler::FCastInteger<Type::Int>,          &GLSLDecompiler::FCastInteger<Type::Uint>, +        &GLSLDecompiler::FSwizzleAdd,          &GLSLDecompiler::Add<Type::Int>,          &GLSLDecompiler::Mul<Type::Int>, @@ -2151,15 +2157,8 @@ private:          &GLSLDecompiler::VoteAny,          &GLSLDecompiler::VoteEqual, -        &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, -        &GLSLDecompiler::Shuffle<Func::ShuffleUp>, -        &GLSLDecompiler::Shuffle<Func::ShuffleDown>, -        &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, - -        &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, -        &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, -        &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, -        &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, +        &GLSLDecompiler::ThreadId, +        &GLSLDecompiler::ShuffleIndexed,      };      static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -2492,6 +2491,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {      bvec2 is_nan2 = isnan(pair2);      return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);  } + +const float fswzadd_modifiers_a[] = float[4](-1.0f,  1.0f, -1.0f,  0.0f ); +const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f,  1.0f, -1.0f );  )";  } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 42cf068b6..2850d5b59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -783,6 +783,11 @@ private:          return {};      } +    Id FSwizzleAdd(Operation operation) { +        UNIMPLEMENTED(); +        return {}; +    } +      Id HNegate(Operation operation) {          UNIMPLEMENTED();          return {}; @@ -1195,42 +1200,12 @@ private:          return {};      } -    Id ShuffleIndexed(Operation) { -        UNIMPLEMENTED(); -        return {}; -    } - -    Id ShuffleUp(Operation) { -        UNIMPLEMENTED(); -        return {}; -    } - -    Id ShuffleDown(Operation) { -        UNIMPLEMENTED(); -        return {}; -    } - -    Id ShuffleButterfly(Operation) { -        UNIMPLEMENTED(); -        return {}; -    } - -    Id InRangeShuffleIndexed(Operation) { +    Id ThreadId(Operation) {          UNIMPLEMENTED();          return {};      } -    Id InRangeShuffleUp(Operation) { -        UNIMPLEMENTED(); -        return {}; -    } - -    Id InRangeShuffleDown(Operation) { -        UNIMPLEMENTED(); -        return {}; -    } - -    Id InRangeShuffleButterfly(Operation) { +    Id ShuffleIndexed(Operation) {          UNIMPLEMENTED();          return {};      } @@ -1393,6 +1368,7 @@ private:          &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,          &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,          &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, +        &SPIRVDecompiler::FSwizzleAdd,          &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,          &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, @@ -1528,15 +1504,8 @@ private:          &SPIRVDecompiler::VoteAny,          &SPIRVDecompiler::VoteEqual, +        &SPIRVDecompiler::ThreadId,          &SPIRVDecompiler::ShuffleIndexed, -        &SPIRVDecompiler::ShuffleUp, -        &SPIRVDecompiler::ShuffleDown, -        &SPIRVDecompiler::ShuffleButterfly, - -        &SPIRVDecompiler::InRangeShuffleIndexed, -        &SPIRVDecompiler::InRangeShuffleUp, -        &SPIRVDecompiler::InRangeShuffleDown, -        &SPIRVDecompiler::InRangeShuffleButterfly,      };      static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;  using Tegra::Shader::VoteOperation;  namespace { +  OperationCode GetOperationCode(VoteOperation vote_op) {      switch (vote_op) {      case VoteOperation::All: @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {          return OperationCode::VoteAll;      }  } +  } // Anonymous namespace  u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { @@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {          break;      }      case OpCode::Id::SHFL: { -        Node width = [this, instr] { -            Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) -                                               : GetRegister(instr.gpr39); - -            // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has -            // been done reversing Nvidia's math. It won't work on all cases due to SHFL having -            // different parameters that don't properly map to GLSL's interface, but it should work -            // for cases emitted by Nvidia's compiler. -            if (instr.shfl.operation == ShuffleOperation::Up) { -                return Operation( -                    OperationCode::ILogicalShiftRight, -                    Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), -                    Immediate(8)); -            } else { -                return Operation(OperationCode::ILogicalShiftRight, -                                 Operation(OperationCode::IAdd, Immediate(0x201F), -                                           Operation(OperationCode::INegate, std::move(mask))), -                                 Immediate(8)); -            } -        }(); +        Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) +                                           : GetRegister(instr.gpr39); +        Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) +                                             : GetRegister(instr.gpr20); + +        Node thread_id = Operation(OperationCode::ThreadId); +        Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); +        Node seg_mask = BitfieldExtract(mask, 8, 16); -        const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { +        Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); +        Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); +        Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, +                                       Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); + +        Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {              switch (instr.shfl.operation) {              case ShuffleOperation::Idx: -                return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; -            case ShuffleOperation::Up: -                return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; +                return Operation(OperationCode::IBitwiseOr, +                                 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), +                                 min_thread_id);              case ShuffleOperation::Down: -                return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; +                return Operation(OperationCode::IAdd, thread_id, index); +            case ShuffleOperation::Up: +                return Operation(OperationCode::IAdd, thread_id, +                                 Operation(OperationCode::INegate, index));              case ShuffleOperation::Bfly: -                return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; +                return Operation(OperationCode::IBitwiseXor, thread_id, index);              } -            UNREACHABLE_MSG("Invalid SHFL operation: {}", -                            static_cast<u64>(instr.shfl.operation.Value())); -            return {}; +            UNREACHABLE(); +            return Immediate(0U);          }(); -        // Setting the predicate before the register is intentional to avoid overwriting. -        Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) -                                             : GetRegister(instr.gpr20); -        SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); +        Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { +            if (instr.shfl.operation == ShuffleOperation::Up) { +                return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); +            } else { +                return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); +            } +        }(); + +        SetPredicate(bb, instr.shfl.pred48, in_bounds);          SetRegister(              bb, instr.gpr0, -            Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); +            Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); +        break; +    } +    case OpCode::Id::FSWZADD: { +        UNIMPLEMENTED_IF(instr.fswzadd.ndv); + +        Node op_a = GetRegister(instr.gpr8); +        Node op_b = GetRegister(instr.gpr20); +        Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); +        SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));          break;      }      default: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4300d9ff4..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -47,6 +47,7 @@ enum class OperationCode {      FTrunc,        /// (MetaArithmetic, float a) -> float      FCastInteger,  /// (MetaArithmetic, int a) -> float      FCastUInteger, /// (MetaArithmetic, uint a) -> float +    FSwizzleAdd,   /// (float a, float b, uint mask) -> float      IAdd,                  /// (MetaArithmetic, int a, int b) -> int      IMul,                  /// (MetaArithmetic, int a, int b) -> int @@ -181,15 +182,8 @@ enum class OperationCode {      VoteAny,      /// (bool) -> bool      VoteEqual,    /// (bool) -> bool -    ShuffleIndexed,   /// (uint value, uint index, uint width) -> uint -    ShuffleUp,        /// (uint value, uint index, uint width) -> uint -    ShuffleDown,      /// (uint value, uint index, uint width) -> uint -    ShuffleButterfly, /// (uint value, uint index, uint width) -> uint - -    InRangeShuffleIndexed,   /// (uint index, uint width) -> bool -    InRangeShuffleUp,        /// (uint index, uint width) -> bool -    InRangeShuffleDown,      /// (uint index, uint width) -> bool -    InRangeShuffleButterfly, /// (uint index, uint width) -> bool +    ThreadId,       /// () -> uint +    ShuffleIndexed, /// (uint value, uint index) -> uint      Amount,  }; | 
