diff options
| author | bunnei <bunneidev@gmail.com> | 2020-01-18 00:54:07 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-01-18 00:54:07 -0500 | 
| commit | 15163edaaa6a452e970de6c2577f9d51aa76bea1 (patch) | |
| tree | 80b21abaee7d30f3bfd33a2eacb0a090d3bec8d6 | |
| parent | 3cce5056ff6ba480c460ff97d897118ee644b4e5 (diff) | |
| parent | 63ba41a26d6bfdd30a4d7cd0879003fb4000332b (diff) | |
Merge pull request #3312 from ReinUsesLisp/atoms-u32
shader/memory: Implement ATOMS.ADD.U32
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 2 | 
5 files changed, 74 insertions, 3 deletions
| diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 57b57c647..6f98bd827 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {      Trunc = 11,  }; +enum class AtomicOp : u64 { +    Add = 0, +    Min = 1, +    Max = 2, +    Inc = 3, +    Dec = 4, +    And = 5, +    Or = 6, +    Xor = 7, +    Exch = 8, +}; +  enum class UniformType : u64 {      UnsignedByte = 0,      SignedByte = 1, @@ -236,6 +248,13 @@ enum class StoreType : u64 {      Bits128 = 6,  }; +enum class AtomicType : u64 { +    U32 = 0, +    S32 = 1, +    U64 = 2, +    S64 = 3, +}; +  enum class IMinMaxExchange : u64 {      None = 0,      XLo = 1, @@ -939,6 +958,16 @@ union Instruction {      } stg;      union { +        BitField<52, 4, AtomicOp> operation; +        BitField<28, 2, AtomicType> type; +        BitField<30, 22, s64> offset; + +        s32 GetImmediateOffset() const { +            return static_cast<s32>(offset << 2); +        } +    } atoms; + +    union {          BitField<32, 1, PhysicalAttributeDirection> direction;          BitField<47, 3, AttributeSize> size;          BitField<20, 11, u64> address; @@ -1659,9 +1688,10 @@ public:          ST_A,          ST_L,          ST_S, -        ST,   // Store in generic memory -        STG,  // Store in global memory -        AL2P, // Transforms attribute memory into physical memory +        ST,    // Store in generic memory +        STG,   // Store in global memory +        ATOMS, // Atomic operation on shared memory +        AL2P,  // Transforms attribute memory into physical memory          TEX,          TEX_B,  // Texture Load Bindless          TXQ,    // Texture Query @@ -1964,6 +1994,7 @@ private:              INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),              INST("101-------------", Id::ST, Type::Memory, "ST"),              INST("1110111011011---", Id::STG, Type::Memory, "STG"), +            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),              INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),              INST("110000----111---", Id::TEX, Type::Texture, "TEX"),              INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index f9f7a97b5..19751939a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1856,6 +1856,16 @@ private:                  Type::Uint};      } +    template <const std::string_view& opname, Type type> +    Expression Atomic(Operation operation) { +        ASSERT(stage == ShaderType::Compute); +        auto& smem = std::get<SmemNode>(*operation[0]); + +        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), +                            Visit(operation[1]).As(type)), +                type}; +    } +      Expression Branch(Operation operation) {          const auto target = std::get_if<ImmediateNode>(&*operation[0]);          UNIMPLEMENTED_IF(!target); @@ -2194,6 +2204,8 @@ private:          &GLSLDecompiler::AtomicImage<Func::Xor>,          &GLSLDecompiler::AtomicImage<Func::Exchange>, +        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, +          &GLSLDecompiler::Branch,          &GLSLDecompiler::BranchIndirect,          &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8fe852ce8..0cf97cafa 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1796,6 +1796,11 @@ private:          return {};      } +    Expression UAtomicAdd(Operation) { +        UNIMPLEMENTED(); +        return {}; +    } +      Expression Branch(Operation operation) {          const auto& target = std::get<ImmediateNode>(*operation[0]);          OpStore(jmp_to, Constant(t_uint, target.GetValue())); @@ -2373,6 +2378,8 @@ private:          &SPIRVDecompiler::AtomicImageXor,          &SPIRVDecompiler::AtomicImageExchange, +        &SPIRVDecompiler::UAtomicAdd, +          &SPIRVDecompiler::Branch,          &SPIRVDecompiler::BranchIndirect,          &SPIRVDecompiler::PushFlowStack, diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 8cc84e935..7591a715f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -16,6 +16,8 @@  namespace VideoCommon::Shader { +using Tegra::Shader::AtomicOp; +using Tegra::Shader::AtomicType;  using Tegra::Shader::Attribute;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; @@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {          }          break;      } +    case OpCode::Id::ATOMS: { +        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", +                             static_cast<int>(instr.atoms.operation.Value())); +        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", +                             static_cast<int>(instr.atoms.type.Value())); + +        const s32 offset = instr.atoms.GetImmediateOffset(); +        Node address = GetRegister(instr.gpr8); +        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); + +        Node memory = GetSharedMemory(std::move(address)); +        Node data = GetRegister(instr.gpr20); + +        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); +        SetRegister(bb, instr.gpr0, std::move(value)); +        break; +    }      case OpCode::Id::AL2P: {          // Ignore al2p.direction since we don't care about it. diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4e155542a..075c7d07c 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,6 +162,8 @@ enum class OperationCode {      AtomicImageXor,      /// (MetaImage, int[N] coords) -> void      AtomicImageExchange, /// (MetaImage, int[N] coords) -> void +    UAtomicAdd, /// (smem, uint) -> uint +      Branch,         /// (uint branch_target) -> void      BranchIndirect, /// (uint branch_target) -> void      PushFlowStack,  /// (uint branch_target) -> void | 
