diff options
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 30 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 66 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 2 | 
5 files changed, 86 insertions, 39 deletions
| diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6f98bd827..f443ec0fe 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -227,6 +227,28 @@ enum class AtomicOp : u64 {      Exch = 8,  }; +enum class GlobalAtomicOp : u64 { +    Add = 0, +    Min = 1, +    Max = 2, +    Inc = 3, +    Dec = 4, +    And = 5, +    Or = 6, +    Xor = 7, +    Exch = 8, +    SafeAdd = 10, +}; + +enum class GlobalAtomicType : u64 { +    U32 = 0, +    S32 = 1, +    U64 = 2, +    F32_FTZ_RN = 3, +    F16x2_FTZ_RN = 4, +    S64 = 5, +}; +  enum class UniformType : u64 {      UnsignedByte = 0,      SignedByte = 1, @@ -958,6 +980,12 @@ union Instruction {      } stg;      union { +        BitField<52, 4, GlobalAtomicOp> operation; +        BitField<49, 3, GlobalAtomicType> type; +        BitField<28, 20, s64> offset; +    } atom; + +    union {          BitField<52, 4, AtomicOp> operation;          BitField<28, 2, AtomicType> type;          BitField<30, 22, s64> offset; @@ -1690,6 +1718,7 @@ public:          ST_S,          ST,    // Store in generic memory          STG,   // Store in global memory +        ATOM,  // Atomic operation on global memory          ATOMS, // Atomic operation on shared memory          AL2P,  // Transforms attribute memory into physical memory          TEX, @@ -1994,6 +2023,7 @@ private:              INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),              INST("101-------------", Id::ST, Type::Memory, "ST"),              INST("1110111011011---", Id::STG, Type::Memory, "STG"), +            INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),              INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),              INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),              INST("110000----111---", Id::TEX, Type::Texture, "TEX"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6531dfe9b..a1ac3d7a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1857,10 +1857,7 @@ private:      template <const std::string_view& opname, Type type>      Expression Atomic(Operation operation) { -        ASSERT(stage == ShaderType::Compute); -        auto& smem = std::get<SmemNode>(*operation[0]); - -        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), +        return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),                              Visit(operation[1]).As(type)),                  type};      } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b53078721..1ab22251e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1123,15 +1123,7 @@ private:          }          if (const auto gmem = std::get_if<GmemNode>(&*node)) { -            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); -            const Id real = AsUint(Visit(gmem->GetRealAddress())); -            const Id base = AsUint(Visit(gmem->GetBaseAddress())); - -            Id offset = OpISub(t_uint, real, base); -            offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); -            return {OpLoad(t_float, -                           OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), -                    Type::Float}; +            return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};          }          if (const auto lmem = std::get_if<LmemNode>(&*node)) { @@ -1142,10 +1134,7 @@ private:          }          if (const auto smem = std::get_if<SmemNode>(&*node)) { -            Id address = AsUint(Visit(smem->GetAddress())); -            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); -            const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); -            return {OpLoad(t_uint, pointer), Type::Uint}; +            return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};          }          if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { @@ -1339,20 +1328,10 @@ private:              target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};          } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { -            ASSERT(stage == ShaderType::Compute); -            Id address = AsUint(Visit(smem->GetAddress())); -            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); -            target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; +            target = {GetSharedMemoryPointer(*smem), Type::Uint};          } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { -            const Id real = AsUint(Visit(gmem->GetRealAddress())); -            const Id base = AsUint(Visit(gmem->GetBaseAddress())); -            const Id diff = OpISub(t_uint, real, base); -            const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - -            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); -            target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), -                      Type::Float}; +            target = {GetGlobalMemoryPointer(*gmem), Type::Uint};          } else {              UNIMPLEMENTED(); @@ -1804,11 +1783,16 @@ private:          return {};      } -    Expression UAtomicAdd(Operation operation) { -        const auto& smem = std::get<SmemNode>(*operation[0]); -        Id address = AsUint(Visit(smem.GetAddress())); -        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); -        const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); +    Expression AtomicAdd(Operation operation) { +        Id pointer; +        if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { +            pointer = GetSharedMemoryPointer(*smem); +        } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { +            pointer = GetGlobalMemoryPointer(*gmem); +        } else { +            UNREACHABLE(); +            return {Constant(t_uint, 0), Type::Uint}; +        }          const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));          const Id semantics = Constant(t_uint, 0U); @@ -2243,6 +2227,22 @@ private:          return {};      } +    Id GetGlobalMemoryPointer(const GmemNode& gmem) { +        const Id real = AsUint(Visit(gmem.GetRealAddress())); +        const Id base = AsUint(Visit(gmem.GetBaseAddress())); +        const Id diff = OpISub(t_uint, real, base); +        const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); +        const Id buffer = global_buffers.at(gmem.GetDescriptor()); +        return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); +    } + +    Id GetSharedMemoryPointer(const SmemNode& smem) { +        ASSERT(stage == ShaderType::Compute); +        Id address = AsUint(Visit(smem.GetAddress())); +        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); +        return OpAccessChain(t_smem_uint, shared_memory, address); +    } +      static constexpr std::array operation_decompilers = {          &SPIRVDecompiler::Assign, @@ -2389,7 +2389,7 @@ private:          &SPIRVDecompiler::AtomicImageXor,          &SPIRVDecompiler::AtomicImageExchange, -        &SPIRVDecompiler::UAtomicAdd, +        &SPIRVDecompiler::AtomicAdd,          &SPIRVDecompiler::Branch,          &SPIRVDecompiler::BranchIndirect, @@ -2485,9 +2485,9 @@ private:      Id t_smem_uint{}; -    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); +    const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);      const Id t_gmem_array = -        Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); +        Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");      const Id t_gmem_struct = MemberDecorate(          Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);      const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..3da833e81 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,6 +19,8 @@ namespace VideoCommon::Shader {  using Tegra::Shader::AtomicOp;  using Tegra::Shader::AtomicType;  using Tegra::Shader::Attribute; +using Tegra::Shader::GlobalAtomicOp; +using Tegra::Shader::GlobalAtomicType;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Register; @@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {          }          break;      } +    case OpCode::Id::ATOM: { +        UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", +                             static_cast<int>(instr.atom.operation.Value())); +        UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", +                             static_cast<int>(instr.atom.type.Value())); + +        const auto [real_address, base_address, descriptor] = +            TrackGlobalMemory(bb, instr, true, true); +        if (!real_address || !base_address) { +            // Tracking failed, skip atomic. +            break; +        } + +        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); +        Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); +        SetRegister(bb, instr.gpr0, std::move(value)); +        break; +    }      case OpCode::Id::ATOMS: {          UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",                               static_cast<int>(instr.atoms.operation.Value())); @@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {          Node memory = GetSharedMemory(std::move(address));          Node data = GetRegister(instr.gpr20); -        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); +        Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));          SetRegister(bb, instr.gpr0, std::move(value));          break;      } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..9af1f0228 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,7 @@ enum class OperationCode {      AtomicImageXor,      /// (MetaImage, int[N] coords) -> void      AtomicImageExchange, /// (MetaImage, int[N] coords) -> void -    UAtomicAdd, /// (smem, uint) -> uint +    AtomicAdd, /// (memory, {u}int) -> {u}int      Branch,         /// (uint branch_target) -> void      BranchIndirect, /// (uint branch_target) -> void | 
