diff options
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 85 | 
1 files changed, 55 insertions, 30 deletions
| diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 3da833e81..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -24,6 +24,7 @@ using Tegra::Shader::GlobalAtomicType;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode;  using Tegra::Shader::Register; +using Tegra::Shader::StoreType;  namespace { @@ -63,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {      }  } +Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { +    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); +    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); +    return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), +                     Immediate(size)); +} + +Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { +    Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); +    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); +    return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), +                     std::move(offset), Immediate(size)); +} + +Node Sign16Extend(Node value) { +    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); +    Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); +    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); +    return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); +} +  } // Anonymous namespace  u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -138,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {          LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));          [[fallthrough]];      case OpCode::Id::LD_S: { -        const auto GetMemory = [&](s32 offset) { +        const auto GetAddress = [&](s32 offset) {              ASSERT(offset % 4 == 0);              const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); -            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), -                                           immediate_offset); -            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) -                                                             : GetLocalMemory(address); +            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); +        }; +        const auto GetMemory = [&](s32 offset) { +            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) +                                                             : GetLocalMemory(GetAddress(offset));          };          switch (instr.ldst_sl.type.Value()) { -        case Tegra::Shader::StoreType::Bits32: -        case Tegra::Shader::StoreType::Bits64: -        case Tegra::Shader::StoreType::Bits128: { -            const u32 count = [&]() { +        case StoreType::Signed16: +            SetRegister(bb, instr.gpr0, +                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); +            break; +        case StoreType::Bits32: +        case StoreType::Bits64: +        case StoreType::Bits128: { +            const u32 count = [&] {                  switch (instr.ldst_sl.type.Value()) { -                case Tegra::Shader::StoreType::Bits32: +                case StoreType::Bits32:                      return 1; -                case Tegra::Shader::StoreType::Bits64: +                case StoreType::Bits64:                      return 2; -                case Tegra::Shader::StoreType::Bits128: +                case StoreType::Bits128:                      return 4;                  default:                      UNREACHABLE(); @@ -214,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {              // To handle unaligned loads get the bytes used to dereference global memory and extract              // those bytes from the loaded u32.              if (IsUnaligned(type)) { -                Node mask = Immediate(GetUnalignedMask(type)); -                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); -                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - -                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), -                                 std::move(offset), Immediate(size)); +                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);              }              SetTemporary(bb, i, gmem); @@ -271,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {              return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);          }; -        const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L -                                    ? &ShaderIR::SetLocalMemory -                                    : &ShaderIR::SetSharedMemory; +        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; +        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; +        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;          switch (instr.ldst_sl.type.Value()) { -        case Tegra::Shader::StoreType::Bits128: +        case StoreType::Bits128:              (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));              (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));              [[fallthrough]]; -        case Tegra::Shader::StoreType::Bits64: +        case StoreType::Bits64:              (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));              [[fallthrough]]; -        case Tegra::Shader::StoreType::Bits32: +        case StoreType::Bits32:              (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));              break; +        case StoreType::Signed16: { +            Node address = GetAddress(0); +            Node memory = (this->*get_memory)(address); +            (this->*set_memory)( +                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); +            break; +        }          default:              UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),                                static_cast<u32>(instr.ldst_sl.type.Value())); @@ -325,12 +354,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {              Node value = GetRegister(instr.gpr0.Value() + i);              if (IsUnaligned(type)) { -                Node mask = Immediate(GetUnalignedMask(type)); -                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); -                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - -                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, -                                  Immediate(size)); +                const u32 mask = GetUnalignedMask(type); +                value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);              }              bb.push_back(Operation(OperationCode::Assign, gmem, value)); | 
