diff options
| author | bunnei <bunneidev@gmail.com> | 2018-10-26 00:16:25 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-10-26 00:16:25 -0400 | 
| commit | d278f25bda79a2d2ed560a68f3983881c1a1e501 (patch) | |
| tree | 1a24fe68cd4d72364eb6f0b6b6f5b420597a0f55 | |
| parent | 72e6b31a070bcefc9d03bf75d14bd0808ea403f5 (diff) | |
| parent | ca142f35c0f15e0d7e68e592b916660d9ee7a743 (diff) | |
Merge pull request #1533 from FernandoS27/lmem
Implemented Shader Local Memory
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 31 | ||||
| -rw-r--r-- | src/video_core/engines/shader_header.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 103 | 
3 files changed, 138 insertions, 1 deletions
| diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index af7756266..141b9159b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -208,6 +208,16 @@ enum class UniformType : u64 {      Double = 5,  }; +enum class StoreType : u64 { +    Unsigned8 = 0, +    Signed8 = 1, +    Unsigned16 = 2, +    Signed16 = 3, +    Bytes32 = 4, +    Bytes64 = 5, +    Bytes128 = 6, +}; +  enum class IMinMaxExchange : u64 {      None = 0,      XLo = 1, @@ -748,6 +758,18 @@ union Instruction {      } ld_c;      union { +        BitField<48, 3, StoreType> type; +    } ldst_sl; + +    union { +        BitField<44, 2, u64> unknown; +    } ld_l; + +    union { +        BitField<44, 2, u64> unknown; +    } st_l; + +    union {          BitField<0, 3, u64> pred0;          BitField<3, 3, u64> pred3;          BitField<7, 1, u64> abs_a; @@ -1209,6 +1231,7 @@ union Instruction {      BitField<61, 1, u64> is_b_imm;      BitField<60, 1, u64> is_b_gpr;      BitField<59, 1, u64> is_c_gpr; +    BitField<20, 24, s64> smem_imm;      Attribute attribute;      Sampler sampler; @@ -1232,8 +1255,12 @@ public:          BRA,          PBK,          LD_A, +        LD_L, +        LD_S,          LD_C,          ST_A, +        ST_L, +        ST_S,          LDG, // Load from global memory          STG, // Store in global memory          TEX, @@ -1490,8 +1517,12 @@ private:              INST("111000110100---", Id::BRK, Type::Flow, "BRK"),              INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),              INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), +            INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), +            INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),              INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),              INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), +            INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), +            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),              INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),              INST("1110111011011---", Id::STG, Type::Memory, "STG"),              INST("110000----111---", Id::TEX, Type::Memory, "TEX"), diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index a885ee3cf..a0e015c4b 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -96,6 +96,11 @@ struct Header {              }          } ps;      }; + +    u64 GetLocalMemorySize() { +        return (common1.shader_local_memory_low_size | +                (common2.shader_local_memory_high_size << 24)); +    }  };  static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 81ffb24e4..dec291a7d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -278,7 +278,7 @@ public:                          const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,                          const Tegra::Shader::Header& header)          : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, -          fixed_pipeline_output_attributes_used{} { +          fixed_pipeline_output_attributes_used{}, local_memory_size{0} {          BuildRegisterList();          BuildInputList();      } @@ -436,6 +436,25 @@ public:          shader.AddLine(dest + " = " + src + ';');      } +    std::string GetLocalMemoryAsFloat(const std::string& index) { +        return "lmem[" + index + ']'; +    } + +    std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) { +        const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; +        return func + "(lmem[" + index + "])"; +    } + +    void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { +        shader.AddLine("lmem[" + index + "] = " + value + ';'); +    } + +    void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, +                                 bool is_signed = false) { +        const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; +        shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); +    } +      std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {          switch (cc) {          case Tegra::Shader::ControlCode::NEU: @@ -533,6 +552,7 @@ public:      void GenerateDeclarations(const std::string& suffix) {          GenerateVertex();          GenerateRegisters(suffix); +        GenerateLocalMemory();          GenerateInternalFlags();          GenerateInputAttrs();          GenerateOutputAttrs(); @@ -578,6 +598,10 @@ public:          return entry.GetName();      } +    void SetLocalMemory(u64 lmem) { +        local_memory_size = lmem; +    } +  private:      /// Generates declarations for registers.      void GenerateRegisters(const std::string& suffix) { @@ -588,6 +612,15 @@ private:          declarations.AddNewLine();      } +    /// Generates declarations for local memory. +    void GenerateLocalMemory() { +        if (local_memory_size > 0) { +            declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + +                                 "];"); +            declarations.AddNewLine(); +        } +    } +      /// Generates declarations for internal flags.      void GenerateInternalFlags() {          for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { @@ -895,6 +928,7 @@ private:      const std::string& suffix;      const Tegra::Shader::Header& header;      std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; +    u64 local_memory_size;  };  class GLSLGenerator { @@ -904,6 +938,8 @@ public:          : subroutines(subroutines), program_code(program_code), main_offset(main_offset),            stage(stage), suffix(suffix) {          std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); +        local_memory_size = header.GetLocalMemorySize(); +        regs.SetLocalMemory(local_memory_size);          Generate(suffix);      } @@ -2324,6 +2360,39 @@ private:                  shader.AddLine("}");                  break;              } +            case OpCode::Id::LD_L: { +                // Add an extra scope and declare the index register inside to prevent +                // overwriting it in case it is used as an output of the LD instruction. +                shader.AddLine('{'); +                ++shader.scope; + +                std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + +                                 std::to_string(instr.smem_imm.Value()) + ')'; + +                shader.AddLine("uint index = (" + op + " / 4);"); + +                const std::string op_a = regs.GetLocalMemoryAsFloat("index"); + +                if (instr.ld_l.unknown != 1) { +                    LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}", +                                 static_cast<unsigned>(instr.ld_l.unknown.Value())); +                    UNREACHABLE(); +                } + +                switch (instr.ldst_sl.type.Value()) { +                case Tegra::Shader::StoreType::Bytes32: +                    regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); +                    break; +                default: +                    LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}", +                                 static_cast<unsigned>(instr.ldst_sl.type.Value())); +                    UNREACHABLE(); +                } + +                --shader.scope; +                shader.AddLine('}'); +                break; +            }              case OpCode::Id::ST_A: {                  ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,                             "Indirect attribute loads are not supported"); @@ -2352,6 +2421,37 @@ private:                  break;              } +            case OpCode::Id::ST_L: { +                // Add an extra scope and declare the index register inside to prevent +                // overwriting it in case it is used as an output of the LD instruction. +                shader.AddLine('{'); +                ++shader.scope; + +                std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + +                                 std::to_string(instr.smem_imm.Value()) + ')'; + +                shader.AddLine("uint index = (" + op + " / 4);"); + +                if (instr.st_l.unknown != 0) { +                    LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}", +                                 static_cast<unsigned>(instr.st_l.unknown.Value())); +                    UNREACHABLE(); +                } + +                switch (instr.ldst_sl.type.Value()) { +                case Tegra::Shader::StoreType::Bytes32: +                    regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); +                    break; +                default: +                    LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}", +                                 static_cast<unsigned>(instr.ldst_sl.type.Value())); +                    UNREACHABLE(); +                } + +                --shader.scope; +                shader.AddLine('}'); +                break; +            }              case OpCode::Id::TEX: {                  Tegra::Shader::TextureType texture_type{instr.tex.texture_type};                  std::string coord; @@ -3575,6 +3675,7 @@ private:      const u32 main_offset;      Maxwell3D::Regs::ShaderStage stage;      const std::string& suffix; +    u64 local_memory_size;      ShaderWriter shader;      ShaderWriter declarations; | 
