diff options
| author | bunnei <bunneidev@gmail.com> | 2019-12-26 21:43:44 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-12-26 21:43:44 -0500 | 
| commit | 8a76f816a4586444903180f12a0408a1ae82a82d (patch) | |
| tree | a92b35863fe491f3e9b8412df7b33c6dc978a07c | |
| parent | 5619d243772a2bb762c7733bd3122e128e370448 (diff) | |
| parent | e09c1fbc1f5868b1bff54a69a58fd6d788c54251 (diff) | |
Merge pull request #3228 from ReinUsesLisp/ptp
 shader/texture: Implement AOFFI and PTP for TLD4 and TLD4S
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 90 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 108 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 1 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 5 | 
5 files changed, 142 insertions, 74 deletions
| diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index dfb12cd2d..412ca5551 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1239,7 +1239,7 @@ union Instruction {          BitField<35, 1, u64> ndv_flag;          BitField<49, 1, u64> nodep_flag;          BitField<50, 1, u64> dc_flag; -        BitField<54, 2, u64> info; +        BitField<54, 2, u64> offset_mode;          BitField<56, 2, u64> component;          bool UsesMiscMode(TextureMiscMode mode) const { @@ -1251,9 +1251,9 @@ union Instruction {              case TextureMiscMode::DC:                  return dc_flag != 0;              case TextureMiscMode::AOFFI: -                return info == 1; +                return offset_mode == 1;              case TextureMiscMode::PTP: -                return info == 2; +                return offset_mode == 2;              default:                  break;              } @@ -1265,7 +1265,7 @@ union Instruction {          BitField<35, 1, u64> ndv_flag;          BitField<49, 1, u64> nodep_flag;          BitField<50, 1, u64> dc_flag; -        BitField<33, 2, u64> info; +        BitField<33, 2, u64> offset_mode;          BitField<37, 2, u64> component;          bool UsesMiscMode(TextureMiscMode mode) const { @@ -1277,9 +1277,9 @@ union Instruction {              case TextureMiscMode::DC:                  return dc_flag != 0;              case TextureMiscMode::AOFFI: -                return info == 1; +                return offset_mode == 1;              case TextureMiscMode::PTP: -                return info == 2; +                return offset_mode == 2;              default:                  break;              } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0389c2143..a311dbcfe 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -48,10 +48,10 @@ class ExprDecompiler;  enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; -struct TextureAoffi {}; +struct TextureOffset {};  struct TextureDerivates {};  using TextureArgument = std::pair<Type, Node>; -using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>; +using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;  constexpr u32 MAX_CONSTBUFFER_ELEMENTS =      static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); @@ -1077,7 +1077,7 @@ private:      }      std::string GenerateTexture(Operation operation, const std::string& function_suffix, -                                const std::vector<TextureIR>& extras, bool sepparate_dc = false) { +                                const std::vector<TextureIR>& extras, bool separate_dc = false) {          constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};          const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); @@ -1090,10 +1090,12 @@ private:          std::string expr = "texture" + function_suffix;          if (!meta->aoffi.empty()) {              expr += "Offset"; +        } else if (!meta->ptp.empty()) { +            expr += "Offsets";          }          expr += '(' + GetSampler(meta->sampler) + ", ";          expr += coord_constructors.at(count + (has_array ? 1 : 0) + -                                      (has_shadow && !sepparate_dc ? 1 : 0) - 1); +                                      (has_shadow && !separate_dc ? 1 : 0) - 1);          expr += '(';          for (std::size_t i = 0; i < count; ++i) {              expr += Visit(operation[i]).AsFloat(); @@ -1106,7 +1108,7 @@ private:              expr += ", float(" + Visit(meta->array).AsInt() + ')';          }          if (has_shadow) { -            if (sepparate_dc) { +            if (separate_dc) {                  expr += "), " + Visit(meta->depth_compare).AsFloat();              } else {                  expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; @@ -1118,8 +1120,12 @@ private:          for (const auto& variant : extras) {              if (const auto argument = std::get_if<TextureArgument>(&variant)) {                  expr += GenerateTextureArgument(*argument); -            } else if (std::holds_alternative<TextureAoffi>(variant)) { -                expr += GenerateTextureAoffi(meta->aoffi); +            } else if (std::holds_alternative<TextureOffset>(variant)) { +                if (!meta->aoffi.empty()) { +                    expr += GenerateTextureAoffi(meta->aoffi); +                } else if (!meta->ptp.empty()) { +                    expr += GenerateTexturePtp(meta->ptp); +                }              } else if (std::holds_alternative<TextureDerivates>(variant)) {                  expr += GenerateTextureDerivates(meta->derivates);              } else { @@ -1160,6 +1166,20 @@ private:          return expr;      } +    std::string ReadTextureOffset(const Node& value) { +        if (const auto immediate = std::get_if<ImmediateNode>(&*value)) { +            // Inline the string as an immediate integer in GLSL (AOFFI arguments are required +            // to be constant by the standard). +            return std::to_string(static_cast<s32>(immediate->GetValue())); +        } else if (device.HasVariableAoffi()) { +            // Avoid using variable AOFFI on unsupported devices. +            return Visit(value).AsInt(); +        } else { +            // Insert 0 on devices not supporting variable AOFFI. +            return "0"; +        } +    } +      std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {          if (aoffi.empty()) {              return {}; @@ -1170,18 +1190,7 @@ private:          expr += '(';          for (std::size_t index = 0; index < aoffi.size(); ++index) { -            const auto operand{aoffi.at(index)}; -            if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { -                // Inline the string as an immediate integer in GLSL (AOFFI arguments are required -                // to be constant by the standard). -                expr += std::to_string(static_cast<s32>(immediate->GetValue())); -            } else if (device.HasVariableAoffi()) { -                // Avoid using variable AOFFI on unsupported devices. -                expr += Visit(operand).AsInt(); -            } else { -                // Insert 0 on devices not supporting variable AOFFI. -                expr += '0'; -            } +            expr += ReadTextureOffset(aoffi.at(index));              if (index + 1 < aoffi.size()) {                  expr += ", ";              } @@ -1191,6 +1200,20 @@ private:          return expr;      } +    std::string GenerateTexturePtp(const std::vector<Node>& ptp) { +        static constexpr std::size_t num_vectors = 4; +        ASSERT(ptp.size() == num_vectors * 2); + +        std::string expr = ", ivec2[]("; +        for (std::size_t vector = 0; vector < num_vectors; ++vector) { +            const bool has_next = vector + 1 < num_vectors; +            expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), +                                ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); +        } +        expr += ')'; +        return expr; +    } +      std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {          if (derivates.empty()) {              return {}; @@ -1689,7 +1712,7 @@ private:          ASSERT(meta);          std::string expr = GenerateTexture( -            operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); +            operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});          if (meta->sampler.IsShadow()) {              expr = "vec4(" + expr + ')';          } @@ -1701,7 +1724,7 @@ private:          ASSERT(meta);          std::string expr = GenerateTexture( -            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); +            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});          if (meta->sampler.IsShadow()) {              expr = "vec4(" + expr + ')';          } @@ -1709,21 +1732,19 @@ private:      }      Expression TextureGather(Operation operation) { -        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); -        ASSERT(meta); +        const auto& meta = std::get<MetaTexture>(operation.GetMeta()); -        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; -        if (meta->sampler.IsShadow()) { -            return {GenerateTexture(operation, "Gather", {TextureAoffi{}}, true) + -                        GetSwizzle(meta->element), -                    Type::Float}; +        const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int; +        const bool separate_dc = meta.sampler.IsShadow(); + +        std::vector<TextureIR> ir; +        if (meta.sampler.IsShadow()) { +            ir = {TextureOffset{}};          } else { -            return {GenerateTexture(operation, "Gather", -                                    {TextureAoffi{}, TextureArgument{type, meta->component}}, -                                    false) + -                        GetSwizzle(meta->element), -                    Type::Float}; +            ir = {TextureOffset{}, TextureArgument{type, meta.component}};          } +        return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element), +                Type::Float};      }      Expression TextureQueryDimensions(Operation operation) { @@ -1794,7 +1815,8 @@ private:          const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());          ASSERT(meta); -        std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}}); +        std::string expr = +            GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});          return {std::move(expr) + GetSwizzle(meta->element), Type::Float};      } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index dff01a541..4b14cdf58 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -89,59 +89,62 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          [[fallthrough]];      }      case OpCode::Id::TLD4: { -        ASSERT(instr.tld4.array == 0);          UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),                               "NDV is not implemented"); -        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), -                             "PTP is not implemented"); -          const auto texture_type = instr.tld4.texture_type.Value();          const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)                                                 : instr.tld4.UsesMiscMode(TextureMiscMode::DC);          const bool is_array = instr.tld4.array != 0;          const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)                                            : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); -        WriteTexInstructionFloat( -            bb, instr, -            GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless)); +        const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) +                                        : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); +        WriteTexInstructionFloat(bb, instr, +                                 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, +                                             is_ptp, is_bindless));          break;      }      case OpCode::Id::TLD4S: { -        const bool uses_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); -        UNIMPLEMENTED_IF_MSG(uses_aoffi, "AOFFI is not implemented"); - -        const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); +        constexpr std::size_t num_coords = 2; +        const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); +        const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);          const Node op_a = GetRegister(instr.gpr8);          const Node op_b = GetRegister(instr.gpr20);          // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.          std::vector<Node> coords; -        Node dc_reg; -        if (depth_compare) { +        std::vector<Node> aoffi; +        Node depth_compare; +        if (is_depth_compare) {              // Note: TLD4S coordinate encoding works just like TEXS's              const Node op_y = GetRegister(instr.gpr8.Value() + 1);              coords.push_back(op_a);              coords.push_back(op_y); -            dc_reg = uses_aoffi ? GetRegister(instr.gpr20.Value() + 1) : op_b; +            if (is_aoffi) { +                aoffi = GetAoffiCoordinates(op_b, num_coords, true); +                depth_compare = GetRegister(instr.gpr20.Value() + 1); +            } else { +                depth_compare = op_b; +            }          } else { +            // There's no depth compare              coords.push_back(op_a); -            if (uses_aoffi) { -                const Node op_y = GetRegister(instr.gpr8.Value() + 1); -                coords.push_back(op_y); +            if (is_aoffi) { +                coords.push_back(GetRegister(instr.gpr8.Value() + 1)); +                aoffi = GetAoffiCoordinates(op_b, num_coords, true);              } else {                  coords.push_back(op_b);              } -            dc_reg = {};          }          const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); -        const SamplerInfo info{TextureType::Texture2D, false, depth_compare}; +        const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare};          const Sampler& sampler = *GetSampler(instr.sampler, info);          Node4 values;          for (u32 element = 0; element < values.size(); ++element) {              auto coords_copy = coords; -            MetaTexture meta{sampler, {}, dc_reg, {}, {}, {}, {}, component, element}; +            MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};              values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));          } @@ -190,7 +193,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          }          for (u32 element = 0; element < values.size(); ++element) { -            MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element}; +            MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};              values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);          } @@ -230,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {                  if (!instr.txq.IsComponentEnabled(element)) {                      continue;                  } -                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element}; +                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};                  const Node value =                      Operation(OperationCode::TextureQueryDimensions, meta,                                GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -299,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {                  continue;              }              auto params = coords; -            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element}; +            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};              const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));              SetTemporary(bb, indexer++, value);          } @@ -367,7 +370,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,      if (it != used_samplers.end()) {          ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&                 it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer); -        return &(*it); +        return &*it;      }      // Otherwise create a new mapping for this sampler @@ -397,7 +400,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,      if (it != used_samplers.end()) {          ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&                 it->IsShadow() == info.is_shadow); -        return &(*it); +        return &*it;      }      // Otherwise create a new mapping for this sampler @@ -538,7 +541,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,      for (u32 element = 0; element < values.size(); ++element) {          auto copy_coords = coords; -        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element}; +        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};          values[element] = Operation(read_method, meta, std::move(copy_coords));      } @@ -635,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,  }  Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, -                            bool is_array, bool is_aoffi, bool is_bindless) { +                            bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { +    ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); +      const std::size_t coord_count = GetCoordCount(texture_type);      // If enabled arrays index is always stored in the gpr8 field @@ -661,12 +666,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de          return values;      } -    std::vector<Node> aoffi; +    std::vector<Node> aoffi, ptp;      if (is_aoffi) {          aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); +    } else if (is_ptp) { +        ptp = GetPtpCoordinates( +            {GetRegister(parameter_register++), GetRegister(parameter_register++)});      } -    Node dc{}; +    Node dc;      if (depth_compare) {          dc = GetRegister(parameter_register++);      } @@ -676,8 +684,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de      for (u32 element = 0; element < values.size(); ++element) {          auto coords_copy = coords; -        MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component, -                         element}; +        MetaTexture meta{ +            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};          values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));      } @@ -710,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {      Node4 values;      for (u32 element = 0; element < values.size(); ++element) {          auto coords_copy = coords; -        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element}; +        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};          values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));      } @@ -760,7 +768,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is      Node4 values;      for (u32 element = 0; element < values.size(); ++element) {          auto coords_copy = coords; -        MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element}; +        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};          values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));      }      return values; @@ -825,4 +833,38 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor      return aoffi;  } +std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { +    static constexpr u32 num_entries = 8; + +    std::vector<Node> ptp; +    ptp.reserve(num_entries); + +    const auto global_size = static_cast<s64>(global_code.size()); +    const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); +    const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); +    if (!low || !high) { +        for (u32 entry = 0; entry < num_entries; ++entry) { +            const u32 reg = entry / 4; +            const u32 offset = entry % 4; +            const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); +            const Node condition = +                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); +            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); +            ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); +        } +        return ptp; +    } + +    const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); +    for (u32 entry = 0; entry < num_entries; ++entry) { +        s32 value = (immediate >> (entry * 8)) & 0b111111; +        if (value >= 32) { +            value -= 64; +        } +        ptp.push_back(Immediate(value)); +    } + +    return ptp; +} +  } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index abd40f582..4d2f4d6a8 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -374,6 +374,7 @@ struct MetaTexture {      Node array;      Node depth_compare;      std::vector<Node> aoffi; +    std::vector<Node> ptp;      std::vector<Node> derivates;      Node bias;      Node lod; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 04ae5f822..baed06ccd 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -350,7 +350,8 @@ private:                        bool is_array);      Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, -                      bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless); +                      bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, +                      bool is_bindless);      Node4 GetTldCode(Tegra::Shader::Instruction instr); @@ -363,6 +364,8 @@ private:      std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); +    std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); +      Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,                           Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,                           Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, | 
