diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 21 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 68 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 110 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 87 | ||||
| -rw-r--r-- | src/video_core/shader/node_helper.h | 6 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 16 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 106 | 
9 files changed, 397 insertions, 43 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle      return value;  } +std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { +    if (bound_buffer_saved) { +        return bound_buffer; +    } +    if (!engine) { +        return std::nullopt; +    } +    bound_buffer_saved = true; +    bound_buffer = engine->GetBoundBuffer(); +    return bound_buffer; +} +  void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {      keys.insert_or_assign({buffer, offset}, value);  } @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes      bindless_samplers.insert_or_assign({buffer, offset}, sampler);  } +void ConstBufferLocker::SetBoundBuffer(u32 buffer) { +    bound_buffer_saved = true; +    bound_buffer = buffer; +} +  bool ConstBufferLocker::IsConsistent() const {      if (!engine) {          return false; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -10,6 +10,7 @@  #include "common/hash.h"  #include "video_core/engines/const_buffer_engine_interface.h"  #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h"  namespace VideoCommon::Shader { @@ -40,6 +41,8 @@ public:      std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); +    std::optional<u32> ObtainBoundBuffer(); +      /// Inserts a key.      void InsertKey(u32 buffer, u32 offset, u32 value); @@ -49,6 +52,9 @@ public:      /// Inserts a bindless sampler key.      void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); +    /// Set the bound buffer for this locker. +    void SetBoundBuffer(u32 buffer); +      /// Checks keys and samplers against engine's current const buffers. Returns true if they are      /// the same value, false otherwise;      bool IsConsistent() const; @@ -71,12 +77,27 @@ public:          return bindless_samplers;      } +    /// Gets bound buffer used on this shader +    u32 GetBoundBuffer() const { +        return bound_buffer; +    } + +    /// Obtains access to the guest driver's profile. +    VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { +        if (engine) { +            return &engine->AccessGuestDriverProfile(); +        } +        return nullptr; +    } +  private:      const Tegra::Engines::ShaderType stage;      Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;      KeyMap keys;      BoundSamplerMap bound_samplers;      BindlessSamplerMap bindless_samplers; +    bool bound_buffer_saved{}; +    u32 bound_buffer{};  };  } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include <cstring> +#include <limits>  #include <set>  #include <fmt/format.h> @@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {      return (absolute_offset % SchedPeriod) == 0;  } +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, +                              const std::list<Sampler>& used_samplers) { +    if (gpu_driver == nullptr) { +        LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); +        return; +    } +    if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { +        return; +    } +    u32 count{}; +    std::vector<u32> bound_offsets; +    for (const auto& sampler : used_samplers) { +        if (sampler.IsBindless()) { +            continue; +        } +        ++count; +        bound_offsets.emplace_back(sampler.GetOffset()); +    } +    if (count > 1) { +        gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); +    } +} + +std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, +                                        VideoCore::GuestDriverProfile* gpu_driver, +                                        const std::list<Sampler>& used_samplers) { +    if (gpu_driver == nullptr) { +        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); +        return std::nullopt; +    } +    const u32 base_offset = sampler_to_deduce.GetOffset(); +    u32 max_offset{std::numeric_limits<u32>::max()}; +    for (const auto& sampler : used_samplers) { +        if (sampler.IsBindless()) { +            continue; +        } +        if (sampler.GetOffset() > base_offset) { +            max_offset = std::min(sampler.GetOffset(), max_offset); +        } +    } +    if (max_offset == std::numeric_limits<u32>::max()) { +        return std::nullopt; +    } +    return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); +} +  } // Anonymous namespace  class ASTDecoder { @@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {      return pc + 1;  } +void ShaderIR::PostDecode() { +    // Deduce texture handler size if needed +    auto gpu_driver = locker.AccessGuestDriverProfile(); +    DeduceTextureHandlerSize(gpu_driver, used_samplers); +    // Deduce Indexed Samplers +    if (!uses_indexed_samplers) { +        return; +    } +    for (auto& sampler : used_samplers) { +        if (!sampler.IsIndexed()) { +            continue; +        } +        if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { +            sampler.SetSize(*size); +        } else { +            LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); +            sampler.SetSize(1); +        } +    } +} +  } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b567e39d..d980535b1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          Node4 values;          for (u32 element = 0; element < values.size(); ++element) {              auto coords_copy = coords; -            MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; +            MetaTexture meta{sampler, {}, depth_compare, aoffi,   {}, {}, +                             {},      {}, component,     element, {}};              values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));          } @@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          const auto derivate_reg = instr.gpr20.Value();          const auto texture_type = instr.txd.texture_type.Value();          const auto coord_count = GetCoordCount(texture_type); - +        Node index_var{};          const Sampler* sampler = -            is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) +            is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}})                          : GetSampler(instr.sampler, {{texture_type, is_array, false}});          Node4 values;          if (sampler == nullptr) { @@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          }          for (u32 element = 0; element < values.size(); ++element) { -            MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; +            MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates, +                             {},       {},         {}, element, index_var};              values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);          } @@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          // TODO: The new commits on the texture refactor, change the way samplers work.          // Sadly, not all texture instructions specify the type of texture their sampler          // uses. This must be fixed at a later instance. +        Node index_var{};          const Sampler* sampler = -            is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); +            is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler);          if (sampler == nullptr) {              u32 indexer = 0; @@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {                  if (!instr.txq.IsComponentEnabled(element)) {                      continue;                  } -                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; +                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};                  const Node value =                      Operation(OperationCode::TextureQueryDimensions, meta,                                GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          auto texture_type = instr.tmml.texture_type.Value();          const bool is_array = instr.tmml.array != 0; +        Node index_var{};          const Sampler* sampler = -            is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); +            is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);          if (sampler == nullptr) {              u32 indexer = 0; @@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {                  continue;              }              auto params = coords; -            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; +            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};              const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));              SetTemporary(bb, indexer++, value);          } @@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,      // Otherwise create a new mapping for this sampler      const auto next_index = static_cast<u32>(used_samplers.size());      return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, -                                       info.is_buffer); +                                       info.is_buffer, false);  } -const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,                                              std::optional<SamplerInfo> sampler_info) {      const Node sampler_register = GetRegister(reg); -    const auto [base_sampler, buffer, offset] = -        TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); -    ASSERT(base_sampler != nullptr); -    if (base_sampler == nullptr) { +    const auto [base_node, tracked_sampler_info] = +        TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); +    ASSERT(base_node != nullptr); +    if (base_node == nullptr) {          return nullptr;      } -    const auto info = GetSamplerInfo(sampler_info, offset, buffer); +    if (const auto bindless_sampler_info = +            std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { +        const u32 buffer = bindless_sampler_info->GetIndex(); +        const u32 offset = bindless_sampler_info->GetOffset(); +        const auto info = GetSamplerInfo(sampler_info, offset, buffer); + +        // If this sampler has already been used, return the existing mapping. +        const auto it = +            std::find_if(used_samplers.begin(), used_samplers.end(), +                         [buffer = buffer, offset = offset](const Sampler& entry) { +                             return entry.GetBuffer() == buffer && entry.GetOffset() == offset; +                         }); +        if (it != used_samplers.end()) { +            ASSERT(it->IsBindless() && it->GetType() == info.type && +                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); +            return &*it; +        } -    // If this sampler has already been used, return the existing mapping. -    const auto it = -        std::find_if(used_samplers.begin(), used_samplers.end(), -                     [buffer = buffer, offset = offset](const Sampler& entry) { -                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset; -                     }); -    if (it != used_samplers.end()) { -        ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && -               it->IsShadow() == info.is_shadow); -        return &*it; -    } +        // Otherwise create a new mapping for this sampler +        const auto next_index = static_cast<u32>(used_samplers.size()); +        return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, +                                           info.is_shadow, info.is_buffer, false); +    } else if (const auto array_sampler_info = +                   std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { +        const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; +        index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); +        const auto info = GetSamplerInfo(sampler_info, base_offset); + +        // If this sampler has already been used, return the existing mapping. +        const auto it = std::find_if( +            used_samplers.begin(), used_samplers.end(), +            [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); +        if (it != used_samplers.end()) { +            ASSERT(!it->IsBindless() && it->GetType() == info.type && +                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && +                   it->IsBuffer() == info.is_buffer && it->IsIndexed()); +            return &*it; +        } -    // Otherwise create a new mapping for this sampler -    const auto next_index = static_cast<u32>(used_samplers.size()); -    return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, -                                       info.is_shadow, info.is_buffer); +        uses_indexed_samplers = true; +        // Otherwise create a new mapping for this sampler +        const auto next_index = static_cast<u32>(used_samplers.size()); +        return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, +                                           info.is_shadow, info.is_buffer, true); +    } +    return nullptr;  }  void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,                           "This method is not supported.");      const SamplerInfo info{texture_type, is_array, is_shadow, false}; -    const Sampler* sampler = -        is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); +    Node index_var{}; +    const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) +                                         : GetSampler(instr.sampler, info);      Node4 values;      if (sampler == nullptr) {          for (u32 element = 0; element < values.size(); ++element) { @@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,      for (u32 element = 0; element < values.size(); ++element) {          auto copy_coords = coords; -        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; +        MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias, +                         lod,      {},    element,       index_var};          values[element] = Operation(read_method, meta, std::move(copy_coords));      } @@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de      u64 parameter_register = instr.gpr20.Value();      const SamplerInfo info{texture_type, is_array, depth_compare, false}; -    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) +    Node index_var{}; +    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info)                                           : GetSampler(instr.sampler, info);      Node4 values;      if (sampler == nullptr) { @@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de      for (u32 element = 0; element < values.size(); ++element) {          auto coords_copy = coords;          MetaTexture meta{ -            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; +            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, +            index_var};          values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));      } @@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {      Node4 values;      for (u32 element = 0; element < values.size(); ++element) {          auto coords_copy = coords; -        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; +        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};          values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));      } @@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is      Node4 values;      for (u32 element = 0; element < values.size(); ++element) {          auto coords_copy = coords; -        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; +        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};          values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));      }      return values; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 9af1f0228..5f83403db 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -212,6 +212,7 @@ enum class MetaStackClass {  class OperationNode;  class ConditionalNode;  class GprNode; +class CustomVarNode;  class ImmediateNode;  class InternalFlagNode;  class PredicateNode; @@ -223,26 +224,32 @@ class SmemNode;  class GmemNode;  class CommentNode; -using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, +using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,                                InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,                                LmemNode, SmemNode, GmemNode, CommentNode>;  using Node = std::shared_ptr<NodeData>;  using Node4 = std::array<Node, 4>;  using NodeBlock = std::vector<Node>; +class BindlessSamplerNode; +class ArraySamplerNode; + +using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; +using TrackSampler = std::shared_ptr<TrackSamplerData>; +  class Sampler {  public:      /// This constructor is for bound samplers      constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, -                               bool is_array, bool is_shadow, bool is_buffer) +                               bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)          : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, -          is_buffer{is_buffer} {} +          is_buffer{is_buffer}, is_indexed{is_indexed} {}      /// This constructor is for bindless samplers      constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, -                               bool is_array, bool is_shadow, bool is_buffer) +                               bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)          : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, -          is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} +          is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}      constexpr u32 GetIndex() const {          return index; @@ -276,16 +283,72 @@ public:          return is_bindless;      } +    constexpr bool IsIndexed() const { +        return is_indexed; +    } + +    constexpr u32 Size() const { +        return size; +    } + +    constexpr void SetSize(u32 new_size) { +        size = new_size; +    } +  private:      u32 index{};  ///< Emulated index given for the this sampler.      u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.      u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). +    u32 size{};   ///< Size of the sampler if indexed.      Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)      bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.      bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.      bool is_buffer{};   ///< Whether the texture is a texture buffer without sampler.      bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. +    bool is_indexed{};  ///< Whether this sampler is an indexed array of textures. +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class ArraySamplerNode final { +public: +    explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) +        : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} + +    constexpr u32 GetIndex() const { +        return index; +    } + +    constexpr u32 GetBaseOffset() const { +        return base_offset; +    } + +    constexpr u32 GetIndexVar() const { +        return bindless_var; +    } + +private: +    u32 index; +    u32 base_offset; +    u32 bindless_var; +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class BindlessSamplerNode final { +public: +    explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} + +    constexpr u32 GetIndex() const { +        return index; +    } + +    constexpr u32 GetOffset() const { +        return offset; +    } + +private: +    u32 index; +    u32 offset;  };  class Image final { @@ -382,6 +445,7 @@ struct MetaTexture {      Node lod;      Node component{};      u32 element{}; +    Node index{};  };  struct MetaImage { @@ -488,6 +552,19 @@ private:      Tegra::Shader::Register index{};  }; +/// A custom variable +class CustomVarNode final { +public: +    explicit constexpr CustomVarNode(u32 index) : index{index} {} + +    constexpr u32 GetIndex() const { +        return index; +    } + +private: +    u32 index{}; +}; +  /// A 32-bits value that represents an immediate value  class ImmediateNode final {  public: diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) {      return std::make_shared<NodeData>(T(std::forward<Args>(args)...));  } +template <typename T, typename... Args> +TrackSampler MakeTrackSampler(Args&&... args) { +    static_assert(std::is_convertible_v<T, TrackSamplerData>); +    return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); +} +  template <typename... Args>  Node Operation(OperationCode code, Args&&... args) {      if constexpr (sizeof...(args) == 0) { diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet                     ConstBufferLocker& locker)      : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {      Decode(); +    PostDecode();  }  ShaderIR::~ShaderIR() = default; @@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) {      return MakeNode<GprNode>(reg);  } +Node ShaderIR::GetCustomVariable(u32 id) { +    return MakeNode<CustomVarNode>(id); +} +  Node ShaderIR::GetImmediate19(Instruction instr) {      return Immediate(instr.alu.GetImm20_19());  } @@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) {      return id;  } +u32 ShaderIR::NewCustomVariable() { +    return num_custom_variables++; +} +  } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -180,6 +180,10 @@ public:          return amend_code[index];      } +    u32 GetNumCustomVariables() const { +        return num_custom_variables; +    } +  private:      friend class ASTDecoder; @@ -191,6 +195,7 @@ private:      };      void Decode(); +    void PostDecode();      NodeBlock DecodeRange(u32 begin, u32 end);      void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); @@ -235,6 +240,8 @@ private:      /// Generates a node for a passed register.      Node GetRegister(Tegra::Shader::Register reg); +    /// Generates a node for a custom variable +    Node GetCustomVariable(u32 id);      /// Generates a node representing a 19-bit immediate value      Node GetImmediate19(Tegra::Shader::Instruction instr);      /// Generates a node representing a 32-bit immediate value @@ -321,7 +328,7 @@ private:                                std::optional<SamplerInfo> sampler_info = std::nullopt);      /// Accesses a texture sampler for a bindless texture. -    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, +    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,                                        std::optional<SamplerInfo> sampler_info = std::nullopt);      /// Accesses an image. @@ -387,6 +394,9 @@ private:      std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; +    std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, +                                                        s64 cursor); +      std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;      std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, @@ -399,6 +409,8 @@ private:      /// Register new amending code and obtain the reference id.      std::size_t DeclareAmend(Node new_amend); +    u32 NewCustomVariable(); +      const ProgramCode& program_code;      const u32 main_offset;      const CompilerSettings settings; @@ -414,6 +426,7 @@ private:      NodeBlock global_code;      ASTManager program_manager{true, true};      std::vector<Node> amend_code; +    u32 num_custom_variables{};      std::set<u32> used_registers;      std::set<Tegra::Shader::Pred> used_predicates; @@ -431,6 +444,7 @@ private:      bool uses_instance_id{};      bool uses_vertex_id{};      bool uses_warps{}; +    bool uses_indexed_samplers{};      Tegra::Shader::Header header;  }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..ea39bca54 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -8,6 +8,7 @@  #include "common/common_types.h"  #include "video_core/shader/node.h" +#include "video_core/shader/node_helper.h"  #include "video_core/shader/shader_ir.h"  namespace VideoCommon::Shader { @@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,      }      return {};  } + +std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { +    if (operation.GetCode() != OperationCode::UAdd) { +        return std::nullopt; +    } +    Node gpr{}; +    Node offset{}; +    ASSERT(operation.GetOperandsCount() == 2); +    for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { +        Node operand = operation[i]; +        if (std::holds_alternative<ImmediateNode>(*operand)) { +            offset = operation[i]; +        } else if (std::holds_alternative<GprNode>(*operand)) { +            gpr = operation[i]; +        } +    } +    if (offset && gpr) { +        return std::make_pair(gpr, offset); +    } +    return std::nullopt; +} + +bool AmendNodeCv(std::size_t amend_index, Node node) { +    if (const auto operation = std::get_if<OperationNode>(&*node)) { +        operation->SetAmendIndex(amend_index); +        return true; +    } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { +        conditional->SetAmendIndex(amend_index); +        return true; +    } +    return false; +} +  } // Anonymous namespace +std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, +                                                              s64 cursor) { +    if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { +        // Constant buffer found, test if it's an immediate +        const auto offset = cbuf->GetOffset(); +        if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { +            auto track = +                MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); +            return {tracked, track}; +        } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { +            auto bound_buffer = locker.ObtainBoundBuffer(); +            if (!bound_buffer) { +                return {}; +            } +            if (*bound_buffer != cbuf->GetIndex()) { +                return {}; +            } +            auto pair = DecoupleIndirectRead(*operation); +            if (!pair) { +                return {}; +            } +            auto [gpr, base_offset] = *pair; +            const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); +            auto gpu_driver = locker.AccessGuestDriverProfile(); +            if (gpu_driver == nullptr) { +                return {}; +            } +            const u32 bindless_cv = NewCustomVariable(); +            const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, +                                      Immediate(gpu_driver->GetTextureHandlerSize())); + +            const Node cv_node = GetCustomVariable(bindless_cv); +            Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); +            const std::size_t amend_index = DeclareAmend(amend_op); +            AmendNodeCv(amend_index, code[cursor]); +            // TODO Implement Bindless Index custom variable +            auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), +                                                            offset_inm->GetValue(), bindless_cv); +            return {tracked, track}; +        } +        return {}; +    } +    if (const auto gpr = std::get_if<GprNode>(&*tracked)) { +        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { +            return {}; +        } +        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same +        // register that it uses as operand +        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); +        if (!source) { +            return {}; +        } +        return TrackBindlessSampler(source, code, new_cursor); +    } +    if (const auto operation = std::get_if<OperationNode>(&*tracked)) { +        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { +            if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); +                std::get<0>(found)) { +                // Cbuf found in operand. +                return found; +            } +        } +        return {}; +    } +    if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { +        const auto& conditional_code = conditional->GetCode(); +        return TrackBindlessSampler(tracked, conditional_code, +                                    static_cast<s64>(conditional_code.size())); +    } +    return {}; +} +  std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,                                                 s64 cursor) const {      if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {  | 
