diff options
22 files changed, 265 insertions, 70 deletions
| diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 73b67f0af..e4802bf9e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -339,6 +339,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {              const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};              return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));          } +    case IR::Attribute::BaseInstance: +        return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); +    case IR::Attribute::BaseVertex: +        return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));      case IR::Attribute::FrontFace:          return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),                              ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), @@ -380,6 +384,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {              const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};              return ctx.OpISub(ctx.U32[1], index, base);          } +    case IR::Attribute::BaseInstance: +        return ctx.OpLoad(ctx.U32[1], ctx.base_instance); +    case IR::Attribute::BaseVertex: +        return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);      default:          throw NotImplementedException("Read U32 attribute {}", attr);      } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 41dc6d031..563a5fc49 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1379,18 +1379,28 @@ void EmitContext::DefineInputs(const IR::Program& program) {      if (loads[IR::Attribute::InstanceId]) {          if (profile.support_vertex_instance_id) {              instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); +            if (loads[IR::Attribute::BaseInstance]) { +                base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); +            }          } else {              instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);              base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);          } +    } else if (loads[IR::Attribute::BaseInstance]) { +        base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);      }      if (loads[IR::Attribute::VertexId]) {          if (profile.support_vertex_instance_id) {              vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); +            if (loads[IR::Attribute::BaseVertex]) { +                base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); +            }          } else {              vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);              base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);          } +    } else if (loads[IR::Attribute::BaseVertex]) { +        base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);      }      if (loads[IR::Attribute::FrontFace]) {          front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 402f2664f..b9b4455f6 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -34,6 +34,11 @@ public:      [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; +    [[nodiscard]] virtual bool HasHLEMacroState() const = 0; + +    [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer( +        u32 bank, u32 offset) = 0; +      virtual void Dump(u64 hash) = 0;      [[nodiscard]] const ProgramHeader& SPH() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7d3d882e4..73e189a89 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -446,6 +446,10 @@ std::string NameOf(Attribute attribute) {          return "ViewportMask";      case Attribute::FrontFace:          return "FrontFace"; +    case Attribute::BaseInstance: +        return "BaseInstance"; +    case Attribute::BaseVertex: +        return "BaseVertex";      }      return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));  } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 6ee3947b1..364d8a912 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -219,6 +219,10 @@ enum class Attribute : u64 {      FixedFncTexture9Q = 231,      ViewportMask = 232,      FrontFace = 255, + +    // Implementation attributes +    BaseInstance = 256, +    BaseVertex = 257,  };  constexpr size_t NUM_GENERICS = 32; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0cdac0eff..eb2e49a68 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {      return Inst<F32>(Opcode::GetAttribute, attribute, vertex);  } +U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { +    return GetAttributeU32(attribute, Imm32(0)); +} + +U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) { +    return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex); +} +  void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {      Inst(Opcode::SetAttribute, attribute, value, vertex);  } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2df992feb..7aaaa4ab0 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -74,6 +74,8 @@ public:      [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);      [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); +    [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); +    [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);      void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);      [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 3adbd2b16..ac159d24b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -219,7 +219,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo      }      Optimization::SsaRewritePass(program); -    Optimization::ConstantPropagationPass(program); +    Optimization::ConstantPropagationPass(env, program);      Optimization::PositionPass(env, program); diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 826f9a54a..ac10405f3 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -7,6 +7,7 @@  #include <type_traits>  #include "common/bit_cast.h" +#include "shader_recompiler/environment.h"  #include "shader_recompiler/exception.h"  #include "shader_recompiler/frontend/ir/ir_emitter.h"  #include "shader_recompiler/frontend/ir/value.h" @@ -515,6 +516,8 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {              case IR::Attribute::PrimitiveId:              case IR::Attribute::InstanceId:              case IR::Attribute::VertexId: +            case IR::Attribute::BaseVertex: +            case IR::Attribute::BaseInstance:                  break;              default:                  return; @@ -644,7 +647,37 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {      }  } -void ConstantPropagation(IR::Block& block, IR::Inst& inst) { +void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { +    const IR::Value bank{inst.Arg(0)}; +    const IR::Value offset{inst.Arg(1)}; +    if (!bank.IsImmediate() || !offset.IsImmediate()) { +        return; +    } +    const auto bank_value = bank.U32(); +    const auto offset_value = offset.U32(); +    auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value); +    if (!replacement) { +        return; +    } +    const auto new_attribute = [replacement]() { +        switch (*replacement) { +        case ReplaceConstant::BaseInstance: +            return IR::Attribute::BaseInstance; +        case ReplaceConstant::BaseVertex: +            return IR::Attribute::BaseVertex; +        default: +            throw NotImplementedException("Not implemented replacement variable {}", *replacement); +        } +    }(); +    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; +    if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { +        inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute)); +    } else { +        inst.ReplaceUsesWith(ir.GetAttribute(new_attribute)); +    } +} + +void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {      switch (inst.GetOpcode()) {      case IR::Opcode::GetRegister:          return FoldGetRegister(inst); @@ -789,18 +822,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {                                      IR::Opcode::CompositeInsertF16x4);      case IR::Opcode::FSwizzleAdd:          return FoldFSwizzleAdd(block, inst); +    case IR::Opcode::GetCbufF32: +    case IR::Opcode::GetCbufU32: +        if (env.HasHLEMacroState()) { +            return FoldConstBuffer(env, block, inst); +        } +        break;      default:          break;      }  }  } // Anonymous namespace -void ConstantPropagationPass(IR::Program& program) { +void ConstantPropagationPass(Environment& env, IR::Program& program) {      const auto end{program.post_order_blocks.rend()};      for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {          IR::Block* const block{*it};          for (IR::Inst& inst : block->Instructions()) { -            ConstantPropagation(*block, inst); +            ConstantPropagation(env, *block, inst);          }      }  } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 11bfe801a..1f8f2ba95 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -13,7 +13,7 @@ struct HostTranslateInfo;  namespace Shader::Optimization {  void CollectShaderInfoPass(Environment& env, IR::Program& program); -void ConstantPropagationPass(IR::Program& program); +void ConstantPropagationPass(Environment& env, IR::Program& program);  void DeadCodeEliminationPass(IR::Program& program);  void GlobalMemoryToStorageBufferPass(IR::Program& program);  void IdentityRemovalPass(IR::Program& program); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d9c6e92db..ea0f48344 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -16,6 +16,11 @@  namespace Shader { +enum class ReplaceConstant : u32 { +    BaseInstance, +    BaseVertex, +}; +  enum class TextureType : u32 {      Color1D,      ColorArray1D, diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h index 7b28a285f..18a9aaf50 100644 --- a/src/shader_recompiler/varying_state.h +++ b/src/shader_recompiler/varying_state.h @@ -11,7 +11,7 @@  namespace Shader {  struct VaryingState { -    std::bitset<256> mask{}; +    std::bitset<512> mask{};      void Set(IR::Attribute attribute, bool state = true) {          mask[static_cast<size_t>(attribute)] = state; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a0dd7400d..50d8a94b1 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -182,8 +182,14 @@ u32 Maxwell3D::GetMaxCurrentVertices() {  size_t Maxwell3D::EstimateIndexBufferSize() {      GPUVAddr start_address = regs.index_buffer.StartAddress();      GPUVAddr end_address = regs.index_buffer.EndAddress(); -    return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address), -                            static_cast<size_t>(end_address - start_address)); +    constexpr std::array<size_t, 4> max_sizes = { +        std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), +        std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; +    const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); +    return std::min<size_t>( +        memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / +            byte_size, +        static_cast<size_t>(end_address - start_address));  }  u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { @@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {      return regs.reg_array[method];  } +void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { +    const u64 key = (static_cast<u64>(bank) << 32) | offset; +    replace_table.emplace(key, name); +} +  } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cfe1e4883..397e88f67 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3020,6 +3020,23 @@ public:      /// Store temporary hw register values, used by some calls to restore state after a operation      Regs shadow_state; +    // None Engine +    enum class EngineHint : u32 { +        None = 0x0, +        OnHLEMacro = 0x1, +    }; + +    EngineHint engine_state{EngineHint::None}; + +    enum class HLEReplaceName : u32 { +        BaseVertex = 0x0, +        BaseInstance = 0x1, +    }; + +    void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); + +    std::unordered_map<u64, HLEReplaceName> replace_table; +      static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");      static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 93b6d42a4..638247e55 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -14,26 +14,29 @@  #include "video_core/rasterizer_interface.h"  namespace Tegra { + +using Maxwell = Engines::Maxwell3D; +  namespace { -bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { +bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) {      switch (topology) { -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: +    case Maxwell::Regs::PrimitiveTopology::Points: +    case Maxwell::Regs::PrimitiveTopology::Lines: +    case Maxwell::Regs::PrimitiveTopology::LineLoop: +    case Maxwell::Regs::PrimitiveTopology::LineStrip: +    case Maxwell::Regs::PrimitiveTopology::Triangles: +    case Maxwell::Regs::PrimitiveTopology::TriangleStrip: +    case Maxwell::Regs::PrimitiveTopology::TriangleFan: +    case Maxwell::Regs::PrimitiveTopology::LinesAdjacency: +    case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency: +    case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency: +    case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency: +    case Maxwell::Regs::PrimitiveTopology::Patches:          return true; -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: -    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: +    case Maxwell::Regs::PrimitiveTopology::Quads: +    case Maxwell::Regs::PrimitiveTopology::QuadStrip: +    case Maxwell::Regs::PrimitiveTopology::Polygon:      default:          return false;      } @@ -82,8 +85,7 @@ public:          : HLEMacroImpl(maxwell3d_), extended(extended_) {}      void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { -        auto topology = -            static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); +        auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);          if (!IsTopologySafe(topology)) {              Fallback(parameters);              return; @@ -99,18 +101,16 @@ public:          params.stride = 0;          if (extended) { -            maxwell3d.CallMethod(0x8e3, 0x640, true); -            maxwell3d.CallMethod(0x8e4, parameters[4], true); +            maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; +            maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);          }          maxwell3d.draw_manager->DrawArrayIndirect(topology);          if (extended) { -            maxwell3d.CallMethod(0x8e3, 0x640, true); -            maxwell3d.CallMethod(0x8e4, 0, true); +            maxwell3d.engine_state = Maxwell::EngineHint::None; +            maxwell3d.replace_table.clear();          } -        maxwell3d.regs.vertex_buffer.first = 0; -        maxwell3d.regs.vertex_buffer.count = 0;      }  private: @@ -134,13 +134,18 @@ private:          const u32 base_instance = parameters[4];          if (extended) { -            maxwell3d.CallMethod(0x8e3, 0x640, true); -            maxwell3d.CallMethod(0x8e4, base_instance, true); +            maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; +            maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);          }          maxwell3d.draw_manager->DrawArray(              static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),              vertex_first, vertex_count, base_instance, instance_count); + +        if (extended) { +            maxwell3d.engine_state = Maxwell::EngineHint::None; +            maxwell3d.replace_table.clear(); +        }      }      bool extended; @@ -151,8 +156,7 @@ public:      explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}      void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { -        auto topology = -            static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); +        auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);          if (!IsTopologySafe(topology)) {              Fallback(parameters);              return; @@ -164,16 +168,12 @@ public:              minimum_limit = std::max(parameters[3], minimum_limit);          }          const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); -        const u32 base_size = std::max(minimum_limit, estimate); -        const u32 element_base = parameters[4]; -        const u32 base_instance = parameters[5]; -        maxwell3d.regs.index_buffer.first = 0; -        maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping +        const u32 base_size = std::max<u32>(minimum_limit, estimate);          maxwell3d.regs.draw.topology.Assign(topology);          maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -        maxwell3d.CallMethod(0x8e3, 0x640, true); -        maxwell3d.CallMethod(0x8e4, element_base, true); -        maxwell3d.CallMethod(0x8e5, base_instance, true); +        maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; +        maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); +        maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);          auto& params = maxwell3d.draw_manager->GetIndirectParams();          params.is_indexed = true;          params.include_count = false; @@ -184,9 +184,8 @@ public:          params.stride = 0;          maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;          maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); -        maxwell3d.CallMethod(0x8e3, 0x640, true); -        maxwell3d.CallMethod(0x8e4, 0x0, true); -        maxwell3d.CallMethod(0x8e5, 0x0, true); +        maxwell3d.engine_state = Maxwell::EngineHint::None; +        maxwell3d.replace_table.clear();      }  private: @@ -197,18 +196,17 @@ private:          const u32 base_instance = parameters[5];          maxwell3d.regs.vertex_id_base = element_base;          maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -        maxwell3d.CallMethod(0x8e3, 0x640, true); -        maxwell3d.CallMethod(0x8e4, element_base, true); -        maxwell3d.CallMethod(0x8e5, base_instance, true); +        maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; +        maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); +        maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);          maxwell3d.draw_manager->DrawIndex(              static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),              parameters[3], parameters[1], element_base, base_instance, instance_count);          maxwell3d.regs.vertex_id_base = 0x0; -        maxwell3d.CallMethod(0x8e3, 0x640, true); -        maxwell3d.CallMethod(0x8e4, 0x0, true); -        maxwell3d.CallMethod(0x8e5, 0x0, true); +        maxwell3d.engine_state = Maxwell::EngineHint::None; +        maxwell3d.replace_table.clear();      }      u32 minimum_limit{1 << 18}; @@ -238,8 +236,7 @@ public:          : HLEMacroImpl(maxwell3d_) {}      void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { -        const auto topology = -            static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); +        const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);          if (!IsTopologySafe(topology)) {              Fallback(parameters);              return; @@ -277,9 +274,6 @@ public:          }          const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());          const u32 base_size = std::max(minimum_limit, estimate); - -        maxwell3d.regs.index_buffer.first = 0; -        maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size);          maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;          auto& params = maxwell3d.draw_manager->GetIndirectParams();          params.is_indexed = true; @@ -290,7 +284,12 @@ public:          params.max_draw_counts = draw_count;          params.stride = stride;          maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -        maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); +        maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; +        maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); +        maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); +        maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); +        maxwell3d.engine_state = Maxwell::EngineHint::None; +        maxwell3d.replace_table.clear();      }  private: @@ -299,9 +298,8 @@ private:              // Clean everything.              // Clean everything.              maxwell3d.regs.vertex_id_base = 0x0; -            maxwell3d.CallMethod(0x8e3, 0x640, true); -            maxwell3d.CallMethod(0x8e4, 0x0, true); -            maxwell3d.CallMethod(0x8e5, 0x0, true); +            maxwell3d.engine_state = Maxwell::EngineHint::None; +            maxwell3d.replace_table.clear();          });          maxwell3d.RefreshParameters();          const u32 start_indirect = parameters[0]; @@ -310,8 +308,7 @@ private:              // Nothing to do.              return;          } -        const auto topology = -            static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); +        const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);          maxwell3d.regs.draw.topology.Assign(topology);          const u32 padding = parameters[3];          const std::size_t max_draws = parameters[4]; @@ -326,9 +323,9 @@ private:              const u32 base_vertex = parameters[base + 3];              const u32 base_instance = parameters[base + 4];              maxwell3d.regs.vertex_id_base = base_vertex; -            maxwell3d.CallMethod(0x8e3, 0x640, true); -            maxwell3d.CallMethod(0x8e4, base_vertex, true); -            maxwell3d.CallMethod(0x8e5, base_instance, true); +            maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; +            maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); +            maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);              maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;              maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],                                                base_vertex, base_instance, parameters[base + 1]); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8f6c51045..11e7d225e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {      return range_so_far;  } -size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { +size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {      PTEKind base_kind = GetPageKind(gpu_addr);      if (base_kind == PTEKind::INVALID) {          return 0; @@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {              return true;          }          range_so_far += copy_amount; +        if (range_so_far >= max_size) { +            result = true; +            return true; +        }          return false;      };      auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {              return true;          }          range_so_far += copy_amount; +        if (range_so_far >= max_size) { +            result = true; +            return true; +        }          return false;      };      auto check_short_pages = [&](std::size_t page_index, std::size_t offset, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 65f6e8134..ca22520d7 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -118,7 +118,8 @@ public:      PTEKind GetPageKind(GPUVAddr gpu_addr) const; -    size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const; +    size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, +                               size_t max_size = std::numeric_limits<size_t>::max()) const;  private:      template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index e62b36822..df229f41b 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,      smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);      alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0);      alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); +    app_stage.Assign(maxwell3d.engine_state);      for (size_t i = 0; i < regs.rt.size(); ++i) {          color_formats[i] = static_cast<u8>(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index ab79fb8f3..03bf64b57 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -197,6 +197,7 @@ struct FixedPipelineState {          BitField<14, 1, u32> smooth_lines;          BitField<15, 1, u32> alpha_to_coverage_enabled;          BitField<16, 1, u32> alpha_to_one_enabled; +        BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;      };      std::array<u8, Maxwell::NumRenderTargets> color_formats; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e7262420c..58b955821 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;  using VideoCommon::GenericEnvironment;  using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 8; +constexpr u32 CACHE_VERSION = 9;  template <typename Container>  auto MakeSpan(Container& container) { diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 958810747..99d85bfb3 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {      const u64 num_texture_types{static_cast<u64>(texture_types.size())};      const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};      const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; +    const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};      file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))          .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))          .write(reinterpret_cast<const char*>(&num_texture_pixel_formats),                 sizeof(num_texture_pixel_formats))          .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) +        .write(reinterpret_cast<const char*>(&num_cbuf_replacement_values), +               sizeof(num_cbuf_replacement_values))          .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))          .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))          .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) @@ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {          file.write(reinterpret_cast<const char*>(&key), sizeof(key))              .write(reinterpret_cast<const char*>(&type), sizeof(type));      } +    for (const auto& [key, type] : cbuf_replacements) { +        file.write(reinterpret_cast<const char*>(&key), sizeof(key)) +            .write(reinterpret_cast<const char*>(&type), sizeof(type)); +    }      if (stage == Shader::Stage::Compute) {          file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))              .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); @@ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,      ASSERT(local_size <= std::numeric_limits<u32>::max());      local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;      texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; +    has_hle_engine_state = +        maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;  }  u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { @@ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {      return value;  } +std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank, +                                                                                  u32 offset) { +    if (!has_hle_engine_state) { +        return std::nullopt; +    } +    const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); +    auto it = maxwell3d->replace_table.find(key); +    if (it == maxwell3d->replace_table.end()) { +        return std::nullopt; +    } +    const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { +        switch (name) { +        case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: +            return Shader::ReplaceConstant::BaseVertex; +        case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: +            return Shader::ReplaceConstant::BaseInstance; +        default: +            UNREACHABLE(); +        } +    }(it->second); +    cbuf_replacements.emplace(key, converted_value); +    return converted_value; +} +  Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {      const auto& regs{maxwell3d->regs};      const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; @@ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) {      u64 num_texture_types{};      u64 num_texture_pixel_formats{};      u64 num_cbuf_values{}; +    u64 num_cbuf_replacement_values{};      file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))          .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))          .read(reinterpret_cast<char*>(&num_texture_pixel_formats),                sizeof(num_texture_pixel_formats))          .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) +        .read(reinterpret_cast<char*>(&num_cbuf_replacement_values), +              sizeof(num_cbuf_replacement_values))          .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))          .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))          .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) @@ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {              .read(reinterpret_cast<char*>(&value), sizeof(value));          cbuf_values.emplace(key, value);      } +    for (size_t i = 0; i < num_cbuf_replacement_values; ++i) { +        u64 key; +        Shader::ReplaceConstant value; +        file.read(reinterpret_cast<char*>(&key), sizeof(key)) +            .read(reinterpret_cast<char*>(&value), sizeof(value)); +        cbuf_replacements.emplace(key, value); +    }      if (stage == Shader::Stage::Compute) {          file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))              .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); @@ -512,6 +555,16 @@ std::array<u32, 3> FileEnvironment::WorkgroupSize() const {      return workgroup_size;  } +std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank, +                                                                              u32 offset) { +    const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); +    auto it = cbuf_replacements.find(key); +    if (it == cbuf_replacements.end()) { +        return std::nullopt; +    } +    return it->second; +} +  void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,                         const std::filesystem::path& filename, u32 cache_version) try {      std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 1342fab1e..d75987a52 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -60,6 +60,10 @@ public:      void Serialize(std::ofstream& file) const; +    bool HasHLEMacroState() const override { +        return has_hle_engine_state; +    } +  protected:      std::optional<u64> TryFindSize(); @@ -73,6 +77,7 @@ protected:      std::unordered_map<u32, Shader::TextureType> texture_types;      std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;      std::unordered_map<u64, u32> cbuf_values; +    std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;      u32 local_memory_size{};      u32 texture_bound{}; @@ -89,6 +94,7 @@ protected:      u32 viewport_transform_state = 1;      bool has_unbound_instructions = false; +    bool has_hle_engine_state = false;  };  class GraphicsEnvironment final : public GenericEnvironment { @@ -109,6 +115,8 @@ public:      u32 ReadViewportTransformState() override; +    std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override; +  private:      Tegra::Engines::Maxwell3D* maxwell3d{};      size_t stage_index{}; @@ -131,6 +139,11 @@ public:      u32 ReadViewportTransformState() override; +    std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer( +        [[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override { +        return std::nullopt; +    } +  private:      Tegra::Engines::KeplerCompute* kepler_compute{};  }; @@ -166,6 +179,13 @@ public:      [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; +    [[nodiscard]] std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, +                                                                               u32 offset) override; + +    [[nodiscard]] bool HasHLEMacroState() const override { +        return cbuf_replacements.size() != 0; +    } +      void Dump(u64 hash) override;  private: @@ -173,6 +193,7 @@ private:      std::unordered_map<u32, Shader::TextureType> texture_types;      std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;      std::unordered_map<u64, u32> cbuf_values; +    std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;      std::array<u32, 3> workgroup_size{};      u32 local_memory_size{};      u32 shared_memory_size{}; | 
