diff options
| author | Fernando S <fsahmkow27@gmail.com> | 2022-12-08 12:41:39 +0100 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-12-08 12:41:39 +0100 | 
| commit | 41461514d6f3ba59dc027dbc4a88c0ffb570ea49 (patch) | |
| tree | c3cffc17ce63e66f5fa2921e4804c396140a9791 | |
| parent | bfdd512787fa57e192de290c8e5b0427da106c2e (diff) | |
| parent | bf0b957c05013f33855e67c31a48e61b1e86d356 (diff) | |
Merge pull request #9401 from vonchenplus/draw_manager
video_core: Implement maxwell3d draw manager and split draw logic
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 32 | ||||
| -rw-r--r-- | src/video_core/engines/draw_manager.cpp | 191 | ||||
| -rw-r--r-- | src/video_core/engines/draw_manager.h | 69 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 171 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 25 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.cpp | 69 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 18 | 
12 files changed, 341 insertions, 267 deletions
| diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5096d935e..06e44d5b5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -33,6 +33,8 @@ add_library(video_core STATIC      engines/sw_blitter/converter.cpp      engines/sw_blitter/converter.h      engines/const_buffer_info.h +    engines/draw_manager.cpp +    engines/draw_manager.h      engines/engine_interface.h      engines/engine_upload.cpp      engines/engine_upload.h diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6881b34c4..502b4d90a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -26,6 +26,7 @@  #include "video_core/control/channel_state_cache.h"  #include "video_core/delayed_destruction_ring.h"  #include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h"  #include "video_core/engines/kepler_compute.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/memory_manager.h" @@ -664,9 +665,10 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {      if (is_indexed) {          BindHostIndexBuffer();      } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { -        const auto& regs = maxwell3d->regs; -        if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { -            runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); +        const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); +        if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { +            runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first, +                                             draw_state.vertex_buffer.count);          }      }      BindHostVertexBuffers(); @@ -993,28 +995,29 @@ void BufferCache<P>::BindHostIndexBuffer() {      TouchBuffer(buffer, index_buffer.buffer_id);      const u32 offset = buffer.Offset(index_buffer.cpu_addr);      const u32 size = index_buffer.size; -    if (maxwell3d->inline_index_draw_indexes.size()) { +    const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); +    if (!draw_state.inline_index_draw_indexes.empty()) {          if constexpr (USE_MEMORY_MAPS) {              auto upload_staging = runtime.UploadStagingBuffer(size);              std::array<BufferCopy, 1> copies{                  {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};              std::memcpy(upload_staging.mapped_span.data(), -                        maxwell3d->inline_index_draw_indexes.data(), size); +                        draw_state.inline_index_draw_indexes.data(), size);              runtime.CopyBuffer(buffer, upload_staging.buffer, copies);          } else { -            buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes); +            buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);          }      } else {          SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);      }      if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { -        const u32 new_offset = offset + maxwell3d->regs.index_buffer.first * -                                            maxwell3d->regs.index_buffer.FormatSizeInBytes(); +        const u32 new_offset = +            offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();          runtime.BindIndexBuffer(buffer, new_offset, size);      } else { -        runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format, -                                maxwell3d->regs.index_buffer.first, -                                maxwell3d->regs.index_buffer.count, buffer, offset, size); +        runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, +                                draw_state.index_buffer.first, draw_state.index_buffer.count, +                                buffer, offset, size);      }  } @@ -1282,15 +1285,16 @@ template <class P>  void BufferCache<P>::UpdateIndexBuffer() {      // We have to check for the dirty flags and index count      // The index count is currently changed without updating the dirty flags -    const auto& index_array = maxwell3d->regs.index_buffer; +    const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); +    const auto& index_array = draw_state.index_buffer;      auto& flags = maxwell3d->dirty.flags;      if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {          return;      }      flags[Dirty::IndexBuffer] = false;      last_index_count = index_array.count; -    if (maxwell3d->inline_index_draw_indexes.size()) { -        auto inline_index_size = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size()); +    if (!draw_state.inline_index_draw_indexes.empty()) { +        auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size());          index_buffer = Binding{              .cpu_addr = 0,              .size = inline_index_size, diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp new file mode 100644 index 000000000..c59524e58 --- /dev/null +++ b/src/video_core/engines/draw_manager.cpp @@ -0,0 +1,191 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra::Engines { +DrawManager::DrawManager(Maxwell3D* maxwell3d_) : maxwell3d(maxwell3d_) {} + +void DrawManager::ProcessMethodCall(u32 method, u32 argument) { +    const auto& regs{maxwell3d->regs}; +    switch (method) { +    case MAXWELL3D_REG_INDEX(clear_surface): +        return Clear(1); +    case MAXWELL3D_REG_INDEX(draw.begin): +        return DrawBegin(); +    case MAXWELL3D_REG_INDEX(draw.end): +        return DrawEnd(); +    case MAXWELL3D_REG_INDEX(vertex_buffer.first): +    case MAXWELL3D_REG_INDEX(vertex_buffer.count): +    case MAXWELL3D_REG_INDEX(index_buffer.first): +        break; +    case MAXWELL3D_REG_INDEX(index_buffer.count): +        draw_state.draw_indexed = true; +        break; +    case MAXWELL3D_REG_INDEX(index_buffer32_subsequent): +    case MAXWELL3D_REG_INDEX(index_buffer16_subsequent): +    case MAXWELL3D_REG_INDEX(index_buffer8_subsequent): +        draw_state.instance_count++; +        [[fallthrough]]; +    case MAXWELL3D_REG_INDEX(index_buffer32_first): +    case MAXWELL3D_REG_INDEX(index_buffer16_first): +    case MAXWELL3D_REG_INDEX(index_buffer8_first): +        return DrawIndexSmall(argument); +    case MAXWELL3D_REG_INDEX(draw_inline_index): +        SetInlineIndexBuffer(argument); +        break; +    case MAXWELL3D_REG_INDEX(inline_index_2x16.even): +        SetInlineIndexBuffer(regs.inline_index_2x16.even); +        SetInlineIndexBuffer(regs.inline_index_2x16.odd); +        break; +    case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): +        SetInlineIndexBuffer(regs.inline_index_4x8.index0); +        SetInlineIndexBuffer(regs.inline_index_4x8.index1); +        SetInlineIndexBuffer(regs.inline_index_4x8.index2); +        SetInlineIndexBuffer(regs.inline_index_4x8.index3); +        break; +    case MAXWELL3D_REG_INDEX(topology_override): +        use_topology_override = true; +        break; +    default: +        break; +    } +} + +void DrawManager::Clear(u32 layer_count) { +    maxwell3d->rasterizer->Clear(layer_count); +} + +void DrawManager::DrawDeferred() { +    if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0) +        return; +    DrawEnd(draw_state.instance_count + 1, true); +    draw_state.instance_count = 0; +} + +void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, +                            u32 base_instance, u32 num_instances) { +    draw_state.topology = topology; +    draw_state.vertex_buffer.first = vertex_first; +    draw_state.vertex_buffer.count = vertex_count; +    draw_state.base_instance = base_instance; +    ProcessDraw(false, num_instances); +} + +void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, +                            u32 base_index, u32 base_instance, u32 num_instances) { +    const auto& regs{maxwell3d->regs}; +    draw_state.topology = topology; +    draw_state.index_buffer = regs.index_buffer; +    draw_state.index_buffer.first = index_first; +    draw_state.index_buffer.count = index_count; +    draw_state.base_index = base_index; +    draw_state.base_instance = base_instance; +    ProcessDraw(true, num_instances); +} + +void DrawManager::SetInlineIndexBuffer(u32 index) { +    draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff)); +    draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8)); +    draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x00ff0000) >> 16)); +    draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0xff000000) >> 24)); +    draw_state.draw_mode = DrawMode::InlineIndex; +} + +void DrawManager::DrawBegin() { +    const auto& regs{maxwell3d->regs}; +    auto reset_instance_count = regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First; +    auto increment_instance_count = +        regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent; +    if (reset_instance_count) { +        DrawDeferred(); +        draw_state.instance_count = 0; +        draw_state.draw_mode = DrawMode::General; +    } else if (increment_instance_count) { +        draw_state.instance_count++; +        draw_state.draw_mode = DrawMode::Instance; +    } + +    draw_state.topology = regs.draw.topology; +} + +void DrawManager::DrawEnd(u32 instance_count, bool force_draw) { +    const auto& regs{maxwell3d->regs}; +    switch (draw_state.draw_mode) { +    case DrawMode::Instance: +        if (!force_draw) +            break; +        [[fallthrough]]; +    case DrawMode::General: +        draw_state.base_instance = regs.global_base_instance_index; +        draw_state.base_index = regs.global_base_vertex_index; +        if (draw_state.draw_indexed) { +            draw_state.index_buffer = regs.index_buffer; +            ProcessDraw(true, instance_count); +        } else { +            draw_state.vertex_buffer = regs.vertex_buffer; +            ProcessDraw(false, instance_count); +        } +        draw_state.draw_indexed = false; +        break; +    case DrawMode::InlineIndex: +        draw_state.base_instance = regs.global_base_instance_index; +        draw_state.base_index = regs.global_base_vertex_index; +        draw_state.index_buffer = regs.index_buffer; +        draw_state.index_buffer.count = +            static_cast<u32>(draw_state.inline_index_draw_indexes.size() / 4); +        draw_state.index_buffer.format = Maxwell3D::Regs::IndexFormat::UnsignedInt; +        ProcessDraw(true, instance_count); +        draw_state.inline_index_draw_indexes.clear(); +        break; +    } +} + +void DrawManager::DrawIndexSmall(u32 argument) { +    const auto& regs{maxwell3d->regs}; +    IndexBufferSmall index_small_params{argument}; +    draw_state.base_instance = regs.global_base_instance_index; +    draw_state.base_index = regs.global_base_vertex_index; +    draw_state.index_buffer = regs.index_buffer; +    draw_state.index_buffer.first = index_small_params.first; +    draw_state.index_buffer.count = index_small_params.count; +    draw_state.topology = index_small_params.topology; +    maxwell3d->dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; +    ProcessDraw(true, 1); +} + +void DrawManager::ProcessTopologyOverride() { +    if (!use_topology_override) +        return; + +    const auto& regs{maxwell3d->regs}; +    switch (regs.topology_override) { +    case PrimitiveTopologyOverride::None: +        break; +    case PrimitiveTopologyOverride::Points: +        draw_state.topology = PrimitiveTopology::Points; +        break; +    case PrimitiveTopologyOverride::Lines: +        draw_state.topology = PrimitiveTopology::Lines; +        break; +    case PrimitiveTopologyOverride::LineStrip: +        draw_state.topology = PrimitiveTopology::LineStrip; +        break; +    default: +        draw_state.topology = static_cast<PrimitiveTopology>(regs.topology_override); +        break; +    } +} + +void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { +    LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology.Value(), +              draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); + +    ProcessTopologyOverride(); + +    if (maxwell3d->ShouldExecute()) +        maxwell3d->rasterizer->Draw(draw_indexed, instance_count); +} +} // namespace Tegra::Engines diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h new file mode 100644 index 000000000..4f67027ca --- /dev/null +++ b/src/video_core/engines/draw_manager.h @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Tegra::Engines { +using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; +using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; +using IndexBuffer = Maxwell3D::Regs::IndexBuffer; +using VertexBuffer = Maxwell3D::Regs::VertexBuffer; +using IndexBufferSmall = Maxwell3D::Regs::IndexBufferSmall; + +class DrawManager { +public: +    enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; +    struct State { +        PrimitiveTopology topology{}; +        DrawMode draw_mode{}; +        bool draw_indexed{}; +        u32 base_index{}; +        VertexBuffer vertex_buffer; +        IndexBuffer index_buffer; +        u32 base_instance{}; +        u32 instance_count{}; +        std::vector<u8> inline_index_draw_indexes; +    }; + +    explicit DrawManager(Maxwell3D* maxwell_3d); + +    void ProcessMethodCall(u32 method, u32 argument); + +    void Clear(u32 layer_count); + +    void DrawDeferred(); + +    void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, +                   u32 base_instance, u32 num_instances); + +    void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, +                   u32 base_instance, u32 num_instances); + +    const State& GetDrawState() const { +        return draw_state; +    } + +private: +    void SetInlineIndexBuffer(u32 index); + +    void DrawBegin(); + +    void DrawEnd(u32 instance_count = 1, bool force_draw = false); + +    void DrawIndexSmall(u32 argument); + +    void ProcessTopologyOverride(); + +    void ProcessDraw(bool draw_indexed, u32 instance_count); + +    Maxwell3D* maxwell3d{}; +    State draw_state{}; +    bool use_topology_override{}; +}; +} // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fb9b9b94e..9b182b653 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,6 +7,7 @@  #include "core/core.h"  #include "core/core_timing.h"  #include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/gpu.h"  #include "video_core/memory_manager.h" @@ -21,8 +22,10 @@ using VideoCore::QueryType;  constexpr u32 MacroRegistersStart = 0xE00;  Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) -    : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, -      upload_state{memory_manager, regs.upload} { +    : draw_manager{std::make_unique<DrawManager>(this)}, system{system_}, +      memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{ +                                                                                memory_manager, +                                                                                regs.upload} {      dirty.flags.flip();      InitializeRegisterDefaults();  } @@ -116,16 +119,6 @@ void Maxwell3D::InitializeRegisterDefaults() {      regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;      shadow_state = regs; - -    draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true; -    draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true; -    draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true; -    draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; -    draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true; -    draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; -    draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true; -    draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true; -    draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true;  }  void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { @@ -213,29 +206,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume          return ProcessCBBind(3);      case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):          return ProcessCBBind(4); -    case MAXWELL3D_REG_INDEX(index_buffer32_first): -        regs.index_buffer.count = regs.index_buffer32_first.count; -        regs.index_buffer.first = regs.index_buffer32_first.first; -        dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -        draw_indexed = true; -        return ProcessDraw(); -    case MAXWELL3D_REG_INDEX(index_buffer16_first): -        regs.index_buffer.count = regs.index_buffer16_first.count; -        regs.index_buffer.first = regs.index_buffer16_first.first; -        dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -        draw_indexed = true; -        return ProcessDraw(); -    case MAXWELL3D_REG_INDEX(index_buffer8_first): -        regs.index_buffer.count = regs.index_buffer8_first.count; -        regs.index_buffer.first = regs.index_buffer8_first.first; -        dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -        draw_indexed = true; -        return ProcessDraw(); -    case MAXWELL3D_REG_INDEX(topology_override): -        use_topology_override = true; -        return; -    case MAXWELL3D_REG_INDEX(clear_surface): -        return ProcessClearBuffers(1);      case MAXWELL3D_REG_INDEX(report_semaphore.query):          return ProcessQueryGet();      case MAXWELL3D_REG_INDEX(render_enable.mode): @@ -254,6 +224,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume          return rasterizer->FragmentBarrier();      case MAXWELL3D_REG_INDEX(tiled_cache_barrier):          return rasterizer->TiledCacheBarrier(); +    default: +        draw_manager->ProcessMethodCall(method, argument); +        break;      }  } @@ -268,7 +241,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)      // Execute the current macro.      macro_engine->Execute(macro_positions[entry], parameters); -    ProcessDeferredDraw(); +    draw_manager->DrawDeferred();  }  void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { @@ -291,62 +264,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {      const u32 argument = ProcessShadowRam(method, method_argument);      ProcessDirtyRegisters(method, argument); -    if (draw_command[method]) { -        regs.reg_array[method] = method_argument; -        deferred_draw_method.push_back(method); -        auto update_inline_index = [&](const u32 index) { -            inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff)); -            inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8)); -            inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x00ff0000) >> 16)); -            inline_index_draw_indexes.push_back(static_cast<u8>((index & 0xff000000) >> 24)); -            draw_mode = DrawMode::InlineIndex; -        }; -        switch (method) { -        case MAXWELL3D_REG_INDEX(draw.begin): { -            draw_mode = -                (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || -                        (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) -                    ? DrawMode::Instance -                    : DrawMode::General; -            break; -        } -        case MAXWELL3D_REG_INDEX(draw.end): -            switch (draw_mode) { -            case DrawMode::General: -                ProcessDraw(); -                break; -            case DrawMode::InlineIndex: -                regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4); -                regs.index_buffer.format = Regs::IndexFormat::UnsignedInt; -                draw_indexed = true; -                ProcessDraw(); -                inline_index_draw_indexes.clear(); -                break; -            case DrawMode::Instance: -                break; -            } -            break; -        case MAXWELL3D_REG_INDEX(index_buffer.count): -            draw_indexed = true; -            break; -        case MAXWELL3D_REG_INDEX(draw_inline_index): -            update_inline_index(method_argument); -            break; -        case MAXWELL3D_REG_INDEX(inline_index_2x16.even): -            update_inline_index(regs.inline_index_2x16.even); -            update_inline_index(regs.inline_index_2x16.odd); -            break; -        case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): -            update_inline_index(regs.inline_index_4x8.index0); -            update_inline_index(regs.inline_index_4x8.index1); -            update_inline_index(regs.inline_index_4x8.index2); -            update_inline_index(regs.inline_index_4x8.index3); -            break; -        } -    } else { -        ProcessDeferredDraw(); -        ProcessMethodCall(method, argument, method_argument, is_last_call); -    } +    ProcessMethodCall(method, argument, method_argument, is_last_call);  }  void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, @@ -387,35 +305,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,      }  } -void Maxwell3D::ProcessTopologyOverride() { -    using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; -    using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; - -    PrimitiveTopology topology{}; - -    switch (regs.topology_override) { -    case PrimitiveTopologyOverride::None: -        topology = regs.draw.topology; -        break; -    case PrimitiveTopologyOverride::Points: -        topology = PrimitiveTopology::Points; -        break; -    case PrimitiveTopologyOverride::Lines: -        topology = PrimitiveTopology::Lines; -        break; -    case PrimitiveTopologyOverride::LineStrip: -        topology = PrimitiveTopology::LineStrip; -        break; -    default: -        topology = static_cast<PrimitiveTopology>(regs.topology_override); -        break; -    } - -    if (use_topology_override) { -        regs.draw.topology.Assign(topology); -    } -} -  void Maxwell3D::ProcessMacroUpload(u32 data) {      macro_engine->AddCode(regs.load_mme.instruction_ptr++, data);  } @@ -635,44 +524,4 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {      return regs.reg_array[method];  } -void Maxwell3D::ProcessClearBuffers(u32 layer_count) { -    rasterizer->Clear(layer_count); -} - -void Maxwell3D::ProcessDraw(u32 instance_count) { -    LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), -              draw_indexed ? regs.index_buffer.count : regs.vertex_buffer.count); - -    ProcessTopologyOverride(); - -    if (ShouldExecute()) { -        rasterizer->Draw(draw_indexed, instance_count); -    } - -    draw_indexed = false; -    deferred_draw_method.clear(); -} - -void Maxwell3D::ProcessDeferredDraw() { -    if (draw_mode != DrawMode::Instance || deferred_draw_method.empty()) { -        return; -    } - -    const auto method_count = deferred_draw_method.size(); -    u32 instance_count = 1; -    u32 vertex_buffer_count = 0; -    u32 index_buffer_count = 0; -    for (size_t index = 0; index < method_count; ++index) { -        const u32 method = deferred_draw_method[index]; -        if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) { -            instance_count = ++vertex_buffer_count; -        } else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) { -            instance_count = ++index_buffer_count; -        } -    } -    ASSERT_MSG(!(vertex_buffer_count && index_buffer_count), "Instance both indexed and direct?"); - -    ProcessDraw(instance_count); -} -  } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a541cd95f..22b904319 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -37,6 +37,8 @@ class RasterizerInterface;  namespace Tegra::Engines { +class DrawManager; +  /**   * This Engine is known as GF100_3D. Documentation can be found in:   * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/3d/clb197.h @@ -2223,6 +2225,7 @@ public:          struct IndexBufferSmall {              union { +                u32 raw;                  BitField<0, 16, u32> first;                  BitField<16, 12, u32> count;                  BitField<28, 4, PrimitiveTopology> topology; @@ -3061,10 +3064,8 @@ public:          Tables tables{};      } dirty; -    std::vector<u8> inline_index_draw_indexes; - -    /// Handles a write to the CLEAR_BUFFERS register. -    void ProcessClearBuffers(u32 layer_count); +    std::unique_ptr<DrawManager> draw_manager; +    friend class DrawManager;  private:      void InitializeRegisterDefaults(); @@ -3122,15 +3123,6 @@ private:      /// Handles a write to the CB_BIND register.      void ProcessCBBind(size_t stage_index); -    /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) -    void ProcessTopologyOverride(); - -    /// Handles deferred draw(e.g., instance draw). -    void ProcessDeferredDraw(); - -    /// Handles a draw. -    void ProcessDraw(u32 instance_count = 1); -      /// Returns a query's value or an empty object if the value will be deferred through a cache.      std::optional<u64> GetQueryResult(); @@ -3153,13 +3145,6 @@ private:      Upload::State upload_state;      bool execute_on{true}; -    bool use_topology_override{false}; - -    std::array<bool, Regs::NUM_REGS> draw_command{}; -    std::vector<u32> deferred_draw_method; -    enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; -    DrawMode draw_mode{DrawMode::General}; -    bool draw_indexed{};  };  #define ASSERT_REG_POSITION(field_name, position)                                                  \ diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 0f3262edb..8549db2e4 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -5,6 +5,7 @@  #include <vector>  #include "common/scope_exit.h"  #include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/macro/macro.h"  #include "video_core/macro/macro_hle.h" @@ -18,57 +19,33 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u3  // HLE'd functions  void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {      const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); - -    maxwell3d.regs.draw.topology.Assign( -        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff)); -    maxwell3d.regs.global_base_instance_index = parameters[5]; -    maxwell3d.regs.global_base_vertex_index = parameters[3]; -    maxwell3d.regs.index_buffer.count = parameters[1]; -    maxwell3d.regs.index_buffer.first = parameters[4]; - -    if (maxwell3d.ShouldExecute()) { -        maxwell3d.Rasterizer().Draw(true, instance_count); -    } -    maxwell3d.regs.index_buffer.count = 0; +    maxwell3d.draw_manager->DrawIndex( +        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff), +        parameters[4], parameters[1], parameters[3], parameters[5], instance_count);  }  void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {      const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - -    maxwell3d.regs.vertex_buffer.first = parameters[3]; -    maxwell3d.regs.vertex_buffer.count = parameters[1]; -    maxwell3d.regs.global_base_instance_index = parameters[4]; -    maxwell3d.regs.draw.topology.Assign( -        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); - -    if (maxwell3d.ShouldExecute()) { -        maxwell3d.Rasterizer().Draw(false, instance_count); -    } -    maxwell3d.regs.vertex_buffer.count = 0; +    maxwell3d.draw_manager->DrawArray( +        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), +        parameters[3], parameters[1], parameters[4], instance_count);  }  void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {      const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);      const u32 element_base = parameters[4];      const u32 base_instance = parameters[5]; -    maxwell3d.regs.index_buffer.first = parameters[3];      maxwell3d.regs.vertex_id_base = element_base; -    maxwell3d.regs.index_buffer.count = parameters[1];      maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -    maxwell3d.regs.global_base_vertex_index = element_base; -    maxwell3d.regs.global_base_instance_index = base_instance;      maxwell3d.CallMethod(0x8e3, 0x640, true);      maxwell3d.CallMethod(0x8e4, element_base, true);      maxwell3d.CallMethod(0x8e5, base_instance, true); -    maxwell3d.regs.draw.topology.Assign( -        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); -    if (maxwell3d.ShouldExecute()) { -        maxwell3d.Rasterizer().Draw(true, instance_count); -    } + +    maxwell3d.draw_manager->DrawIndex( +        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), +        parameters[3], parameters[1], element_base, base_instance, instance_count); +      maxwell3d.regs.vertex_id_base = 0x0; -    maxwell3d.regs.index_buffer.count = 0; -    maxwell3d.regs.global_base_vertex_index = 0x0; -    maxwell3d.regs.global_base_instance_index = 0x0;      maxwell3d.CallMethod(0x8e3, 0x640, true);      maxwell3d.CallMethod(0x8e4, 0x0, true);      maxwell3d.CallMethod(0x8e5, 0x0, true); @@ -79,9 +56,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&      SCOPE_EXIT({          // Clean everything.          maxwell3d.regs.vertex_id_base = 0x0; -        maxwell3d.regs.index_buffer.count = 0; -        maxwell3d.regs.global_base_vertex_index = 0x0; -        maxwell3d.regs.global_base_instance_index = 0x0;          maxwell3d.CallMethod(0x8e3, 0x640, true);          maxwell3d.CallMethod(0x8e4, 0x0, true);          maxwell3d.CallMethod(0x8e5, 0x0, true); @@ -93,9 +67,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&          // Nothing to do.          return;      } -    const auto topology = -        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); -    maxwell3d.regs.draw.topology.Assign(topology);      const u32 padding = parameters[3];      const std::size_t max_draws = parameters[4]; @@ -106,23 +77,17 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&      for (std::size_t index = first_draw; index < last_draw; index++) {          const std::size_t base = index * indirect_words + 5; -        const u32 num_vertices = parameters[base]; -        const u32 instance_count = parameters[base + 1]; -        const u32 first_index = parameters[base + 2];          const u32 base_vertex = parameters[base + 3];          const u32 base_instance = parameters[base + 4]; -        maxwell3d.regs.index_buffer.first = first_index;          maxwell3d.regs.vertex_id_base = base_vertex; -        maxwell3d.regs.index_buffer.count = num_vertices; -        maxwell3d.regs.global_base_vertex_index = base_vertex; -        maxwell3d.regs.global_base_instance_index = base_instance;          maxwell3d.CallMethod(0x8e3, 0x640, true);          maxwell3d.CallMethod(0x8e4, base_vertex, true);          maxwell3d.CallMethod(0x8e5, base_instance, true);          maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; -        if (maxwell3d.ShouldExecute()) { -            maxwell3d.Rasterizer().Draw(true, instance_count); -        } +        maxwell3d.draw_manager->DrawIndex( +            static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]), +            parameters[base + 2], parameters[base], base_vertex, base_instance, +            parameters[base + 1]);      }  } @@ -136,7 +101,7 @@ void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&      ASSERT(clear_params.layer == 0);      maxwell3d.regs.clear_surface.raw = clear_params.raw; -    maxwell3d.ProcessClearBuffers(num_layers); +    maxwell3d.draw_manager->Clear(num_layers);  }  constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{ diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f71a316b6..64ed6f628 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -224,16 +224,18 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {      SyncState(); -    const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology); +    const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + +    const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);      BeginTransformFeedback(pipeline, primitive_mode); -    const GLuint base_instance = static_cast<GLuint>(maxwell3d->regs.global_base_instance_index); +    const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);      const GLsizei num_instances = static_cast<GLsizei>(instance_count);      if (is_indexed) { -        const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.global_base_vertex_index); -        const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.index_buffer.count); +        const GLint base_vertex = static_cast<GLint>(draw_state.base_index); +        const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);          const GLvoid* const offset = buffer_cache_runtime.IndexOffset(); -        const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format); +        const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);          if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {              glDrawElements(primitive_mode, num_vertices, format, offset);          } else if (num_instances == 1 && base_instance == 0) { @@ -252,8 +254,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {                                                            base_instance);          }      } else { -        const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.vertex_buffer.first); -        const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.vertex_buffer.count); +        const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first); +        const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);          if (num_instances == 1 && base_instance == 0) {              glDrawArrays(primitive_mode, base_vertex, num_vertices);          } else if (base_instance == 0) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a38060100..a59d0d24e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,6 +22,7 @@  #include "shader_recompiler/frontend/maxwell/control_flow.h"  #include "shader_recompiler/frontend/maxwell/translate_program.h"  #include "shader_recompiler/profile.h" +#include "video_core/engines/draw_manager.h"  #include "video_core/engines/kepler_compute.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/memory_manager.h" @@ -327,7 +328,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {      const auto& regs{maxwell3d->regs};      graphics_key.raw = 0;      graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0); -    graphics_key.gs_input_topology.Assign(regs.draw.topology.Value()); +    graphics_key.gs_input_topology.Assign(maxwell3d->draw_manager->GetDrawState().topology);      graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value());      graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value());      graphics_key.tessellation_clockwise.Assign( @@ -371,7 +372,8 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n      // If games are using a small index count, we can assume these are full screen quads.      // Usually these shaders are only used once for building textures so we can assume they      // can't be built async -    if (maxwell3d->regs.index_buffer.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) { +    const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); +    if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {          return pipeline;      }      return nullptr; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5864e772b..e62b36822 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -8,6 +8,7 @@  #include "common/cityhash.h"  #include "common/common_types.h"  #include "common/polyfill_ranges.h" +#include "video_core/engines/draw_manager.h"  #include "video_core/renderer_vulkan/fixed_pipeline_state.h"  #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -50,12 +51,13 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell&  void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,                                   bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {      const Maxwell& regs = maxwell3d.regs; +    const auto topology_ = maxwell3d.draw_manager->GetDrawState().topology;      const std::array enabled_lut{          regs.polygon_offset_point_enable,          regs.polygon_offset_line_enable,          regs.polygon_offset_fill_enable,      }; -    const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); +    const u32 topology_index = static_cast<u32>(topology_);      raw1 = 0;      extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); @@ -78,7 +80,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,                                    Maxwell::Tessellation::OutputPrimitives::Triangles_CW);      logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);      logic_op.Assign(PackLogicOp(regs.logic_op.op)); -    topology.Assign(regs.draw.topology); +    topology.Assign(topology_);      msaa_mode.Assign(regs.anti_alias_samples_mode);      raw2 = 0; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 38a6b7488..81f5f3e11 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -507,7 +507,8 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const      // If games are using a small index count, we can assume these are full screen quads.      // Usually these shaders are only used once for building textures so we can assume they      // can't be built async -    if (maxwell3d->regs.index_buffer.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) { +    const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); +    if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {          return pipeline;      }      return nullptr; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d8ad8815c..8d7a5e400 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -12,6 +12,7 @@  #include "common/scope_exit.h"  #include "common/settings.h"  #include "video_core/control/channel_state.h" +#include "video_core/engines/draw_manager.h"  #include "video_core/engines/kepler_compute.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_vulkan/blit_image.h" @@ -36,6 +37,7 @@  namespace Vulkan {  using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using MaxwellDrawState = Tegra::Engines::DrawManager::State;  using VideoCommon::ImageViewId;  using VideoCommon::ImageViewType; @@ -127,16 +129,16 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3      return scissor;  } -DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_indexed) { +DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) {      DrawParams params{ -        .base_instance = regs.global_base_instance_index, +        .base_instance = draw_state.base_instance,          .num_instances = num_instances, -        .base_vertex = is_indexed ? regs.global_base_vertex_index : regs.vertex_buffer.first, -        .num_vertices = is_indexed ? regs.index_buffer.count : regs.vertex_buffer.count, -        .first_index = is_indexed ? regs.index_buffer.first : 0, +        .base_vertex = is_indexed ? draw_state.base_index : draw_state.vertex_buffer.first, +        .num_vertices = is_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count, +        .first_index = is_indexed ? draw_state.index_buffer.first : 0,          .is_indexed = is_indexed,      }; -    if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { +    if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) {          // 6 triangle vertices per quad, base vertex is part of the index          // See BindQuadArrayIndexBuffer for more details          params.num_vertices = (params.num_vertices / 4) * 6; @@ -195,9 +197,9 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {      UpdateDynamicStates(); -    const auto& regs{maxwell3d->regs}; +    const auto& draw_state = maxwell3d->draw_manager->GetDrawState();      const u32 num_instances{instance_count}; -    const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_indexed)}; +    const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};      scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {          if (draw_params.is_indexed) {              cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, | 
