diff options
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 7 | ||||
| -rw-r--r-- | src/video_core/host_shaders/vulkan_quad_indexed.comp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 299 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 8 | 
8 files changed, 245 insertions, 125 deletions
| diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6c8d98946..f1c60d1f3 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -666,9 +666,10 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {          BindHostIndexBuffer();      } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {          const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); -        if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { -            runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first, -                                             draw_state.vertex_buffer.count); +        if (draw_state.topology == Maxwell::PrimitiveTopology::Quads || +            draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { +            runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first, +                                        draw_state.vertex_buffer.count);          }      }      BindHostVertexBuffers(); diff --git a/src/video_core/host_shaders/vulkan_quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp index a412f30ff..066fe4a9c 100644 --- a/src/video_core/host_shaders/vulkan_quad_indexed.comp +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp @@ -16,6 +16,7 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {  layout (push_constant) uniform PushConstants {      uint base_vertex;      int index_shift; // 0: uint8, 1: uint16, 2: uint32 +    int is_strip; // 0: quads 1: quadstrip  };  void main() { @@ -28,9 +29,10 @@ void main() {      int flipped_shift = 2 - index_shift;      int mask = (1 << flipped_shift) - 1; -    const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); +    const int quads_swizzle[6] = int[](0, 1, 2, 0, 2, 3); +    const int quad_strip_swizzle[6] = int[](0, 3, 1, 0, 2, 3);      for (uint vertex = 0; vertex < 6; ++vertex) { -        int offset = primitive * 4 + quad_swizzle[vertex]; +        int offset = (is_strip == 0 ? primitive * 4 + quads_swizzle[vertex] : primitive * 2 + quad_strip_swizzle[vertex]);          int int_offset = offset >> flipped_shift;          int bit_offset = (offset & mask) * index_size;          uint packed_input = input_indexes[int_offset]; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 3e03c5cd6..ca52e2389 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -301,6 +301,8 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,          return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;      case Maxwell::PrimitiveTopology::Lines:          return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; +    case Maxwell::PrimitiveTopology::LineLoop: +        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;      case Maxwell::PrimitiveTopology::LineStrip:          return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;      case Maxwell::PrimitiveTopology::Triangles: @@ -309,15 +311,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,          return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;      case Maxwell::PrimitiveTopology::TriangleFan:          return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; +    case Maxwell::PrimitiveTopology::LinesAdjacency: +        return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; +    case Maxwell::PrimitiveTopology::LineStripAdjacency: +        return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; +    case Maxwell::PrimitiveTopology::TrianglesAdjacency: +        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY; +    case Maxwell::PrimitiveTopology::TriangleStripAdjacency: +        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;      case Maxwell::PrimitiveTopology::Quads: -        // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases +    case Maxwell::PrimitiveTopology::QuadStrip: +        // TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT +        // whenever it releases          return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;      case Maxwell::PrimitiveTopology::Patches:          return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; -    default: -        UNIMPLEMENTED_MSG("Unimplemented topology={}", topology); -        return {}; +    case Maxwell::PrimitiveTopology::Polygon: +        LOG_WARNING(Render_Vulkan, "Draw mode is Polygon with a polygon mode of lines should be a " +                                   "single body and not a bunch of triangles."); +        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;      } +    UNIMPLEMENTED_MSG("Unimplemented topology={}", topology); +    return {};  }  VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 84d36fea6..6b54d7111 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -51,15 +51,6 @@ size_t BytesPerIndex(VkIndexType index_type) {      }  } -template <typename T> -std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { -    std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; -    for (T& index : indices) { -        index = static_cast<T>(first + index + quad * 4); -    } -    return indices; -} -  vk::Buffer CreateBuffer(const Device& device, u64 size) {      VkBufferUsageFlags flags =          VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | @@ -123,6 +114,187 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat      return *views.back().handle;  } +class QuadIndexBuffer { +public: +    QuadIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, +                    Scheduler& scheduler_, StagingBufferPool& staging_pool_) +        : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, +          staging_pool{staging_pool_} {} + +    virtual ~QuadIndexBuffer() = default; + +    void UpdateBuffer(u32 num_indices_) { +        if (num_indices_ <= num_indices) { +            return; +        } + +        scheduler.Finish(); + +        num_indices = num_indices_; +        index_type = IndexTypeFromNumElements(device, num_indices); + +        const u32 num_quads = GetQuadsNum(num_indices); +        const u32 num_triangle_indices = num_quads * 6; +        const u32 num_first_offset_copies = 4; +        const size_t bytes_per_index = BytesPerIndex(index_type); +        const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; +        buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ +            .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, +            .pNext = nullptr, +            .flags = 0, +            .size = size_bytes, +            .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, +            .sharingMode = VK_SHARING_MODE_EXCLUSIVE, +            .queueFamilyIndexCount = 0, +            .pQueueFamilyIndices = nullptr, +        }); +        if (device.HasDebuggingToolAttached()) { +            buffer.SetObjectNameEXT("Quad LUT"); +        } +        memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); + +        const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); +        u8* staging_data = staging.mapped_span.data(); +        const size_t quad_size = bytes_per_index * 6; + +        for (u32 first = 0; first < num_first_offset_copies; ++first) { +            for (u32 quad = 0; quad < num_quads; ++quad) { +                MakeAndUpdateIndices(staging_data, quad_size, quad, first); +                staging_data += quad_size; +            } +        } + +        scheduler.RequestOutsideRenderPassOperationContext(); +        scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, +                          dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { +            const VkBufferCopy copy{ +                .srcOffset = src_offset, +                .dstOffset = 0, +                .size = size_bytes, +            }; +            const VkBufferMemoryBarrier write_barrier{ +                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .buffer = dst_buffer, +                .offset = 0, +                .size = size_bytes, +            }; +            cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); +            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, +                                   VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); +        }); +    } + +    void BindBuffer(u32 first) { +        const VkIndexType index_type_ = index_type; +        const size_t sub_first_offset = static_cast<size_t>(first % 4) * GetQuadsNum(num_indices); +        const size_t offset = +            (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type); +        scheduler.Record([buffer = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) { +            cmdbuf.BindIndexBuffer(buffer, offset, index_type_); +        }); +    } + +protected: +    virtual u32 GetQuadsNum(u32 num_indices) const = 0; + +    virtual void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) = 0; + +    const Device& device; +    MemoryAllocator& memory_allocator; +    Scheduler& scheduler; +    StagingBufferPool& staging_pool; + +    vk::Buffer buffer{}; +    MemoryCommit memory_commit{}; +    VkIndexType index_type{}; +    u32 num_indices = 0; +}; + +class QuadArrayIndexBuffer : public QuadIndexBuffer { +public: +    QuadArrayIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, +                         Scheduler& scheduler_, StagingBufferPool& staging_pool_) +        : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} + +    ~QuadArrayIndexBuffer() = default; + +private: +    u32 GetQuadsNum(u32 num_indices_) const override { +        return num_indices_ / 4; +    } + +    template <typename T> +    static std::array<T, 6> MakeIndices(u32 quad, u32 first) { +        std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; +        for (T& index : indices) { +            index = static_cast<T>(first + index + quad * 4); +        } +        return indices; +    } + +    void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { +        switch (index_type) { +        case VK_INDEX_TYPE_UINT8_EXT: +            std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); +            break; +        case VK_INDEX_TYPE_UINT16: +            std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); +            break; +        case VK_INDEX_TYPE_UINT32: +            std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); +            break; +        default: +            ASSERT(false); +            break; +        } +    } +}; + +class QuadStripIndexBuffer : public QuadIndexBuffer { +public: +    QuadStripIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, +                         Scheduler& scheduler_, StagingBufferPool& staging_pool_) +        : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} + +    ~QuadStripIndexBuffer() = default; + +private: +    u32 GetQuadsNum(u32 num_indices_) const override { +        return num_indices_ >= 4 ? (num_indices_ - 2) / 2 : 0; +    } + +    template <typename T> +    static std::array<T, 6> MakeIndices(u32 quad, u32 first) { +        std::array<T, 6> indices{0, 3, 1, 0, 2, 3}; +        for (T& index : indices) { +            index = static_cast<T>(first + index + quad * 2); +        } +        return indices; +    } + +    void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { +        switch (index_type) { +        case VK_INDEX_TYPE_UINT8_EXT: +            std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); +            break; +        case VK_INDEX_TYPE_UINT16: +            std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); +            break; +        case VK_INDEX_TYPE_UINT32: +            std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); +            break; +        default: +            ASSERT(false); +            break; +        } +    } +}; +  BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,                                         Scheduler& scheduler_, StagingBufferPool& staging_pool_,                                         UpdateDescriptorQueue& update_descriptor_queue_, @@ -130,7 +302,12 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m      : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},        staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},        uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), -      quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} +      quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) { +    quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_, +                                                                     scheduler_, staging_pool_); +    quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_, +                                                                     scheduler_, staging_pool_); +}  StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {      return staging_pool.Request(size, MemoryUsage::Upload); @@ -245,10 +422,11 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat      VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);      VkDeviceSize vk_offset = offset;      VkBuffer vk_buffer = buffer; -    if (topology == PrimitiveTopology::Quads) { +    if (topology == PrimitiveTopology::Quads || topology == PrimitiveTopology::QuadStrip) {          vk_index_type = VK_INDEX_TYPE_UINT32;          std::tie(vk_buffer, vk_offset) = -            quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); +            quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset, +                                     topology == PrimitiveTopology::QuadStrip);      } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {          vk_index_type = VK_INDEX_TYPE_UINT16;          std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); @@ -263,7 +441,7 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat      });  } -void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { +void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count) {      if (count == 0) {          ReserveNullBuffer();          scheduler.Record([this](vk::CommandBuffer cmdbuf) { @@ -271,16 +449,14 @@ void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {          });          return;      } -    ReserveQuadArrayLUT(first + count, true); - -    // The LUT has the indices 0, 1, 2, and 3 copied as an array -    // To apply these 'first' offsets we can apply an offset based on the modulus. -    const VkIndexType index_type = quad_array_lut_index_type; -    const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); -    const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); -    scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { -        cmdbuf.BindIndexBuffer(buffer, offset, index_type); -    }); + +    if (topology == PrimitiveTopology::Quads) { +        quad_array_index_buffer->UpdateBuffer(first + count); +        quad_array_index_buffer->BindBuffer(first); +    } else if (topology == PrimitiveTopology::QuadStrip) { +        quad_strip_index_buffer->UpdateBuffer(first + count); +        quad_strip_index_buffer->BindBuffer(first); +    }  }  void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, @@ -323,83 +499,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,      });  } -void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { -    if (num_indices <= current_num_indices) { -        return; -    } -    if (wait_for_idle) { -        scheduler.Finish(); -    } -    current_num_indices = num_indices; -    quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); - -    const u32 num_quads = num_indices / 4; -    const u32 num_triangle_indices = num_quads * 6; -    const u32 num_first_offset_copies = 4; -    const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); -    const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; -    quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ -        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, -        .pNext = nullptr, -        .flags = 0, -        .size = size_bytes, -        .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, -        .sharingMode = VK_SHARING_MODE_EXCLUSIVE, -        .queueFamilyIndexCount = 0, -        .pQueueFamilyIndices = nullptr, -    }); -    if (device.HasDebuggingToolAttached()) { -        quad_array_lut.SetObjectNameEXT("Quad LUT"); -    } -    quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); - -    const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); -    u8* staging_data = staging.mapped_span.data(); -    const size_t quad_size = bytes_per_index * 6; -    for (u32 first = 0; first < num_first_offset_copies; ++first) { -        for (u32 quad = 0; quad < num_quads; ++quad) { -            switch (quad_array_lut_index_type) { -            case VK_INDEX_TYPE_UINT8_EXT: -                std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); -                break; -            case VK_INDEX_TYPE_UINT16: -                std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); -                break; -            case VK_INDEX_TYPE_UINT32: -                std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); -                break; -            default: -                ASSERT(false); -                break; -            } -            staging_data += quad_size; -        } -    } -    scheduler.RequestOutsideRenderPassOperationContext(); -    scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, -                      dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { -        const VkBufferCopy copy{ -            .srcOffset = src_offset, -            .dstOffset = 0, -            .size = size_bytes, -        }; -        const VkBufferMemoryBarrier write_barrier{ -            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, -            .pNext = nullptr, -            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, -            .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, -            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, -            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, -            .buffer = dst_buffer, -            .offset = 0, -            .size = size_bytes, -        }; -        cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); -        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, -                               0, write_barrier); -    }); -} -  void BufferCacheRuntime::ReserveNullBuffer() {      if (null_buffer) {          return; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index a15c8b39b..183b33632 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -50,6 +50,9 @@ private:      std::vector<BufferView> views;  }; +class QuadArrayIndexBuffer; +class QuadStripIndexBuffer; +  class BufferCacheRuntime {      friend Buffer; @@ -86,7 +89,7 @@ public:      void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,                           u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); -    void BindQuadArrayIndexBuffer(u32 first, u32 count); +    void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count);      void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); @@ -118,8 +121,6 @@ private:          update_descriptor_queue.AddBuffer(buffer, offset, size);      } -    void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); -      void ReserveNullBuffer();      const Device& device; @@ -128,10 +129,8 @@ private:      StagingBufferPool& staging_pool;      UpdateDescriptorQueue& update_descriptor_queue; -    vk::Buffer quad_array_lut; -    MemoryCommit quad_array_lut_commit; -    VkIndexType quad_array_lut_index_type{}; -    u32 current_num_indices = 0; +    std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer; +    std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;      vk::Buffer null_buffer;      MemoryCommit null_buffer_commit; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2c00979d7..1a316b6eb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -245,7 +245,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,                                   UpdateDescriptorQueue& update_descriptor_queue_)      : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,                    INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, -                  COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), +                  COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV),        scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},        update_descriptor_queue{update_descriptor_queue_} {} @@ -253,7 +253,7 @@ QuadIndexedPass::~QuadIndexedPass() = default;  std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(      Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, -    VkBuffer src_buffer, u32 src_offset) { +    VkBuffer src_buffer, u32 src_offset, bool is_strip) {      const u32 index_shift = [index_format] {          switch (index_format) {          case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: @@ -267,7 +267,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(          return 2;      }();      const u32 input_size = num_vertices << index_shift; -    const u32 num_tri_vertices = (num_vertices / 4) * 6; +    const u32 num_tri_vertices = (is_strip ? (num_vertices - 2) / 2 : num_vertices / 4) * 6;      const std::size_t staging_size = num_tri_vertices * sizeof(u32);      const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); @@ -278,8 +278,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(      const void* const descriptor_data{update_descriptor_queue.UpdateData()};      scheduler.RequestOutsideRenderPassOperationContext(); -    scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, -                      index_shift](vk::CommandBuffer cmdbuf) { +    scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift, +                      is_strip](vk::CommandBuffer cmdbuf) {          static constexpr u32 DISPATCH_SIZE = 1024;          static constexpr VkMemoryBarrier WRITE_BARRIER{              .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -287,7 +287,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(              .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,              .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,          }; -        const std::array<u32, 2> push_constants{base_vertex, index_shift}; +        const std::array<u32, 3> push_constants{base_vertex, index_shift, is_strip ? 1u : 0u};          const VkDescriptorSet set = descriptor_allocator.Commit();          device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);          cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5d32e3caf..c4c8fa081 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -74,7 +74,7 @@ public:      std::pair<VkBuffer, VkDeviceSize> Assemble(          Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, -        u32 base_vertex, VkBuffer src_buffer, u32 src_offset); +        u32 base_vertex, VkBuffer src_buffer, u32 src_offset, bool is_strip);  private:      Scheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4b7126c30..ac1eb9895 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -138,12 +138,16 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,          .first_index = is_indexed ? draw_state.index_buffer.first : 0,          .is_indexed = is_indexed,      }; +    // 6 triangle vertices per quad, base vertex is part of the index +    // See BindQuadIndexBuffer for more details      if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { -        // 6 triangle vertices per quad, base vertex is part of the index -        // See BindQuadArrayIndexBuffer for more details          params.num_vertices = (params.num_vertices / 4) * 6;          params.base_vertex = 0;          params.is_indexed = true; +    } else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { +        params.num_vertices = (params.num_vertices - 2) / 2 * 6; +        params.base_vertex = 0; +        params.is_indexed = true;      }      return params;  } | 
