diff options
26 files changed, 1004 insertions, 338 deletions
diff --git a/externals/dynarmic b/externals/dynarmic -Subproject 087a74417abfb0a8ae3bc1463d0d476a9bf94e5 +Subproject f6ae9e1c3311b747b7b91fd903c62bf40b3b9c8 diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index f8c7f0efd..e825c0526 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); config.page_table_address_space_bits = address_space_bits; config.silently_mirror_page_table = false; + config.absolute_offset_page_table = true; // Multi-process state config.processor_id = core_index; diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 2e53b3221..767158444 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -9,6 +9,7 @@ #include "core/hle/kernel/writable_event.h" #include "core/hle/service/nifm/nifm.h" #include "core/hle/service/service.h" +#include "core/settings.h" namespace Service::NIFM { @@ -86,7 +87,12 @@ private: IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.PushEnum(RequestState::Connected); + + if (Settings::values.bcat_backend == "none") { + rb.PushEnum(RequestState::NotSubmitted); + } else { + rb.PushEnum(RequestState::Connected); + } } void GetResult(Kernel::HLERequestContext& ctx) { @@ -194,14 +200,22 @@ private: IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<u8>(1); + if (Settings::values.bcat_backend == "none") { + rb.Push<u8>(0); + } else { + rb.Push<u8>(1); + } } void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_NIFM, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<u8>(1); + if (Settings::values.bcat_backend == "none") { + rb.Push<u8>(0); + } else { + rb.Push<u8>(1); + } } Core::System& system; }; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index a58ea9c59..62752e419 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -198,7 +198,7 @@ void NVFlinger::Compose() { const auto& igbp_buffer = buffer->get().igbp_buffer; - const auto& gpu = system.GPU(); + auto& gpu = system.GPU(); const auto& multi_fence = buffer->get().multi_fence; for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { const auto& fence = multi_fence.fences[fence_id]; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 91bf07a92..3c2a29d9b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -146,7 +146,7 @@ struct Memory::Impl { u8* GetPointer(const VAddr vaddr) { u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; if (page_pointer != nullptr) { - return page_pointer + (vaddr & PAGE_MASK); + return page_pointer + vaddr; } if (current_page_table->attributes[vaddr >> PAGE_BITS] == @@ -229,7 +229,8 @@ struct Memory::Impl { case Common::PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); - const u8* const src_ptr = page_table.pointers[page_index] + page_offset; + const u8* const src_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); std::memcpy(dest_buffer, src_ptr, copy_amount); break; } @@ -276,7 +277,8 @@ struct Memory::Impl { case Common::PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); - u8* const dest_ptr = page_table.pointers[page_index] + page_offset; + u8* const dest_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); std::memcpy(dest_ptr, src_buffer, copy_amount); break; } @@ -322,7 +324,8 @@ struct Memory::Impl { case Common::PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); - u8* dest_ptr = page_table.pointers[page_index] + page_offset; + u8* dest_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); std::memset(dest_ptr, 0, copy_amount); break; } @@ -368,7 +371,8 @@ struct Memory::Impl { } case Common::PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); - const u8* src_ptr = page_table.pointers[page_index] + page_offset; + const u8* src_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); WriteBlock(process, dest_addr, src_ptr, copy_amount); break; } @@ -446,7 +450,8 @@ struct Memory::Impl { page_type = Common::PageType::Unmapped; } else { page_type = Common::PageType::Memory; - current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; + current_page_table->pointers[vaddr >> PAGE_BITS] = + pointer - (vaddr & ~PAGE_MASK); } break; } @@ -493,7 +498,9 @@ struct Memory::Impl { memory); } else { while (base != end) { - page_table.pointers[base] = memory; + page_table.pointers[base] = memory - (base << PAGE_BITS); + ASSERT_MSG(page_table.pointers[base], + "memory mapping base yield a nullptr within the table"); base += 1; memory += PAGE_SIZE; @@ -518,7 +525,7 @@ struct Memory::Impl { if (page_pointer != nullptr) { // NOTE: Avoid adding any extra logic to this fast-path block T value; - std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); + std::memcpy(&value, &page_pointer[vaddr], sizeof(T)); return value; } @@ -559,7 +566,7 @@ struct Memory::Impl { u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; if (page_pointer != nullptr) { // NOTE: Avoid adding any extra logic to this fast-path block - std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); + std::memcpy(&page_pointer[vaddr], &data, sizeof(T)); return; } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 65d7b9f93..c80171fe6 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -155,12 +155,16 @@ if (ENABLE_VULKAN) renderer_vulkan/maxwell_to_vk.h renderer_vulkan/vk_buffer_cache.cpp renderer_vulkan/vk_buffer_cache.h + renderer_vulkan/vk_descriptor_pool.cpp + renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_device.cpp renderer_vulkan/vk_device.h renderer_vulkan/vk_image.cpp renderer_vulkan/vk_image.h renderer_vulkan/vk_memory_manager.cpp renderer_vulkan/vk_memory_manager.h + renderer_vulkan/vk_renderpass_cache.cpp + renderer_vulkan/vk_renderpass_cache.h renderer_vulkan/vk_resource_manager.cpp renderer_vulkan/vk_resource_manager.h renderer_vulkan/vk_sampler_cache.cpp @@ -174,7 +178,9 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_stream_buffer.cpp renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_swapchain.cpp - renderer_vulkan/vk_swapchain.h) + renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_update_descriptor.cpp + renderer_vulkan/vk_update_descriptor.h) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 095660115..b9c5c41a2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } -void GPU::WaitFence(u32 syncpoint_id, u32 value) const { +void GPU::WaitFence(u32 syncpoint_id, u32 value) { // Synced GPU, is always in sync if (!is_async) { return; } MICROPROFILE_SCOPE(GPU_wait); - while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { - } + std::unique_lock lock{sync_mutex}; + sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; }); } void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; + sync_cv.notify_all(); if (!syncpt_interrupts[syncpoint_id].empty()) { u32 value = syncpoints[syncpoint_id].load(); auto it = syncpt_interrupts[syncpoint_id].begin(); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ecc338ae9..b648317bb 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -6,6 +6,7 @@ #include <array> #include <atomic> +#include <condition_variable> #include <list> #include <memory> #include <mutex> @@ -181,7 +182,7 @@ public: virtual void WaitIdle() const = 0; /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. - void WaitFence(u32 syncpoint_id, u32 value) const; + void WaitFence(u32 syncpoint_id, u32 value); void IncrementSyncPoint(u32 syncpoint_id); @@ -312,6 +313,8 @@ private: std::mutex sync_mutex; + std::condition_variable sync_cv; + const bool is_async; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a311dbcfe..f9f7a97b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -751,6 +751,9 @@ private: Expression Visit(const Node& node) { if (const auto operation = std::get_if<OperationNode>(&*node)) { + if (const auto amend_index = operation->GetAmendIndex()) { + Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); + } const auto operation_index = static_cast<std::size_t>(operation->GetCode()); if (operation_index >= operation_decompilers.size()) { UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); @@ -872,6 +875,9 @@ private: } if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + if (const auto amend_index = conditional->GetAmendIndex()) { + Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); + } // It's invalid to call conditional on nested nodes, use an operation instead code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); ++code.scope; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 46da81aaa..1ba544943 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -2,124 +2,145 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <cstring> #include <memory> #include <optional> #include <tuple> -#include "common/alignment.h" #include "common/assert.h" -#include "core/memory.h" -#include "video_core/memory_manager.h" +#include "common/bit_util.h" +#include "core/core.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" namespace Vulkan { -CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, - std::size_t alignment, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, - alignment{alignment} {} - -VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, - Memory::Memory& cpu_memory_, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) - : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{ - cpu_memory_} { - const auto usage = vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eUniformBuffer; - const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead | - vk::AccessFlagBits::eUniformRead; - stream_buffer = - std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access, - vk::PipelineStageFlagBits::eAllCommands); - buffer_handle = stream_buffer->GetBuffer(); -} +namespace { -VKBufferCache::~VKBufferCache() = default; +const auto BufferUsage = + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; + +const auto UploadPipelineStage = + vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | + vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader; -u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) { - const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; - ASSERT_MSG(cpu_addr, "Invalid GPU address"); - - // Cache management is a big overhead, so only cache entries with a given size. - // TODO: Figure out which size is the best for given games. - cache &= size >= 2048; - - u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)}; - if (cache) { - const auto entry = TryGet(host_ptr); - if (entry) { - if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { - return entry->GetOffset(); - } - Unregister(entry); - } - } - - AlignBuffer(alignment); - const u64 uploaded_offset = buffer_offset; - - if (host_ptr == nullptr) { - return uploaded_offset; - } - - std::memcpy(buffer_ptr, host_ptr, size); - buffer_ptr += size; - buffer_offset += size; - - if (cache) { - auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, - alignment, host_ptr); - Register(entry); - } - - return uploaded_offset; +const auto UploadAccessBarriers = + vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | + vk::AccessFlagBits::eIndexRead; + +auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { + return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage); } -u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { - AlignBuffer(alignment); - std::memcpy(buffer_ptr, raw_pointer, size); - const u64 uploaded_offset = buffer_offset; +} // Anonymous namespace + +CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, + CacheAddr cache_addr, std::size_t size) + : VideoCommon::BufferBlock{cache_addr, size} { + const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), + BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | + vk::BufferUsageFlagBits::eTransferDst, + vk::SharingMode::eExclusive, 0, nullptr); - buffer_ptr += size; - buffer_offset += size; - return uploaded_offset; + const auto& dld{device.GetDispatchLoader()}; + const auto dev{device.GetLogical()}; + buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); + buffer.commit = memory_manager.Commit(*buffer.handle, false); } -std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { - AlignBuffer(alignment); - u8* const uploaded_ptr = buffer_ptr; - const u64 uploaded_offset = buffer_offset; +CachedBufferBlock::~CachedBufferBlock() = default; + +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool) + : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, + device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ + staging_pool} {} - buffer_ptr += size; - buffer_offset += size; - return {uploaded_ptr, uploaded_offset}; +VKBufferCache::~VKBufferCache() = default; + +Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { + return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); } -void VKBufferCache::Reserve(std::size_t max_size) { - bool invalidate; - std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); - buffer_offset = buffer_offset_base; +const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { + return buffer->GetHandle(); +} + +const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { + size = std::max(size, std::size_t(4)); + const auto& empty = staging_pool.GetUnusedBuffer(size, false); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { + cmdbuf.fillBuffer(buffer, 0, size, 0, dld); + }); + return &*empty.handle; +} - if (invalidate) { - InvalidateAll(); - } +void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + const u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(size, true); + std::memcpy(staging.commit->Map(size), data, size); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + size](auto cmdbuf, auto& dld) { + cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, + offset, size)}, + {}, dld); + }); } -void VKBufferCache::Send() { - stream_buffer->Send(buffer_offset - buffer_offset_base); +void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(size, true); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + size](auto cmdbuf, auto& dld) { + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, + {}, dld); + cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); + }); + scheduler.Finish(); + + std::memcpy(data, staging.commit->Map(size), size); } -void VKBufferCache::AlignBuffer(std::size_t alignment) { - // Align the offset, not the mapped pointer - const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); - buffer_ptr += offset_aligned - buffer_offset; - buffer_offset = offset_aligned; +void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, + dst_offset, size](auto cmdbuf, auto& dld) { + cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, + vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), + vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, + dst_offset, size)}, + {}, dld); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index daa8ccf66..3f38eed0c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -5,105 +5,74 @@ #pragma once #include <memory> -#include <tuple> +#include <unordered_map> +#include <vector> #include "common/common_types.h" -#include "video_core/gpu.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_vulkan/declarations.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" -namespace Memory { -class Memory; -} - -namespace Tegra { -class MemoryManager; +namespace Core { +class System; } namespace Vulkan { class VKDevice; -class VKFence; class VKMemoryManager; -class VKStreamBuffer; +class VKScheduler; -class CachedBufferEntry final : public RasterizerCacheObject { +class CachedBufferBlock final : public VideoCommon::BufferBlock { public: - explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, - u8* host_ptr); + explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, + CacheAddr cache_addr, std::size_t size); + ~CachedBufferBlock(); - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return size; - } - - std::size_t GetSize() const { - return size; - } - - u64 GetOffset() const { - return offset; - } - - std::size_t GetAlignment() const { - return alignment; + const vk::Buffer* GetHandle() const { + return &*buffer.handle; } private: - VAddr cpu_addr{}; - std::size_t size{}; - u64 offset{}; - std::size_t alignment{}; + VKBuffer buffer; }; -class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { +using Buffer = std::shared_ptr<CachedBufferBlock>; + +class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> { public: - explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been - /// allocated. - u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true); + const vk::Buffer* GetEmptyBuffer(std::size_t size) override; - /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. - u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); +protected: + void WriteBarrier() override {} - /// Reserves memory to be used by host's CPU. Returns mapped address and offset. - std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4); + Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; - /// Reserves a region of memory to be used in subsequent upload/reserve operations. - void Reserve(std::size_t max_size); + const vk::Buffer* ToHandle(const Buffer& buffer) override; - /// Ensures that the set data is sent to the device. - void Send(); + void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + const u8* data) override; - /// Returns the buffer cache handle. - vk::Buffer GetBuffer() const { - return buffer_handle; - } + void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + u8* data) override; -protected: - // We do not have to flush this cache as things in it are never modified by us. - void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) override; private: - void AlignBuffer(std::size_t alignment); - - Tegra::MemoryManager& tegra_memory_manager; - Memory::Memory& cpu_memory; - - std::unique_ptr<VKStreamBuffer> stream_buffer; - vk::Buffer buffer_handle; - - u8* buffer_ptr = nullptr; - u64 buffer_offset = 0; - u64 buffer_offset_base = 0; + const VKDevice& device; + VKMemoryManager& memory_manager; + VKScheduler& scheduler; + VKStagingBufferPool& staging_pool; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp new file mode 100644 index 000000000..cc7c281a0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -0,0 +1,89 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <vector> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace Vulkan { + +// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. +constexpr std::size_t SETS_GROW_RATE = 0x20; + +DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, + vk::DescriptorSetLayout layout) + : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} + +DescriptorAllocator::~DescriptorAllocator() = default; + +vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) { + return *descriptors[CommitResource(fence)]; +} + +void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { + auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin); + descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()), + std::make_move_iterator(new_sets.end())); +} + +VKDescriptorPool::VKDescriptorPool(const VKDevice& device) + : device{device}, active_pool{AllocateNewPool()} {} + +VKDescriptorPool::~VKDescriptorPool() = default; + +vk::DescriptorPool VKDescriptorPool::AllocateNewPool() { + static constexpr u32 num_sets = 0x20000; + static constexpr vk::DescriptorPoolSize pool_sizes[] = { + {vk::DescriptorType::eUniformBuffer, num_sets * 90}, + {vk::DescriptorType::eStorageBuffer, num_sets * 60}, + {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64}, + {vk::DescriptorType::eCombinedImageSampler, num_sets * 64}, + {vk::DescriptorType::eStorageImage, num_sets * 40}}; + + const vk::DescriptorPoolCreateInfo create_info( + vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets, + static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes)); + const auto dev = device.GetLogical(); + return *pools.emplace_back( + dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader())); +} + +std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors( + vk::DescriptorSetLayout layout, std::size_t count) { + std::vector layout_copies(count, layout); + vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count), + layout_copies.data()); + + std::vector<vk::DescriptorSet> sets(count); + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) { + case vk::Result::eSuccess: + break; + case vk::Result::eErrorOutOfPoolMemory: + active_pool = AllocateNewPool(); + allocate_info.descriptorPool = active_pool; + if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) { + break; + } + [[fallthrough]]; + default: + vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique"); + } + + vk::PoolFree deleter(dev, active_pool, dld); + std::vector<UniqueDescriptorSet> unique_sets; + unique_sets.reserve(count); + for (const auto set : sets) { + unique_sets.push_back(UniqueDescriptorSet{set, deleter}); + } + return unique_sets; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h new file mode 100644 index 000000000..a441dbc0f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -0,0 +1,56 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <vector> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace Vulkan { + +class VKDescriptorPool; + +class DescriptorAllocator final : public VKFencedPool { +public: + explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout); + ~DescriptorAllocator() override; + + DescriptorAllocator(const DescriptorAllocator&) = delete; + + vk::DescriptorSet Commit(VKFence& fence); + +protected: + void Allocate(std::size_t begin, std::size_t end) override; + +private: + VKDescriptorPool& descriptor_pool; + const vk::DescriptorSetLayout layout; + + std::vector<UniqueDescriptorSet> descriptors; +}; + +class VKDescriptorPool final { + friend DescriptorAllocator; + +public: + explicit VKDescriptorPool(const VKDevice& device); + ~VKDescriptorPool(); + +private: + vk::DescriptorPool AllocateNewPool(); + + std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout, + std::size_t count); + + const VKDevice& device; + + std::vector<UniqueDescriptorPool> pools; + vk::DescriptorPool active_pool; +}; + +} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 0451babbf..9cc9979d0 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -6,6 +6,7 @@ #include <optional> #include <tuple> #include <vector> + #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" @@ -16,34 +17,32 @@ namespace Vulkan { -// TODO(Rodrigo): Fine tune this number -constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; +namespace { + +u64 GetAllocationChunkSize(u64 required_size) { + static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20}; + auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size); + return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20); +} + +} // Anonymous namespace class VKMemoryAllocation final { public: explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, - vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) - : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, - shifted_type{ShiftType(type)}, is_mappable{properties & - vk::MemoryPropertyFlagBits::eHostVisible} { - if (is_mappable) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld)); - } - } + vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) + : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, + shifted_type{ShiftType(type)} {} ~VKMemoryAllocation() { const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - if (is_mappable) - dev.unmapMemory(memory, dld); dev.free(memory, nullptr, dld); } VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { - auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size), - static_cast<u64>(alignment)); + auto found = TryFindFreeSection(free_iterator, allocation_size, + static_cast<u64>(commit_size), static_cast<u64>(alignment)); if (!found) { found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), static_cast<u64>(alignment)); @@ -52,8 +51,7 @@ public: return nullptr; } } - u8* address = is_mappable ? base_address + *found : nullptr; - auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found, + auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found, *found + commit_size); commits.push_back(commit.get()); @@ -65,12 +63,10 @@ public: void Free(const VKMemoryCommitImpl* commit) { ASSERT(commit); - const auto it = - std::find_if(commits.begin(), commits.end(), - [&](const auto& stored_commit) { return stored_commit == commit; }); + + const auto it = std::find(std::begin(commits), std::end(commits), commit); if (it == commits.end()) { - LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); - UNREACHABLE(); + UNREACHABLE_MSG("Freeing unallocated commit!"); return; } commits.erase(it); @@ -88,11 +84,11 @@ private: } /// A memory allocator, it may return a free region between "start" and "end" with the solicited - /// requeriments. + /// requirements. std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { - u64 iterator = start; - while (iterator + size < end) { - const u64 try_left = Common::AlignUp(iterator, alignment); + u64 iterator = Common::AlignUp(start, alignment); + while (iterator + size <= end) { + const u64 try_left = iterator; const u64 try_right = try_left + size; bool overlap = false; @@ -100,7 +96,7 @@ private: const auto [commit_left, commit_right] = commit->interval; if (try_left < commit_right && commit_left < try_right) { // There's an overlap, continue the search where the overlapping commit ends. - iterator = commit_right; + iterator = Common::AlignUp(commit_right, alignment); overlap = true; break; } @@ -110,6 +106,7 @@ private: return try_left; } } + // No free regions where found, return an empty optional. return std::nullopt; } @@ -117,12 +114,8 @@ private: const VKDevice& device; ///< Vulkan device. const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. const vk::MemoryPropertyFlags properties; ///< Vulkan properties. - const u64 alloc_size; ///< Size of this allocation. + const u64 allocation_size; ///< Size of this allocation. const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. - const bool is_mappable; ///< Whether the allocation is mappable. - - /// Base address of the mapped pointer. - u8* base_address{}; /// Hints where the next free region is likely going to be. u64 free_iterator{}; @@ -132,13 +125,15 @@ private: }; VKMemoryManager::VKMemoryManager(const VKDevice& device) - : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, - is_memory_unified{GetMemoryUnified(props)} {} + : device{device}, properties{device.GetPhysical().getMemoryProperties( + device.GetDispatchLoader())}, + is_memory_unified{GetMemoryUnified(properties)} {} VKMemoryManager::~VKMemoryManager() = default; -VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { - ASSERT(reqs.size < ALLOC_CHUNK_SIZE); +VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, + bool host_visible) { + const u64 chunk_size = GetAllocationChunkSize(requirements.size); // When a host visible commit is asked, search for host visible and coherent, otherwise search // for a fast device local type. @@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent : vk::MemoryPropertyFlagBits::eDeviceLocal; - const auto TryCommit = [&]() -> VKMemoryCommit { - for (auto& alloc : allocs) { - if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits)) - continue; - - if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) { - return commit; - } - } - return {}; - }; - - if (auto commit = TryCommit(); commit) { + if (auto commit = TryAllocCommit(requirements, wanted_properties)) { return commit; } // Commit has failed, allocate more memory. - if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { - // TODO(Rodrigo): Try to use host memory. - LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); - UNREACHABLE(); + if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) { + // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory. + // Allocation has failed, panic. + UNREACHABLE_MSG("Ran out of VRAM!"); + return {}; } // Commit again, this time it won't fail since there's a fresh allocation above. If it does, // there's a bug. - auto commit = TryCommit(); + auto commit = TryAllocCommit(requirements, wanted_properties); ASSERT(commit); return commit; } @@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); - auto commit = Commit(requeriments, host_visible); + auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible); dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); return commit; } @@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - const auto requeriments = dev.getImageMemoryRequirements(image, dld); - auto commit = Commit(requeriments, host_visible); + auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible); dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); return commit; } bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size) { - const u32 type = [&]() { - for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { - const auto flags = props.memoryTypes[type_index].propertyFlags; + const u32 type = [&] { + for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { + const auto flags = properties.memoryTypes[type_index].propertyFlags; if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { // The type matches in type and in the wanted properties. return type_index; } } - LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); - UNREACHABLE(); - return 0u; + UNREACHABLE_MSG("Couldn't find a compatible memory type!"); + return 0U; }(); const auto dev = device.GetLogical(); @@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 // Try to allocate found type. const vk::MemoryAllocateInfo memory_ai(size, type); vk::DeviceMemory memory; - if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); + if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); res != vk::Result::eSuccess) { LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); return false; } - allocs.push_back( + allocations.push_back( std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); return true; } -/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { - for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { - if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { +VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, + vk::MemoryPropertyFlags wanted_properties) { + for (auto& allocation : allocations) { + if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { + continue; + } + if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) { + return commit; + } + } + return {}; +} + +/*static*/ bool VKMemoryManager::GetMemoryUnified( + const vk::PhysicalDeviceMemoryProperties& properties) { + for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { + if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { // Memory is considered unified when heaps are device local only. return false; } @@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 return true; } -VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, - u8* data, u64 begin, u64 end) - : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} +VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, + vk::DeviceMemory memory, u64 begin, u64 end) + : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); } -u8* VKMemoryCommitImpl::GetData() const { - ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); - return data; +MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { + const auto dev = device.GetLogical(); + const auto address = reinterpret_cast<u8*>( + dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader())); + return MemoryMap{this, address}; +} + +void VKMemoryCommitImpl::Unmap() const { + const auto dev = device.GetLogical(); + dev.unmapMemory(memory, device.GetDispatchLoader()); +} + +MemoryMap VKMemoryCommitImpl::Map() const { + return Map(interval.second - interval.first); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 073597b35..cd00bb91b 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -12,6 +12,7 @@ namespace Vulkan { +class MemoryMap; class VKDevice; class VKMemoryAllocation; class VKMemoryCommitImpl; @@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; class VKMemoryManager final { public: explicit VKMemoryManager(const VKDevice& device); + VKMemoryManager(const VKMemoryManager&) = delete; ~VKMemoryManager(); /** * Commits a memory with the specified requeriments. - * @param reqs Requeriments returned from a Vulkan call. + * @param requirements Requirements returned from a Vulkan call. * @param host_visible Signals the allocator that it *must* use host visible and coherent - * memory. When passing false, it will try to allocate device local memory. + * memory. When passing false, it will try to allocate device local memory. * @returns A memory commit. */ VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); @@ -47,25 +49,35 @@ private: /// Allocates a chunk of memory. bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); + /// Tries to allocate a memory commit. + VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, + vk::MemoryPropertyFlags wanted_properties); + /// Returns true if the device uses an unified memory model. - static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); + static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); - const VKDevice& device; ///< Device handler. - const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties. - const bool is_memory_unified; ///< True if memory model is unified. - std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations. + const VKDevice& device; ///< Device handler. + const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. + const bool is_memory_unified; ///< True if memory model is unified. + std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. }; class VKMemoryCommitImpl final { friend VKMemoryAllocation; + friend MemoryMap; public: - explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, - u64 begin, u64 end); + explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, + vk::DeviceMemory memory, u64 begin, u64 end); ~VKMemoryCommitImpl(); - /// Returns the writeable memory map. The commit has to be mappable. - u8* GetData() const; + /// Maps a memory region and returns a pointer to it. + /// It's illegal to have more than one memory map at the same time. + MemoryMap Map(u64 size, u64 offset = 0) const; + + /// Maps the whole commit and returns a pointer to it. + /// It's illegal to have more than one memory map at the same time. + MemoryMap Map() const; /// Returns the Vulkan memory handler. vk::DeviceMemory GetMemory() const { @@ -78,10 +90,46 @@ public: } private: + /// Unmaps memory. + void Unmap() const; + + const VKDevice& device; ///< Vulkan device. std::pair<u64, u64> interval{}; ///< Interval where the commit exists. vk::DeviceMemory memory; ///< Vulkan device memory handler. VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. - u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. +}; + +/// Holds ownership of a memory map. +class MemoryMap final { +public: + explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address) + : commit{commit}, address{address} {} + + ~MemoryMap() { + if (commit) { + commit->Unmap(); + } + } + + /// Prematurely releases the memory map. + void Release() { + commit->Unmap(); + commit = nullptr; + } + + /// Returns the address of the memory map. + u8* GetAddress() const { + return address; + } + + /// Returns the address of the memory map; + operator u8*() const { + return address; + } + +private: + const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. + u8* address{}; ///< Address to the mapped memory. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp new file mode 100644 index 000000000..93f5d7ba0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -0,0 +1,100 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <vector> + +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" + +namespace Vulkan { + +VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} + +VKRenderPassCache::~VKRenderPassCache() = default; + +vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { + const auto [pair, is_cache_miss] = cache.try_emplace(params); + auto& entry = pair->second; + if (is_cache_miss) { + entry = CreateRenderPass(params); + } + return *entry; +} + +UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { + std::vector<vk::AttachmentDescription> descriptors; + std::vector<vk::AttachmentReference> color_references; + + for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { + const auto attachment = params.color_attachments[rt]; + const auto format = + MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format); + ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", + static_cast<u32>(attachment.pixel_format)); + + // TODO(Rodrigo): Add eMayAlias when it's needed. + const auto color_layout = attachment.is_texception + ? vk::ImageLayout::eGeneral + : vk::ImageLayout::eColorAttachmentOptimal; + descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format, + vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, + vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, + vk::AttachmentStoreOp::eDontCare, color_layout, color_layout); + color_references.emplace_back(static_cast<u32>(rt), color_layout); + } + + vk::AttachmentReference zeta_attachment_ref; + if (params.has_zeta) { + const auto format = + MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); + ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", + static_cast<u32>(params.zeta_pixel_format)); + + const auto zeta_layout = params.zeta_texception + ? vk::ImageLayout::eGeneral + : vk::ImageLayout::eDepthStencilAttachmentOptimal; + descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format, + vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, + vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad, + vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout); + zeta_attachment_ref = + vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout); + } + + const vk::SubpassDescription subpass_description( + {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()), + color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0, + nullptr); + + vk::AccessFlags access; + vk::PipelineStageFlags stage; + if (!color_references.empty()) { + access |= + vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; + stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput; + } + + if (params.has_zeta) { + access |= vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + stage |= vk::PipelineStageFlagBits::eLateFragmentTests; + } + + const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access, + {}); + + const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()), + descriptors.data(), 1, &subpass_description, 1, + &subpass_dependency); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + return dev.createRenderPassUnique(create_info, nullptr, dld); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h new file mode 100644 index 000000000..b49b2db48 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -0,0 +1,97 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <tuple> +#include <unordered_map> + +#include <boost/container/static_vector.hpp> +#include <boost/functional/hash.hpp> + +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/surface.h" + +namespace Vulkan { + +class VKDevice; + +// TODO(Rodrigo): Optimize this structure for faster hashing + +struct RenderPassParams { + struct ColorAttachment { + u32 index = 0; + VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid; + bool is_texception = false; + + std::size_t Hash() const noexcept { + return static_cast<std::size_t>(pixel_format) | + static_cast<std::size_t>(is_texception) << 6 | + static_cast<std::size_t>(index) << 7; + } + + bool operator==(const ColorAttachment& rhs) const noexcept { + return std::tie(index, pixel_format, is_texception) == + std::tie(rhs.index, rhs.pixel_format, rhs.is_texception); + } + }; + + boost::container::static_vector<ColorAttachment, + Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> + color_attachments{}; + // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type. + VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid; + bool has_zeta = false; + bool zeta_texception = false; + + std::size_t Hash() const noexcept { + std::size_t hash = 0; + for (const auto& rt : color_attachments) { + boost::hash_combine(hash, rt.Hash()); + } + boost::hash_combine(hash, zeta_pixel_format); + boost::hash_combine(hash, has_zeta); + boost::hash_combine(hash, zeta_texception); + return hash; + } + + bool operator==(const RenderPassParams& rhs) const { + return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) == + std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta, + rhs.zeta_texception); + } +}; + +} // namespace Vulkan + +namespace std { + +template <> +struct hash<Vulkan::RenderPassParams> { + std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace Vulkan { + +class VKRenderPassCache final { +public: + explicit VKRenderPassCache(const VKDevice& device); + ~VKRenderPassCache(); + + vk::RenderPass GetRenderPass(const RenderPassParams& params); + +private: + UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const; + + const VKDevice& device; + std::unordered_map<RenderPassParams, UniqueRenderPass> cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a8baf91de..8fe852ce8 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -954,6 +954,10 @@ private: Expression Visit(const Node& node) { if (const auto operation = std::get_if<OperationNode>(&*node)) { + if (const auto amend_index = operation->GetAmendIndex()) { + [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; + ASSERT(type == Type::Void); + } const auto operation_index = static_cast<std::size_t>(operation->GetCode()); const auto decompiler = operation_decompilers[operation_index]; if (decompiler == nullptr) { @@ -1142,6 +1146,10 @@ private: } if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + if (const auto amend_index = conditional->GetAmendIndex()) { + [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; + ASSERT(type == Type::Void); + } // It's invalid to call conditional on nested nodes, use an operation instead const Id true_label = OpLabel(); const Id skip_label = OpLabel(); diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 62f1427f5..d48d3b44c 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -3,86 +3,144 @@ // Refer to the license.txt file included. #include <algorithm> -#include <memory> #include <optional> +#include <tuple> #include <vector> +#include "common/alignment.h" #include "common/assert.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" namespace Vulkan { +namespace { + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; -VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, - vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) - : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ - pipeline_stage} { - CreateBuffers(memory_manager, usage); - ReserveWatches(WATCHES_INITIAL_RESERVE); +constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; + +std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, + vk::MemoryPropertyFlags wanted) { + const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); + for (u32 i = 0; i < properties.memoryTypeCount; i++) { + if (!(filter & (1 << i))) { + continue; + } + if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { + return i; + } + } + return {}; +} + +} // Anonymous namespace + +VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, + vk::BufferUsageFlags usage) + : device{device}, scheduler{scheduler} { + CreateBuffers(usage); + ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); + ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); } VKStreamBuffer::~VKStreamBuffer() = default; -std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { - ASSERT(size <= buffer_size); +std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { + ASSERT(size <= STREAM_BUFFER_SIZE); mapped_size = size; - if (offset + size > buffer_size) { - // The buffer would overflow, save the amount of used buffers, signal an invalidation and - // reset the state. - invalidation_mark = used_watches; - used_watches = 0; + if (alignment > 0) { + offset = Common::AlignUp(offset, alignment); + } + + WaitPendingOperations(offset); + + bool invalidated = false; + if (offset + size > STREAM_BUFFER_SIZE) { + // The buffer would overflow, save the amount of used watches and reset the state. + invalidation_mark = current_watch_cursor; + current_watch_cursor = 0; offset = 0; + + // Swap watches and reset waiting cursors. + std::swap(previous_watches, current_watches); + wait_cursor = 0; + wait_bound = 0; + + // Ensure that we don't wait for uncommitted fences. + scheduler.Flush(); + + invalidated = true; } - return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld)); + return {pointer, offset, invalidated}; } -void VKStreamBuffer::Send(u64 size) { +void VKStreamBuffer::Unmap(u64 size) { ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); - if (invalidation_mark) { - // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. - scheduler.Flush(); - std::for_each(watches.begin(), watches.begin() + *invalidation_mark, - [&](auto& resource) { resource->Wait(); }); - invalidation_mark = std::nullopt; - } + const auto dev = device.GetLogical(); + dev.unmapMemory(*memory, device.GetDispatchLoader()); + + offset += size; - if (used_watches + 1 >= watches.size()) { + if (current_watch_cursor + 1 >= current_watches.size()) { // Ensure that there are enough watches. - ReserveWatches(WATCHES_RESERVE_CHUNK); + ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); } - // Add a watch for this allocation. - watches[used_watches++]->Watch(scheduler.GetFence()); - - offset += size; + auto& watch = current_watches[current_watch_cursor++]; + watch.upper_bound = offset; + watch.fence.Watch(scheduler.GetFence()); } -void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { - const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, - nullptr); - +void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { + const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, + 0, nullptr); const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); - commit = memory_manager.Commit(*buffer, true); - mapped_pointer = commit->GetData(); + + const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); + // Prefer device local host visible allocations (this should hit AMD's pinned memory). + auto type = FindMemoryType(device, requirements.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent | + vk::MemoryPropertyFlagBits::eDeviceLocal); + if (!type) { + // Otherwise search for a host visible allocation. + type = FindMemoryType(device, requirements.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent); + ASSERT_MSG(type, "No host visible and coherent memory type found"); + } + const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); + memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); + + dev.bindBufferMemory(*buffer, *memory, 0, dld); } -void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { - const std::size_t previous_size = watches.size(); - watches.resize(previous_size + grow_size); - std::generate(watches.begin() + previous_size, watches.end(), - []() { return std::make_unique<VKFenceWatch>(); }); +void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { + watches.resize(watches.size() + grow_size); +} + +void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { + if (!invalidation_mark) { + return; + } + while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { + auto& watch = previous_watches[wait_cursor]; + wait_bound = watch.upper_bound; + watch.fence.Wait(); + ++wait_cursor; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 842e54162..187c0c612 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -4,28 +4,24 @@ #pragma once -#include <memory> #include <optional> #include <tuple> #include <vector> #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" namespace Vulkan { class VKDevice; class VKFence; class VKFenceWatch; -class VKResourceManager; class VKScheduler; -class VKStreamBuffer { +class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, - vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); + explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, + vk::BufferUsageFlags usage); ~VKStreamBuffer(); /** @@ -34,39 +30,47 @@ public: * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer * offset and a boolean that's true when buffer has been invalidated. */ - std::tuple<u8*, u64, bool> Reserve(u64 size); + std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. - void Send(u64 size); + void Unmap(u64 size); - vk::Buffer GetBuffer() const { + vk::Buffer GetHandle() const { return *buffer; } private: + struct Watch final { + VKFenceWatch fence; + u64 upper_bound{}; + }; + /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); + void CreateBuffers(vk::BufferUsageFlags usage); /// Increases the amount of watches available. - void ReserveWatches(std::size_t grow_size); + void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); + + void WaitPendingOperations(u64 requested_upper_bound); const VKDevice& device; ///< Vulkan device manager. VKScheduler& scheduler; ///< Command scheduler. - const u64 buffer_size; ///< Total size of the stream buffer. const vk::AccessFlags access; ///< Access usage of this stream buffer. const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. - UniqueBuffer buffer; ///< Mapped buffer. - VKMemoryCommit commit; ///< Memory commit. - u8* mapped_pointer{}; ///< Pointer to the host visible commit + UniqueBuffer buffer; ///< Mapped buffer. + UniqueDeviceMemory memory; ///< Memory allocation. u64 offset{}; ///< Buffer iterator. u64 mapped_size{}; ///< Size reserved for the current copy. - std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches - std::size_t used_watches{}; ///< Count of watches, reset on invalidation. - std::optional<std::size_t> - invalidation_mark{}; ///< Number of watches used in the current invalidation. + std::vector<Watch> current_watches; ///< Watches recorded in the current iteration. + std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. + std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle. + + std::vector<Watch> previous_watches; ///< Watches used in the previous iteration. + std::size_t wait_cursor{}; ///< Last watch being waited for completion. + u64 wait_bound{}; ///< Highest offset being watched for completion. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp new file mode 100644 index 000000000..0e577b9ff --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -0,0 +1,57 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <variant> +#include <boost/container/static_vector.hpp> + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" + +namespace Vulkan { + +VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler) + : device{device}, scheduler{scheduler} {} + +VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; + +void VKUpdateDescriptorQueue::TickFrame() { + payload.clear(); +} + +void VKUpdateDescriptorQueue::Acquire() { + entries.clear(); +} + +void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template, + vk::DescriptorSet set) { + if (payload.size() + entries.size() >= payload.max_size()) { + LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); + scheduler.WaitWorker(); + payload.clear(); + } + + const auto payload_start = payload.data() + payload.size(); + for (const auto& entry : entries) { + if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) { + payload.push_back(*image); + } else if (const auto buffer = std::get_if<Buffer>(&entry)) { + payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); + } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) { + payload.push_back(*texel); + } else { + UNREACHABLE(); + } + } + + scheduler.Record([dev = device.GetLogical(), payload_start, set, + update_template]([[maybe_unused]] auto cmdbuf, auto& dld) { + dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld); + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h new file mode 100644 index 000000000..8c825aa29 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -0,0 +1,86 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <type_traits> +#include <variant> +#include <boost/container/static_vector.hpp> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKScheduler; + +class DescriptorUpdateEntry { +public: + explicit DescriptorUpdateEntry() : image{} {} + + DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {} + + DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size) + : buffer{buffer, offset, size} {} + + DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {} + +private: + union { + vk::DescriptorImageInfo image; + vk::DescriptorBufferInfo buffer; + vk::BufferView texel_buffer; + }; +}; + +class VKUpdateDescriptorQueue final { +public: + explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler); + ~VKUpdateDescriptorQueue(); + + void TickFrame(); + + void Acquire(); + + void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set); + + void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) { + entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}}); + } + + void AddImage(vk::ImageView image_view) { + entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}}); + } + + void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) { + entries.push_back(Buffer{buffer, offset, size}); + } + + void AddTexelBuffer(vk::BufferView texel_buffer) { + entries.emplace_back(texel_buffer); + } + + vk::ImageLayout* GetLastImageLayout() { + return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout; + } + +private: + struct Buffer { + const vk::Buffer* buffer{}; + u64 offset{}; + std::size_t size{}; + }; + using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>; + // Old gcc versions don't consider this trivially copyable. + // static_assert(std::is_trivially_copyable_v<Variant>); + + const VKDevice& device; + VKScheduler& scheduler; + + boost::container::static_vector<Variant, 0x400> entries; + boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; +}; + +} // namespace Vulkan diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4d2f4d6a8..4e155542a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -392,8 +392,30 @@ struct MetaImage { using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; +class AmendNode { +public: + std::optional<std::size_t> GetAmendIndex() const { + if (amend_index == amend_null_index) { + return std::nullopt; + } + return {amend_index}; + } + + void SetAmendIndex(std::size_t index) { + amend_index = index; + } + + void ClearAmend() { + amend_index = amend_null_index; + } + +private: + static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; + std::size_t amend_index{amend_null_index}; +}; + /// Holds any kind of operation that can be done in the IR -class OperationNode final { +class OperationNode final : public AmendNode { public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} @@ -433,7 +455,7 @@ private: }; /// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final { +class ConditionalNode final : public AmendNode { public: explicit ConditionalNode(Node condition, std::vector<Node>&& code) : condition{std::move(condition)}, code{std::move(code)} {} diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 1d9825c76..31eecb3f4 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { Immediate(bits)); } +std::size_t ShaderIR::DeclareAmend(Node new_amend) { + const std::size_t id = amend_code.size(); + amend_code.push_back(new_amend); + return id; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index baed06ccd..aacd0a0da 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -176,6 +176,10 @@ public: /// Returns a condition code evaluated from internal flags Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; + const Node& GetAmendNode(std::size_t index) const { + return amend_code[index]; + } + private: friend class ASTDecoder; @@ -392,6 +396,9 @@ private: Tegra::Shader::Instruction instr, bool is_write); + /// Register new amending code and obtain the reference id. + std::size_t DeclareAmend(Node new_amend); + const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; @@ -406,6 +413,7 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; ASTManager program_manager{true, true}; + std::vector<Node> amend_code; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 07a720494..7490fb718 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -215,18 +215,11 @@ void GRenderWindow::moveContext() { } void GRenderWindow::SwapBuffers() { - // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`, - // since we never call `doneCurrent` in this thread. - // However: - // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called - // since the last time `swapBuffers` was executed; - // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks. - context->makeCurrent(child); - context->swapBuffers(child); + if (!first_frame) { - emit FirstFrameDisplayed(); first_frame = true; + emit FirstFrameDisplayed(); } } |