diff options
4 files changed, 59 insertions, 14 deletions
| diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index ce92f66ab..b278614e6 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -24,25 +24,38 @@ using namespace Common::Literals;  // Maximum potential alignment of a Vulkan buffer  constexpr VkDeviceSize MAX_ALIGNMENT = 256; -// Maximum size to put elements in the stream buffer -constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;  // Stream buffer size in bytes -constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; -constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; +constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; -size_t Region(size_t iterator) noexcept { -    return iterator / REGION_SIZE; +size_t GetStreamBufferSize(const Device& device) { +    VkDeviceSize size{0}; +    if (device.HasDebuggingToolAttached()) { +        ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { +            size = std::max(size, heap.size); +        }); +        // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be +        // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue +        // as the heap will be much larger. +        if (size <= 256_MiB) { +            size = size * 40 / 100; +        } +    } else { +        size = MAX_STREAM_BUFFER_SIZE; +    } +    return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);  }  } // Anonymous namespace  StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,                                       Scheduler& scheduler_) -    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { +    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, +      stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size / +                                                                   StagingBufferPool::NUM_SYNCS} {      VkBufferCreateInfo stream_ci = {          .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,          .pNext = nullptr,          .flags = 0, -        .size = STREAM_BUFFER_SIZE, +        .size = stream_buffer_size,          .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |                   VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,          .sharingMode = VK_SHARING_MODE_EXCLUSIVE, @@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem  StagingBufferPool::~StagingBufferPool() = default;  StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { -    if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { +    if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {          return GetStreamBuffer(size);      }      return GetStagingBuffer(size, usage, deferred); @@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {      used_iterator = iterator;      free_iterator = std::max(free_iterator, iterator + size); -    if (iterator + size >= STREAM_BUFFER_SIZE) { +    if (iterator + size >= stream_buffer_size) {          std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,                    current_tick);          used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 5f69f08b1..d3deb9072 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -90,6 +90,9 @@ private:      void ReleaseCache(MemoryUsage usage);      void ReleaseLevel(StagingBuffersCache& cache, size_t log2); +    size_t Region(size_t iter) const noexcept { +        return iter / region_size; +    }      const Device& device;      MemoryAllocator& memory_allocator; @@ -97,6 +100,8 @@ private:      vk::Buffer stream_buffer;      std::span<u8> stream_pointer; +    VkDeviceSize stream_buffer_size; +    VkDeviceSize region_size;      size_t iterator = 0;      size_t used_iterator = 0; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 3ef381a38..82767fdf0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -9,6 +9,7 @@  #include "common/alignment.h"  #include "common/assert.h"  #include "common/common_types.h" +#include "common/literals.h"  #include "common/logging/log.h"  #include "common/polyfill_ranges.h"  #include "video_core/vulkan_common/vma.h" @@ -69,8 +70,7 @@ struct Range {      case MemoryUsage::Download:          return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;      case MemoryUsage::DeviceLocal: -        return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | -               VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; +        return {};      }      return {};  } @@ -212,7 +212,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_)      : device{device_}, allocator{device.GetAllocator()},        properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},        buffer_image_granularity{ -          device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} +          device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { +    // GPUs not supporting rebar may only have a region with less than 256MB host visible/device +    // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to +    // the heap running out of memory. With RenderDoc attached and only a small host/device region, +    // only allow the stream buffer in this memory heap. +    if (device.HasDebuggingToolAttached()) { +        using namespace Common::Literals; +        ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { +            if (heap.size <= 256_MiB) { +                valid_memory_types &= ~(1u << index); +            } +        }); +    } +}  MemoryAllocator::~MemoryAllocator() = default; @@ -244,7 +257,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa          .usage = MemoryUsageVma(usage),          .requiredFlags = 0,          .preferredFlags = MemoryUsagePreferedVmaFlags(usage), -        .memoryTypeBits = 0, +        .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,          .pool = VK_NULL_HANDLE,          .pUserData = nullptr,          .priority = 0.f, diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index f449bc8d0..38a182bcb 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -7,6 +7,7 @@  #include <span>  #include <vector>  #include "common/common_types.h" +#include "video_core/vulkan_common/vulkan_device.h"  #include "video_core/vulkan_common/vulkan_wrapper.h"  VK_DEFINE_HANDLE(VmaAllocator) @@ -26,6 +27,18 @@ enum class MemoryUsage {      Stream,      ///< Requests device local host visible buffer, falling back host memory.  }; +template <typename F> +void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { +    auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; +    for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { +        auto& memory_type = memory_props.memoryTypes[i]; +        if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && +            (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { +            f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); +        } +    } +} +  /// Ownership handle of a memory commitment.  /// Points to a subregion of a memory allocation.  class MemoryCommit { @@ -124,6 +137,7 @@ private:      std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.      VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers                                             // and optimal images +    u32 valid_memory_types{~0u};  };  } // namespace Vulkan | 
