summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-01-17 03:16:15 -0300
committerReinUsesLisp <reinuseslisp@airmail.cc>2021-02-13 02:17:24 -0300
commita02b4e1df662dcc2d2edd7712539eabf2eef5d89 (patch)
tree6311228f312d4d22a9776587a0bd8af5bbeff328 /src
parent35df1d1864ba721ea7b1cebf9a106dd771cde4f5 (diff)
buffer_cache: Skip cache on small uploads on Vulkan
Ports from OpenGL the optimization to skip small 3D uniform buffer uploads. This will take advantage of the previously introduced stream buffer. Fixes instances where the staging buffer offset was being ignored.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h17
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp3
3 files changed, 18 insertions, 9 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d6399bf24..bd8507610 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -660,24 +660,25 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const VAddr cpu_addr = binding.cpu_addr;
const u32 size = binding.size;
Buffer& buffer = slot_buffers[binding.buffer_id];
- if constexpr (IS_OPENGL) {
- if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
+ if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
+ if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
// Fast path for Nvidia
if (!HasFastUniformBufferBound(stage, binding_index)) {
// We only have to bind when the currently bound buffer is not the fast version
- fast_bound_uniform_buffers[stage] |= 1U << binding_index;
runtime.BindFastUniformBuffer(stage, binding_index, size);
}
const auto span = ImmediateBufferWithData(cpu_addr, size);
runtime.PushFastUniformBuffer(stage, binding_index, span);
- } else {
- // Stream buffer path to avoid stalling on non-Nvidia drivers
- const auto span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
- cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+ return;
}
- return;
}
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+
+ // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
+ const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+ return;
}
// Classic cached path
SynchronizeBuffer(buffer, cpu_addr, size);
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index d232e1f2d..7ff7e0d55 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -69,6 +69,13 @@ public:
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
+ std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
+ [[maybe_unused]] u32 binding_index, u32 size) {
+ const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
+ BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
+ return ref.mapped_span;
+ }
+
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
BindBuffer(buffer, offset, size);
}
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 275d740b8..bc71202e2 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -86,7 +86,8 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.pNext = nullptr,
.flags = 0,
.size = STREAM_BUFFER_SIZE,
- .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,