summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorameerj <52414509+ameerj@users.noreply.github.com>2023-05-27 21:46:15 -0400
committerameerj <52414509+ameerj@users.noreply.github.com>2023-05-28 00:38:46 -0400
commit642c14f0c7ee71f1f4daa50cee84ec9143697af6 (patch)
tree864fae9c9389790e87f8f63341c89e09f6fed5bc /src
parentd6db422098ad12d95396a23f60f005893eb9b720 (diff)
OpenGL: Make use of persistent buffer maps in buffer cache downloads
Persistent buffer maps were already used by the texture cache, this extends their usage for the buffer cache. In my testing, using the memory maps for uploads was slower than the existing "ImmediateUpload" path, so the memory map usage is limited to downloads for the time being.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h10
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp58
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp134
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.h (renamed from src/video_core/renderer_opengl/gl_stream_buffer.h)42
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp87
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h47
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp9
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h1
15 files changed, 298 insertions, 204 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 308d013d6..3e8bd0060 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -133,8 +133,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state_tracker.cpp
renderer_opengl/gl_state_tracker.h
- renderer_opengl/gl_stream_buffer.cpp
- renderer_opengl/gl_stream_buffer.h
+ renderer_opengl/gl_staging_buffer_pool.cpp
+ renderer_opengl/gl_staging_buffer_pool.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_texture_cache_base.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 65494097b..08bc66aaa 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -465,7 +465,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
if (committed_ranges.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-
async_buffers.emplace_back(std::optional<Async_Buffer>{});
}
return;
@@ -526,7 +525,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
committed_ranges.clear();
if (downloads.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-
async_buffers.emplace_back(std::optional<Async_Buffer>{});
}
return;
@@ -678,7 +676,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = index_buffer.size;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
- if constexpr (USE_MEMORY_MAPS) {
+ if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(size);
std::array<BufferCopy, 1> copies{
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
@@ -1446,7 +1444,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) {
- if constexpr (USE_MEMORY_MAPS) {
+ if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
MappedUploadMemory(buffer, total_size_bytes, copies);
} else {
ImmediateUploadMemory(buffer, largest_copy, copies);
@@ -1457,7 +1455,7 @@ template <class P>
void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 largest_copy,
[[maybe_unused]] std::span<const BufferCopy> copies) {
- if constexpr (!USE_MEMORY_MAPS) {
+ if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
std::span<u8> immediate_buffer;
for (const BufferCopy& copy : copies) {
std::span<const u8> upload_span;
@@ -1516,7 +1514,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
auto& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
- if constexpr (USE_MEMORY_MAPS) {
+ if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
std::array copies{BufferCopy{
.src_offset = upload_staging.offset,
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index ac00d4d9d..7c6ef49d5 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -103,6 +103,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
static constexpr BufferId NULL_BUFFER_ID{0};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 6af4ae793..724e53edb 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
return views.back().texture.handle;
}
-BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
- : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
+ StagingBufferPool& staging_buffer_pool_)
+ : device{device_}, staging_buffer_pool{staging_buffer_pool_},
+ has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
@@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
}();
}
+StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestUploadBuffer(size);
+}
+
+StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestDownloadBuffer(size);
+}
+
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
if (device.CanReportMemoryUsage()) {
return device_access_memory - device.GetCurrentDedicatedVideoMemory();
@@ -147,15 +157,49 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
return 2_GiB;
}
-void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
- std::span<const VideoCommon::BufferCopy> copies) {
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ if (barrier) {
+ PreCopyBarrier();
+ }
for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(
- src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
- static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
+ glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
+ static_cast<GLintptr>(copy.dst_offset),
+ static_cast<GLsizeiptr>(copy.size));
+ }
+ if (barrier) {
+ PostCopyBarrier();
}
}
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
+}
+
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
+}
+
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
+}
+
+void BufferCacheRuntime::PreCopyBarrier() {
+ // TODO: finer grained barrier?
+ glMemoryBarrier(GL_ALL_BARRIER_BITS);
+}
+
+void BufferCacheRuntime::PostCopyBarrier() {
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+}
+
+void BufferCacheRuntime::Finish() {
+ glFinish();
+}
+
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 18d3c3ac0..a24991585 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -12,7 +12,7 @@
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
namespace OpenGL {
@@ -60,11 +60,28 @@ class BufferCacheRuntime {
public:
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
- explicit BufferCacheRuntime(const Device& device_);
+ explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
+
+ [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
+
+ [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
+
+ void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+ void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+ void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void PreCopyBarrier();
+ void PostCopyBarrier();
+ void Finish();
+
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
@@ -169,6 +186,7 @@ private:
};
const Device& device;
+ StagingBufferPool& staging_buffer_pool;
bool has_fast_buffer_sub_data = false;
bool use_assembly_shaders = false;
@@ -201,7 +219,7 @@ private:
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
- using Async_Buffer = u32;
+ using Async_Buffer = OpenGL::StagingBufferMap;
using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
static constexpr bool IS_OPENGL = true;
@@ -209,9 +227,12 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
- static constexpr bool USE_MEMORY_MAPS = false;
+ static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
+
+ // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f5baa0f3c..fc711c44a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -24,6 +24,7 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
StateTracker& state_tracker_)
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
program_manager(program_manager_), state_tracker(state_tracker_),
- texture_cache_runtime(device, program_manager, state_tracker),
- texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+ texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
+ texture_cache(texture_cache_runtime, *this),
+ buffer_cache_runtime(device, staging_buffer_pool),
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 410d8ffc5..a73ad15c1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -230,6 +230,7 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
+ StagingBufferPool staging_buffer_pool;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
new file mode 100644
index 000000000..72b1dbb32
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
@@ -0,0 +1,134 @@
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
+
+namespace OpenGL {
+
+StagingBufferMap::~StagingBufferMap() {
+ if (sync) {
+ sync->Create();
+ }
+}
+
+StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
+ : storage_flags{storage_flags_}, map_flags{map_flags_} {}
+
+StagingBuffers::~StagingBuffers() = default;
+
+StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
+ const size_t index = RequestBuffer(requested_size);
+ OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
+ return StagingBufferMap{
+ .mapped_span = std::span(maps[index], requested_size),
+ .sync = sync,
+ .buffer = buffers[index].handle,
+ };
+}
+
+size_t StagingBuffers::RequestBuffer(size_t requested_size) {
+ if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
+ return *index;
+ }
+
+ OGLBuffer& buffer = buffers.emplace_back();
+ buffer.Create();
+ glNamedBufferStorage(buffer.handle, requested_size, nullptr,
+ storage_flags | GL_MAP_PERSISTENT_BIT);
+ maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
+ map_flags | GL_MAP_PERSISTENT_BIT)));
+
+ syncs.emplace_back();
+ sizes.push_back(requested_size);
+
+ ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
+ maps.size() == sizes.size());
+
+ return buffers.size() - 1;
+}
+
+std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
+ size_t smallest_buffer = std::numeric_limits<size_t>::max();
+ std::optional<size_t> found;
+ const size_t num_buffers = sizes.size();
+ for (size_t index = 0; index < num_buffers; ++index) {
+ const size_t buffer_size = sizes[index];
+ if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
+ continue;
+ }
+ if (syncs[index].handle != 0) {
+ if (!syncs[index].IsSignaled()) {
+ continue;
+ }
+ syncs[index].Release();
+ }
+ smallest_buffer = buffer_size;
+ found = index;
+ }
+ return found;
+}
+
+StreamBuffer::StreamBuffer() {
+ static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+ buffer.Create();
+ glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+ glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+ mapped_pointer =
+ static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+ for (OGLSync& sync : fences) {
+ sync.Create();
+ }
+}
+
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+ ASSERT(size < REGION_SIZE);
+ for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+ ++region) {
+ fences[region].Create();
+ }
+ used_iterator = iterator;
+
+ for (size_t region = Region(free_iterator) + 1,
+ region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+ region < region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ if (iterator + size >= free_iterator) {
+ free_iterator = iterator + size;
+ }
+ if (iterator + size > STREAM_BUFFER_SIZE) {
+ for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+ fences[region].Create();
+ }
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return {std::span(mapped_pointer + offset, size), offset};
+}
+
+StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
+ return upload_buffers.RequestMap(size, true);
+}
+
+StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
+ return download_buffers.RequestMap(size, false);
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
index 8fe927aaf..2c467be3d 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
@@ -4,8 +4,10 @@
#pragma once
#include <array>
+#include <optional>
#include <span>
#include <utility>
+#include <vector>
#include <glad/glad.h>
@@ -17,6 +19,33 @@ namespace OpenGL {
using namespace Common::Literals;
+struct StagingBufferMap {
+ ~StagingBufferMap();
+
+ std::span<u8> mapped_span;
+ size_t offset = 0;
+ OGLSync* sync;
+ GLuint buffer;
+};
+
+struct StagingBuffers {
+ explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
+ ~StagingBuffers();
+
+ StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
+
+ size_t RequestBuffer(size_t requested_size);
+
+ std::optional<size_t> FindBuffer(size_t requested_size);
+
+ std::vector<OGLSync> syncs;
+ std::vector<OGLBuffer> buffers;
+ std::vector<u8*> maps;
+ std::vector<size_t> sizes;
+ GLenum storage_flags;
+ GLenum map_flags;
+};
+
class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16;
@@ -48,4 +77,17 @@ private:
std::array<OGLSync, NUM_SYNCS> fences;
};
+class StagingBufferPool {
+public:
+ StagingBufferPool() = default;
+ ~StagingBufferPool() = default;
+
+ StagingBufferMap RequestUploadBuffer(size_t size);
+ StagingBufferMap RequestDownloadBuffer(size_t size);
+
+private:
+ StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
+};
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
deleted file mode 100644
index 2005c8993..000000000
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <array>
-#include <memory>
-#include <span>
-
-#include <glad/glad.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
-
-namespace OpenGL {
-
-StreamBuffer::StreamBuffer() {
- static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
- buffer.Create();
- glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
- glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
- mapped_pointer =
- static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
- for (OGLSync& sync : fences) {
- sync.Create();
- }
-}
-
-std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
- ASSERT(size < REGION_SIZE);
- for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
- ++region) {
- fences[region].Create();
- }
- used_iterator = iterator;
-
- for (size_t region = Region(free_iterator) + 1,
- region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
- region < region_end; ++region) {
- glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
- fences[region].Release();
- }
- if (iterator + size >= free_iterator) {
- free_iterator = iterator + size;
- }
- if (iterator + size > STREAM_BUFFER_SIZE) {
- for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
- fences[region].Create();
- }
- used_iterator = 0;
- iterator = 0;
- free_iterator = size;
-
- for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
- glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
- fences[region].Release();
- }
- }
- const size_t offset = iterator;
- iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
- return {std::span(mapped_pointer + offset, size), offset};
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 1e0823836..ad87f3e80 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -451,19 +451,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
}
}
-
} // Anonymous namespace
-ImageBufferMap::~ImageBufferMap() {
- if (sync) {
- sync->Create();
- }
-}
-
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
- StateTracker& state_tracker_)
- : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
- format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
+ StateTracker& state_tracker_,
+ StagingBufferPool& staging_buffer_pool_)
+ : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
+ util_shaders(program_manager), format_conversion_pass{util_shaders},
+ resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -553,12 +548,12 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
-ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
- return upload_buffers.RequestMap(size, true);
+StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestUploadBuffer(size);
}
-ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
- return download_buffers.RequestMap(size, false);
+StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestDownloadBuffer(size);
}
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
@@ -643,7 +638,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
is_linear ? GL_LINEAR : GL_NEAREST);
}
-void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
switch (image.info.type) {
case ImageType::e2D:
@@ -685,64 +680,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC();
}
-TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
- : storage_flags{storage_flags_}, map_flags{map_flags_} {}
-
-TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
-
-ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
- bool insert_fence) {
- const size_t index = RequestBuffer(requested_size);
- OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
- return ImageBufferMap{
- .mapped_span = std::span(maps[index], requested_size),
- .sync = sync,
- .buffer = buffers[index].handle,
- };
-}
-
-size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
- if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
- return *index;
- }
-
- OGLBuffer& buffer = buffers.emplace_back();
- buffer.Create();
- glNamedBufferStorage(buffer.handle, requested_size, nullptr,
- storage_flags | GL_MAP_PERSISTENT_BIT);
- maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
- map_flags | GL_MAP_PERSISTENT_BIT)));
-
- syncs.emplace_back();
- sizes.push_back(requested_size);
-
- ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
- maps.size() == sizes.size());
-
- return buffers.size() - 1;
-}
-
-std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
- size_t smallest_buffer = std::numeric_limits<size_t>::max();
- std::optional<size_t> found;
- const size_t num_buffers = sizes.size();
- for (size_t index = 0; index < num_buffers; ++index) {
- const size_t buffer_size = sizes[index];
- if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
- continue;
- }
- if (syncs[index].handle != 0) {
- if (!syncs[index].IsSignaled()) {
- continue;
- }
- syncs[index].Release();
- }
- smallest_buffer = buffer_size;
- found = index;
- }
- return found;
-}
-
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@@ -818,7 +755,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
}
}
-void Image::UploadMemory(const ImageBufferMap& map,
+void Image::UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
UploadMemory(map.buffer, map.offset, copies);
}
@@ -865,7 +802,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b
}
}
-void Image::DownloadMemory(ImageBufferMap& map,
+void Image::DownloadMemory(StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
DownloadMemory(map.buffer, map.offset, copies);
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3e9b3302b..1148b73d7 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -11,6 +11,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -37,15 +38,6 @@ using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
-struct ImageBufferMap {
- ~ImageBufferMap();
-
- std::span<u8> mapped_span;
- size_t offset = 0;
- OGLSync* sync;
- GLuint buffer;
-};
-
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
@@ -74,14 +66,15 @@ class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
- StateTracker& state_tracker);
+ StateTracker& state_tracker,
+ StagingBufferPool& staging_buffer_pool);
~TextureCacheRuntime();
void Finish();
- ImageBufferMap UploadStagingBuffer(size_t size);
+ StagingBufferMap UploadStagingBuffer(size_t size);
- ImageBufferMap DownloadStagingBuffer(size_t size);
+ StagingBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const {
return device_access_memory;
@@ -120,7 +113,7 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
- void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+ void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
@@ -149,35 +142,16 @@ public:
}
private:
- struct StagingBuffers {
- explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
- ~StagingBuffers();
-
- ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
-
- size_t RequestBuffer(size_t requested_size);
-
- std::optional<size_t> FindBuffer(size_t requested_size);
-
- std::vector<OGLSync> syncs;
- std::vector<OGLBuffer> buffers;
- std::vector<u8*> maps;
- std::vector<size_t> sizes;
- GLenum storage_flags;
- GLenum map_flags;
- };
-
const Device& device;
StateTracker& state_tracker;
+ StagingBufferPool& staging_buffer_pool;
+
UtilShaders util_shaders;
FormatConversionPass format_conversion_pass;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
bool has_broken_texture_view_formats = false;
- StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
- StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
-
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
@@ -213,7 +187,7 @@ public:
void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map,
+ void UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
@@ -222,7 +196,8 @@ public:
void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
- void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
+ void DownloadMemory(StagingBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 2c7ac210b..544982d18 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -19,6 +19,7 @@
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default;
-void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
+void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
@@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
+void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 9013808e7..feecd404c 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -16,23 +16,23 @@ namespace OpenGL {
class Image;
class ProgramManager;
-struct ImageBufferMap;
+struct StagingBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
- void ASTCDecode(Image& image, const ImageBufferMap& map,
+ void ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+ void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+ void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void PitchUpload(Image& image, const ImageBufferMap& map,
+ void PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 5e9602905..b1f3c071f 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -157,6 +157,7 @@ struct BufferCacheParams {
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;