26 files changed, 1004 insertions, 338 deletions
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject 087a74417abfb0a8ae3bc1463d0d476a9bf94e5
+Subproject f6ae9e1c3311b747b7b91fd903c62bf40b3b9c8
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index f8c7f0efd..e825c0526 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
     config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
     config.page_table_address_space_bits = address_space_bits;
     config.silently_mirror_page_table = false;
+    config.absolute_offset_page_table = true;
 
     // Multi-process state
     config.processor_id = core_index;
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 2e53b3221..767158444 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/service.h"
+#include "core/settings.h"
 
 namespace Service::NIFM {
 
@@ -86,7 +87,12 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.PushEnum(RequestState::Connected);
+
+        if (Settings::values.bcat_backend == "none") {
+            rb.PushEnum(RequestState::NotSubmitted);
+        } else {
+            rb.PushEnum(RequestState::Connected);
+        }
     }
 
     void GetResult(Kernel::HLERequestContext& ctx) {
@@ -194,14 +200,22 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
     }
     void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_NIFM, "(STUBBED) called");
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
     }
     Core::System& system;
 };
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index a58ea9c59..62752e419 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -198,7 +198,7 @@ void NVFlinger::Compose() {
 
         const auto& igbp_buffer = buffer->get().igbp_buffer;
 
-        const auto& gpu = system.GPU();
+        auto& gpu = system.GPU();
         const auto& multi_fence = buffer->get().multi_fence;
         for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
             const auto& fence = multi_fence.fences[fence_id];
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 91bf07a92..3c2a29d9b 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -146,7 +146,7 @@ struct Memory::Impl {
     u8* GetPointer(const VAddr vaddr) {
         u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
         if (page_pointer != nullptr) {
-            return page_pointer + (vaddr & PAGE_MASK);
+            return page_pointer + vaddr;
         }
 
         if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +229,8 @@ struct Memory::Impl {
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
 
-                const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* const src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 std::memcpy(dest_buffer, src_ptr, copy_amount);
                 break;
             }
@@ -276,7 +277,8 @@ struct Memory::Impl {
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
 
-                u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* const dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 std::memcpy(dest_ptr, src_buffer, copy_amount);
                 break;
             }
@@ -322,7 +324,8 @@ struct Memory::Impl {
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
 
-                u8* dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 std::memset(dest_ptr, 0, copy_amount);
                 break;
             }
@@ -368,7 +371,8 @@ struct Memory::Impl {
             }
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
-                const u8* src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 WriteBlock(process, dest_addr, src_ptr, copy_amount);
                 break;
             }
@@ -446,7 +450,8 @@ struct Memory::Impl {
                         page_type = Common::PageType::Unmapped;
                     } else {
                         page_type = Common::PageType::Memory;
-                        current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
+                        current_page_table->pointers[vaddr >> PAGE_BITS] =
+                            pointer - (vaddr & ~PAGE_MASK);
                     }
                     break;
                 }
@@ -493,7 +498,9 @@ struct Memory::Impl {
                       memory);
         } else {
             while (base != end) {
-                page_table.pointers[base] = memory;
+                page_table.pointers[base] = memory - (base << PAGE_BITS);
+                ASSERT_MSG(page_table.pointers[base],
+                           "memory mapping base yield a nullptr within the table");
 
                 base += 1;
                 memory += PAGE_SIZE;
@@ -518,7 +525,7 @@ struct Memory::Impl {
         if (page_pointer != nullptr) {
             // NOTE: Avoid adding any extra logic to this fast-path block
             T value;
-            std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
+            std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
             return value;
         }
 
@@ -559,7 +566,7 @@ struct Memory::Impl {
         u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
         if (page_pointer != nullptr) {
             // NOTE: Avoid adding any extra logic to this fast-path block
-            std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
+            std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
             return;
         }
 
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 65d7b9f93..c80171fe6 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -155,12 +155,16 @@ if (ENABLE_VULKAN)
         renderer_vulkan/maxwell_to_vk.h
         renderer_vulkan/vk_buffer_cache.cpp
         renderer_vulkan/vk_buffer_cache.h
+        renderer_vulkan/vk_descriptor_pool.cpp
+        renderer_vulkan/vk_descriptor_pool.h
         renderer_vulkan/vk_device.cpp
         renderer_vulkan/vk_device.h
         renderer_vulkan/vk_image.cpp
         renderer_vulkan/vk_image.h
         renderer_vulkan/vk_memory_manager.cpp
         renderer_vulkan/vk_memory_manager.h
+        renderer_vulkan/vk_renderpass_cache.cpp
+        renderer_vulkan/vk_renderpass_cache.h
         renderer_vulkan/vk_resource_manager.cpp
         renderer_vulkan/vk_resource_manager.h
         renderer_vulkan/vk_sampler_cache.cpp
@@ -174,7 +178,9 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_stream_buffer.cpp
         renderer_vulkan/vk_stream_buffer.h
         renderer_vulkan/vk_swapchain.cpp
-        renderer_vulkan/vk_swapchain.h)
+        renderer_vulkan/vk_swapchain.h
+        renderer_vulkan/vk_update_descriptor.cpp
+        renderer_vulkan/vk_update_descriptor.h)
 
     target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 095660115..b9c5c41a2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
     return *dma_pusher;
 }
 
-void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+void GPU::WaitFence(u32 syncpoint_id, u32 value) {
     // Synced GPU, is always in sync
     if (!is_async) {
         return;
     }
     MICROPROFILE_SCOPE(GPU_wait);
-    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
-    }
+    std::unique_lock lock{sync_mutex};
+    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
 }
 
 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
     syncpoints[syncpoint_id]++;
     std::lock_guard lock{sync_mutex};
+    sync_cv.notify_all();
     if (!syncpt_interrupts[syncpoint_id].empty()) {
         u32 value = syncpoints[syncpoint_id].load();
         auto it = syncpt_interrupts[syncpoint_id].begin();
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ecc338ae9..b648317bb 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <atomic>
+#include <condition_variable>
 #include <list>
 #include <memory>
 #include <mutex>
@@ -181,7 +182,7 @@ public:
     virtual void WaitIdle() const = 0;
 
     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
-    void WaitFence(u32 syncpoint_id, u32 value) const;
+    void WaitFence(u32 syncpoint_id, u32 value);
 
     void IncrementSyncPoint(u32 syncpoint_id);
 
@@ -312,6 +313,8 @@ private:
 
     std::mutex sync_mutex;
 
+    std::condition_variable sync_cv;
+
     const bool is_async;
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a311dbcfe..f9f7a97b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -751,6 +751,9 @@ private:
 
     Expression Visit(const Node& node) {
         if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
             const auto operation_index = static_cast<std::size_t>(operation->GetCode());
             if (operation_index >= operation_decompilers.size()) {
                 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -872,6 +875,9 @@ private:
         }
 
         if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
             // It's invalid to call conditional on nested nodes, use an operation instead
             code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
             ++code.scope;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 46da81aaa..1ba544943 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -2,124 +2,145 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <cstring>
 #include <memory>
 #include <optional>
 #include <tuple>
 
-#include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
-#include "video_core/memory_manager.h"
+#include "common/bit_util.h"
+#include "core/core.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
 namespace Vulkan {
 
-CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
-                                     std::size_t alignment, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
-      alignment{alignment} {}
-
-VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
-                             Memory::Memory& cpu_memory_,
-                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
-    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
-                                                                                   cpu_memory_} {
-    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
-                       vk::BufferUsageFlagBits::eIndexBuffer |
-                       vk::BufferUsageFlagBits::eUniformBuffer;
-    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
-                        vk::AccessFlagBits::eUniformRead;
-    stream_buffer =
-        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
-                                         vk::PipelineStageFlagBits::eAllCommands);
-    buffer_handle = stream_buffer->GetBuffer();
-}
+namespace {
 
-VKBufferCache::~VKBufferCache() = default;
+const auto BufferUsage =
+    vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
+    vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
+
+const auto UploadPipelineStage =
+    vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
+    vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+    vk::PipelineStageFlagBits::eComputeShader;
 
-u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
-    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    // Cache management is a big overhead, so only cache entries with a given size.
-    // TODO: Figure out which size is the best for given games.
-    cache &= size >= 2048;
-
-    u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
-    if (cache) {
-        const auto entry = TryGet(host_ptr);
-        if (entry) {
-            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
-                return entry->GetOffset();
-            }
-            Unregister(entry);
-        }
-    }
-
-    AlignBuffer(alignment);
-    const u64 uploaded_offset = buffer_offset;
-
-    if (host_ptr == nullptr) {
-        return uploaded_offset;
-    }
-
-    std::memcpy(buffer_ptr, host_ptr, size);
-    buffer_ptr += size;
-    buffer_offset += size;
-
-    if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
-                                                         alignment, host_ptr);
-        Register(entry);
-    }
-
-    return uploaded_offset;
+const auto UploadAccessBarriers =
+    vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
+    vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
+    vk::AccessFlagBits::eIndexRead;
+
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+    return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
 }
 
-u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    std::memcpy(buffer_ptr, raw_pointer, size);
-    const u64 uploaded_offset = buffer_offset;
+} // Anonymous namespace
+
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                                     CacheAddr cache_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cache_addr, size} {
+    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
+                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
 
-    buffer_ptr += size;
-    buffer_offset += size;
-    return uploaded_offset;
+    const auto& dld{device.GetDispatchLoader()};
+    const auto dev{device.GetLogical()};
+    buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer.commit = memory_manager.Commit(*buffer.handle, false);
 }
 
-std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    u8* const uploaded_ptr = buffer_ptr;
-    const u64 uploaded_offset = buffer_offset;
+CachedBufferBlock::~CachedBufferBlock() = default;
+
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                             const VKDevice& device, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+    : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
+                                                                   CreateStreamBuffer(device,
+                                                                                      scheduler)},
+      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+                                                                                staging_pool} {}
 
-    buffer_ptr += size;
-    buffer_offset += size;
-    return {uploaded_ptr, uploaded_offset};
+VKBufferCache::~VKBufferCache() = default;
+
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
 }
 
-void VKBufferCache::Reserve(std::size_t max_size) {
-    bool invalidate;
-    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
-    buffer_offset = buffer_offset_base;
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
+}
+
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
+    size = std::max(size, std::size_t(4));
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
+        cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
+    });
+    return &*empty.handle;
+}
 
-    if (invalidate) {
-        InvalidateAll();
-    }
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                    const u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    std::memcpy(staging.commit->Map(size), data, size);
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
+                                     offset, size)},
+            {}, dld);
+    });
 }
 
-void VKBufferCache::Send() {
-    stream_buffer->Send(buffer_offset - buffer_offset_base);
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                      u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+                vk::PipelineStageFlagBits::eComputeShader,
+            vk::PipelineStageFlagBits::eTransfer, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
+                                     vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
+            {}, dld);
+        cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
+    });
+    scheduler.Finish();
+
+    std::memcpy(data, staging.commit->Map(size), size);
 }
 
-void VKBufferCache::AlignBuffer(std::size_t alignment) {
-    // Align the offset, not the mapped pointer
-    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
-    buffer_ptr += offset_aligned - buffer_offset;
-    buffer_offset = offset_aligned;
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                              std::size_t dst_offset, std::size_t size) {
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
+                      dst_offset, size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
+                                     vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
+             vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
+                                     dst_offset, size)},
+            {}, dld);
+    });
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index daa8ccf66..3f38eed0c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,105 +5,74 @@
 #pragma once
 
 #include <memory>
-#include <tuple>
+#include <unordered_map>
+#include <vector>
 
 #include "common/common_types.h"
-#include "video_core/gpu.h"
+#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
-namespace Memory {
-class Memory;
-}
-
-namespace Tegra {
-class MemoryManager;
+namespace Core {
+class System;
 }
 
 namespace Vulkan {
 
 class VKDevice;
-class VKFence;
 class VKMemoryManager;
-class VKStreamBuffer;
+class VKScheduler;
 
-class CachedBufferEntry final : public RasterizerCacheObject {
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
-                               u8* host_ptr);
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                               CacheAddr cache_addr, std::size_t size);
+    ~CachedBufferBlock();
 
-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
-    std::size_t GetSizeInBytes() const override {
-        return size;
-    }
-
-    std::size_t GetSize() const {
-        return size;
-    }
-
-    u64 GetOffset() const {
-        return offset;
-    }
-
-    std::size_t GetAlignment() const {
-        return alignment;
+    const vk::Buffer* GetHandle() const {
+        return &*buffer.handle;
     }
 
 private:
-    VAddr cpu_addr{};
-    std::size_t size{};
-    u64 offset{};
-    std::size_t alignment{};
+    VKBuffer buffer;
 };
 
-class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
 public:
-    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
-                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                           const VKDevice& device, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
     ~VKBufferCache();
 
-    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
-    /// allocated.
-    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
+    const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
 
-    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
-    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
+protected:
+    void WriteBarrier() override {}
 
-    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
-    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
+    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
 
-    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
-    void Reserve(std::size_t max_size);
+    const vk::Buffer* ToHandle(const Buffer& buffer) override;
 
-    /// Ensures that the set data is sent to the device.
-    void Send();
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
 
-    /// Returns the buffer cache handle.
-    vk::Buffer GetBuffer() const {
-        return buffer_handle;
-    }
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
 
-protected:
-    // We do not have to flush this cache as things in it are never modified by us.
-    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;
 
 private:
-    void AlignBuffer(std::size_t alignment);
-
-    Tegra::MemoryManager& tegra_memory_manager;
-    Memory::Memory& cpu_memory;
-
-    std::unique_ptr<VKStreamBuffer> stream_buffer;
-    vk::Buffer buffer_handle;
-
-    u8* buffer_ptr = nullptr;
-    u64 buffer_offset = 0;
-    u64 buffer_offset_base = 0;
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
new file mode 100644
index 000000000..cc7c281a0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
+constexpr std::size_t SETS_GROW_RATE = 0x20;
+
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
+                                         vk::DescriptorSetLayout layout)
+    : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+
+DescriptorAllocator::~DescriptorAllocator() = default;
+
+vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
+    return *descriptors[CommitResource(fence)];
+}
+
+void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
+    auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
+    descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
+                       std::make_move_iterator(new_sets.end()));
+}
+
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
+    : device{device}, active_pool{AllocateNewPool()} {}
+
+VKDescriptorPool::~VKDescriptorPool() = default;
+
+vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
+    static constexpr u32 num_sets = 0x20000;
+    static constexpr vk::DescriptorPoolSize pool_sizes[] = {
+        {vk::DescriptorType::eUniformBuffer, num_sets * 90},
+        {vk::DescriptorType::eStorageBuffer, num_sets * 60},
+        {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
+        {vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
+        {vk::DescriptorType::eStorageImage, num_sets * 40}};
+
+    const vk::DescriptorPoolCreateInfo create_info(
+        vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
+        static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
+    const auto dev = device.GetLogical();
+    return *pools.emplace_back(
+        dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
+}
+
+std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
+    vk::DescriptorSetLayout layout, std::size_t count) {
+    std::vector layout_copies(count, layout);
+    vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
+                                                layout_copies.data());
+
+    std::vector<vk::DescriptorSet> sets(count);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
+    case vk::Result::eSuccess:
+        break;
+    case vk::Result::eErrorOutOfPoolMemory:
+        active_pool = AllocateNewPool();
+        allocate_info.descriptorPool = active_pool;
+        if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
+            break;
+        }
+        [[fallthrough]];
+    default:
+        vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
+    }
+
+    vk::PoolFree deleter(dev, active_pool, dld);
+    std::vector<UniqueDescriptorSet> unique_sets;
+    unique_sets.reserve(count);
+    for (const auto set : sets) {
+        unique_sets.push_back(UniqueDescriptorSet{set, deleter});
+    }
+    return unique_sets;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
new file mode 100644
index 000000000..a441dbc0f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+class VKDescriptorPool;
+
+class DescriptorAllocator final : public VKFencedPool {
+public:
+    explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
+    ~DescriptorAllocator() override;
+
+    DescriptorAllocator(const DescriptorAllocator&) = delete;
+
+    vk::DescriptorSet Commit(VKFence& fence);
+
+protected:
+    void Allocate(std::size_t begin, std::size_t end) override;
+
+private:
+    VKDescriptorPool& descriptor_pool;
+    const vk::DescriptorSetLayout layout;
+
+    std::vector<UniqueDescriptorSet> descriptors;
+};
+
+class VKDescriptorPool final {
+    friend DescriptorAllocator;
+
+public:
+    explicit VKDescriptorPool(const VKDevice& device);
+    ~VKDescriptorPool();
+
+private:
+    vk::DescriptorPool AllocateNewPool();
+
+    std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
+                                                         std::size_t count);
+
+    const VKDevice& device;
+
+    std::vector<UniqueDescriptorPool> pools;
+    vk::DescriptorPool active_pool;
+};
+
+} // namespace Vulkan
+\ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 0451babbf..9cc9979d0 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -6,6 +6,7 @@
 #include <optional>
 #include <tuple>
 #include <vector>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -16,34 +17,32 @@
 
 namespace Vulkan {
 
-// TODO(Rodrigo): Fine tune this number
-constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+namespace {
+
+u64 GetAllocationChunkSize(u64 required_size) {
+    static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
+    auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
+    return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
+}
+
+} // Anonymous namespace
 
 class VKMemoryAllocation final {
 public:
     explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
-                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
-        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
-          shifted_type{ShiftType(type)}, is_mappable{properties &
-                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
-        if (is_mappable) {
-            const auto dev = device.GetLogical();
-            const auto& dld = device.GetDispatchLoader();
-            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
-        }
-    }
+                                vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
+          shifted_type{ShiftType(type)} {}
 
     ~VKMemoryAllocation() {
         const auto dev = device.GetLogical();
         const auto& dld = device.GetDispatchLoader();
-        if (is_mappable)
-            dev.unmapMemory(memory, dld);
         dev.free(memory, nullptr, dld);
     }
 
     VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
-        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
-                                        static_cast<u64>(alignment));
+        auto found = TryFindFreeSection(free_iterator, allocation_size,
+                                        static_cast<u64>(commit_size), static_cast<u64>(alignment));
         if (!found) {
             found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
                                        static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
                 return nullptr;
             }
         }
-        u8* address = is_mappable ? base_address + *found : nullptr;
-        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+        auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
                                                            *found + commit_size);
         commits.push_back(commit.get());
 
@@ -65,12 +63,10 @@ public:
 
     void Free(const VKMemoryCommitImpl* commit) {
         ASSERT(commit);
-        const auto it =
-            std::find_if(commits.begin(), commits.end(),
-                         [&](const auto& stored_commit) { return stored_commit == commit; });
+
+        const auto it = std::find(std::begin(commits), std::end(commits), commit);
         if (it == commits.end()) {
-            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
-            UNREACHABLE();
+            UNREACHABLE_MSG("Freeing unallocated commit!");
             return;
         }
         commits.erase(it);
@@ -88,11 +84,11 @@ private:
     }
 
     /// A memory allocator, it may return a free region between "start" and "end" with the solicited
-    /// requeriments.
+    /// requirements.
     std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
-        u64 iterator = start;
-        while (iterator + size < end) {
-            const u64 try_left = Common::AlignUp(iterator, alignment);
+        u64 iterator = Common::AlignUp(start, alignment);
+        while (iterator + size <= end) {
+            const u64 try_left = iterator;
             const u64 try_right = try_left + size;
 
             bool overlap = false;
@@ -100,7 +96,7 @@ private:
                 const auto [commit_left, commit_right] = commit->interval;
                 if (try_left < commit_right && commit_left < try_right) {
                     // There's an overlap, continue the search where the overlapping commit ends.
-                    iterator = commit_right;
+                    iterator = Common::AlignUp(commit_right, alignment);
                     overlap = true;
                     break;
                 }
@@ -110,6 +106,7 @@ private:
                 return try_left;
             }
         }
+
         // No free regions where found, return an empty optional.
         return std::nullopt;
     }
@@ -117,12 +114,8 @@ private:
     const VKDevice& device;                   ///< Vulkan device.
     const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
     const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
-    const u64 alloc_size;                     ///< Size of this allocation.
+    const u64 allocation_size;                ///< Size of this allocation.
     const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
-    const bool is_mappable;                   ///< Whether the allocation is mappable.
-
-    /// Base address of the mapped pointer.
-    u8* base_address{};
 
     /// Hints where the next free region is likely going to be.
     u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
 };
 
 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
-      is_memory_unified{GetMemoryUnified(props)} {}
+    : device{device}, properties{device.GetPhysical().getMemoryProperties(
+                          device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(properties)} {}
 
 VKMemoryManager::~VKMemoryManager() = default;
 
-VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
-    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
+                                       bool host_visible) {
+    const u64 chunk_size = GetAllocationChunkSize(requirements.size);
 
     // When a host visible commit is asked, search for host visible and coherent, otherwise search
     // for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
             ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
             : vk::MemoryPropertyFlagBits::eDeviceLocal;
 
-    const auto TryCommit = [&]() -> VKMemoryCommit {
-        for (auto& alloc : allocs) {
-            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
-                continue;
-
-            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
-                return commit;
-            }
-        }
-        return {};
-    };
-
-    if (auto commit = TryCommit(); commit) {
+    if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
         return commit;
     }
 
     // Commit has failed, allocate more memory.
-    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
-        // TODO(Rodrigo): Try to use host memory.
-        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
-        UNREACHABLE();
+    if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
+        // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
+        // Allocation has failed, panic.
+        UNREACHABLE_MSG("Ran out of VRAM!");
+        return {};
     }
 
     // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
     // there's a bug.
-    auto commit = TryCommit();
+    auto commit = TryAllocCommit(requirements, wanted_properties);
     ASSERT(commit);
     return commit;
 }
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
 VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
     const auto dev = device.GetLogical();
     const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
     dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
     return commit;
 }
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
 VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
     const auto dev = device.GetLogical();
     const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
     dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
     return commit;
 }
 
 bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
                                   u64 size) {
-    const u32 type = [&]() {
-        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
-            const auto flags = props.memoryTypes[type_index].propertyFlags;
+    const u32 type = [&] {
+        for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
+            const auto flags = properties.memoryTypes[type_index].propertyFlags;
             if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
                 // The type matches in type and in the wanted properties.
                 return type_index;
             }
         }
-        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
-        UNREACHABLE();
-        return 0u;
+        UNREACHABLE_MSG("Couldn't find a compatible memory type!");
+        return 0U;
     }();
 
     const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
     // Try to allocate found type.
     const vk::MemoryAllocateInfo memory_ai(size, type);
     vk::DeviceMemory memory;
-    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+    if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
         res != vk::Result::eSuccess) {
         LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
         return false;
     }
-    allocs.push_back(
+    allocations.push_back(
         std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
     return true;
 }
 
-/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
-    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
-        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                               vk::MemoryPropertyFlags wanted_properties) {
+    for (auto& allocation : allocations) {
+        if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
+            continue;
+        }
+        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
+            return commit;
+        }
+    }
+    return {};
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(
+    const vk::PhysicalDeviceMemoryProperties& properties) {
+    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
+        if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
             // Memory is considered unified when heaps are device local only.
             return false;
         }
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
     return true;
 }
 
-VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
-                                       u8* data, u64 begin, u64 end)
-    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                       vk::DeviceMemory memory, u64 begin, u64 end)
+    : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
 
 VKMemoryCommitImpl::~VKMemoryCommitImpl() {
     allocation->Free(this);
 }
 
-u8* VKMemoryCommitImpl::GetData() const {
-    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
-    return data;
+MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
+    const auto dev = device.GetLogical();
+    const auto address = reinterpret_cast<u8*>(
+        dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
+    return MemoryMap{this, address};
+}
+
+void VKMemoryCommitImpl::Unmap() const {
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(memory, device.GetDispatchLoader());
+}
+
+MemoryMap VKMemoryCommitImpl::Map() const {
+    return Map(interval.second - interval.first);
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 073597b35..cd00bb91b 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -12,6 +12,7 @@
 
 namespace Vulkan {
 
+class MemoryMap;
 class VKDevice;
 class VKMemoryAllocation;
 class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
 class VKMemoryManager final {
 public:
     explicit VKMemoryManager(const VKDevice& device);
+    VKMemoryManager(const VKMemoryManager&) = delete;
     ~VKMemoryManager();
 
     /**
      * Commits a memory with the specified requeriments.
-     * @param reqs Requeriments returned from a Vulkan call.
+     * @param requirements Requirements returned from a Vulkan call.
      * @param host_visible Signals the allocator that it *must* use host visible and coherent
-     * memory. When passing false, it will try to allocate device local memory.
+     *                     memory. When passing false, it will try to allocate device local memory.
      * @returns A memory commit.
      */
     VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
     /// Allocates a chunk of memory.
     bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
 
+    /// Tries to allocate a memory commit.
+    VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                  vk::MemoryPropertyFlags wanted_properties);
+
     /// Returns true if the device uses an unified memory model.
-    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
 
-    const VKDevice& device;                                  ///< Device handler.
-    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
-    const bool is_memory_unified;                            ///< True if memory model is unified.
-    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+    const VKDevice& device;                              ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+    const bool is_memory_unified;                        ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };
 
 class VKMemoryCommitImpl final {
     friend VKMemoryAllocation;
+    friend MemoryMap;
 
 public:
-    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
-                                u64 begin, u64 end);
+    explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                vk::DeviceMemory memory, u64 begin, u64 end);
     ~VKMemoryCommitImpl();
 
-    /// Returns the writeable memory map. The commit has to be mappable.
-    u8* GetData() const;
+    /// Maps a memory region and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map(u64 size, u64 offset = 0) const;
+
+    /// Maps the whole commit and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map() const;
 
     /// Returns the Vulkan memory handler.
     vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
     }
 
 private:
+    /// Unmaps memory.
+    void Unmap() const;
+
+    const VKDevice& device;           ///< Vulkan device.
     std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
     vk::DeviceMemory memory;          ///< Vulkan device memory handler.
     VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+/// Holds ownership of a memory map.
+class MemoryMap final {
+public:
+    explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
+        : commit{commit}, address{address} {}
+
+    ~MemoryMap() {
+        if (commit) {
+            commit->Unmap();
+        }
+    }
+
+    /// Prematurely releases the memory map.
+    void Release() {
+        commit->Unmap();
+        commit = nullptr;
+    }
+
+    /// Returns the address of the memory map.
+    u8* GetAddress() const {
+        return address;
+    }
+
+    /// Returns the address of the memory map;
+    operator u8*() const {
+        return address;
+    }
+
+private:
+    const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
+    u8* address{};                      ///< Address to the mapped memory.
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
new file mode 100644
index 000000000..93f5d7ba0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -0,0 +1,100 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+
+namespace Vulkan {
+
+VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
+
+VKRenderPassCache::~VKRenderPassCache() = default;
+
+vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
+    const auto [pair, is_cache_miss] = cache.try_emplace(params);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        entry = CreateRenderPass(params);
+    }
+    return *entry;
+}
+
+UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
+    std::vector<vk::AttachmentDescription> descriptors;
+    std::vector<vk::AttachmentReference> color_references;
+
+    for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
+        const auto attachment = params.color_attachments[rt];
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(attachment.pixel_format));
+
+        // TODO(Rodrigo): Add eMayAlias when it's needed.
+        const auto color_layout = attachment.is_texception
+                                      ? vk::ImageLayout::eGeneral
+                                      : vk::ImageLayout::eColorAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
+                                 vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
+        color_references.emplace_back(static_cast<u32>(rt), color_layout);
+    }
+
+    vk::AttachmentReference zeta_attachment_ref;
+    if (params.has_zeta) {
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(params.zeta_pixel_format));
+
+        const auto zeta_layout = params.zeta_texception
+                                     ? vk::ImageLayout::eGeneral
+                                     : vk::ImageLayout::eDepthStencilAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
+        zeta_attachment_ref =
+            vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
+    }
+
+    const vk::SubpassDescription subpass_description(
+        {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
+        color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
+        nullptr);
+
+    vk::AccessFlags access;
+    vk::PipelineStageFlags stage;
+    if (!color_references.empty()) {
+        access |=
+            vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
+    }
+
+    if (params.has_zeta) {
+        access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                  vk::AccessFlagBits::eDepthStencilAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
+    }
+
+    const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
+                                                   {});
+
+    const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
+                                               descriptors.data(), 1, &subpass_description, 1,
+                                               &subpass_dependency);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createRenderPassUnique(create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
new file mode 100644
index 000000000..b49b2db48
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -0,0 +1,97 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+// TODO(Rodrigo): Optimize this structure for faster hashing
+
+struct RenderPassParams {
+    struct ColorAttachment {
+        u32 index = 0;
+        VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+        bool is_texception = false;
+
+        std::size_t Hash() const noexcept {
+            return static_cast<std::size_t>(pixel_format) |
+                   static_cast<std::size_t>(is_texception) << 6 |
+                   static_cast<std::size_t>(index) << 7;
+        }
+
+        bool operator==(const ColorAttachment& rhs) const noexcept {
+            return std::tie(index, pixel_format, is_texception) ==
+                   std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
+        }
+    };
+
+    boost::container::static_vector<ColorAttachment,
+                                    Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+        color_attachments{};
+    // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
+    VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+    bool has_zeta = false;
+    bool zeta_texception = false;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        for (const auto& rt : color_attachments) {
+            boost::hash_combine(hash, rt.Hash());
+        }
+        boost::hash_combine(hash, zeta_pixel_format);
+        boost::hash_combine(hash, has_zeta);
+        boost::hash_combine(hash, zeta_texception);
+        return hash;
+    }
+
+    bool operator==(const RenderPassParams& rhs) const {
+        return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
+               std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
+                        rhs.zeta_texception);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::RenderPassParams> {
+    std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKRenderPassCache final {
+public:
+    explicit VKRenderPassCache(const VKDevice& device);
+    ~VKRenderPassCache();
+
+    vk::RenderPass GetRenderPass(const RenderPassParams& params);
+
+private:
+    UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
+
+    const VKDevice& device;
+    std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a8baf91de..8fe852ce8 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -954,6 +954,10 @@ private:
 
     Expression Visit(const Node& node) {
         if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
             const auto operation_index = static_cast<std::size_t>(operation->GetCode());
             const auto decompiler = operation_decompilers[operation_index];
             if (decompiler == nullptr) {
@@ -1142,6 +1146,10 @@ private:
         }
 
         if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
             // It's invalid to call conditional on nested nodes, use an operation instead
             const Id true_label = OpLabel();
             const Id skip_label = OpLabel();
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 62f1427f5..d48d3b44c 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <memory>
 #include <optional>
+#include <tuple>
 #include <vector>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
 namespace Vulkan {
 
+namespace {
+
 constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
 constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
 
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
-    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
-                                                                                   pipeline_stage} {
-    CreateBuffers(memory_manager, usage);
-    ReserveWatches(WATCHES_INITIAL_RESERVE);
+constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
+
+std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
+                                  vk::MemoryPropertyFlags wanted) {
+    const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
+    for (u32 i = 0; i < properties.memoryTypeCount; i++) {
+        if (!(filter & (1 << i))) {
+            continue;
+        }
+        if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
+            return i;
+        }
+    }
+    return {};
+}
+
+} // Anonymous namespace
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                               vk::BufferUsageFlags usage)
+    : device{device}, scheduler{scheduler} {
+    CreateBuffers(usage);
+    ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
+    ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
 }
 
 VKStreamBuffer::~VKStreamBuffer() = default;
 
-std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
-    ASSERT(size <= buffer_size);
+std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
+    ASSERT(size <= STREAM_BUFFER_SIZE);
     mapped_size = size;
 
-    if (offset + size > buffer_size) {
-        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
-        // reset the state.
-        invalidation_mark = used_watches;
-        used_watches = 0;
+    if (alignment > 0) {
+        offset = Common::AlignUp(offset, alignment);
+    }
+
+    WaitPendingOperations(offset);
+
+    bool invalidated = false;
+    if (offset + size > STREAM_BUFFER_SIZE) {
+        // The buffer would overflow, save the amount of used watches and reset the state.
+        invalidation_mark = current_watch_cursor;
+        current_watch_cursor = 0;
         offset = 0;
+
+        // Swap watches and reset waiting cursors.
+        std::swap(previous_watches, current_watches);
+        wait_cursor = 0;
+        wait_bound = 0;
+
+        // Ensure that we don't wait for uncommitted fences.
+        scheduler.Flush();
+
+        invalidated = true;
     }
 
-    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
+    return {pointer, offset, invalidated};
 }
 
-void VKStreamBuffer::Send(u64 size) {
+void VKStreamBuffer::Unmap(u64 size) {
     ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
 
-    if (invalidation_mark) {
-        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
-        scheduler.Flush();
-        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
-                      [&](auto& resource) { resource->Wait(); });
-        invalidation_mark = std::nullopt;
-    }
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(*memory, device.GetDispatchLoader());
+
+    offset += size;
 
-    if (used_watches + 1 >= watches.size()) {
+    if (current_watch_cursor + 1 >= current_watches.size()) {
         // Ensure that there are enough watches.
-        ReserveWatches(WATCHES_RESERVE_CHUNK);
+        ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
     }
-    // Add a watch for this allocation.
-    watches[used_watches++]->Watch(scheduler.GetFence());
-
-    offset += size;
+    auto& watch = current_watches[current_watch_cursor++];
+    watch.upper_bound = offset;
+    watch.fence.Watch(scheduler.GetFence());
 }
 
-void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
-    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
-                                         nullptr);
-
+void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
+                                         0, nullptr);
     const auto dev = device.GetLogical();
     const auto& dld = device.GetDispatchLoader();
     buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
-    commit = memory_manager.Commit(*buffer, true);
-    mapped_pointer = commit->GetData();
+
+    const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
+    // Prefer device local host visible allocations (this should hit AMD's pinned memory).
+    auto type = FindMemoryType(device, requirements.memoryTypeBits,
+                               vk::MemoryPropertyFlagBits::eHostVisible |
+                                   vk::MemoryPropertyFlagBits::eHostCoherent |
+                                   vk::MemoryPropertyFlagBits::eDeviceLocal);
+    if (!type) {
+        // Otherwise search for a host visible allocation.
+        type = FindMemoryType(device, requirements.memoryTypeBits,
+                              vk::MemoryPropertyFlagBits::eHostVisible |
+                                  vk::MemoryPropertyFlagBits::eHostCoherent);
+        ASSERT_MSG(type, "No host visible and coherent memory type found");
+    }
+    const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
+    memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
+
+    dev.bindBufferMemory(*buffer, *memory, 0, dld);
 }
 
-void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
-    const std::size_t previous_size = watches.size();
-    watches.resize(previous_size + grow_size);
-    std::generate(watches.begin() + previous_size, watches.end(),
-                  []() { return std::make_unique<VKFenceWatch>(); });
+void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
+    watches.resize(watches.size() + grow_size);
+}
+
+void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
+    if (!invalidation_mark) {
+        return;
+    }
+    while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
+        auto& watch = previous_watches[wait_cursor];
+        wait_bound = watch.upper_bound;
+        watch.fence.Wait();
+        ++wait_cursor;
+    }
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 842e54162..187c0c612 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@
 
 #pragma once
 
-#include <memory>
 #include <optional>
 #include <tuple>
 #include <vector>
 
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 
 namespace Vulkan {
 
 class VKDevice;
 class VKFence;
 class VKFenceWatch;
-class VKResourceManager;
 class VKScheduler;
 
-class VKStreamBuffer {
+class VKStreamBuffer final {
 public:
-    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                            vk::BufferUsageFlags usage);
     ~VKStreamBuffer();
 
     /**
@@ -34,39 +30,47 @@ public:
      * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
      * offset and a boolean that's true when buffer has been invalidated.
      */
-    std::tuple<u8*, u64, bool> Reserve(u64 size);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
 
     /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
-    void Send(u64 size);
+    void Unmap(u64 size);
 
-    vk::Buffer GetBuffer() const {
+    vk::Buffer GetHandle() const {
         return *buffer;
     }
 
 private:
+    struct Watch final {
+        VKFenceWatch fence;
+        u64 upper_bound{};
+    };
+
     /// Creates Vulkan buffer handles committing the required the required memory.
-    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+    void CreateBuffers(vk::BufferUsageFlags usage);
 
     /// Increases the amount of watches available.
-    void ReserveWatches(std::size_t grow_size);
+    void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
+
+    void WaitPendingOperations(u64 requested_upper_bound);
 
     const VKDevice& device;                      ///< Vulkan device manager.
     VKScheduler& scheduler;                      ///< Command scheduler.
-    const u64 buffer_size;                       ///< Total size of the stream buffer.
     const vk::AccessFlags access;                ///< Access usage of this stream buffer.
     const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
 
-    UniqueBuffer buffer;   ///< Mapped buffer.
-    VKMemoryCommit commit; ///< Memory commit.
-    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+    UniqueBuffer buffer;       ///< Mapped buffer.
+    UniqueDeviceMemory memory; ///< Memory allocation.
 
     u64 offset{};      ///< Buffer iterator.
     u64 mapped_size{}; ///< Size reserved for the current copy.
 
-    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
-    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
-    std::optional<std::size_t>
-        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+    std::vector<Watch> current_watches;           ///< Watches recorded in the current iteration.
+    std::size_t current_watch_cursor{};           ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
+
+    std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
+    std::size_t wait_cursor{};           ///< Last watch being waited for completion.
+    u64 wait_bound{};                    ///< Highest offset being watched for completion.
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
new file mode 100644
index 000000000..0e577b9ff
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -0,0 +1,57 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
+    : device{device}, scheduler{scheduler} {}
+
+VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
+
+void VKUpdateDescriptorQueue::TickFrame() {
+    payload.clear();
+}
+
+void VKUpdateDescriptorQueue::Acquire() {
+    entries.clear();
+}
+
+void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
+                                   vk::DescriptorSet set) {
+    if (payload.size() + entries.size() >= payload.max_size()) {
+        LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
+        scheduler.WaitWorker();
+        payload.clear();
+    }
+
+    const auto payload_start = payload.data() + payload.size();
+    for (const auto& entry : entries) {
+        if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) {
+            payload.push_back(*image);
+        } else if (const auto buffer = std::get_if<Buffer>(&entry)) {
+            payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
+        } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) {
+            payload.push_back(*texel);
+        } else {
+            UNREACHABLE();
+        }
+    }
+
+    scheduler.Record([dev = device.GetLogical(), payload_start, set,
+                      update_template]([[maybe_unused]] auto cmdbuf, auto& dld) {
+        dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld);
+    });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
new file mode 100644
index 000000000..8c825aa29
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -0,0 +1,86 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+
+class DescriptorUpdateEntry {
+public:
+    explicit DescriptorUpdateEntry() : image{} {}
+
+    DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {}
+
+    DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size)
+        : buffer{buffer, offset, size} {}
+
+    DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {}
+
+private:
+    union {
+        vk::DescriptorImageInfo image;
+        vk::DescriptorBufferInfo buffer;
+        vk::BufferView texel_buffer;
+    };
+};
+
+class VKUpdateDescriptorQueue final {
+public:
+    explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
+    ~VKUpdateDescriptorQueue();
+
+    void TickFrame();
+
+    void Acquire();
+
+    void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set);
+
+    void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}});
+    }
+
+    void AddImage(vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}});
+    }
+
+    void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) {
+        entries.push_back(Buffer{buffer, offset, size});
+    }
+
+    void AddTexelBuffer(vk::BufferView texel_buffer) {
+        entries.emplace_back(texel_buffer);
+    }
+
+    vk::ImageLayout* GetLastImageLayout() {
+        return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout;
+    }
+
+private:
+    struct Buffer {
+        const vk::Buffer* buffer{};
+        u64 offset{};
+        std::size_t size{};
+    };
+    using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>;
+    // Old gcc versions don't consider this trivially copyable.
+    // static_assert(std::is_trivially_copyable_v<Variant>);
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+
+    boost::container::static_vector<Variant, 0x400> entries;
+    boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4d2f4d6a8..4e155542a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -392,8 +392,30 @@ struct MetaImage {
 using Meta =
     std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
 
+class AmendNode {
+public:
+    std::optional<std::size_t> GetAmendIndex() const {
+        if (amend_index == amend_null_index) {
+            return std::nullopt;
+        }
+        return {amend_index};
+    }
+
+    void SetAmendIndex(std::size_t index) {
+        amend_index = index;
+    }
+
+    void ClearAmend() {
+        amend_index = amend_null_index;
+    }
+
+private:
+    static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
+    std::size_t amend_index{amend_null_index};
+};
+
 /// Holds any kind of operation that can be done in the IR
-class OperationNode final {
+class OperationNode final : public AmendNode {
 public:
     explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
 
@@ -433,7 +455,7 @@ private:
 };
 
 /// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final {
+class ConditionalNode final : public AmendNode {
 public:
     explicit ConditionalNode(Node condition, std::vector<Node>&& code)
         : condition{std::move(condition)}, code{std::move(code)} {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 1d9825c76..31eecb3f4 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
                      Immediate(bits));
 }
 
+std::size_t ShaderIR::DeclareAmend(Node new_amend) {
+    const std::size_t id = amend_code.size();
+    amend_code.push_back(new_amend);
+    return id;
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index baed06ccd..aacd0a0da 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -176,6 +176,10 @@ public:
     /// Returns a condition code evaluated from internal flags
     Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
 
+    const Node& GetAmendNode(std::size_t index) const {
+        return amend_code[index];
+    }
+
 private:
     friend class ASTDecoder;
 
@@ -392,6 +396,9 @@ private:
                                                                Tegra::Shader::Instruction instr,
                                                                bool is_write);
 
+    /// Register new amending code and obtain the reference id.
+    std::size_t DeclareAmend(Node new_amend);
+
     const ProgramCode& program_code;
     const u32 main_offset;
     const CompilerSettings settings;
@@ -406,6 +413,7 @@ private:
     std::map<u32, NodeBlock> basic_blocks;
     NodeBlock global_code;
     ASTManager program_manager{true, true};
+    std::vector<Node> amend_code;
 
     std::set<u32> used_registers;
     std::set<Tegra::Shader::Pred> used_predicates;
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 07a720494..7490fb718 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -215,18 +215,11 @@ void GRenderWindow::moveContext() {
 }
 
 void GRenderWindow::SwapBuffers() {
-    // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
-    // since we never call `doneCurrent` in this thread.
-    // However:
-    // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
-    // since the last time `swapBuffers` was executed;
-    // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
-    context->makeCurrent(child);
-
     context->swapBuffers(child);
+
     if (!first_frame) {
-        emit FirstFrameDisplayed();
         first_frame = true;
+        emit FirstFrameDisplayed();
     }
 }