53 files changed, 637 insertions, 924 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 090dd19b1..e553b8203 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -36,8 +36,6 @@ add_library(audio_core STATIC
     splitter_context.h
     stream.cpp
     stream.h
-    time_stretch.cpp
-    time_stretch.h
     voice_context.cpp
     voice_context.h
 
@@ -63,7 +61,6 @@ if (NOT MSVC)
 endif()
 
 target_link_libraries(audio_core PUBLIC common core)
-target_link_libraries(audio_core PRIVATE SoundTouch)
 
 if(ENABLE_CUBEB)
     target_link_libraries(audio_core PRIVATE cubeb)
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 93c35e785..13de3087c 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -7,7 +7,6 @@
 #include <cstring>
 #include "audio_core/cubeb_sink.h"
 #include "audio_core/stream.h"
-#include "audio_core/time_stretch.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/ring_buffer.h"
@@ -23,8 +22,7 @@ class CubebSinkStream final : public SinkStream {
 public:
     CubebSinkStream(cubeb* ctx_, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
                     const std::string& name)
-        : ctx{ctx_}, num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate,
-                                                                             num_channels} {
+        : ctx{ctx_}, num_channels{std::min(num_channels_, 6u)} {
 
         cubeb_stream_params params{};
         params.rate = sample_rate;
@@ -131,7 +129,6 @@ private:
     Common::RingBuffer<s16, 0x10000> queue;
     std::array<s16, 2> last_frame{};
     std::atomic<bool> should_flush{};
-    TimeStretcher time_stretch;
 
     static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
                              void* output_buffer, long num_frames);
@@ -205,25 +202,7 @@ long CubebSinkStream::DataCallback([[maybe_unused]] cubeb_stream* stream, void*
 
     const std::size_t num_channels = impl->GetNumChannels();
     const std::size_t samples_to_write = num_channels * num_frames;
-    std::size_t samples_written;
-
-    /*
-    if (Settings::values.enable_audio_stretching.GetValue()) {
-        const std::vector<s16> in{impl->queue.Pop()};
-        const std::size_t num_in{in.size() / num_channels};
-        s16* const out{reinterpret_cast<s16*>(buffer)};
-        const std::size_t out_frames =
-            impl->time_stretch.Process(in.data(), num_in, out, num_frames);
-        samples_written = out_frames * num_channels;
-
-        if (impl->should_flush) {
-            impl->time_stretch.Flush();
-            impl->should_flush = false;
-        }
-    } else {
-        samples_written = impl->queue.Pop(buffer, samples_to_write);
-    }*/
-    samples_written = impl->queue.Pop(buffer, samples_to_write);
+    const std::size_t samples_written = impl->queue.Pop(buffer, samples_to_write);
 
     if (samples_written >= num_channels) {
         std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp
index 62d3716a6..2d14ce2cb 100644
--- a/src/audio_core/sdl2_sink.cpp
+++ b/src/audio_core/sdl2_sink.cpp
@@ -7,7 +7,6 @@
 #include <cstring>
 #include "audio_core/sdl2_sink.h"
 #include "audio_core/stream.h"
-#include "audio_core/time_stretch.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 //#include "common/settings.h"
@@ -27,7 +26,7 @@ namespace AudioCore {
 class SDLSinkStream final : public SinkStream {
 public:
     SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device)
-        : num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, num_channels} {
+        : num_channels{std::min(num_channels_, 6u)} {
 
         SDL_AudioSpec spec;
         spec.freq = sample_rate;
@@ -116,7 +115,6 @@ private:
     SDL_AudioDeviceID dev = 0;
     u32 num_channels{};
     std::atomic<bool> should_flush{};
-    TimeStretcher time_stretch;
 };
 
 SDLSink::SDLSink(std::string_view target_device_name) {
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
deleted file mode 100644
index 726591fce..000000000
--- a/src/audio_core/time_stretch.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cmath>
-#include <cstddef>
-#include "audio_core/time_stretch.h"
-#include "common/logging/log.h"
-
-namespace AudioCore {
-
-TimeStretcher::TimeStretcher(u32 sample_rate, u32 channel_count) : m_sample_rate{sample_rate} {
-    m_sound_touch.setChannels(channel_count);
-    m_sound_touch.setSampleRate(sample_rate);
-    m_sound_touch.setPitch(1.0);
-    m_sound_touch.setTempo(1.0);
-}
-
-void TimeStretcher::Clear() {
-    m_sound_touch.clear();
-}
-
-void TimeStretcher::Flush() {
-    m_sound_touch.flush();
-}
-
-std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,
-                                   std::size_t num_out) {
-    const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
-
-    // We were given actual_samples number of samples, and num_samples were requested from us.
-    double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
-
-    const double max_latency = 0.25; // seconds
-    const double max_backlog = m_sample_rate * max_latency;
-    const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
-    if (backlog_fullness > 4.0) {
-        // Too many samples in backlog: Don't push anymore on
-        num_in = 0;
-    }
-
-    // We ideally want the backlog to be about 50% full.
-    // This gives some headroom both ways to prevent underflow and overflow.
-    // We tweak current_ratio to encourage this.
-    constexpr double tweak_time_scale = 0.05; // seconds
-    const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale);
-    current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0);
-
-    // This low-pass filter smoothes out variance in the calculated stretch ratio.
-    // The time-scale determines how responsive this filter is.
-    constexpr double lpf_time_scale = 0.712; // seconds
-    const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
-    m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
-
-    // Place a lower limit of 5% speed. When a game boots up, there will be
-    // many silence samples. These do not need to be timestretched.
-    m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
-    m_sound_touch.setTempo(m_stretch_ratio);
-
-    LOG_TRACE(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
-              backlog_fullness);
-
-    m_sound_touch.putSamples(in, static_cast<u32>(num_in));
-    return m_sound_touch.receiveSamples(out, static_cast<u32>(num_out));
-}
-
-} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
deleted file mode 100644
index bb2270b96..000000000
--- a/src/audio_core/time_stretch.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <SoundTouch.h>
-#include "common/common_types.h"
-
-namespace AudioCore {
-
-class TimeStretcher {
-public:
-    TimeStretcher(u32 sample_rate, u32 channel_count);
-
-    /// @param in       Input sample buffer
-    /// @param num_in   Number of input frames in `in`
-    /// @param out      Output sample buffer
-    /// @param num_out  Desired number of output frames in `out`
-    /// @returns Actual number of frames written to `out`
-    std::size_t Process(const s16* in, std::size_t num_in, s16* out, std::size_t num_out);
-
-    void Clear();
-
-    void Flush();
-
-private:
-    u32 m_sample_rate;
-    soundtouch::SoundTouch m_sound_touch;
-    double m_stretch_ratio = 1.0;
-};
-
-} // namespace AudioCore
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h
index b94d73c7a..69fde8421 100644
--- a/src/common/atomic_ops.h
+++ b/src/common/atomic_ops.h
@@ -46,6 +46,50 @@ namespace Common {
                                           reinterpret_cast<__int64*>(expected.data())) != 0;
 }
 
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
+                                               u8& actual) {
+    actual =
+        _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
+                                               u16& actual) {
+    actual =
+        _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
+                                               u32& actual) {
+    actual =
+        _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
+                                               u64& actual) {
+    actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value,
+                                           expected);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
+                                               u128& actual) {
+    const bool result =
+        _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
+                                       value[0], reinterpret_cast<__int64*>(expected.data())) != 0;
+    actual = expected;
+    return result;
+}
+
+[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
+    u128 result{};
+    _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
+                                   result[0], reinterpret_cast<__int64*>(result.data()));
+    return result;
+}
+
 #else
 
 [[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
@@ -72,6 +116,52 @@ namespace Common {
     return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
 }
 
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
+                                               u8& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
+                                               u16& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
+                                               u32& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
+                                               u64& actual) {
+    actual = __sync_val_compare_and_swap(pointer, expected, value);
+    return actual == expected;
+}
+
+[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
+                                               u128& actual) {
+    unsigned __int128 value_a;
+    unsigned __int128 expected_a;
+    unsigned __int128 actual_a;
+    std::memcpy(&value_a, value.data(), sizeof(u128));
+    std::memcpy(&expected_a, expected.data(), sizeof(u128));
+    actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
+    std::memcpy(actual.data(), &actual_a, sizeof(u128));
+    return actual_a == expected_a;
+}
+
+[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
+    unsigned __int128 zeros_a = 0;
+    unsigned __int128 result_a =
+        __sync_val_compare_and_swap((unsigned __int128*)pointer, zeros_a, zeros_a);
+
+    u128 result;
+    std::memcpy(result.data(), &result_a, sizeof(u128));
+    return result;
+}
+
 #endif
 
 } // namespace Common
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 347e41efc..7a3f21dcf 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -55,8 +55,9 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
 u64 NativeClock::GetRTSC() {
     TimePoint new_time_point{};
     TimePoint current_time_point{};
+
+    current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
     do {
-        current_time_point.pack = time_point.pack;
         _mm_mfence();
         const u64 current_measure = __rdtsc();
         u64 diff = current_measure - current_time_point.inner.last_measure;
@@ -66,7 +67,7 @@ u64 NativeClock::GetRTSC() {
                                                 : current_time_point.inner.last_measure;
         new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
     } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
-                                           current_time_point.pack));
+                                           current_time_point.pack, current_time_point.pack));
     /// The clock cannot be more precise than the guest timer, remove the lower bits
     return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
 }
@@ -75,13 +76,14 @@ void NativeClock::Pause(bool is_paused) {
     if (!is_paused) {
         TimePoint current_time_point{};
         TimePoint new_time_point{};
+
+        current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
         do {
-            current_time_point.pack = time_point.pack;
             new_time_point.pack = current_time_point.pack;
             _mm_mfence();
             new_time_point.inner.last_measure = __rdtsc();
         } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
-                                               current_time_point.pack));
+                                               current_time_point.pack, current_time_point.pack));
     }
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index a2d893450..6d5a1ecfd 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -93,17 +93,19 @@ public:
             static constexpr u64 ICACHE_LINE_SIZE = 64;
 
             const u64 cache_line_start = value & ~(ICACHE_LINE_SIZE - 1);
-            parent.InvalidateCacheRange(cache_line_start, ICACHE_LINE_SIZE);
+            parent.system.InvalidateCpuInstructionCacheRange(cache_line_start, ICACHE_LINE_SIZE);
             break;
         }
         case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoU:
-            parent.ClearInstructionCache();
+            parent.system.InvalidateCpuInstructionCaches();
             break;
         case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoUInnerSharable:
         default:
             LOG_DEBUG(Core_ARM, "Unprocesseed instruction cache operation: {}", op);
             break;
         }
+
+        parent.jit->HaltExecution();
     }
 
     void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
diff --git a/src/core/hle/kernel/k_code_memory.cpp b/src/core/hle/kernel/k_code_memory.cpp
index b365ce7b7..63bbe02e9 100644
--- a/src/core/hle/kernel/k_code_memory.cpp
+++ b/src/core/hle/kernel/k_code_memory.cpp
@@ -28,7 +28,8 @@ ResultCode KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr
     auto& page_table = m_owner->PageTable();
 
     // Construct the page group.
-    m_page_group = KPageLinkedList(addr, Common::DivideUp(size, PageSize));
+    m_page_group =
+        KPageLinkedList(page_table.GetPhysicalAddr(addr), Common::DivideUp(size, PageSize));
 
     // Lock the memory.
     R_TRY(page_table.LockForCodeMemory(addr, size))
diff --git a/src/core/hle/kernel/k_page_linked_list.h b/src/core/hle/kernel/k_page_linked_list.h
index 0e2ae582a..869228322 100644
--- a/src/core/hle/kernel/k_page_linked_list.h
+++ b/src/core/hle/kernel/k_page_linked_list.h
@@ -89,6 +89,10 @@ public:
         return ResultSuccess;
     }
 
+    bool Empty() const {
+        return nodes.empty();
+    }
+
 private:
     std::list<Node> nodes;
 };
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index 02d93b12e..599013cf6 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -486,6 +486,58 @@ VAddr KPageTable::FindFreeArea(VAddr region_start, std::size_t region_num_pages,
     return address;
 }
 
+ResultCode KPageTable::MakePageGroup(KPageLinkedList& pg, VAddr addr, size_t num_pages) {
+    ASSERT(this->IsLockedByCurrentThread());
+
+    const size_t size = num_pages * PageSize;
+
+    // We're making a new group, not adding to an existing one.
+    R_UNLESS(pg.Empty(), ResultInvalidCurrentMemory);
+
+    // Begin traversal.
+    Common::PageTable::TraversalContext context;
+    Common::PageTable::TraversalEntry next_entry;
+    R_UNLESS(page_table_impl.BeginTraversal(next_entry, context, addr), ResultInvalidCurrentMemory);
+
+    // Prepare tracking variables.
+    PAddr cur_addr = next_entry.phys_addr;
+    size_t cur_size = next_entry.block_size - (cur_addr & (next_entry.block_size - 1));
+    size_t tot_size = cur_size;
+
+    // Iterate, adding to group as we go.
+    const auto& memory_layout = system.Kernel().MemoryLayout();
+    while (tot_size < size) {
+        R_UNLESS(page_table_impl.ContinueTraversal(next_entry, context),
+                 ResultInvalidCurrentMemory);
+
+        if (next_entry.phys_addr != (cur_addr + cur_size)) {
+            const size_t cur_pages = cur_size / PageSize;
+
+            R_UNLESS(IsHeapPhysicalAddress(memory_layout, cur_addr), ResultInvalidCurrentMemory);
+            R_TRY(pg.AddBlock(cur_addr, cur_pages));
+
+            cur_addr = next_entry.phys_addr;
+            cur_size = next_entry.block_size;
+        } else {
+            cur_size += next_entry.block_size;
+        }
+
+        tot_size += next_entry.block_size;
+    }
+
+    // Ensure we add the right amount for the last block.
+    if (tot_size > size) {
+        cur_size -= (tot_size - size);
+    }
+
+    // Add the last block.
+    const size_t cur_pages = cur_size / PageSize;
+    R_UNLESS(IsHeapPhysicalAddress(memory_layout, cur_addr), ResultInvalidCurrentMemory);
+    R_TRY(pg.AddBlock(cur_addr, cur_pages));
+
+    return ResultSuccess;
+}
+
 ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size,
                                           KPageTable& src_page_table, VAddr src_addr) {
     KScopedLightLock lk(general_lock);
@@ -1223,6 +1275,31 @@ ResultCode KPageTable::UnmapPages(VAddr address, std::size_t num_pages, KMemoryS
     return ResultSuccess;
 }
 
+ResultCode KPageTable::MakeAndOpenPageGroup(KPageLinkedList* out, VAddr address, size_t num_pages,
+                                            KMemoryState state_mask, KMemoryState state,
+                                            KMemoryPermission perm_mask, KMemoryPermission perm,
+                                            KMemoryAttribute attr_mask, KMemoryAttribute attr) {
+    // Ensure that the page group isn't null.
+    ASSERT(out != nullptr);
+
+    // Make sure that the region we're mapping is valid for the table.
+    const size_t size = num_pages * PageSize;
+    R_UNLESS(this->Contains(address, size), ResultInvalidCurrentMemory);
+
+    // Lock the table.
+    KScopedLightLock lk(general_lock);
+
+    // Check if state allows us to create the group.
+    R_TRY(this->CheckMemoryState(address, size, state_mask | KMemoryState::FlagReferenceCounted,
+                                 state | KMemoryState::FlagReferenceCounted, perm_mask, perm,
+                                 attr_mask, attr));
+
+    // Create a new page group for the region.
+    R_TRY(this->MakePageGroup(*out, address, num_pages));
+
+    return ResultSuccess;
+}
+
 ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size,
                                                   Svc::MemoryPermission svc_perm) {
     const size_t num_pages = size / PageSize;
@@ -1605,57 +1682,21 @@ ResultCode KPageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size)
 }
 
 ResultCode KPageTable::LockForCodeMemory(VAddr addr, std::size_t size) {
-    KScopedLightLock lk(general_lock);
-
-    KMemoryPermission new_perm = KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite;
-
-    KMemoryPermission old_perm{};
-
-    if (const ResultCode result{CheckMemoryState(
-            nullptr, &old_perm, nullptr, nullptr, addr, size, KMemoryState::FlagCanCodeMemory,
-            KMemoryState::FlagCanCodeMemory, KMemoryPermission::All,
-            KMemoryPermission::UserReadWrite, KMemoryAttribute::All, KMemoryAttribute::None)};
-        result.IsError()) {
-        return result;
-    }
-
-    new_perm = (new_perm != KMemoryPermission::None) ? new_perm : old_perm;
-
-    block_manager->UpdateLock(
-        addr, size / PageSize,
-        [](KMemoryBlockManager::iterator block, KMemoryPermission permission) {
-            block->ShareToDevice(permission);
-        },
-        new_perm);
-
-    return ResultSuccess;
+    return this->LockMemoryAndOpen(
+        nullptr, nullptr, addr, size, KMemoryState::FlagCanCodeMemory,
+        KMemoryState::FlagCanCodeMemory, KMemoryPermission::All, KMemoryPermission::UserReadWrite,
+        KMemoryAttribute::All, KMemoryAttribute::None,
+        static_cast<KMemoryPermission>(KMemoryPermission::NotMapped |
+                                       KMemoryPermission::KernelReadWrite),
+        KMemoryAttribute::Locked);
 }
 
 ResultCode KPageTable::UnlockForCodeMemory(VAddr addr, std::size_t size) {
-    KScopedLightLock lk(general_lock);
-
-    KMemoryPermission new_perm = KMemoryPermission::UserReadWrite;
-
-    KMemoryPermission old_perm{};
-
-    if (const ResultCode result{CheckMemoryState(
-            nullptr, &old_perm, nullptr, nullptr, addr, size, KMemoryState::FlagCanCodeMemory,
-            KMemoryState::FlagCanCodeMemory, KMemoryPermission::None, KMemoryPermission::None,
-            KMemoryAttribute::All, KMemoryAttribute::Locked)};
-        result.IsError()) {
-        return result;
-    }
-
-    new_perm = (new_perm != KMemoryPermission::None) ? new_perm : old_perm;
-
-    block_manager->UpdateLock(
-        addr, size / PageSize,
-        [](KMemoryBlockManager::iterator block, KMemoryPermission permission) {
-            block->UnshareToDevice(permission);
-        },
-        new_perm);
-
-    return ResultSuccess;
+    return this->UnlockMemory(addr, size, KMemoryState::FlagCanCodeMemory,
+                              KMemoryState::FlagCanCodeMemory, KMemoryPermission::None,
+                              KMemoryPermission::None, KMemoryAttribute::All,
+                              KMemoryAttribute::Locked, KMemoryPermission::UserReadWrite,
+                              KMemoryAttribute::Locked, nullptr);
 }
 
 ResultCode KPageTable::InitializeMemoryLayout(VAddr start, VAddr end) {
@@ -1991,4 +2032,109 @@ ResultCode KPageTable::CheckMemoryState(KMemoryState* out_state, KMemoryPermissi
     return ResultSuccess;
 }
 
+ResultCode KPageTable::LockMemoryAndOpen(KPageLinkedList* out_pg, PAddr* out_paddr, VAddr addr,
+                                         size_t size, KMemoryState state_mask, KMemoryState state,
+                                         KMemoryPermission perm_mask, KMemoryPermission perm,
+                                         KMemoryAttribute attr_mask, KMemoryAttribute attr,
+                                         KMemoryPermission new_perm, KMemoryAttribute lock_attr) {
+    // Validate basic preconditions.
+    ASSERT((lock_attr & attr) == KMemoryAttribute::None);
+    ASSERT((lock_attr & (KMemoryAttribute::IpcLocked | KMemoryAttribute::DeviceShared)) ==
+           KMemoryAttribute::None);
+
+    // Validate the lock request.
+    const size_t num_pages = size / PageSize;
+    R_UNLESS(this->Contains(addr, size), ResultInvalidCurrentMemory);
+
+    // Lock the table.
+    KScopedLightLock lk(general_lock);
+
+    // Check that the output page group is empty, if it exists.
+    if (out_pg) {
+        ASSERT(out_pg->GetNumPages() == 0);
+    }
+
+    // Check the state.
+    KMemoryState old_state{};
+    KMemoryPermission old_perm{};
+    KMemoryAttribute old_attr{};
+    size_t num_allocator_blocks{};
+    R_TRY(this->CheckMemoryState(std::addressof(old_state), std::addressof(old_perm),
+                                 std::addressof(old_attr), std::addressof(num_allocator_blocks),
+                                 addr, size, state_mask | KMemoryState::FlagReferenceCounted,
+                                 state | KMemoryState::FlagReferenceCounted, perm_mask, perm,
+                                 attr_mask, attr));
+
+    // Get the physical address, if we're supposed to.
+    if (out_paddr != nullptr) {
+        ASSERT(this->GetPhysicalAddressLocked(out_paddr, addr));
+    }
+
+    // Make the page group, if we're supposed to.
+    if (out_pg != nullptr) {
+        R_TRY(this->MakePageGroup(*out_pg, addr, num_pages));
+    }
+
+    // Decide on new perm and attr.
+    new_perm = (new_perm != KMemoryPermission::None) ? new_perm : old_perm;
+    KMemoryAttribute new_attr = static_cast<KMemoryAttribute>(old_attr | lock_attr);
+
+    // Update permission, if we need to.
+    if (new_perm != old_perm) {
+        R_TRY(Operate(addr, num_pages, new_perm, OperationType::ChangePermissions));
+    }
+
+    // Apply the memory block updates.
+    block_manager->Update(addr, num_pages, old_state, new_perm, new_attr);
+
+    return ResultSuccess;
+}
+
+ResultCode KPageTable::UnlockMemory(VAddr addr, size_t size, KMemoryState state_mask,
+                                    KMemoryState state, KMemoryPermission perm_mask,
+                                    KMemoryPermission perm, KMemoryAttribute attr_mask,
+                                    KMemoryAttribute attr, KMemoryPermission new_perm,
+                                    KMemoryAttribute lock_attr, const KPageLinkedList* pg) {
+    // Validate basic preconditions.
+    ASSERT((attr_mask & lock_attr) == lock_attr);
+    ASSERT((attr & lock_attr) == lock_attr);
+
+    // Validate the unlock request.
+    const size_t num_pages = size / PageSize;
+    R_UNLESS(this->Contains(addr, size), ResultInvalidCurrentMemory);
+
+    // Lock the table.
+    KScopedLightLock lk(general_lock);
+
+    // Check the state.
+    KMemoryState old_state{};
+    KMemoryPermission old_perm{};
+    KMemoryAttribute old_attr{};
+    size_t num_allocator_blocks{};
+    R_TRY(this->CheckMemoryState(std::addressof(old_state), std::addressof(old_perm),
+                                 std::addressof(old_attr), std::addressof(num_allocator_blocks),
+                                 addr, size, state_mask | KMemoryState::FlagReferenceCounted,
+                                 state | KMemoryState::FlagReferenceCounted, perm_mask, perm,
+                                 attr_mask, attr));
+
+    // Check the page group.
+    if (pg != nullptr) {
+        UNIMPLEMENTED_MSG("PageGroup support is unimplemented!");
+    }
+
+    // Decide on new perm and attr.
+    new_perm = (new_perm != KMemoryPermission::None) ? new_perm : old_perm;
+    KMemoryAttribute new_attr = static_cast<KMemoryAttribute>(old_attr & ~lock_attr);
+
+    // Update permission, if we need to.
+    if (new_perm != old_perm) {
+        R_TRY(Operate(addr, num_pages, new_perm, OperationType::ChangePermissions));
+    }
+
+    // Apply the memory block updates.
+    block_manager->Update(addr, num_pages, old_state, new_perm, new_attr);
+
+    return ResultSuccess;
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index 54c6adf8d..bfabdf38c 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -12,6 +12,7 @@
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/k_light_lock.h"
 #include "core/hle/kernel/k_memory_block.h"
+#include "core/hle/kernel/k_memory_layout.h"
 #include "core/hle/kernel/k_memory_manager.h"
 #include "core/hle/result.h"
 
@@ -71,6 +72,10 @@ public:
     ResultCode UnlockForDeviceAddressSpace(VAddr addr, std::size_t size);
     ResultCode LockForCodeMemory(VAddr addr, std::size_t size);
     ResultCode UnlockForCodeMemory(VAddr addr, std::size_t size);
+    ResultCode MakeAndOpenPageGroup(KPageLinkedList* out, VAddr address, size_t num_pages,
+                                    KMemoryState state_mask, KMemoryState state,
+                                    KMemoryPermission perm_mask, KMemoryPermission perm,
+                                    KMemoryAttribute attr_mask, KMemoryAttribute attr);
 
     Common::PageTable& PageTableImpl() {
         return page_table_impl;
@@ -159,10 +164,37 @@ private:
                                       attr_mask, attr, ignore_attr);
     }
 
+    ResultCode LockMemoryAndOpen(KPageLinkedList* out_pg, PAddr* out_paddr, VAddr addr, size_t size,
+                                 KMemoryState state_mask, KMemoryState state,
+                                 KMemoryPermission perm_mask, KMemoryPermission perm,
+                                 KMemoryAttribute attr_mask, KMemoryAttribute attr,
+                                 KMemoryPermission new_perm, KMemoryAttribute lock_attr);
+    ResultCode UnlockMemory(VAddr addr, size_t size, KMemoryState state_mask, KMemoryState state,
+                            KMemoryPermission perm_mask, KMemoryPermission perm,
+                            KMemoryAttribute attr_mask, KMemoryAttribute attr,
+                            KMemoryPermission new_perm, KMemoryAttribute lock_attr,
+                            const KPageLinkedList* pg);
+
+    ResultCode MakePageGroup(KPageLinkedList& pg, VAddr addr, size_t num_pages);
+
     bool IsLockedByCurrentThread() const {
         return general_lock.IsLockedByCurrentThread();
     }
 
+    bool IsHeapPhysicalAddress(const KMemoryLayout& layout, PAddr phys_addr) {
+        ASSERT(this->IsLockedByCurrentThread());
+
+        return layout.IsHeapPhysicalAddress(cached_physical_heap_region, phys_addr);
+    }
+
+    bool GetPhysicalAddressLocked(PAddr* out, VAddr virt_addr) const {
+        ASSERT(this->IsLockedByCurrentThread());
+
+        *out = GetPhysicalAddr(virt_addr);
+
+        return *out != 0;
+    }
+
     mutable KLightLock general_lock;
     mutable KLightLock map_physical_memory_lock;
 
@@ -322,6 +354,7 @@ private:
     bool is_aslr_enabled{};
 
     u32 heap_fill_value{};
+    const KMemoryRegion* cached_physical_heap_region{};
 
     KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application};
     KMemoryManager::Direction allocation_option{KMemoryManager::Direction::FromFront};
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 839171e85..976d63234 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1362,8 +1362,11 @@ static ResultCode MapProcessMemory(Core::System& system, VAddr dst_address, Hand
              ResultInvalidMemoryRegion);
 
     // Create a new page group.
-    KMemoryInfo kBlockInfo = dst_pt.QueryInfo(dst_address);
-    KPageLinkedList pg(kBlockInfo.GetAddress(), kBlockInfo.GetNumPages());
+    KPageLinkedList pg;
+    R_TRY(src_pt.MakeAndOpenPageGroup(
+        std::addressof(pg), src_address, size / PageSize, KMemoryState::FlagCanMapProcess,
+        KMemoryState::FlagCanMapProcess, KMemoryPermission::None, KMemoryPermission::None,
+        KMemoryAttribute::All, KMemoryAttribute::None));
 
     // Map the group.
     R_TRY(dst_pt.MapPages(dst_address, pg, KMemoryState::SharedCode,
@@ -1408,8 +1411,8 @@ static ResultCode UnmapProcessMemory(Core::System& system, VAddr dst_address, Ha
 }
 
 static ResultCode CreateCodeMemory(Core::System& system, Handle* out, VAddr address, size_t size) {
-    LOG_TRACE(Kernel_SVC, "called, handle_out={}, address=0x{:X}, size=0x{:X}",
-              static_cast<void*>(out), address, size);
+    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, size=0x{:X}", address, size);
+
     // Get kernel instance.
     auto& kernel = system.Kernel();
 
@@ -1664,7 +1667,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
         return ResultInvalidAddress;
     }
 
-    if (size == 0 || Common::Is4KBAligned(size)) {
+    if (size == 0 || !Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
         return ResultInvalidSize;
     }
diff --git a/src/core/hle/service/am/applets/applet_web_browser.cpp b/src/core/hle/service/am/applets/applet_web_browser.cpp
index bb5cb61be..a4b3fb187 100644
--- a/src/core/hle/service/am/applets/applet_web_browser.cpp
+++ b/src/core/hle/service/am/applets/applet_web_browser.cpp
@@ -446,6 +446,14 @@ void WebBrowser::ExecuteLogin() {
 }
 
 void WebBrowser::ExecuteOffline() {
+    // TODO (Morph): This is a hack for WebSession foreground web applets such as those used by
+    //               Super Mario 3D All-Stars.
+    // TODO (Morph): Implement WebSession.
+    if (applet_mode == LibraryAppletMode::AllForegroundInitiallyHidden) {
+        LOG_WARNING(Service_AM, "WebSession is not implemented");
+        return;
+    }
+
     const auto main_url = GetMainURL(Common::FS::PathToUTF8String(offline_document));
 
     if (!Common::FS::Exists(main_url)) {
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 3703ca4c6..4208337db 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -174,7 +174,7 @@ ResultCode VfsDirectoryServiceWrapper::RenameFile(const std::string& src_path_,
     ASSERT_MSG(dest != nullptr, "Newly created file with success cannot be found.");
 
     ASSERT_MSG(dest->WriteBytes(src->ReadAllBytes()) == src->GetSize(),
-               "Could not write all of the bytes but everything else has succeded.");
+               "Could not write all of the bytes but everything else has succeeded.");
 
     if (!src->GetContainingDirectory()->DeleteFile(Common::FS::GetFilename(src_path))) {
         // TODO(DarkLordZach): Find a better error code for this
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index e5c951e06..aa6cb34b7 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -262,11 +262,6 @@ void Controller_NPad::OnInit() {
             service_context.CreateEvent(fmt::format("npad:NpadStyleSetChanged_{}", i));
     }
 
-    if (hid_core.GetSupportedStyleTag().raw == Core::HID::NpadStyleSet::None) {
-        // We want to support all controllers
-        hid_core.SetSupportedStyleTag({Core::HID::NpadStyleSet::All});
-    }
-
     supported_npad_id_types.resize(npad_id_list.size());
     std::memcpy(supported_npad_id_types.data(), npad_id_list.data(),
                 npad_id_list.size() * sizeof(Core::HID::NpadIdType));
@@ -288,14 +283,6 @@ void Controller_NPad::OnInit() {
             WriteEmptyEntry(npad);
         }
     }
-
-    // Connect controllers
-    for (auto& controller : controller_data) {
-        const auto& device = controller.device;
-        if (device->IsConnected()) {
-            AddNewControllerAt(device->GetNpadStyleIndex(), device->GetNpadIdType());
-        }
-    }
 }
 
 void Controller_NPad::WriteEmptyEntry(NpadInternalState& npad) {
@@ -320,6 +307,7 @@ void Controller_NPad::WriteEmptyEntry(NpadInternalState& npad) {
 }
 
 void Controller_NPad::OnRelease() {
+    is_controller_initialized = false;
     for (std::size_t i = 0; i < controller_data.size(); ++i) {
         auto& controller = controller_data[i];
         service_context.CloseEvent(controller.styleset_changed_event);
@@ -651,9 +639,27 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing
 
 void Controller_NPad::SetSupportedStyleSet(Core::HID::NpadStyleTag style_set) {
     hid_core.SetSupportedStyleTag(style_set);
+
+    if (is_controller_initialized) {
+        return;
+    }
+
+    // Once SetSupportedStyleSet is called controllers are fully initialized
+    is_controller_initialized = true;
+
+    // Connect all active controllers
+    for (auto& controller : controller_data) {
+        const auto& device = controller.device;
+        if (device->IsConnected()) {
+            AddNewControllerAt(device->GetNpadStyleIndex(), device->GetNpadIdType());
+        }
+    }
 }
 
 Core::HID::NpadStyleTag Controller_NPad::GetSupportedStyleSet() const {
+    if (!is_controller_initialized) {
+        return {Core::HID::NpadStyleSet::None};
+    }
     return hid_core.GetSupportedStyleTag();
 }
 
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 3287cf435..967379f05 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -191,16 +191,16 @@ private:
 
     // This is nn::hid::detail::NpadFullKeyColorState
     struct NpadFullKeyColorState {
-        ColorAttribute attribute;
-        Core::HID::NpadControllerColor fullkey;
+        ColorAttribute attribute{ColorAttribute::NoController};
+        Core::HID::NpadControllerColor fullkey{};
     };
     static_assert(sizeof(NpadFullKeyColorState) == 0xC, "NpadFullKeyColorState is an invalid size");
 
     // This is nn::hid::detail::NpadJoyColorState
     struct NpadJoyColorState {
-        ColorAttribute attribute;
-        Core::HID::NpadControllerColor left;
-        Core::HID::NpadControllerColor right;
+        ColorAttribute attribute{ColorAttribute::NoController};
+        Core::HID::NpadControllerColor left{};
+        Core::HID::NpadControllerColor right{};
     };
     static_assert(sizeof(NpadJoyColorState) == 0x14, "NpadJoyColorState is an invalid size");
 
@@ -226,11 +226,11 @@ private:
     // This is nn::hid::NpadPalmaState
     // This is nn::hid::NpadSystemExtState
     struct NPadGenericState {
-        s64_le sampling_number;
-        Core::HID::NpadButtonState npad_buttons;
-        Core::HID::AnalogStickState l_stick;
-        Core::HID::AnalogStickState r_stick;
-        NpadAttribute connection_status;
+        s64_le sampling_number{};
+        Core::HID::NpadButtonState npad_buttons{};
+        Core::HID::AnalogStickState l_stick{};
+        Core::HID::AnalogStickState r_stick{};
+        NpadAttribute connection_status{};
         INSERT_PADDING_BYTES(4); // Reserved
     };
     static_assert(sizeof(NPadGenericState) == 0x28, "NPadGenericState is an invalid size");
@@ -253,7 +253,7 @@ private:
         Common::Vec3f gyro{};
         Common::Vec3f rotation{};
         std::array<Common::Vec3f, 3> orientation{};
-        SixAxisSensorAttribute attribute;
+        SixAxisSensorAttribute attribute{};
         INSERT_PADDING_BYTES(4); // Reserved
     };
     static_assert(sizeof(SixAxisSensorState) == 0x60, "SixAxisSensorState is an invalid size");
@@ -325,11 +325,11 @@ private:
 
     // This is nn::hid::detail::NfcXcdDeviceHandleStateImpl
     struct NfcXcdDeviceHandleStateImpl {
-        u64 handle;
-        bool is_available;
-        bool is_activated;
+        u64 handle{};
+        bool is_available{};
+        bool is_activated{};
         INSERT_PADDING_BYTES(0x6); // Reserved
-        u64 sampling_number;
+        u64 sampling_number{};
     };
     static_assert(sizeof(NfcXcdDeviceHandleStateImpl) == 0x18,
                   "NfcXcdDeviceHandleStateImpl is an invalid size");
@@ -366,8 +366,8 @@ private:
     };
 
     struct AppletFooterUi {
-        AppletFooterUiAttributes attributes;
-        AppletFooterUiType type;
+        AppletFooterUiAttributes attributes{};
+        AppletFooterUiType type{AppletFooterUiType::None};
         INSERT_PADDING_BYTES(0x5B); // Reserved
     };
     static_assert(sizeof(AppletFooterUi) == 0x60, "AppletFooterUi is an invalid size");
@@ -404,41 +404,41 @@ private:
 
     // This is nn::hid::detail::NpadInternalState
     struct NpadInternalState {
-        Core::HID::NpadStyleTag style_tag;
-        NpadJoyAssignmentMode assignment_mode;
-        NpadFullKeyColorState fullkey_color;
-        NpadJoyColorState joycon_color;
-        Lifo<NPadGenericState, hid_entry_count> fullkey_lifo;
-        Lifo<NPadGenericState, hid_entry_count> handheld_lifo;
-        Lifo<NPadGenericState, hid_entry_count> joy_dual_lifo;
-        Lifo<NPadGenericState, hid_entry_count> joy_left_lifo;
-        Lifo<NPadGenericState, hid_entry_count> joy_right_lifo;
-        Lifo<NPadGenericState, hid_entry_count> palma_lifo;
-        Lifo<NPadGenericState, hid_entry_count> system_ext_lifo;
-        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_fullkey_lifo;
-        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_handheld_lifo;
-        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_dual_left_lifo;
-        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_dual_right_lifo;
-        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_left_lifo;
-        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_right_lifo;
-        DeviceType device_type;
+        Core::HID::NpadStyleTag style_tag{Core::HID::NpadStyleSet::None};
+        NpadJoyAssignmentMode assignment_mode{NpadJoyAssignmentMode::Dual};
+        NpadFullKeyColorState fullkey_color{};
+        NpadJoyColorState joycon_color{};
+        Lifo<NPadGenericState, hid_entry_count> fullkey_lifo{};
+        Lifo<NPadGenericState, hid_entry_count> handheld_lifo{};
+        Lifo<NPadGenericState, hid_entry_count> joy_dual_lifo{};
+        Lifo<NPadGenericState, hid_entry_count> joy_left_lifo{};
+        Lifo<NPadGenericState, hid_entry_count> joy_right_lifo{};
+        Lifo<NPadGenericState, hid_entry_count> palma_lifo{};
+        Lifo<NPadGenericState, hid_entry_count> system_ext_lifo{};
+        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_fullkey_lifo{};
+        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_handheld_lifo{};
+        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_dual_left_lifo{};
+        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_dual_right_lifo{};
+        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_left_lifo{};
+        Lifo<SixAxisSensorState, hid_entry_count> sixaxis_right_lifo{};
+        DeviceType device_type{};
         INSERT_PADDING_BYTES(0x4); // Reserved
-        NPadSystemProperties system_properties;
-        NpadSystemButtonProperties button_properties;
-        Core::HID::NpadBatteryLevel battery_level_dual;
-        Core::HID::NpadBatteryLevel battery_level_left;
-        Core::HID::NpadBatteryLevel battery_level_right;
+        NPadSystemProperties system_properties{};
+        NpadSystemButtonProperties button_properties{};
+        Core::HID::NpadBatteryLevel battery_level_dual{};
+        Core::HID::NpadBatteryLevel battery_level_left{};
+        Core::HID::NpadBatteryLevel battery_level_right{};
         union {
-            Lifo<NfcXcdDeviceHandleStateImpl, 0x2> nfc_xcd_device_lifo{};
-            AppletFooterUi applet_footer;
+            AppletFooterUi applet_footer{};
+            Lifo<NfcXcdDeviceHandleStateImpl, 0x2> nfc_xcd_device_lifo;
         };
         INSERT_PADDING_BYTES(0x20); // Unknown
-        Lifo<NpadGcTriggerState, hid_entry_count> gc_trigger_lifo;
-        NpadLarkType lark_type_l_and_main;
-        NpadLarkType lark_type_r;
-        NpadLuciaType lucia_type;
-        NpadLagonType lagon_type;
-        NpadLagerType lager_type;
+        Lifo<NpadGcTriggerState, hid_entry_count> gc_trigger_lifo{};
+        NpadLarkType lark_type_l_and_main{};
+        NpadLarkType lark_type_r{};
+        NpadLuciaType lucia_type{};
+        NpadLagonType lagon_type{};
+        NpadLagerType lager_type{};
         // FW 13.x Investigate there is some sort of bitflag related to joycons
         INSERT_PADDING_BYTES(0x4);
         INSERT_PADDING_BYTES(0xc08); // Unknown
@@ -511,7 +511,8 @@ private:
     NpadHandheldActivationMode handheld_activation_mode{NpadHandheldActivationMode::Dual};
     NpadCommunicationMode communication_mode{NpadCommunicationMode::Default};
     bool permit_vibration_session_enabled{false};
-    bool analog_stick_use_center_clamp{};
+    bool analog_stick_use_center_clamp{false};
     bool is_in_lr_assignment_mode{false};
+    bool is_controller_initialized{false};
 };
 } // namespace Service::HID
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index d9202ea6c..92e6bf889 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -63,10 +63,6 @@ IAppletResource::IAppletResource(Core::System& system_,
     MakeController<Controller_Gesture>(HidController::Gesture);
     MakeController<Controller_ConsoleSixAxis>(HidController::ConsoleSixAxisSensor);
 
-    // Homebrew doesn't try to activate some controllers, so we activate them by default
-    GetController<Controller_NPad>(HidController::NPad).ActivateController();
-    GetController<Controller_Touchscreen>(HidController::Touchscreen).ActivateController();
-
     GetController<Controller_Stubbed>(HidController::HomeButton).SetCommonHeaderOffset(0x4C00);
     GetController<Controller_Stubbed>(HidController::SleepButton).SetCommonHeaderOffset(0x4E00);
     GetController<Controller_Stubbed>(HidController::CaptureButton).SetCommonHeaderOffset(0x5000);
@@ -878,6 +874,10 @@ void Hid::AcquireNpadStyleSetUpdateEventHandle(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_HID, "called, npad_id={}, applet_resource_user_id={}, unknown={}",
               parameters.npad_id, parameters.applet_resource_user_id, parameters.unknown);
 
+    // Games expect this event to be signaled after calling this function
+    applet_resource->GetController<Controller_NPad>(HidController::NPad)
+        .SignalStyleSetChangedEvent(parameters.npad_id);
+
     IPC::ResponseBuilder rb{ctx, 2, 1};
     rb.Push(ResultSuccess);
     rb.PushCopyObjects(applet_resource->GetController<Controller_NPad>(HidController::NPad)
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index f9b82b504..44c54c665 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -134,7 +134,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
     }
 
     EventState status = events_interface.status[event_id];
-    const bool bad_parameter = status != EventState::Free && status != EventState::Registered;
+    const bool bad_parameter = status == EventState::Busy;
     if (bad_parameter) {
         std::memcpy(output.data(), &params, sizeof(params));
         return NvResult::BadParameter;
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index c16babe14..1ce2a856b 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -26,7 +26,7 @@ void NVDRV::Open(Kernel::HLERequestContext& ctx) {
         rb.Push<DeviceFD>(0);
         rb.PushEnum(NvResult::NotInitialized);
 
-        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        LOG_ERROR(Service_NVDRV, "NvServices is not initialized!");
         return;
     }
 
@@ -61,7 +61,7 @@ void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) {
 
     if (!is_initialized) {
         ServiceError(ctx, NvResult::NotInitialized);
-        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        LOG_ERROR(Service_NVDRV, "NvServices is not initialized!");
         return;
     }
 
@@ -87,7 +87,7 @@ void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) {
 
     if (!is_initialized) {
         ServiceError(ctx, NvResult::NotInitialized);
-        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        LOG_ERROR(Service_NVDRV, "NvServices is not initialized!");
         return;
     }
 
@@ -114,7 +114,7 @@ void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) {
 
     if (!is_initialized) {
         ServiceError(ctx, NvResult::NotInitialized);
-        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        LOG_ERROR(Service_NVDRV, "NvServices is not initialized!");
         return;
     }
 
@@ -139,7 +139,7 @@ void NVDRV::Close(Kernel::HLERequestContext& ctx) {
 
     if (!is_initialized) {
         ServiceError(ctx, NvResult::NotInitialized);
-        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        LOG_ERROR(Service_NVDRV, "NvServices is not initialized!");
         return;
     }
 
@@ -170,7 +170,7 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
 
     if (!is_initialized) {
         ServiceError(ctx, NvResult::NotInitialized);
-        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        LOG_ERROR(Service_NVDRV, "NvServices is not initialized!");
         return;
     }
 
diff --git a/src/core/hle/service/nvflinger/buffer_item_consumer.cpp b/src/core/hle/service/nvflinger/buffer_item_consumer.cpp
index 7f32c0775..93fa1ec10 100644
--- a/src/core/hle/service/nvflinger/buffer_item_consumer.cpp
+++ b/src/core/hle/service/nvflinger/buffer_item_consumer.cpp
@@ -21,7 +21,7 @@ Status BufferItemConsumer::AcquireBuffer(BufferItem* item, std::chrono::nanoseco
         return Status::BadValue;
     }
 
-    std::unique_lock lock(mutex);
+    std::scoped_lock lock(mutex);
 
     if (const auto status = AcquireBufferLocked(item, present_when); status != Status::NoError) {
         if (status != Status::NoBufferAvailable) {
@@ -40,7 +40,7 @@ Status BufferItemConsumer::AcquireBuffer(BufferItem* item, std::chrono::nanoseco
 }
 
 Status BufferItemConsumer::ReleaseBuffer(const BufferItem& item, Fence& release_fence) {
-    std::unique_lock lock(mutex);
+    std::scoped_lock lock(mutex);
 
     if (const auto status = AddReleaseFenceLocked(item.buf, item.graphic_buffer, release_fence);
         status != Status::NoError) {
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
deleted file mode 100644
index 5fead6d1b..000000000
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/hle/kernel/k_writable_event.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/service/kernel_helpers.h"
-#include "core/hle/service/nvflinger/buffer_queue.h"
-
-namespace Service::NVFlinger {
-
-BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
-                         KernelHelpers::ServiceContext& service_context_)
-    : id(id_), layer_id(layer_id_), service_context{service_context_} {
-    buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
-}
-
-BufferQueue::~BufferQueue() {
-    service_context.CloseEvent(buffer_wait_event);
-}
-
-void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
-    ASSERT(slot < buffer_slots);
-    LOG_WARNING(Service, "Adding graphics buffer {}", slot);
-
-    {
-        std::unique_lock lock{free_buffers_mutex};
-        free_buffers.push_back(slot);
-    }
-    free_buffers_condition.notify_one();
-
-    buffers[slot] = {
-        .slot = slot,
-        .status = Buffer::Status::Free,
-        .igbp_buffer = igbp_buffer,
-        .transform = {},
-        .crop_rect = {},
-        .swap_interval = 0,
-        .multi_fence = {},
-    };
-
-    buffer_wait_event->GetWritableEvent().Signal();
-}
-
-std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
-                                                                                       u32 height) {
-    // Wait for first request before trying to dequeue
-    {
-        std::unique_lock lock{free_buffers_mutex};
-        free_buffers_condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; });
-    }
-
-    if (!is_connect) {
-        // Buffer was disconnected while the thread was blocked, this is most likely due to
-        // emulation being stopped
-        return std::nullopt;
-    }
-
-    std::unique_lock lock{free_buffers_mutex};
-
-    auto f_itr = free_buffers.begin();
-    auto slot = buffers.size();
-
-    while (f_itr != free_buffers.end()) {
-        const Buffer& buffer = buffers[*f_itr];
-        if (buffer.status == Buffer::Status::Free && buffer.igbp_buffer.width == width &&
-            buffer.igbp_buffer.height == height) {
-            slot = *f_itr;
-            free_buffers.erase(f_itr);
-            break;
-        }
-        ++f_itr;
-    }
-    if (slot == buffers.size()) {
-        return std::nullopt;
-    }
-    buffers[slot].status = Buffer::Status::Dequeued;
-    return {{buffers[slot].slot, &buffers[slot].multi_fence}};
-}
-
-const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
-    ASSERT(slot < buffers.size());
-    ASSERT(buffers[slot].status == Buffer::Status::Dequeued);
-    ASSERT(buffers[slot].slot == slot);
-
-    return buffers[slot].igbp_buffer;
-}
-
-void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
-                              const Common::Rectangle<int>& crop_rect, u32 swap_interval,
-                              Service::Nvidia::MultiFence& multi_fence) {
-    ASSERT(slot < buffers.size());
-    ASSERT(buffers[slot].status == Buffer::Status::Dequeued);
-    ASSERT(buffers[slot].slot == slot);
-
-    buffers[slot].status = Buffer::Status::Queued;
-    buffers[slot].transform = transform;
-    buffers[slot].crop_rect = crop_rect;
-    buffers[slot].swap_interval = swap_interval;
-    buffers[slot].multi_fence = multi_fence;
-    std::unique_lock lock{queue_sequence_mutex};
-    queue_sequence.push_back(slot);
-}
-
-void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence) {
-    ASSERT(slot < buffers.size());
-    ASSERT(buffers[slot].status != Buffer::Status::Free);
-    ASSERT(buffers[slot].slot == slot);
-
-    buffers[slot].status = Buffer::Status::Free;
-    buffers[slot].multi_fence = multi_fence;
-    buffers[slot].swap_interval = 0;
-
-    {
-        std::unique_lock lock{free_buffers_mutex};
-        free_buffers.push_back(slot);
-    }
-    free_buffers_condition.notify_one();
-
-    buffer_wait_event->GetWritableEvent().Signal();
-}
-
-std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
-    std::unique_lock lock{queue_sequence_mutex};
-    std::size_t buffer_slot = buffers.size();
-    // Iterate to find a queued buffer matching the requested slot.
-    while (buffer_slot == buffers.size() && !queue_sequence.empty()) {
-        const auto slot = static_cast<std::size_t>(queue_sequence.front());
-        ASSERT(slot < buffers.size());
-        if (buffers[slot].status == Buffer::Status::Queued) {
-            ASSERT(buffers[slot].slot == slot);
-            buffer_slot = slot;
-        }
-        queue_sequence.pop_front();
-    }
-    if (buffer_slot == buffers.size()) {
-        return std::nullopt;
-    }
-    buffers[buffer_slot].status = Buffer::Status::Acquired;
-    return {{buffers[buffer_slot]}};
-}
-
-void BufferQueue::ReleaseBuffer(u32 slot) {
-    ASSERT(slot < buffers.size());
-    ASSERT(buffers[slot].status == Buffer::Status::Acquired);
-    ASSERT(buffers[slot].slot == slot);
-
-    buffers[slot].status = Buffer::Status::Free;
-    {
-        std::unique_lock lock{free_buffers_mutex};
-        free_buffers.push_back(slot);
-    }
-    free_buffers_condition.notify_one();
-
-    buffer_wait_event->GetWritableEvent().Signal();
-}
-
-void BufferQueue::Connect() {
-    std::unique_lock lock{queue_sequence_mutex};
-    queue_sequence.clear();
-    is_connect = true;
-}
-
-void BufferQueue::Disconnect() {
-    buffers.fill({});
-    {
-        std::unique_lock lock{queue_sequence_mutex};
-        queue_sequence.clear();
-    }
-    buffer_wait_event->GetWritableEvent().Signal();
-    is_connect = false;
-    free_buffers_condition.notify_one();
-}
-
-u32 BufferQueue::Query(QueryType type) {
-    LOG_WARNING(Service, "(STUBBED) called type={}", type);
-
-    switch (type) {
-    case QueryType::NativeWindowFormat:
-        return static_cast<u32>(PixelFormat::RGBA8888);
-    case QueryType::NativeWindowWidth:
-    case QueryType::NativeWindowHeight:
-        break;
-    case QueryType::NativeWindowMinUndequeuedBuffers:
-        return 0;
-    case QueryType::NativeWindowConsumerUsageBits:
-        return 0;
-    }
-    UNIMPLEMENTED_MSG("Unimplemented query type={}", type);
-    return 0;
-}
-
-Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() {
-    return buffer_wait_event->GetWritableEvent();
-}
-
-Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() {
-    return buffer_wait_event->GetReadableEvent();
-}
-
-} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
deleted file mode 100644
index f2a579133..000000000
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <condition_variable>
-#include <list>
-#include <mutex>
-#include <optional>
-
-#include "common/common_funcs.h"
-#include "common/math_util.h"
-#include "common/swap.h"
-#include "core/hle/kernel/k_event.h"
-#include "core/hle/kernel/k_readable_event.h"
-#include "core/hle/service/nvdrv/nvdata.h"
-
-namespace Kernel {
-class KernelCore;
-class KEvent;
-class KReadableEvent;
-class KWritableEvent;
-} // namespace Kernel
-
-namespace Service::KernelHelpers {
-class ServiceContext;
-} // namespace Service::KernelHelpers
-
-namespace Service::NVFlinger {
-
-constexpr u32 buffer_slots = 0x40;
-struct IGBPBuffer {
-    u32_le magic;
-    u32_le width;
-    u32_le height;
-    u32_le stride;
-    u32_le format;
-    u32_le usage;
-    INSERT_PADDING_WORDS(1);
-    u32_le index;
-    INSERT_PADDING_WORDS(3);
-    u32_le gpu_buffer_id;
-    INSERT_PADDING_WORDS(6);
-    u32_le external_format;
-    INSERT_PADDING_WORDS(10);
-    u32_le nvmap_handle;
-    u32_le offset;
-    INSERT_PADDING_WORDS(60);
-};
-
-static_assert(sizeof(IGBPBuffer) == 0x16C, "IGBPBuffer has wrong size");
-
-class BufferQueue final {
-public:
-    enum class QueryType {
-        NativeWindowWidth = 0,
-        NativeWindowHeight = 1,
-        NativeWindowFormat = 2,
-        /// The minimum number of buffers that must remain un-dequeued after a buffer has been
-        /// queued
-        NativeWindowMinUndequeuedBuffers = 3,
-        /// The consumer gralloc usage bits currently set by the consumer
-        NativeWindowConsumerUsageBits = 10,
-    };
-
-    explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
-                         KernelHelpers::ServiceContext& service_context_);
-    ~BufferQueue();
-
-    enum class BufferTransformFlags : u32 {
-        /// No transform flags are set
-        Unset = 0x00,
-        /// Flip source image horizontally (around the vertical axis)
-        FlipH = 0x01,
-        /// Flip source image vertically (around the horizontal axis)
-        FlipV = 0x02,
-        /// Rotate source image 90 degrees clockwise
-        Rotate90 = 0x04,
-        /// Rotate source image 180 degrees
-        Rotate180 = 0x03,
-        /// Rotate source image 270 degrees clockwise
-        Rotate270 = 0x07,
-    };
-
-    enum class PixelFormat : u32 {
-        RGBA8888 = 1,
-        RGBX8888 = 2,
-        RGB888 = 3,
-        RGB565 = 4,
-        BGRA8888 = 5,
-        RGBA5551 = 6,
-        RRGBA4444 = 7,
-    };
-
-    struct Buffer {
-        enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 };
-
-        u32 slot;
-        Status status = Status::Free;
-        IGBPBuffer igbp_buffer;
-        BufferTransformFlags transform;
-        Common::Rectangle<int> crop_rect;
-        u32 swap_interval;
-        Service::Nvidia::MultiFence multi_fence;
-    };
-
-    void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
-    std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width,
-                                                                              u32 height);
-    const IGBPBuffer& RequestBuffer(u32 slot) const;
-    void QueueBuffer(u32 slot, BufferTransformFlags transform,
-                     const Common::Rectangle<int>& crop_rect, u32 swap_interval,
-                     Service::Nvidia::MultiFence& multi_fence);
-    void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence);
-    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
-    void ReleaseBuffer(u32 slot);
-    void Connect();
-    void Disconnect();
-    u32 Query(QueryType type);
-
-    u32 GetId() const {
-        return id;
-    }
-
-    bool IsConnected() const {
-        return is_connect;
-    }
-
-    Kernel::KWritableEvent& GetWritableBufferWaitEvent();
-
-    Kernel::KReadableEvent& GetBufferWaitEvent();
-
-private:
-    BufferQueue(const BufferQueue&) = delete;
-
-    u32 id{};
-    u64 layer_id{};
-    std::atomic_bool is_connect{};
-
-    std::list<u32> free_buffers;
-    std::array<Buffer, buffer_slots> buffers;
-    std::list<u32> queue_sequence;
-    Kernel::KEvent* buffer_wait_event{};
-
-    std::mutex free_buffers_mutex;
-    std::condition_variable free_buffers_condition;
-
-    std::mutex queue_sequence_mutex;
-
-    KernelHelpers::ServiceContext& service_context;
-};
-
-} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp b/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
index 677bec932..41fbba219 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
@@ -20,122 +20,102 @@ BufferQueueConsumer::~BufferQueueConsumer() = default;
 Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
                                           std::chrono::nanoseconds expected_present,
                                           u64 max_frame_number) {
-    s32 num_dropped_buffers{};
+    std::scoped_lock lock(core->mutex);
+
+    // Check that the consumer doesn't currently have the maximum number of buffers acquired.
+    const s32 num_acquired_buffers{
+        static_cast<s32>(std::count_if(slots.begin(), slots.end(), [](const auto& slot) {
+            return slot.buffer_state == BufferState::Acquired;
+        }))};
+
+    if (num_acquired_buffers >= core->max_acquired_buffer_count + 1) {
+        LOG_ERROR(Service_NVFlinger, "max acquired buffer count reached: {} (max {})",
+                  num_acquired_buffers, core->max_acquired_buffer_count);
+        return Status::InvalidOperation;
+    }
 
-    std::shared_ptr<IProducerListener> listener;
-    {
-        std::unique_lock lock(core->mutex);
-
-        // Check that the consumer doesn't currently have the maximum number of buffers acquired.
-        const s32 num_acquired_buffers{
-            static_cast<s32>(std::count_if(slots.begin(), slots.end(), [](const auto& slot) {
-                return slot.buffer_state == BufferState::Acquired;
-            }))};
-
-        if (num_acquired_buffers >= core->max_acquired_buffer_count + 1) {
-            LOG_ERROR(Service_NVFlinger, "max acquired buffer count reached: {} (max {})",
-                      num_acquired_buffers, core->max_acquired_buffer_count);
-            return Status::InvalidOperation;
-        }
+    // Check if the queue is empty.
+    if (core->queue.empty()) {
+        return Status::NoBufferAvailable;
+    }
 
-        // Check if the queue is empty.
-        if (core->queue.empty()) {
-            return Status::NoBufferAvailable;
-        }
+    auto front(core->queue.begin());
 
-        auto front(core->queue.begin());
-
-        // If expected_present is specified, we may not want to return a buffer yet.
-        if (expected_present.count() != 0) {
-            constexpr auto MAX_REASONABLE_NSEC = 1000000000LL; // 1 second
-
-            // The expected_present argument indicates when the buffer is expected to be
-            // presented on-screen.
-            while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
-                const auto& buffer_item{core->queue[1]};
-
-                // If dropping entry[0] would leave us with a buffer that the consumer is not yet
-                // ready for, don't drop it.
-                if (max_frame_number && buffer_item.frame_number > max_frame_number) {
-                    break;
-                }
-
-                // If entry[1] is timely, drop entry[0] (and repeat).
-                const auto desired_present = buffer_item.timestamp;
-                if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
-                    desired_present > expected_present.count()) {
-                    // This buffer is set to display in the near future, or desired_present is
-                    // garbage.
-                    LOG_DEBUG(Service_NVFlinger, "nodrop desire={} expect={}", desired_present,
-                              expected_present.count());
-                    break;
-                }
-
-                LOG_DEBUG(Service_NVFlinger, "drop desire={} expect={} size={}", desired_present,
-                          expected_present.count(), core->queue.size());
-
-                if (core->StillTracking(*front)) {
-                    // Front buffer is still in mSlots, so mark the slot as free
-                    slots[front->slot].buffer_state = BufferState::Free;
-                    core->free_buffers.push_back(front->slot);
-                    listener = core->connected_producer_listener;
-                    ++num_dropped_buffers;
-                }
-
-                core->queue.erase(front);
-                front = core->queue.begin();
-            }
+    // If expected_present is specified, we may not want to return a buffer yet.
+    if (expected_present.count() != 0) {
+        constexpr auto MAX_REASONABLE_NSEC = 1000000000LL; // 1 second
 
-            // See if the front buffer is ready to be acquired.
-            const auto desired_present = front->timestamp;
-            const auto buffer_is_due =
-                desired_present <= expected_present.count() ||
-                desired_present > expected_present.count() + MAX_REASONABLE_NSEC;
-            const auto consumer_is_ready =
-                max_frame_number > 0 ? front->frame_number <= max_frame_number : true;
+        // The expected_present argument indicates when the buffer is expected to be presented
+        // on-screen.
+        while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
+            const auto& buffer_item{core->queue[1]};
 
-            if (!buffer_is_due || !consumer_is_ready) {
-                LOG_DEBUG(Service_NVFlinger, "defer desire={} expect={}", desired_present,
-                          expected_present.count());
-                return Status::PresentLater;
+            // If dropping entry[0] would leave us with a buffer that the consumer is not yet ready
+            // for, don't drop it.
+            if (max_frame_number && buffer_item.frame_number > max_frame_number) {
+                break;
             }
 
-            LOG_DEBUG(Service_NVFlinger, "accept desire={} expect={}", desired_present,
-                      expected_present.count());
-        }
+            // If entry[1] is timely, drop entry[0] (and repeat).
+            const auto desired_present = buffer_item.timestamp;
+            if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
+                desired_present > expected_present.count()) {
+                // This buffer is set to display in the near future, or desired_present is garbage.
+                LOG_DEBUG(Service_NVFlinger, "nodrop desire={} expect={}", desired_present,
+                          expected_present.count());
+                break;
+            }
 
-        const auto slot = front->slot;
-        *out_buffer = *front;
+            LOG_DEBUG(Service_NVFlinger, "drop desire={} expect={} size={}", desired_present,
+                      expected_present.count(), core->queue.size());
 
-        LOG_DEBUG(Service_NVFlinger, "acquiring slot={}", slot);
+            if (core->StillTracking(*front)) {
+                // Front buffer is still in mSlots, so mark the slot as free
+                slots[front->slot].buffer_state = BufferState::Free;
+            }
 
-        // If the front buffer is still being tracked, update its slot state
-        if (core->StillTracking(*front)) {
-            slots[slot].acquire_called = true;
-            slots[slot].needs_cleanup_on_release = false;
-            slots[slot].buffer_state = BufferState::Acquired;
-            slots[slot].fence = Fence::NoFence();
+            core->queue.erase(front);
+            front = core->queue.begin();
         }
 
-        // If the buffer has previously been acquired by the consumer, set graphic_buffer to nullptr
-        // to avoid unnecessarily remapping this buffer on the consumer side.
-        if (out_buffer->acquire_called) {
-            out_buffer->graphic_buffer = nullptr;
+        // See if the front buffer is ready to be acquired.
+        const auto desired_present = front->timestamp;
+        if (desired_present > expected_present.count() &&
+            desired_present < expected_present.count() + MAX_REASONABLE_NSEC) {
+            LOG_DEBUG(Service_NVFlinger, "defer desire={} expect={}", desired_present,
+                      expected_present.count());
+            return Status::PresentLater;
         }
 
-        core->queue.erase(front);
+        LOG_DEBUG(Service_NVFlinger, "accept desire={} expect={}", desired_present,
+                  expected_present.count());
+    }
+
+    const auto slot = front->slot;
+    *out_buffer = *front;
 
-        // We might have freed a slot while dropping old buffers, or the producer  may be blocked
-        // waiting for the number of buffers in the queue to decrease.
-        core->SignalDequeueCondition();
+    LOG_DEBUG(Service_NVFlinger, "acquiring slot={}", slot);
+
+    // If the front buffer is still being tracked, update its slot state
+    if (core->StillTracking(*front)) {
+        slots[slot].acquire_called = true;
+        slots[slot].needs_cleanup_on_release = false;
+        slots[slot].buffer_state = BufferState::Acquired;
+        slots[slot].fence = Fence::NoFence();
     }
 
-    if (listener != nullptr) {
-        for (s32 i = 0; i < num_dropped_buffers; ++i) {
-            listener->OnBufferReleased();
-        }
+    // If the buffer has previously been acquired by the consumer, set graphic_buffer to nullptr to
+    // avoid unnecessarily remapping this buffer on the consumer side.
+    if (out_buffer->acquire_called) {
+        out_buffer->graphic_buffer = nullptr;
     }
 
+    core->queue.erase(front);
+
+    // We might have freed a slot while dropping old buffers, or the producer  may be blocked
+    // waiting for the number of buffers in the queue to decrease.
+    core->SignalDequeueCondition();
+
     return Status::NoError;
 }
 
@@ -147,7 +127,7 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc
 
     std::shared_ptr<IProducerListener> listener;
     {
-        std::unique_lock lock(core->mutex);
+        std::scoped_lock lock(core->mutex);
 
         // If the frame number has changed because the buffer has been reallocated, we can ignore
         // this ReleaseBuffer for the old buffer.
@@ -170,8 +150,6 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc
             slots[slot].fence = release_fence;
             slots[slot].buffer_state = BufferState::Free;
 
-            core->free_buffers.push_back(slot);
-
             listener = core->connected_producer_listener;
 
             LOG_DEBUG(Service_NVFlinger, "releasing slot {}", slot);
@@ -189,7 +167,7 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc
             return Status::BadValue;
         }
 
-        core->dequeue_condition.notify_all();
+        core->SignalDequeueCondition();
     }
 
     // Call back without lock held
@@ -209,7 +187,7 @@ Status BufferQueueConsumer::Connect(std::shared_ptr<IConsumerListener> consumer_
 
     LOG_DEBUG(Service_NVFlinger, "controlled_by_app={}", controlled_by_app);
 
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
 
     if (core->is_abandoned) {
         LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
diff --git a/src/core/hle/service/nvflinger/buffer_queue_core.cpp b/src/core/hle/service/nvflinger/buffer_queue_core.cpp
index eb93b43ee..6082610e0 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_core.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_core.cpp
@@ -10,16 +10,12 @@
 
 namespace Service::android {
 
-BufferQueueCore::BufferQueueCore() : lock{mutex, std::defer_lock} {
-    for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) {
-        free_slots.insert(slot);
-    }
-}
+BufferQueueCore::BufferQueueCore() = default;
 
 BufferQueueCore::~BufferQueueCore() = default;
 
 void BufferQueueCore::NotifyShutdown() {
-    std::unique_lock lk(mutex);
+    std::scoped_lock lock(mutex);
 
     is_shutting_down = true;
 
@@ -35,7 +31,7 @@ bool BufferQueueCore::WaitForDequeueCondition() {
         return false;
     }
 
-    dequeue_condition.wait(lock);
+    dequeue_condition.wait(mutex);
 
     return true;
 }
@@ -86,26 +82,15 @@ s32 BufferQueueCore::GetPreallocatedBufferCountLocked() const {
 void BufferQueueCore::FreeBufferLocked(s32 slot) {
     LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
 
-    const auto had_buffer = slots[slot].graphic_buffer != nullptr;
-
     slots[slot].graphic_buffer.reset();
 
     if (slots[slot].buffer_state == BufferState::Acquired) {
         slots[slot].needs_cleanup_on_release = true;
     }
 
-    if (slots[slot].buffer_state != BufferState::Free) {
-        free_slots.insert(slot);
-    } else if (had_buffer) {
-        // If the slot was FREE, but we had a buffer, we need to move this slot from the free
-        // buffers list to the the free slots list.
-        free_buffers.remove(slot);
-        free_slots.insert(slot);
-    }
-
     slots[slot].buffer_state = BufferState::Free;
+    slots[slot].frame_number = UINT32_MAX;
     slots[slot].acquire_called = false;
-    slots[slot].frame_number = 0;
     slots[slot].fence = Fence::NoFence();
 }
 
@@ -126,8 +111,7 @@ bool BufferQueueCore::StillTracking(const BufferItem& item) const {
 
 void BufferQueueCore::WaitWhileAllocatingLocked() const {
     while (is_allocating) {
-        std::unique_lock lk(mutex);
-        is_allocating_condition.wait(lk);
+        is_allocating_condition.wait(mutex);
     }
 }
 
diff --git a/src/core/hle/service/nvflinger/buffer_queue_core.h b/src/core/hle/service/nvflinger/buffer_queue_core.h
index a3cd89f1c..4dfd53387 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_core.h
+++ b/src/core/hle/service/nvflinger/buffer_queue_core.h
@@ -50,23 +50,7 @@ private:
     void WaitWhileAllocatingLocked() const;
 
 private:
-    class AutoLock final {
-    public:
-        AutoLock(std::shared_ptr<BufferQueueCore>& core_) : core{core_} {
-            core->lock.lock();
-        }
-
-        ~AutoLock() {
-            core->lock.unlock();
-        }
-
-    private:
-        std::shared_ptr<BufferQueueCore>& core;
-    };
-
-private:
     mutable std::mutex mutex;
-    mutable std::unique_lock<std::mutex> lock;
     bool is_abandoned{};
     bool consumer_controlled_by_app{};
     std::shared_ptr<IConsumerListener> consumer_listener;
@@ -75,10 +59,8 @@ private:
     std::shared_ptr<IProducerListener> connected_producer_listener;
     BufferQueueDefs::SlotsType slots{};
     std::vector<BufferItem> queue;
-    std::set<s32> free_slots;
-    std::list<s32> free_buffers;
     s32 override_max_buffer_count{};
-    mutable std::condition_variable dequeue_condition;
+    mutable std::condition_variable_any dequeue_condition;
     const bool use_async_buffer{}; // This is always disabled on HOS
     bool dequeue_buffer_cannot_block{};
     PixelFormat default_buffer_format{PixelFormat::Rgba8888};
@@ -90,7 +72,7 @@ private:
     u64 frame_counter{};
     u32 transform_hint{};
     bool is_allocating{};
-    mutable std::condition_variable is_allocating_condition;
+    mutable std::condition_variable_any is_allocating_condition;
     bool allow_allocation{true};
     u64 buffer_age{};
     bool is_shutting_down{};
diff --git a/src/core/hle/service/nvflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
index 078091904..0833be57a 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
@@ -38,7 +38,7 @@ BufferQueueProducer::~BufferQueueProducer() {
 Status BufferQueueProducer::RequestBuffer(s32 slot, std::shared_ptr<GraphicBuffer>* buf) {
     LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
 
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
 
     if (core->is_abandoned) {
         LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -65,7 +65,7 @@ Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
     std::shared_ptr<IConsumerListener> listener;
 
     {
-        BufferQueueCore::AutoLock lock(core);
+        std::scoped_lock lock(core->mutex);
         core->WaitWhileAllocatingLocked();
         if (core->is_abandoned) {
             LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -156,6 +156,14 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
             case BufferState::Acquired:
                 ++acquired_count;
                 break;
+            case BufferState::Free:
+                // We return the oldest of the free buffers to avoid stalling the producer if
+                // possible, since the consumer may still have pending reads of in-flight buffers
+                if (*found == BufferQueueCore::INVALID_BUFFER_SLOT ||
+                    slots[s].frame_number < slots[*found].frame_number) {
+                    *found = s;
+                }
+                break;
             default:
                 break;
             }
@@ -183,27 +191,12 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
             }
         }
 
-        *found = BufferQueueCore::INVALID_BUFFER_SLOT;
-
         // If we disconnect and reconnect quickly, we can be in a state where our slots are empty
         // but we have many buffers in the queue. This can cause us to run out of memory if we
         // outrun the consumer. Wait here if it looks like we have too many buffers queued up.
         const bool too_many_buffers = core->queue.size() > static_cast<size_t>(max_buffer_count);
         if (too_many_buffers) {
             LOG_ERROR(Service_NVFlinger, "queue size is {}, waiting", core->queue.size());
-        } else {
-            if (!core->free_buffers.empty()) {
-                auto slot = core->free_buffers.begin();
-                *found = *slot;
-                core->free_buffers.erase(slot);
-            } else if (core->allow_allocation && !core->free_slots.empty()) {
-                auto slot = core->free_slots.begin();
-                // Only return free slots up to the max buffer count
-                if (*slot < max_buffer_count) {
-                    *found = *slot;
-                    core->free_slots.erase(slot);
-                }
-            }
         }
 
         // If no buffer is found, or if the queue has too many buffers outstanding, wait for a
@@ -240,7 +233,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
     Status return_flags = Status::NoError;
     bool attached_by_consumer = false;
     {
-        BufferQueueCore::AutoLock lock(core);
+        std::scoped_lock lock(core->mutex);
         core->WaitWhileAllocatingLocked();
         if (format == PixelFormat::NoFormat) {
             format = core->default_buffer_format;
@@ -317,12 +310,13 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
         }
 
         {
-            BufferQueueCore::AutoLock lock(core);
+            std::scoped_lock lock(core->mutex);
             if (core->is_abandoned) {
                 LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
                 return Status::NoInit;
             }
 
+            slots[*out_slot].frame_number = UINT32_MAX;
             slots[*out_slot].graphic_buffer = graphic_buffer;
         }
     }
@@ -339,7 +333,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
 Status BufferQueueProducer::DetachBuffer(s32 slot) {
     LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
 
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
     if (core->is_abandoned) {
         LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
         return Status::NoInit;
@@ -374,7 +368,7 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
         return Status::BadValue;
     }
 
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
 
     core->WaitWhileAllocatingLocked();
 
@@ -382,12 +376,21 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
         LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
         return Status::NoInit;
     }
-    if (core->free_buffers.empty()) {
-        return Status::NoMemory;
+
+    // Find the oldest valid slot
+    int found = BufferQueueCore::INVALID_BUFFER_SLOT;
+    for (int s = 0; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) {
+        if (slots[s].buffer_state == BufferState::Free && slots[s].graphic_buffer != nullptr) {
+            if (found == BufferQueueCore::INVALID_BUFFER_SLOT ||
+                slots[s].frame_number < slots[found].frame_number) {
+                found = s;
+            }
+        }
     }
 
-    const s32 found = core->free_buffers.front();
-    core->free_buffers.remove(found);
+    if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
+        return Status::NoMemory;
+    }
 
     LOG_DEBUG(Service_NVFlinger, "Detached slot {}", found);
 
@@ -409,7 +412,7 @@ Status BufferQueueProducer::AttachBuffer(s32* out_slot,
         return Status::BadValue;
     }
 
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
     core->WaitWhileAllocatingLocked();
 
     Status return_flags = Status::NoError;
@@ -469,7 +472,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
     BufferItem item;
 
     {
-        BufferQueueCore::AutoLock lock(core);
+        std::scoped_lock lock(core->mutex);
 
         if (core->is_abandoned) {
             LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -554,7 +557,9 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
                 // mark it as freed
                 if (core->StillTracking(*front)) {
                     slots[front->slot].buffer_state = BufferState::Free;
-                    core->free_buffers.push_front(front->slot);
+                    // Reset the frame number of the freed buffer so that it is the first in line to
+                    // be dequeued again
+                    slots[front->slot].frame_number = 0;
                 }
                 // Overwrite the droppable buffer with the incoming one
                 *front = item;
@@ -582,10 +587,9 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
     // Call back without the main BufferQueue lock held, but with the callback lock held so we can
     // ensure that callbacks occur in order
     {
-        std::unique_lock lock(callback_mutex);
+        std::scoped_lock lock(callback_mutex);
         while (callback_ticket != current_callback_ticket) {
-            std::unique_lock<std::mutex> lk(callback_mutex);
-            callback_condition.wait(lk);
+            callback_condition.wait(callback_mutex);
         }
 
         if (frameAvailableListener != nullptr) {
@@ -604,7 +608,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
 void BufferQueueProducer::CancelBuffer(s32 slot, const Fence& fence) {
     LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
 
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
 
     if (core->is_abandoned) {
         LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -621,8 +625,8 @@ void BufferQueueProducer::CancelBuffer(s32 slot, const Fence& fence) {
         return;
     }
 
-    core->free_buffers.push_front(slot);
     slots[slot].buffer_state = BufferState::Free;
+    slots[slot].frame_number = 0;
     slots[slot].fence = fence;
 
     core->SignalDequeueCondition();
@@ -630,7 +634,7 @@ void BufferQueueProducer::CancelBuffer(s32 slot, const Fence& fence) {
 }
 
 Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
 
     if (out_value == nullptr) {
         LOG_ERROR(Service_NVFlinger, "outValue was nullptr");
@@ -687,7 +691,7 @@ Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
 Status BufferQueueProducer::Connect(const std::shared_ptr<IProducerListener>& listener,
                                     NativeWindowApi api, bool producer_controlled_by_app,
                                     QueueBufferOutput* output) {
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
 
     LOG_DEBUG(Service_NVFlinger, "api = {} producer_controlled_by_app = {}", api,
               producer_controlled_by_app);
@@ -745,7 +749,7 @@ Status BufferQueueProducer::Disconnect(NativeWindowApi api) {
     std::shared_ptr<IConsumerListener> listener;
 
     {
-        BufferQueueCore::AutoLock lock(core);
+        std::scoped_lock lock(core->mutex);
 
         core->WaitWhileAllocatingLocked();
 
@@ -795,10 +799,11 @@ Status BufferQueueProducer::SetPreallocatedBuffer(s32 slot,
         return Status::BadValue;
     }
 
-    BufferQueueCore::AutoLock lock(core);
+    std::scoped_lock lock(core->mutex);
 
     slots[slot] = {};
     slots[slot].graphic_buffer = buffer;
+    slots[slot].frame_number = 0;
 
     // Most games preallocate a buffer and pass a valid buffer here. However, it is possible for
     // this to be called with an empty buffer, Naruto Ultimate Ninja Storm is a game that does this.
diff --git a/src/core/hle/service/nvflinger/buffer_queue_producer.h b/src/core/hle/service/nvflinger/buffer_queue_producer.h
index 5ddeebe0c..77fdcae8e 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_producer.h
+++ b/src/core/hle/service/nvflinger/buffer_queue_producer.h
@@ -77,7 +77,7 @@ private:
     std::mutex callback_mutex;
     s32 next_callback_ticket{};
     s32 current_callback_ticket{};
-    std::condition_variable callback_condition;
+    std::condition_variable_any callback_condition;
 };
 
 } // namespace Service::android
diff --git a/src/core/hle/service/nvflinger/consumer_base.cpp b/src/core/hle/service/nvflinger/consumer_base.cpp
index 3ccbb7fb8..be65a3f88 100644
--- a/src/core/hle/service/nvflinger/consumer_base.cpp
+++ b/src/core/hle/service/nvflinger/consumer_base.cpp
@@ -18,7 +18,7 @@ ConsumerBase::ConsumerBase(std::unique_ptr<BufferQueueConsumer> consumer_)
     : consumer{std::move(consumer_)} {}
 
 ConsumerBase::~ConsumerBase() {
-    std::unique_lock lock(mutex);
+    std::scoped_lock lock(mutex);
 
     ASSERT_MSG(is_abandoned, "consumer is not abandoned!");
 }
@@ -36,17 +36,17 @@ void ConsumerBase::FreeBufferLocked(s32 slot_index) {
 }
 
 void ConsumerBase::OnFrameAvailable(const BufferItem& item) {
-    std::unique_lock lock(mutex);
+    std::scoped_lock lock(mutex);
     LOG_DEBUG(Service_NVFlinger, "called");
 }
 
 void ConsumerBase::OnFrameReplaced(const BufferItem& item) {
-    std::unique_lock lock(mutex);
+    std::scoped_lock lock(mutex);
     LOG_DEBUG(Service_NVFlinger, "called");
 }
 
 void ConsumerBase::OnBuffersReleased() {
-    std::unique_lock lock(mutex);
+    std::scoped_lock lock(mutex);
     LOG_DEBUG(Service_NVFlinger, "called");
 }
 
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3fed51400..28d30eee2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -322,7 +322,7 @@ struct Memory::Impl {
         }
 
         if (Settings::IsFastmemEnabled()) {
-            const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
+            const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
             system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
         }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index d3cbb14a9..cb47d253c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <bit>
+
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
 #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 3c2a5e16f..aa7082978 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <bit>
 #include <tuple>
 #include <utility>
 
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 53be98ced..28f6a6184 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 #include <array>
+#include <bit>
 #include <climits>
 
 #include <boost/container/static_vector.hpp>
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
index 57b4f0eee..60732215b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -132,7 +132,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
         multisample = v.X(meta_reg++);
     }
     if (tld.clamp != 0) {
-        throw NotImplementedException("TLD.CL - CLAMP is not implmented");
+        throw NotImplementedException("TLD.CL - CLAMP is not implemented");
     }
     IR::TextureInstInfo info{};
     info.type.Assign(GetType(tld.type));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
index 311a9e763..f89ce1b68 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -81,7 +81,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
     } const tmml{insn};
 
     if ((tmml.mask & 0b1100) != 0) {
-        throw NotImplementedException("TMML BA results are not implmented");
+        throw NotImplementedException("TMML BA results are not implemented");
     }
     const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
 
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 81fac94bf..40f7755e8 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -56,6 +56,18 @@ AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pi
     av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
     return PREFERRED_CPU_FMT;
 }
+
+// List all the currently available hwcontext in ffmpeg
+std::vector<AVHWDeviceType> ListSupportedContexts() {
+    std::vector<AVHWDeviceType> contexts{};
+    AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
+    do {
+        current_device_type = av_hwdevice_iterate_types(current_device_type);
+        contexts.push_back(current_device_type);
+    } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
+    return contexts;
+}
+
 } // namespace
 
 void AVFrameDeleter(AVFrame* ptr) {
@@ -76,17 +88,6 @@ Codec::~Codec() {
     av_buffer_unref(&av_gpu_decoder);
 }
 
-// List all the currently available hwcontext in ffmpeg
-static std::vector<AVHWDeviceType> ListSupportedContexts() {
-    std::vector<AVHWDeviceType> contexts{};
-    AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
-    do {
-        current_device_type = av_hwdevice_iterate_types(current_device_type);
-        contexts.push_back(current_device_type);
-    } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
-    return contexts;
-}
-
 bool Codec::CreateGpuAvDevice() {
     static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
     static const auto supported_contexts = ListSupportedContexts();
@@ -96,6 +97,8 @@ bool Codec::CreateGpuAvDevice() {
             LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
             continue;
         }
+        // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
+        av_buffer_unref(&av_gpu_decoder);
         const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
         if (hwdevice_res < 0) {
             LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
@@ -127,15 +130,19 @@ bool Codec::CreateGpuAvDevice() {
                           av_codec->name, av_hwdevice_get_type_name(type));
                 break;
             }
-            if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
-                av_codec_ctx->pix_fmt = config->pix_fmt;
-                if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) {
+            if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
+#if defined(__unix__)
+                // Some linux decoding backends are reported to crash with this config method
+                // TODO(ameerj): Properly support this method
+                if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
                     // skip zero-copy decoders, we don't currently support them
                     LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
                               av_hwdevice_get_type_name(type), config->methods);
                     continue;
                 }
+#endif
                 LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+                av_codec_ctx->pix_fmt = config->pix_fmt;
                 return true;
             }
         }
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 656dd7eb0..597301eeb 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -282,7 +282,7 @@ void main() {
 
 u64 Device::GetCurrentDedicatedVideoMemory() const {
     GLint cur_avail_mem_kb = 0;
-    glGetIntegerv(GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX, &cur_avail_mem_kb);
+    glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb);
     return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7e06d0069..e6f9ece8b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -15,8 +15,9 @@
 #include "common/logging/log.h"
 #include "common/math_util.h"
 #include "common/microprofile.h"
+#include "common/scope_exit.h"
 #include "common/settings.h"
-#include "core/memory.h"
+
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
@@ -210,6 +211,7 @@ void RasterizerOpenGL::Clear() {
 void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
 
+    SCOPE_EXIT({ gpu.TickWork(); });
     query_cache.UpdateCounters();
 
     GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
@@ -265,8 +267,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     ++num_queued_commands;
     has_written_global_memory |= pipeline->WritesGlobalMemory();
-
-    gpu.TickWork();
 }
 
 void RasterizerOpenGL::DispatchCompute() {
@@ -352,7 +352,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
     shader_cache.OnCPUWrite(addr, size);
     {
         std::scoped_lock lock{texture_cache.mutex};
-        texture_cache.CachedWriteMemory(addr, size);
+        texture_cache.WriteMemory(addr, size);
     }
     {
         std::scoped_lock lock{buffer_cache.mutex};
@@ -364,10 +364,6 @@ void RasterizerOpenGL::SyncGuestHost() {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
     shader_cache.SyncGuestHost();
     {
-        std::scoped_lock lock{texture_cache.mutex};
-        texture_cache.FlushCachedWrites();
-    }
-    {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.FlushCachedWrites();
     }
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index ec03cca38..abda1c490 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -367,17 +367,14 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
           PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
       full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
       blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
+      blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
       convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
       convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
       convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
       convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
       convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
       linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
-      nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
-    if (device.IsExtShaderStencilExportSupported()) {
-        blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
-    }
-}
+      nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
 
 BlitImageHelper::~BlitImageHelper() = default;
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index dd6e0027e..fa87d37f8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -408,7 +408,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
     pipeline_cache.OnCPUWrite(addr, size);
     {
         std::scoped_lock lock{texture_cache.mutex};
-        texture_cache.CachedWriteMemory(addr, size);
+        texture_cache.WriteMemory(addr, size);
     }
     {
         std::scoped_lock lock{buffer_cache.mutex};
@@ -419,10 +419,6 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
 void RasterizerVulkan::SyncGuestHost() {
     pipeline_cache.SyncGuestHost();
     {
-        std::scoped_lock lock{texture_cache.mutex};
-        texture_cache.FlushCachedWrites();
-    }
-    {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.FlushCachedWrites();
     }
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index f2890d263..2c2ccc7c6 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1451,8 +1451,7 @@ bool Image::BlitScaleHelper(bool scale_up) {
 
         runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
                                              src_region, operation, BLIT_OPERATION);
-    } else if (!runtime->device.IsBlitDepthStencilSupported() &&
-               aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+    } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
         if (!blit_framebuffer) {
             blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
         }
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index cc7999027..dd0106432 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -39,9 +39,6 @@ enum class ImageFlagBits : u32 {
     Rescaled = 1 << 13,
     CheckingRescalable = 1 << 14,
     IsRescalable = 1 << 15,
-
-    // Cached CPU
-    CachedCpuModified = 1 << 16, ///< Contents have been modified from the CPU
 };
 DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 099b2ae1b..8fef74117 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -438,23 +438,6 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
 }
 
 template <class P>
-void TextureCache<P>::CachedWriteMemory(VAddr cpu_addr, size_t size) {
-    const VAddr new_cpu_addr = Common::AlignDown(cpu_addr, CPU_PAGE_SIZE);
-    const size_t new_size = Common::AlignUp(size + cpu_addr - new_cpu_addr, CPU_PAGE_SIZE);
-    ForEachImageInRegion(new_cpu_addr, new_size, [this](ImageId image_id, Image& image) {
-        if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
-            return;
-        }
-        image.flags |= ImageFlagBits::CachedCpuModified;
-        cached_cpu_invalidate.insert(image_id);
-
-        if (True(image.flags & ImageFlagBits::Tracked)) {
-            UntrackImage(image, image_id);
-        }
-    });
-}
-
-template <class P>
 void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
     std::vector<ImageId> images;
     ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
@@ -512,18 +495,6 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
 }
 
 template <class P>
-void TextureCache<P>::FlushCachedWrites() {
-    for (ImageId image_id : cached_cpu_invalidate) {
-        Image& image = slot_images[image_id];
-        if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
-            image.flags &= ~ImageFlagBits::CachedCpuModified;
-            image.flags |= ImageFlagBits::CpuModified;
-        }
-    }
-    cached_cpu_invalidate.clear();
-}
-
-template <class P>
 void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
                                 const Tegra::Engines::Fermi2D::Surface& src,
                                 const Tegra::Engines::Fermi2D::Config& copy) {
@@ -1109,8 +1080,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
         Image& overlap = slot_images[overlap_id];
         if (True(overlap.flags & ImageFlagBits::GpuModified)) {
             new_image.flags |= ImageFlagBits::GpuModified;
-            new_image.modification_tick =
-                std::max(overlap.modification_tick, new_image.modification_tick);
         }
         if (overlap.info.num_samples != new_image.info.num_samples) {
             LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
@@ -1589,9 +1558,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
 template <class P>
 void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
     ASSERT(False(image.flags & ImageFlagBits::Tracked));
-    if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
-        return;
-    }
     image.flags |= ImageFlagBits::Tracked;
     if (False(image.flags & ImageFlagBits::Sparse)) {
         rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
@@ -1648,9 +1614,6 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
     }
     total_used_memory -= Common::AlignUp(tentative_size, 1024);
-    if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
-        cached_cpu_invalidate.erase(image_id);
-    }
     const GPUVAddr gpu_addr = image.gpu_addr;
     const auto alloc_it = image_allocs_table.find(gpu_addr);
     if (alloc_it == image_allocs_table.end()) {
@@ -1817,11 +1780,7 @@ template <class P>
 void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
     Image& image = slot_images[image_id];
     if (invalidate) {
-        if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
-            cached_cpu_invalidate.erase(image_id);
-        }
-        image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified |
-                         ImageFlagBits::CachedCpuModified);
+        image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
         if (False(image.flags & ImageFlagBits::Tracked)) {
             TrackImage(image, image_id);
         }
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index ad5978a33..b1324edf3 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -8,7 +8,6 @@
 #include <span>
 #include <type_traits>
 #include <unordered_map>
-#include <unordered_set>
 #include <vector>
 #include <queue>
 
@@ -51,9 +50,6 @@ class TextureCache {
     /// Address shift for caching images into a hash table
     static constexpr u64 PAGE_BITS = 20;
 
-    static constexpr u64 CPU_PAGE_BITS = 12;
-    static constexpr u64 CPU_PAGE_SIZE = 1ULL << CPU_PAGE_BITS;
-
     /// Enables debugging features to the texture cache
     static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
     /// Implement blits as copies between framebuffers
@@ -140,9 +136,6 @@ public:
     /// Mark images in a range as modified from the CPU
     void WriteMemory(VAddr cpu_addr, size_t size);
 
-    /// Mark images in a range as modified from the CPU
-    void CachedWriteMemory(VAddr cpu_addr, size_t size);
-
     /// Download contents of host images to guest memory in a region
     void DownloadMemory(VAddr cpu_addr, size_t size);
 
@@ -152,8 +145,6 @@ public:
     /// Remove images in a region
     void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
 
-    void FlushCachedWrites();
-
     /// Blit an image with the given parameters
     void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
                    const Tegra::Engines::Fermi2D::Surface& src,
@@ -375,8 +366,6 @@ private:
 
     std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
 
-    std::unordered_set<ImageId> cached_cpu_invalidate;
-
     VAddr virtual_invalid_space{};
 
     bool has_deleted_images = false;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index e142bee35..f3a05ada9 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -621,6 +621,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
             khr_push_descriptor = false;
             break;
         }
+        const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff;
+        if (nv_major_version >= 510) {
+            LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
+            cant_blit_msaa = true;
+        }
     }
     const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
     if (ext_extended_dynamic_state && is_radv) {
@@ -731,7 +736,7 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
 }
 
 void Device::ReportLoss() const {
-    LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
+    LOG_CRITICAL(Render_Vulkan, "Device loss occurred!");
 
     // Wait for the log to flush and for Nsight Aftermath to dump the results
     std::this_thread::sleep_for(std::chrono::seconds{15});
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 30902101d..b1467d016 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -293,7 +293,7 @@ if (YUZU_USE_QT_WEB_ENGINE)
 endif ()
 
 if(UNIX AND NOT APPLE)
-    install(TARGETS yuzu RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
+    install(TARGETS yuzu)
 endif()
 
 if (YUZU_USE_BUNDLED_QT)
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index edb525e82..c1d90d588 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -214,14 +214,14 @@
       <item row="1" column="1">
        <widget class="QCheckBox" name="enable_all_controllers">
         <property name="text">
-         <string>Enable all Controller Types</string>
+         <string>Enable All Controller Types</string>
         </property>
        </widget>
       </item>
       <item row="2" column="1">
        <widget class="QCheckBox" name="disable_web_applet">
         <property name="text">
-         <string>Disable Web Applet**</string>
+         <string>Disable Web Applet</string>
         </property>
        </widget>
       </item>
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp
index 53e629a5e..6679e9c53 100644
--- a/src/yuzu/configuration/configure_hotkeys.cpp
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -35,8 +35,9 @@ ConfigureHotkeys::ConfigureHotkeys(Core::HID::HIDCore& hid_core, QWidget* parent
     ui->hotkey_list->setContextMenuPolicy(Qt::CustomContextMenu);
     ui->hotkey_list->setModel(model);
 
-    ui->hotkey_list->setColumnWidth(name_column, 200);
-    ui->hotkey_list->resizeColumnToContents(hotkey_column);
+    ui->hotkey_list->header()->setStretchLastSection(false);
+    ui->hotkey_list->header()->setSectionResizeMode(name_column, QHeaderView::ResizeMode::Stretch);
+    ui->hotkey_list->header()->setMinimumSectionSize(150);
 
     connect(ui->button_restore_defaults, &QPushButton::clicked, this,
             &ConfigureHotkeys::RestoreDefaults);
@@ -76,8 +77,8 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
     }
 
     ui->hotkey_list->expandAll();
-    ui->hotkey_list->resizeColumnToContents(name_column);
     ui->hotkey_list->resizeColumnToContents(hotkey_column);
+    ui->hotkey_list->resizeColumnToContents(controller_column);
 }
 
 void ConfigureHotkeys::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_per_game_addons.cpp b/src/yuzu/configuration/configure_per_game_addons.cpp
index 21e51d749..7893a85bb 100644
--- a/src/yuzu/configuration/configure_per_game_addons.cpp
+++ b/src/yuzu/configuration/configure_per_game_addons.cpp
@@ -47,6 +47,10 @@ ConfigurePerGameAddons::ConfigurePerGameAddons(Core::System& system_, QWidget* p
     item_model->setHeaderData(0, Qt::Horizontal, tr("Patch Name"));
     item_model->setHeaderData(1, Qt::Horizontal, tr("Version"));
 
+    tree_view->header()->setStretchLastSection(false);
+    tree_view->header()->setSectionResizeMode(0, QHeaderView::ResizeMode::Stretch);
+    tree_view->header()->setMinimumSectionSize(150);
+
     // We must register all custom types with the Qt Automoc system so that we are able to use it
     // with signals/slots. In this case, QList falls under the umbrella of custom types.
     qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
@@ -138,5 +142,5 @@ void ConfigurePerGameAddons::LoadConfiguration() {
         item_model->appendRow(list_items.back());
     }
 
-    tree_view->setColumnWidth(0, 5 * tree_view->width() / 16);
+    tree_view->resizeColumnToContents(1);
 }
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 3b7058a2b..62d15f8cd 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -293,8 +293,6 @@ GMainWindow::GMainWindow()
 
     MigrateConfigFiles();
 
-    ui->action_Fullscreen->setChecked(false);
-
 #if defined(HAVE_SDL2) && !defined(_WIN32)
     SDL_InitSubSystem(SDL_INIT_VIDEO);
     // SDL disables the screen saver by default, and setting the hint
@@ -312,17 +310,20 @@ GMainWindow::GMainWindow()
     }
 
     QString game_path;
+    bool has_gamepath = false;
+    bool is_fullscreen = false;
 
     for (int i = 1; i < args.size(); ++i) {
         // Preserves drag/drop functionality
         if (args.size() == 2 && !args[1].startsWith(QChar::fromLatin1('-'))) {
             game_path = args[1];
+            has_gamepath = true;
             break;
         }
 
         // Launch game in fullscreen mode
         if (args[i] == QStringLiteral("-f")) {
-            ui->action_Fullscreen->setChecked(true);
+            is_fullscreen = true;
             continue;
         }
 
@@ -365,9 +366,15 @@ GMainWindow::GMainWindow()
             }
 
             game_path = args[++i];
+            has_gamepath = true;
         }
     }
 
+    // Override fullscreen setting if gamepath or argument is provided
+    if (has_gamepath || is_fullscreen) {
+        ui->action_Fullscreen->setChecked(is_fullscreen);
+    }
+
     if (!game_path.isEmpty()) {
         BootGame(game_path);
     }
diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt
index 74fc24972..c8901f2df 100644
--- a/src/yuzu_cmd/CMakeLists.txt
+++ b/src/yuzu_cmd/CMakeLists.txt
@@ -45,7 +45,7 @@ if (YUZU_USE_EXTERNAL_SDL2)
 endif()
 
 if(UNIX AND NOT APPLE)
-    install(TARGETS yuzu-cmd RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
+    install(TARGETS yuzu-cmd)
 endif()
 
 if (MSVC)
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 34782c378..f34d6b728 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -342,12 +342,6 @@ fps_cap =
 # null: No audio output
 output_engine =
 
-# Whether or not to enable the audio-stretching post-processing effect.
-# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter,
-# at the cost of increasing audio latency.
-# 0: No, 1 (default): Yes
-enable_audio_stretching =
-
 # Which audio device to use.
 # auto (default): Auto-select
 output_device =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 14bf82f39..ab12dd15d 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -74,6 +74,7 @@ static void PrintVersion() {
 int main(int argc, char** argv) {
     Common::Log::Initialize();
     Common::Log::SetColorConsoleBackendEnabled(true);
+    Common::Log::Start();
     Common::DetachedTasks detached_tasks;
 
     int option_index = 0;