107 files changed, 2005 insertions, 746 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f69d00a2b..6c99dd5e2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,18 +1,79 @@
 # Enable modules to include each other's files
 include_directories(.)
 
+# CMake seems to only define _DEBUG on Windows
+set_property(DIRECTORY APPEND PROPERTY
+    COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
+
+# Set compilation flags
+if (MSVC)
+    set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
+
+    # Silence "deprecation" warnings
+    add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
+
+    # Avoid windows.h junk
+    add_definitions(-DNOMINMAX)
+
+    # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
+    add_definitions(-DWIN32_LEAN_AND_MEAN)
+
+    # /W3 - Level 3 warnings
+    # /MP - Multi-threaded compilation
+    # /Zi - Output debugging information
+    # /Zo - enhanced debug info for optimized builds
+    # /permissive- - enables stricter C++ standards conformance checks
+    # /EHsc - C++-only exception handling semantics
+    # /Zc:throwingNew - let codegen assume `operator new` will never return null
+    # /Zc:inline - let codegen omit inline functions in object files
+    add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+
+    # /GS- - No stack buffer overflow checks
+    add_compile_options("$<$<CONFIG:Release>:/GS->")
+
+    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
+    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
+else()
+    add_compile_options("-Wno-attributes")
+
+    if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
+        add_compile_options("-stdlib=libc++")
+    endif()
+
+    # Set file offset size to 64 bits.
+    #
+    # On modern Unixes, this is typically already the case. The lone exception is
+    # glibc, which may default to 32 bits. glibc allows this to be configured
+    # by setting _FILE_OFFSET_BITS.
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
+        add_definitions(-D_FILE_OFFSET_BITS=64)
+    endif()
+
+    if (MINGW)
+        add_definitions(-DMINGW_HAS_SECURE_API)
+
+        if (MINGW_STATIC_BUILD)
+            add_definitions(-DQT_STATICPLUGIN)
+            add_compile_options("-static")
+        endif()
+    endif()
+endif()
+
 add_subdirectory(common)
 add_subdirectory(core)
 add_subdirectory(audio_core)
 add_subdirectory(video_core)
 add_subdirectory(input_common)
 add_subdirectory(tests)
+
 if (ENABLE_SDL2)
     add_subdirectory(yuzu_cmd)
 endif()
+
 if (ENABLE_QT)
     add_subdirectory(yuzu)
 endif()
+
 if (ENABLE_WEB_SERVICE)
     add_subdirectory(web_service)
 endif()
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4b66a6786..22a3f8c84 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -38,7 +38,7 @@ Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format fo
       sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {
 
     release_event = core_timing.RegisterEvent(
-        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
+        name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); });
 }
 
 void Stream::Play() {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 43ae8a9e7..850ce8006 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -98,6 +98,7 @@ add_library(common STATIC
     microprofile.h
     microprofileui.h
     misc.cpp
+    multi_level_queue.h
     page_table.cpp
     page_table.h
     param_package.cpp
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 1eea17ba1..a4f9ed4aa 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -58,4 +58,43 @@ inline u64 CountLeadingZeroes64(u64 value) {
     return __builtin_clzll(value);
 }
 #endif
+
+#ifdef _MSC_VER
+inline u32 CountTrailingZeroes32(u32 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 32;
+}
+
+inline u64 CountTrailingZeroes64(u64 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward64(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 64;
+}
+#else
+inline u32 CountTrailingZeroes32(u32 value) {
+    if (value == 0) {
+        return 32;
+    }
+
+    return __builtin_ctz(value);
+}
+
+inline u64 CountTrailingZeroes64(u64 value) {
+    if (value == 0) {
+        return 64;
+    }
+
+    return __builtin_ctzll(value);
+}
+#endif
+
 } // namespace Common
diff --git a/src/common/common_types.h b/src/common/common_types.h
index 6b1766dca..4cec89fbd 100644
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -40,10 +40,9 @@ using s64 = std::int64_t; ///< 64-bit signed int
 using f32 = float;  ///< 32-bit floating point
 using f64 = double; ///< 64-bit floating point
 
-// TODO: It would be nice to eventually replace these with strong types that prevent accidental
-// conversion between each other.
-using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
-using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
+using VAddr = u64;    ///< Represents a pointer in the userspace virtual address space.
+using PAddr = u64;    ///< Represents a pointer in the ARM11 physical address space.
+using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.
 
 using u128 = std::array<std::uint64_t, 2>;
 static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
diff --git a/src/common/detached_tasks.cpp b/src/common/detached_tasks.cpp
index a347d9e02..f268d6021 100644
--- a/src/common/detached_tasks.cpp
+++ b/src/common/detached_tasks.cpp
@@ -16,22 +16,22 @@ DetachedTasks::DetachedTasks() {
 }
 
 void DetachedTasks::WaitForAllTasks() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     cv.wait(lock, [this]() { return count == 0; });
 }
 
 DetachedTasks::~DetachedTasks() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     ASSERT(count == 0);
     instance = nullptr;
 }
 
 void DetachedTasks::AddTask(std::function<void()> task) {
-    std::unique_lock<std::mutex> lock(instance->mutex);
+    std::unique_lock lock{instance->mutex};
     ++instance->count;
     std::thread([task{std::move(task)}]() {
         task();
-        std::unique_lock<std::mutex> lock(instance->mutex);
+        std::unique_lock lock{instance->mutex};
         --instance->count;
         std::notify_all_at_thread_exit(instance->cv, std::move(lock));
     })
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 4462ff3fb..a03179520 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -46,12 +46,12 @@ public:
     }
 
     void AddBackend(std::unique_ptr<Backend> backend) {
-        std::lock_guard<std::mutex> lock(writing_mutex);
+        std::lock_guard lock{writing_mutex};
         backends.push_back(std::move(backend));
     }
 
     void RemoveBackend(std::string_view backend_name) {
-        std::lock_guard<std::mutex> lock(writing_mutex);
+        std::lock_guard lock{writing_mutex};
         const auto it =
             std::remove_if(backends.begin(), backends.end(),
                            [&backend_name](const auto& i) { return backend_name == i->GetName(); });
@@ -80,7 +80,7 @@ private:
         backend_thread = std::thread([&] {
             Entry entry;
             auto write_logs = [&](Entry& e) {
-                std::lock_guard<std::mutex> lock(writing_mutex);
+                std::lock_guard lock{writing_mutex};
                 for (const auto& backend : backends) {
                     backend->Write(e);
                 }
diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h
new file mode 100644
index 000000000..2b61b91e0
--- /dev/null
+++ b/src/common/multi_level_queue.h
@@ -0,0 +1,337 @@
+// Copyright 2019 TuxSH
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <iterator>
+#include <list>
+#include <utility>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Common {
+
+/**
+ * A MultiLevelQueue is a type of priority queue which has the following characteristics:
+ * - iteratable through each of its elements.
+ * - back can be obtained.
+ * - O(1) add, lookup (both front and back)
+ * - discrete priorities and a max of 64 priorities (limited domain)
+ * This type of priority queue is normaly used for managing threads within an scheduler
+ */
+template <typename T, std::size_t Depth>
+class MultiLevelQueue {
+public:
+    using value_type = T;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+
+    using difference_type = typename std::pointer_traits<pointer>::difference_type;
+    using size_type = std::size_t;
+
+    template <bool is_constant>
+    class iterator_impl {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = T;
+        using pointer = std::conditional_t<is_constant, T*, const T*>;
+        using reference = std::conditional_t<is_constant, const T&, T&>;
+        using difference_type = typename std::pointer_traits<pointer>::difference_type;
+
+        friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) {
+            if (lhs.IsEnd() && rhs.IsEnd())
+                return true;
+            return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it);
+        }
+
+        friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) {
+            return !operator==(lhs, rhs);
+        }
+
+        reference operator*() const {
+            return *it;
+        }
+
+        pointer operator->() const {
+            return it.operator->();
+        }
+
+        iterator_impl& operator++() {
+            if (IsEnd()) {
+                return *this;
+            }
+
+            ++it;
+
+            if (it == GetEndItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= ~((1ULL << (current_priority + 1)) - 1);
+                if (prios == 0) {
+                    current_priority = mlq.depth();
+                } else {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetBeginItForPrio();
+                }
+            }
+            return *this;
+        }
+
+        iterator_impl& operator--() {
+            if (IsEnd()) {
+                if (mlq.used_priorities != 0) {
+                    current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else if (it == GetBeginItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= (1ULL << current_priority) - 1;
+                if (prios != 0) {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else {
+                --it;
+            }
+            return *this;
+        }
+
+        iterator_impl operator++(int) {
+            const iterator_impl v{*this};
+            ++(*this);
+            return v;
+        }
+
+        iterator_impl operator--(int) {
+            const iterator_impl v{*this};
+            --(*this);
+            return v;
+        }
+
+        // allow implicit const->non-const
+        iterator_impl(const iterator_impl<false>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl(const iterator_impl<true>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl& operator=(const iterator_impl<false>& other) {
+            mlq = other.mlq;
+            it = other.it;
+            current_priority = other.current_priority;
+            return *this;
+        }
+
+        friend class iterator_impl<true>;
+        iterator_impl() = default;
+
+    private:
+        friend class MultiLevelQueue;
+        using container_ref =
+            std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>;
+        using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator,
+                                                 typename std::list<T>::iterator>;
+
+        explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority)
+            : mlq(mlq), it(it), current_priority(current_priority) {}
+        explicit iterator_impl(container_ref mlq, u32 current_priority)
+            : mlq(mlq), it(), current_priority(current_priority) {}
+
+        bool IsEnd() const {
+            return current_priority == mlq.depth();
+        }
+
+        list_iterator GetBeginItForPrio() const {
+            return mlq.levels[current_priority].begin();
+        }
+
+        list_iterator GetEndItForPrio() const {
+            return mlq.levels[current_priority].end();
+        }
+
+        container_ref mlq;
+        list_iterator it;
+        u32 current_priority;
+    };
+
+    using iterator = iterator_impl<false>;
+    using const_iterator = iterator_impl<true>;
+
+    void add(const T& element, u32 priority, bool send_back = true) {
+        if (send_back)
+            levels[priority].push_back(element);
+        else
+            levels[priority].push_front(element);
+        used_priorities |= 1ULL << priority;
+    }
+
+    void remove(const T& element, u32 priority) {
+        auto it = ListIterateTo(levels[priority], element);
+        if (it == levels[priority].end())
+            return;
+        levels[priority].erase(it);
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        remove(element, old_priority);
+        add(element, new_priority, !adjust_front);
+    }
+    void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        adjust(*it, old_priority, new_priority, adjust_front);
+    }
+
+    void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_front(*it, priority, other);
+    }
+
+    void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_back(*it, priority, other);
+    }
+
+    void yield(u32 priority, std::size_t n = 1) {
+        ListShiftForward(levels[priority], n);
+    }
+
+    std::size_t depth() const {
+        return Depth;
+    }
+
+    std::size_t size(u32 priority) const {
+        return levels[priority].size();
+    }
+
+    std::size_t size() const {
+        u64 priorities = used_priorities;
+        std::size_t size = 0;
+        while (priorities != 0) {
+            const u64 current_priority = CountTrailingZeroes64(priorities);
+            size += levels[current_priority].size();
+            priorities &= ~(1ULL << current_priority);
+        }
+        return size;
+    }
+
+    bool empty() const {
+        return used_priorities == 0;
+    }
+
+    bool empty(u32 priority) const {
+        return (used_priorities & (1ULL << priority)) == 0;
+    }
+
+    u32 highest_priority_set(u32 max_priority = 0) const {
+        const u64 priorities =
+            max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1));
+        return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities));
+    }
+
+    u32 lowest_priority_set(u32 min_priority = Depth - 1) const {
+        const u64 priorities = min_priority >= Depth - 1
+                                   ? used_priorities
+                                   : (used_priorities & ((1ULL << (min_priority + 1)) - 1));
+        return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities);
+    }
+
+    const_iterator cbegin(u32 max_prio = 0) const {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? cend()
+                                 : const_iterator{*this, levels[priority].cbegin(), priority};
+    }
+    const_iterator begin(u32 max_prio = 0) const {
+        return cbegin(max_prio);
+    }
+    iterator begin(u32 max_prio = 0) {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority};
+    }
+
+    const_iterator cend(u32 min_prio = Depth - 1) const {
+        return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1);
+    }
+    const_iterator end(u32 min_prio = Depth - 1) const {
+        return cend(min_prio);
+    }
+    iterator end(u32 min_prio = Depth - 1) {
+        return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1);
+    }
+
+    T& front(u32 max_priority = 0) {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+    const T& front(u32 max_priority = 0) const {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+
+    T back(u32 min_priority = Depth - 1) {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+    const T& back(u32 min_priority = Depth - 1) const {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+
+private:
+    using const_list_iterator = typename std::list<T>::const_iterator;
+
+    static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) {
+        if (shift >= list.size()) {
+            return;
+        }
+
+        const auto begin_range = list.begin();
+        const auto end_range = std::next(begin_range, shift);
+        list.splice(list.end(), list, begin_range, end_range);
+    }
+
+    static void ListSplice(std::list<T>& in_list, const_list_iterator position,
+                           std::list<T>& out_list, const_list_iterator element) {
+        in_list.splice(position, out_list, element);
+    }
+
+    static const_list_iterator ListIterateTo(const std::list<T>& list, const T& element) {
+        auto it = list.cbegin();
+        while (it != list.cend() && *it != element) {
+            ++it;
+        }
+        return it;
+    }
+
+    std::array<std::list<T>, Depth> levels;
+    u64 used_priorities = 0;
+};
+
+} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
index 8eba1c3f1..69b7abc54 100644
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -16,6 +16,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
 
     pointers.resize(num_page_table_entries);
     attributes.resize(num_page_table_entries);
+    backing_addr.resize(num_page_table_entries);
 
     // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
     // vector size is subsequently decreased (via resize), the vector might not automatically
@@ -24,6 +25,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
 
     pointers.shrink_to_fit();
     attributes.shrink_to_fit();
+    backing_addr.shrink_to_fit();
 }
 
 } // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
index 8339f2890..8b8ff0bb8 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -21,6 +21,8 @@ enum class PageType : u8 {
     RasterizerCachedMemory,
     /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
     Special,
+    /// Page is allocated for use.
+    Allocated,
 };
 
 struct SpecialRegion {
@@ -66,7 +68,7 @@ struct PageTable {
      * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
      * of type `Special`.
      */
-    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
+    boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions;
 
     /**
      * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
@@ -74,6 +76,8 @@ struct PageTable {
      */
     std::vector<PageType> attributes;
 
+    std::vector<u64> backing_addr;
+
     const std::size_t page_size_in_bits{};
 };
 
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 5144c0d9f..fe7a420cc 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -27,18 +27,6 @@ namespace Common {
 
 #ifdef _MSC_VER
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
-    SetThreadAffinityMask(thread, mask);
-}
-
-void SetCurrentThreadAffinity(u32 mask) {
-    SetThreadAffinityMask(GetCurrentThread(), mask);
-}
-
-void SwitchCurrentThread() {
-    SwitchToThread();
-}
-
 // Sets the debugger-visible name of the current thread.
 // Uses undocumented (actually, it is now documented) trick.
 // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp
@@ -70,31 +58,6 @@ void SetCurrentThreadName(const char* name) {
 
 #else // !MSVC_VER, so must be POSIX threads
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
-#ifdef __APPLE__
-    thread_policy_set(pthread_mach_thread_np(thread), THREAD_AFFINITY_POLICY, (integer_t*)&mask, 1);
-#elif (defined __linux__ || defined __FreeBSD__) && !(defined ANDROID)
-    cpu_set_t cpu_set;
-    CPU_ZERO(&cpu_set);
-
-    for (int i = 0; i != sizeof(mask) * 8; ++i)
-        if ((mask >> i) & 1)
-            CPU_SET(i, &cpu_set);
-
-    pthread_setaffinity_np(thread, sizeof(cpu_set), &cpu_set);
-#endif
-}
-
-void SetCurrentThreadAffinity(u32 mask) {
-    SetThreadAffinity(pthread_self(), mask);
-}
-
-#ifndef _WIN32
-void SwitchCurrentThread() {
-    usleep(1000 * 1);
-}
-#endif
-
 // MinGW with the POSIX threading model does not support pthread_setname_np
 #if !defined(_WIN32) || defined(_MSC_VER)
 void SetCurrentThreadName(const char* name) {
diff --git a/src/common/thread.h b/src/common/thread.h
index 2cf74452d..0cfd98be6 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,14 +9,13 @@
 #include <cstddef>
 #include <mutex>
 #include <thread>
-#include "common/common_types.h"
 
 namespace Common {
 
 class Event {
 public:
     void Set() {
-        std::lock_guard<std::mutex> lk(mutex);
+        std::lock_guard lk{mutex};
         if (!is_set) {
             is_set = true;
             condvar.notify_one();
@@ -24,14 +23,14 @@ public:
     }
 
     void Wait() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         condvar.wait(lk, [&] { return is_set; });
         is_set = false;
     }
 
     template <class Clock, class Duration>
     bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         if (!condvar.wait_until(lk, time, [this] { return is_set; }))
             return false;
         is_set = false;
@@ -39,7 +38,7 @@ public:
     }
 
     void Reset() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         // no other action required, since wait loops on the predicate and any lingering signal will
         // get cleared on the first iteration
         is_set = false;
@@ -57,7 +56,7 @@ public:
 
     /// Blocks until all "count" threads have called Sync()
     void Sync() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         const std::size_t current_generation = generation;
 
         if (++waiting == count) {
@@ -78,9 +77,6 @@ private:
     std::size_t generation = 0; // Incremented once each time the barrier is used
 };
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask);
-void SetCurrentThreadAffinity(u32 mask);
-void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
 void SetCurrentThreadName(const char* name);
 
 } // namespace Common
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index 821e8536a..e714ba5b3 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -78,7 +78,7 @@ public:
 
     T PopWait() {
         if (Empty()) {
-            std::unique_lock<std::mutex> lock(cv_mutex);
+            std::unique_lock lock{cv_mutex};
             cv.wait(lock, [this]() { return !Empty(); });
         }
         T t;
@@ -137,7 +137,7 @@ public:
 
     template <typename Arg>
     void Push(Arg&& t) {
-        std::lock_guard<std::mutex> lock(write_lock);
+        std::lock_guard lock{write_lock};
         spsc_queue.Push(t);
     }
 
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index bbbe60896..9e23afe85 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -70,6 +70,8 @@ add_library(core STATIC
     file_sys/system_archive/ng_word.h
     file_sys/system_archive/system_archive.cpp
     file_sys/system_archive/system_archive.h
+    file_sys/system_archive/system_version.cpp
+    file_sys/system_archive/system_version.h
     file_sys/vfs.cpp
     file_sys/vfs.h
     file_sys/vfs_concat.cpp
@@ -144,6 +146,8 @@ add_library(core STATIC
     hle/kernel/svc_wrap.h
     hle/kernel/thread.cpp
     hle/kernel/thread.h
+    hle/kernel/transfer_memory.cpp
+    hle/kernel/transfer_memory.h
     hle/kernel/vm_manager.cpp
     hle/kernel/vm_manager.h
     hle/kernel/wait_object.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index a88e332be..4fe77c25b 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -460,8 +460,8 @@ Tegra::DebugContext* System::GetGPUDebugContext() const {
 void System::RegisterCheatList(const std::vector<FileSys::CheatList>& list,
                                const std::string& build_id, VAddr code_region_start,
                                VAddr code_region_end) {
-    impl->cheat_engine =
-        std::make_unique<FileSys::CheatEngine>(list, build_id, code_region_start, code_region_end);
+    impl->cheat_engine = std::make_unique<FileSys::CheatEngine>(*this, list, build_id,
+                                                                code_region_start, code_region_end);
 }
 
 void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 1eefed6d0..e75741db0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -22,7 +22,7 @@
 namespace Core {
 
 void CpuBarrier::NotifyEnd() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     end = true;
     condition.notify_all();
 }
@@ -34,7 +34,7 @@ bool CpuBarrier::Rendezvous() {
     }
 
     if (!end) {
-        std::unique_lock<std::mutex> lock(mutex);
+        std::unique_lock lock{mutex};
 
         --cores_waiting;
         if (!cores_waiting) {
@@ -131,7 +131,7 @@ void Cpu::Reschedule() {
 
     reschedule_pending = false;
     // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     scheduler->Reschedule();
 }
 
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index a0dd5db24..41adb2302 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -186,7 +186,7 @@ void CoreTiming::Advance() {
         Event evt = std::move(event_queue.front());
         std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
         event_queue.pop_back();
-        evt.type->callback(evt.userdata, static_cast<int>(global_timer - evt.time));
+        evt.type->callback(evt.userdata, global_timer - evt.time);
     }
 
     is_global_timer_sane = false;
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 59163bae1..9d2efde37 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -15,7 +15,7 @@
 namespace Core::Timing {
 
 /// A callback that may be scheduled for a particular core timing event.
-using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
 
 /// Contains the characteristics of a particular event.
 struct EventType {
diff --git a/src/core/file_sys/cheat_engine.cpp b/src/core/file_sys/cheat_engine.cpp
index 09ca9d705..b06c2f20a 100644
--- a/src/core/file_sys/cheat_engine.cpp
+++ b/src/core/file_sys/cheat_engine.cpp
@@ -11,14 +11,13 @@
 #include "core/core_timing_util.h"
 #include "core/file_sys/cheat_engine.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/service/hid/controllers/controller_base.h"
 #include "core/hle/service/hid/controllers/npad.h"
 #include "core/hle/service/hid/hid.h"
 #include "core/hle/service/sm/sm.h"
 
 namespace FileSys {
 
-constexpr u64 CHEAT_ENGINE_TICKS = Core::Timing::BASE_CLOCK_RATE / 60;
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
 constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
 
 u64 Cheat::Address() const {
@@ -77,8 +76,8 @@ void CheatList::Execute() {
     }
 }
 
-CheatList::CheatList(ProgramSegment master, ProgramSegment standard)
-    : master_list(master), standard_list(standard) {}
+CheatList::CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard)
+    : master_list{std::move(master)}, standard_list{std::move(standard)}, system{&system_} {}
 
 bool CheatList::EvaluateConditional(const Cheat& cheat) const {
     using ComparisonFunction = bool (*)(u64, u64);
@@ -89,10 +88,8 @@ bool CheatList::EvaluateConditional(const Cheat& cheat) const {
     };
 
     if (cheat.type == CodeType::ConditionalInput) {
-        const auto applet_resource = Core::System::GetInstance()
-                                         .ServiceManager()
-                                         .GetService<Service::HID::Hid>("hid")
-                                         ->GetAppletResource();
+        const auto applet_resource =
+            system->ServiceManager().GetService<Service::HID::Hid>("hid")->GetAppletResource();
         if (applet_resource == nullptr) {
             LOG_WARNING(
                 Common_Filesystem,
@@ -188,8 +185,9 @@ void CheatList::Loop(const Cheat& cheat) {
     ASSERT(iter != block_pairs.end());
     ASSERT(iter->first < iter->second);
 
-    for (int i = cheat.Value(4, 4); i >= 0; --i) {
-        register_3 = i;
+    const s32 initial_value = static_cast<s32>(cheat.Value(4, sizeof(s32)));
+    for (s32 i = initial_value; i >= 0; --i) {
+        register_3 = static_cast<u64>(i);
         for (std::size_t c = iter->first + 1; c < iter->second; ++c) {
             current_index = c;
             ExecuteSingleCheat(
@@ -320,14 +318,14 @@ void CheatList::ExecuteBlock(const Block& block) {
 
 CheatParser::~CheatParser() = default;
 
-CheatList CheatParser::MakeCheatList(CheatList::ProgramSegment master,
+CheatList CheatParser::MakeCheatList(const Core::System& system, CheatList::ProgramSegment master,
                                      CheatList::ProgramSegment standard) const {
-    return {master, standard};
+    return {system, std::move(master), std::move(standard)};
 }
 
 TextCheatParser::~TextCheatParser() = default;
 
-CheatList TextCheatParser::Parse(const std::vector<u8>& data) const {
+CheatList TextCheatParser::Parse(const Core::System& system, const std::vector<u8>& data) const {
     std::stringstream ss;
     ss.write(reinterpret_cast<const char*>(data.data()), data.size());
 
@@ -375,7 +373,7 @@ CheatList TextCheatParser::Parse(const std::vector<u8>& data) const {
         }
     }
 
-    return MakeCheatList(master_list, standard_list);
+    return MakeCheatList(system, master_list, standard_list);
 }
 
 std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line) const {
@@ -425,6 +423,7 @@ std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line
     return out;
 }
 
+namespace {
 u64 MemoryReadImpl(u32 width, VAddr addr) {
     switch (width) {
     case 1:
@@ -459,17 +458,18 @@ void MemoryWriteImpl(u32 width, VAddr addr, u64 value) {
         UNREACHABLE();
     }
 }
+} // Anonymous namespace
 
-CheatEngine::CheatEngine(std::vector<CheatList> cheats, const std::string& build_id,
-                         VAddr code_region_start, VAddr code_region_end)
-    : cheats(std::move(cheats)) {
-    auto& core_timing{Core::System::GetInstance().CoreTiming()};
+CheatEngine::CheatEngine(Core::System& system, std::vector<CheatList> cheats_,
+                         const std::string& build_id, VAddr code_region_start,
+                         VAddr code_region_end)
+    : cheats{std::move(cheats_)}, core_timing{system.CoreTiming()} {
     event = core_timing.RegisterEvent(
         "CheatEngine::FrameCallback::" + build_id,
         [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
     core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
 
-    const auto& vm_manager = Core::System::GetInstance().CurrentProcess()->VMManager();
+    const auto& vm_manager = system.CurrentProcess()->VMManager();
     for (auto& list : this->cheats) {
         list.SetMemoryParameters(code_region_start, vm_manager.GetHeapRegionBaseAddress(),
                                  code_region_end, vm_manager.GetHeapRegionEndAddress(),
@@ -478,15 +478,14 @@ CheatEngine::CheatEngine(std::vector<CheatList> cheats, const std::string& build
 }
 
 CheatEngine::~CheatEngine() {
-    auto& core_timing{Core::System::GetInstance().CoreTiming()};
     core_timing.UnscheduleEvent(event, 0);
 }
 
-void CheatEngine::FrameCallback(u64 userdata, int cycles_late) {
-    for (auto& list : cheats)
+void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
+    for (auto& list : cheats) {
         list.Execute();
+    }
 
-    auto& core_timing{Core::System::GetInstance().CoreTiming()};
     core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
 }
 
diff --git a/src/core/file_sys/cheat_engine.h b/src/core/file_sys/cheat_engine.h
index 7ed69a2c8..ac22a82cb 100644
--- a/src/core/file_sys/cheat_engine.h
+++ b/src/core/file_sys/cheat_engine.h
@@ -7,13 +7,17 @@
 #include <map>
 #include <set>
 #include <vector>
-#include <queue>
 #include "common/bit_field.h"
 #include "common/common_types.h"
 
+namespace Core {
+class System;
+}
+
 namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing
 
 namespace FileSys {
 
@@ -133,7 +137,7 @@ public:
     void Execute();
 
 private:
-    CheatList(ProgramSegment master, ProgramSegment standard);
+    CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard);
 
     void ProcessBlockPairs(const Block& block);
     void ExecuteSingleCheat(const Cheat& cheat);
@@ -183,6 +187,8 @@ private:
     std::map<u64, u64> block_pairs;
 
     std::set<u64> encountered_loops;
+
+    const Core::System* system;
 };
 
 // Intermediary class that parses a text file or other disk format for storing cheats into a
@@ -191,10 +197,10 @@ class CheatParser {
 public:
     virtual ~CheatParser();
 
-    virtual CheatList Parse(const std::vector<u8>& data) const = 0;
+    virtual CheatList Parse(const Core::System& system, const std::vector<u8>& data) const = 0;
 
 protected:
-    CheatList MakeCheatList(CheatList::ProgramSegment master,
+    CheatList MakeCheatList(const Core::System& system_, CheatList::ProgramSegment master,
                             CheatList::ProgramSegment standard) const;
 };
 
@@ -203,7 +209,7 @@ class TextCheatParser final : public CheatParser {
 public:
     ~TextCheatParser() override;
 
-    CheatList Parse(const std::vector<u8>& data) const override;
+    CheatList Parse(const Core::System& system, const std::vector<u8>& data) const override;
 
 private:
     std::array<u8, 16> ParseSingleLineCheat(const std::string& line) const;
@@ -212,16 +218,17 @@ private:
 // Class that encapsulates a CheatList and manages its interaction with memory and CoreTiming
 class CheatEngine final {
 public:
-    CheatEngine(std::vector<CheatList> cheats, const std::string& build_id, VAddr code_region_start,
-                VAddr code_region_end);
+    CheatEngine(Core::System& system_, std::vector<CheatList> cheats_, const std::string& build_id,
+                VAddr code_region_start, VAddr code_region_end);
     ~CheatEngine();
 
 private:
-    void FrameCallback(u64 userdata, int cycles_late);
-
-    Core::Timing::EventType* event;
+    void FrameCallback(u64 userdata, s64 cycles_late);
 
     std::vector<CheatList> cheats;
+
+    Core::Timing::EventType* event;
+    Core::Timing::CoreTiming& core_timing;
 };
 
 } // namespace FileSys
diff --git a/src/core/file_sys/errors.h b/src/core/file_sys/errors.h
index e4a4ee4ab..bb4654366 100644
--- a/src/core/file_sys/errors.h
+++ b/src/core/file_sys/errors.h
@@ -11,6 +11,9 @@ namespace FileSys {
 constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1};
 constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002};
 constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001};
+constexpr ResultCode ERROR_OUT_OF_BOUNDS{ErrorModule::FS, 3005};
+constexpr ResultCode ERROR_FAILED_MOUNT_ARCHIVE{ErrorModule::FS, 3223};
+constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::FS, 6001};
 constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061};
 constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062};
 
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 2b09e5d35..e11217708 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -20,6 +20,7 @@
 #include "core/file_sys/vfs_vector.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/loader.h"
+#include "core/loader/nso.h"
 #include "core/settings.h"
 
 namespace FileSys {
@@ -32,14 +33,6 @@ constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
     "subsdk3", "subsdk4",   "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9",
 };
 
-struct NSOBuildHeader {
-    u32_le magic;
-    INSERT_PADDING_BYTES(0x3C);
-    std::array<u8, 0x20> build_id;
-    INSERT_PADDING_BYTES(0xA0);
-};
-static_assert(sizeof(NSOBuildHeader) == 0x100, "NSOBuildHeader has incorrect size.");
-
 std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
     std::array<u8, sizeof(u32)> bytes{};
     bytes[0] = version % SINGLE_BYTE_MODULUS;
@@ -163,14 +156,16 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
 }
 
 std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
-    if (nso.size() < 0x100)
+    if (nso.size() < sizeof(Loader::NSOHeader)) {
         return nso;
+    }
 
-    NSOBuildHeader header;
-    std::memcpy(&header, nso.data(), sizeof(NSOBuildHeader));
+    Loader::NSOHeader header;
+    std::memcpy(&header, nso.data(), sizeof(header));
 
-    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
         return nso;
+    }
 
     const auto build_id_raw = Common::HexArrayToString(header.build_id);
     const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1);
@@ -213,9 +208,11 @@ std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
         }
     }
 
-    if (out.size() < 0x100)
+    if (out.size() < sizeof(Loader::NSOHeader)) {
         return nso;
-    std::memcpy(out.data(), &header, sizeof(NSOBuildHeader));
+    }
+
+    std::memcpy(out.data(), &header, sizeof(header));
     return out;
 }
 
@@ -233,7 +230,7 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const {
     return !CollectPatches(patch_dirs, build_id).empty();
 }
 
-static std::optional<CheatList> ReadCheatFileFromFolder(u64 title_id,
+static std::optional<CheatList> ReadCheatFileFromFolder(const Core::System& system, u64 title_id,
                                                         const std::array<u8, 0x20>& build_id_,
                                                         const VirtualDir& base_path, bool upper) {
     const auto build_id_raw = Common::HexArrayToString(build_id_, upper);
@@ -254,28 +251,28 @@ static std::optional<CheatList> ReadCheatFileFromFolder(u64 title_id,
     }
 
     TextCheatParser parser;
-    return parser.Parse(data);
+    return parser.Parse(system, data);
 }
 
-std::vector<CheatList> PatchManager::CreateCheatList(const std::array<u8, 32>& build_id_) const {
-    std::vector<CheatList> out;
-
+std::vector<CheatList> PatchManager::CreateCheatList(const Core::System& system,
+                                                     const std::array<u8, 32>& build_id_) const {
     const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
     auto patch_dirs = load_dir->GetSubdirectories();
     std::sort(patch_dirs.begin(), patch_dirs.end(),
               [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
 
+    std::vector<CheatList> out;
     out.reserve(patch_dirs.size());
     for (const auto& subdir : patch_dirs) {
         auto cheats_dir = subdir->GetSubdirectory("cheats");
         if (cheats_dir != nullptr) {
-            auto res = ReadCheatFileFromFolder(title_id, build_id_, cheats_dir, true);
+            auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
             if (res.has_value()) {
                 out.push_back(std::move(*res));
                 continue;
             }
 
-            res = ReadCheatFileFromFolder(title_id, build_id_, cheats_dir, false);
+            res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, false);
             if (res.has_value())
                 out.push_back(std::move(*res));
         }
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index 3e3ac6aca..de2672c76 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -12,6 +12,10 @@
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/vfs.h"
 
+namespace Core {
+class System;
+}
+
 namespace FileSys {
 
 class NCA;
@@ -47,7 +51,8 @@ public:
     bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const;
 
     // Creates a CheatList object with all
-    std::vector<CheatList> CreateCheatList(const std::array<u8, 0x20>& build_id) const;
+    std::vector<CheatList> CreateCheatList(const Core::System& system,
+                                           const std::array<u8, 0x20>& build_id) const;
 
     // Currently tracked RomFS patches:
     // - Game Updates
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index e3e79f40a..c9722ed77 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -6,6 +6,7 @@
 #include "core/file_sys/romfs.h"
 #include "core/file_sys/system_archive/ng_word.h"
 #include "core/file_sys/system_archive/system_archive.h"
+#include "core/file_sys/system_archive/system_version.h"
 
 namespace FileSys::SystemArchive {
 
@@ -30,7 +31,7 @@ constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHI
     {0x0100000000000806, "NgWord", &NgWord1},
     {0x0100000000000807, "SsidList", nullptr},
     {0x0100000000000808, "Dictionary", nullptr},
-    {0x0100000000000809, "SystemVersion", nullptr},
+    {0x0100000000000809, "SystemVersion", &SystemVersion},
     {0x010000000000080A, "AvatarImage", nullptr},
     {0x010000000000080B, "LocalNews", nullptr},
     {0x010000000000080C, "Eula", nullptr},
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
new file mode 100644
index 000000000..6e22f97b0
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/file_sys/system_archive/system_version.h"
+#include "core/file_sys/vfs_vector.h"
+
+namespace FileSys::SystemArchive {
+
+namespace SystemVersionData {
+
+// This section should reflect the best system version to describe yuzu's HLE api.
+// TODO(DarkLordZach): Update when HLE gets better.
+
+constexpr u8 VERSION_MAJOR = 5;
+constexpr u8 VERSION_MINOR = 1;
+constexpr u8 VERSION_MICRO = 0;
+
+constexpr u8 REVISION_MAJOR = 3;
+constexpr u8 REVISION_MINOR = 0;
+
+constexpr char PLATFORM_STRING[] = "NX";
+constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
+constexpr char DISPLAY_VERSION[] = "5.1.0";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
+
+} // namespace SystemVersionData
+
+std::string GetLongDisplayVersion() {
+    return SystemVersionData::DISPLAY_TITLE;
+}
+
+VirtualDir SystemVersion() {
+    VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file");
+    file->WriteObject(SystemVersionData::VERSION_MAJOR, 0);
+    file->WriteObject(SystemVersionData::VERSION_MINOR, 1);
+    file->WriteObject(SystemVersionData::VERSION_MICRO, 2);
+    file->WriteObject(SystemVersionData::REVISION_MAJOR, 4);
+    file->WriteObject(SystemVersionData::REVISION_MINOR, 5);
+    file->WriteArray(SystemVersionData::PLATFORM_STRING,
+                     std::min<u64>(sizeof(SystemVersionData::PLATFORM_STRING), 0x20ULL), 0x8);
+    file->WriteArray(SystemVersionData::VERSION_HASH,
+                     std::min<u64>(sizeof(SystemVersionData::VERSION_HASH), 0x40ULL), 0x28);
+    file->WriteArray(SystemVersionData::DISPLAY_VERSION,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_VERSION), 0x18ULL), 0x68);
+    file->WriteArray(SystemVersionData::DISPLAY_TITLE,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_TITLE), 0x80ULL), 0x80);
+    return std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{file},
+                                                std::vector<VirtualDir>{}, "data");
+}
+
+} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/system_version.h b/src/core/file_sys/system_archive/system_version.h
new file mode 100644
index 000000000..deed79b26
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.h
@@ -0,0 +1,16 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include "core/file_sys/vfs_types.h"
+
+namespace FileSys::SystemArchive {
+
+std::string GetLongDisplayVersion();
+
+VirtualDir SystemVersion();
+
+} // namespace FileSys::SystemArchive
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index e29afd630..1320bbe77 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -30,7 +30,7 @@ private:
         explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
         std::tuple<float, float, bool> GetStatus() const override {
             if (auto state = touch_state.lock()) {
-                std::lock_guard<std::mutex> guard(state->mutex);
+                std::lock_guard guard{state->mutex};
                 return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
             }
             return std::make_tuple(0.0f, 0.0f, false);
@@ -81,7 +81,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
     if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
         return;
 
-    std::lock_guard<std::mutex> guard(touch_state->mutex);
+    std::lock_guard guard{touch_state->mutex};
     touch_state->touch_x = static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) /
                            (framebuffer_layout.screen.right - framebuffer_layout.screen.left);
     touch_state->touch_y = static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) /
@@ -91,7 +91,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
 }
 
 void EmuWindow::TouchReleased() {
-    std::lock_guard<std::mutex> guard(touch_state->mutex);
+    std::lock_guard guard{touch_state->mutex};
     touch_state->touch_pressed = false;
     touch_state->touch_x = 0;
     touch_state->touch_y = 0;
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 352190da8..c8842410b 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -26,7 +26,7 @@ void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_
     // them all.
     std::size_t last = waiting_threads.size();
     if (num_to_wake > 0) {
-        last = num_to_wake;
+        last = std::min(last, static_cast<std::size_t>(num_to_wake));
     }
 
     // Signal the waiting threads.
@@ -90,9 +90,9 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
     // Determine the modified value depending on the waiting count.
     s32 updated_value;
     if (waiting_threads.empty()) {
-        updated_value = value - 1;
-    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
         updated_value = value + 1;
+    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+        updated_value = value - 1;
     } else {
         updated_value = value;
     }
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4d224d01d..6baeb3494 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -29,12 +29,12 @@ namespace Kernel {
  * @param thread_handle The handle of the thread that's been awoken
  * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
  */
-static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) {
+static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
     const auto proper_handle = static_cast<Handle>(thread_handle);
     const auto& system = Core::System::GetInstance();
 
     // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
 
     SharedPtr<Thread> thread =
         system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle);
@@ -62,7 +62,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 
     if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 ||
         thread->GetWaitHandle() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex ||
+               thread->GetStatus() == ThreadStatus::WaitCondVar);
         thread->SetMutexWaitAddress(0);
         thread->SetCondVarWaitAddress(0);
         thread->SetWaitHandle(0);
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index ff17ff865..03ea5b659 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -8,9 +8,6 @@
 #include <unordered_map>
 #include "core/hle/kernel/object.h"
 
-template <typename T>
-class ResultVal;
-
 namespace Core {
 class System;
 }
diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp
index 8870463d0..217144efc 100644
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -23,6 +23,7 @@ bool Object::IsWaitable() const {
     case HandleType::Unknown:
     case HandleType::WritableEvent:
     case HandleType::SharedMemory:
+    case HandleType::TransferMemory:
     case HandleType::AddressArbiter:
     case HandleType::ResourceLimit:
     case HandleType::ClientPort:
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index 4c2505908..3f6baa094 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -22,6 +22,7 @@ enum class HandleType : u32 {
     WritableEvent,
     ReadableEvent,
     SharedMemory,
+    TransferMemory,
     Thread,
     Process,
     AddressArbiter,
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 87779a71c..a5144b8ad 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <memory>
 #include <random>
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
@@ -75,6 +76,10 @@ SharedPtr<ResourceLimit> Process::GetResourceLimit() const {
     return resource_limit;
 }
 
+u64 Process::GetTotalPhysicalMemoryUsed() const {
+    return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size;
+}
+
 ResultCode Process::ClearSignalState() {
     if (status == ProcessStatus::Exited) {
         LOG_ERROR(Kernel, "called on a terminated process instance.");
@@ -107,14 +112,17 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
     return handle_table.SetSize(capabilities.GetHandleTableSize());
 }
 
-void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
+void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
+    // The kernel always ensures that the given stack size is page aligned.
+    main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
+
     // Allocate and map the main thread stack
     // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
     // of the user address space.
+    const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
     vm_manager
-        .MapMemoryBlock(vm_manager.GetTLSIORegionEndAddress() - stack_size,
-                        std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size,
-                        MemoryState::Stack)
+        .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
+                        0, main_thread_stack_size, MemoryState::Stack)
         .Unwrap();
 
     vm_manager.LogLayout();
@@ -226,6 +234,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
     MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
     MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
 
+    code_memory_size += module_.memory->size();
+
     // Clear instruction cache in CPU JIT
     system.InvalidateCpuInstructionCaches();
 }
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 1bd7bf5c1..732d12170 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -35,14 +35,6 @@ class Thread;
 
 struct CodeSet;
 
-struct AddressMapping {
-    // Address and size must be page-aligned
-    VAddr address;
-    u64 size;
-    bool read_only;
-    bool unk_flag;
-};
-
 enum class MemoryRegion : u16 {
     APPLICATION = 1,
     SYSTEM = 2,
@@ -194,6 +186,9 @@ public:
         return random_entropy.at(index);
     }
 
+    /// Retrieves the total physical memory used by this process in bytes.
+    u64 GetTotalPhysicalMemoryUsed() const;
+
     /// Clears the signaled state of the process if and only if it's signaled.
     ///
     /// @pre The process must not be already terminated. If this is called on a
@@ -218,7 +213,7 @@ public:
     /**
      * Applies address space changes and launches the process main thread.
      */
-    void Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size);
+    void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size);
 
     /**
      * Prepares a process for termination by stopping all of its threads
@@ -255,6 +250,12 @@ private:
     /// Memory manager for this process.
     Kernel::VMManager vm_manager;
 
+    /// Size of the main thread's stack in bytes.
+    u64 main_thread_stack_size = 0;
+
+    /// Size of the loaded code memory in bytes.
+    u64 code_memory_size = 0;
+
     /// Current status of the process
     ProcessStatus status;
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index cc189cc64..ac501bf7f 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -29,8 +29,8 @@ Scheduler::~Scheduler() {
 }
 
 bool Scheduler::HaveReadyThreads() const {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
-    return ready_queue.get_first() != nullptr;
+    std::lock_guard lock{scheduler_mutex};
+    return !ready_queue.empty();
 }
 
 Thread* Scheduler::GetCurrentThread() const {
@@ -46,22 +46,27 @@ Thread* Scheduler::PopNextReadyThread() {
     Thread* thread = GetCurrentThread();
 
     if (thread && thread->GetStatus() == ThreadStatus::Running) {
+        if (ready_queue.empty()) {
+            return thread;
+        }
         // We have to do better than the current thread.
         // This call returns null when that's not possible.
-        next = ready_queue.pop_first_better(thread->GetPriority());
-        if (!next) {
-            // Otherwise just keep going with the current thread
+        next = ready_queue.front();
+        if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
             next = thread;
         }
     } else {
-        next = ready_queue.pop_first();
+        if (ready_queue.empty()) {
+            return nullptr;
+        }
+        next = ready_queue.front();
     }
 
     return next;
 }
 
 void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* const previous_thread = GetCurrentThread();
+    Thread* previous_thread = GetCurrentThread();
     Process* const previous_process = system.Kernel().CurrentProcess();
 
     UpdateLastContextSwitchTime(previous_thread, previous_process);
@@ -75,7 +80,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
         if (previous_thread->GetStatus() == ThreadStatus::Running) {
             // This is only the case when a reschedule is triggered without the current thread
             // yielding execution (i.e. an event triggered, system core time-sliced, etc)
-            ready_queue.push_front(previous_thread->GetPriority(), previous_thread);
+            ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
             previous_thread->SetStatus(ThreadStatus::Ready);
         }
     }
@@ -90,7 +95,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 
         current_thread = new_thread;
 
-        ready_queue.remove(new_thread->GetPriority(), new_thread);
+        ready_queue.remove(new_thread, new_thread->GetPriority());
         new_thread->SetStatus(ThreadStatus::Running);
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
@@ -127,7 +132,7 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
 }
 
 void Scheduler::Reschedule() {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     Thread* cur = GetCurrentThread();
     Thread* next = PopNextReadyThread();
@@ -143,51 +148,54 @@ void Scheduler::Reschedule() {
     SwitchContext(next);
 }
 
-void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+void Scheduler::AddThread(SharedPtr<Thread> thread) {
+    std::lock_guard lock{scheduler_mutex};
 
     thread_list.push_back(std::move(thread));
-    ready_queue.prepare(priority);
 }
 
 void Scheduler::RemoveThread(Thread* thread) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                       thread_list.end());
 }
 
 void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.push_back(priority, thread);
+    ready_queue.add(thread, priority);
 }
 
 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.remove(priority, thread);
+    ready_queue.remove(thread, priority);
 }
 
 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
+    if (thread->GetPriority() == priority) {
+        return;
+    }
 
     // If thread was ready, adjust queues
     if (thread->GetStatus() == ThreadStatus::Ready)
-        ready_queue.move(thread, thread->GetPriority(), priority);
-    else
-        ready_queue.prepare(priority);
+        ready_queue.adjust(thread, thread->GetPriority(), priority);
 }
 
 Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     const u32 mask = 1U << core;
-    return ready_queue.get_first_filter([mask, maximum_priority](Thread const* thread) {
-        return (thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority;
-    });
+    for (auto* thread : ready_queue) {
+        if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
+            return thread;
+        }
+    }
+    return nullptr;
 }
 
 void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 1c5bf57d9..b29bf7be8 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -7,7 +7,7 @@
 #include <mutex>
 #include <vector>
 #include "common/common_types.h"
-#include "common/thread_queue_list.h"
+#include "common/multi_level_queue.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"
 
@@ -38,7 +38,7 @@ public:
     u64 GetLastContextSwitchTicks() const;
 
     /// Adds a new thread to the scheduler
-    void AddThread(SharedPtr<Thread> thread, u32 priority);
+    void AddThread(SharedPtr<Thread> thread);
 
     /// Removes a thread from the scheduler
     void RemoveThread(Thread* thread);
@@ -156,7 +156,7 @@ private:
     std::vector<SharedPtr<Thread>> thread_list;
 
     /// Lists only ready thread ids.
-    Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
+    Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
 
     SharedPtr<Thread> current_thread = nullptr;
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index a6a17efe7..76a8b0191 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -32,6 +32,7 @@
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_wrap.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -174,11 +175,8 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
         return ERR_INVALID_SIZE;
     }
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
-    const VAddr heap_base = vm_manager.GetHeapRegionBaseAddress();
-    const auto alloc_result =
-        vm_manager.HeapAllocate(heap_base, heap_size, VMAPermission::ReadWrite);
-
+    auto& vm_manager = Core::System::GetInstance().Kernel().CurrentProcess()->VMManager();
+    const auto alloc_result = vm_manager.SetHeapSize(heap_size);
     if (alloc_result.Failed()) {
         return alloc_result.Code();
     }
@@ -711,7 +709,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
         HeapRegionBaseAddr = 4,
         HeapRegionSize = 5,
         TotalMemoryUsage = 6,
-        TotalHeapUsage = 7,
+        TotalPhysicalMemoryUsed = 7,
         IsCurrentProcessBeingDebugged = 8,
         RegisterResourceLimit = 9,
         IdleTickCount = 10,
@@ -747,7 +745,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
     case GetInfoType::NewMapRegionBaseAddr:
     case GetInfoType::NewMapRegionSize:
     case GetInfoType::TotalMemoryUsage:
-    case GetInfoType::TotalHeapUsage:
+    case GetInfoType::TotalPhysicalMemoryUsed:
     case GetInfoType::IsVirtualAddressMemoryEnabled:
     case GetInfoType::PersonalMmHeapUsage:
     case GetInfoType::TitleId:
@@ -807,8 +805,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             *result = process->VMManager().GetTotalMemoryUsage();
             return RESULT_SUCCESS;
 
-        case GetInfoType::TotalHeapUsage:
-            *result = process->VMManager().GetTotalHeapUsage();
+        case GetInfoType::TotalPhysicalMemoryUsed:
+            *result = process->GetTotalPhysicalMemoryUsed();
             return RESULT_SUCCESS;
 
         case GetInfoType::IsVirtualAddressMemoryEnabled:
@@ -1355,7 +1353,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
     current_thread->SetCondVarWaitAddress(condition_variable_addr);
     current_thread->SetMutexWaitAddress(mutex_addr);
     current_thread->SetWaitHandle(thread_handle);
-    current_thread->SetStatus(ThreadStatus::WaitMutex);
+    current_thread->SetStatus(ThreadStatus::WaitCondVar);
     current_thread->InvalidateWakeupCallback();
 
     current_thread->WakeAfterDelay(nano_seconds);
@@ -1399,10 +1397,10 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
     // them all.
     std::size_t last = waiting_threads.size();
     if (target != -1)
-        last = target;
+        last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
 
     // If there are no threads waiting on this condition variable, just exit
-    if (last > waiting_threads.size())
+    if (last == 0)
         return RESULT_SUCCESS;
 
     for (std::size_t index = 0; index < last; ++index) {
@@ -1410,6 +1408,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
         ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
 
+        // liberate Cond Var Thread.
+        thread->SetCondVarWaitAddress(0);
+
         std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
 
         auto& monitor = Core::System::GetInstance().Monitor();
@@ -1428,10 +1429,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
             }
         } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
                                            thread->GetWaitHandle()));
-
         if (mutex_val == 0) {
             // We were able to acquire the mutex, resume this thread.
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
             thread->ResumeFromWait();
 
             auto* const lock_owner = thread->GetLockOwner();
@@ -1441,8 +1441,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
             thread->SetLockOwner(nullptr);
             thread->SetMutexWaitAddress(0);
-            thread->SetCondVarWaitAddress(0);
             thread->SetWaitHandle(0);
+            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
         } else {
             // Atomically signal that the mutex now has a waiting thread.
             do {
@@ -1461,12 +1461,11 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
             const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
             auto owner = handle_table.Get<Thread>(owner_handle);
             ASSERT(owner);
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
             thread->InvalidateWakeupCallback();
+            thread->SetStatus(ThreadStatus::WaitMutex);
 
             owner->AddMutexWaiter(thread);
-
-            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
         }
     }
 
@@ -1586,14 +1585,121 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
     }
 
     auto& kernel = Core::System::GetInstance().Kernel();
-    auto process = kernel.CurrentProcess();
-    auto& handle_table = process->GetHandleTable();
-    const auto shared_mem_handle = SharedMemory::Create(kernel, process, size, perms, perms, addr);
+    auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
 
-    CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
+    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
+    const auto result = handle_table.Create(std::move(transfer_mem_handle));
+    if (result.Failed()) {
+        return result.Code();
+    }
+
+    *handle = *result;
     return RESULT_SUCCESS;
 }
 
+static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 permission_raw) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}",
+              handle, address, size, permission_raw);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto permissions = static_cast<MemoryPermission>(permission_raw);
+    if (permissions != MemoryPermission::None && permissions != MemoryPermission::Read &&
+        permissions != MemoryPermission::ReadWrite) {
+        LOG_ERROR(Kernel_SVC, "Invalid transfer memory permissions given (permissions=0x{:08X}).",
+                  permission_raw);
+        return ERR_INVALID_STATE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->MapMemory(address, size, permissions);
+}
+
+static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) {
+    LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle,
+              address, size);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->UnmapMemory(address, size);
+}
+
 static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
     LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
 
@@ -1969,8 +2075,8 @@ static const FunctionDef SVC_Table[] = {
     {0x4E, nullptr, "ReadWriteRegister"},
     {0x4F, nullptr, "SetProcessActivity"},
     {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"},
-    {0x51, nullptr, "MapTransferMemory"},
-    {0x52, nullptr, "UnmapTransferMemory"},
+    {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"},
+    {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"},
     {0x53, nullptr, "CreateInterruptEvent"},
     {0x54, nullptr, "QueryPhysicalAddress"},
     {0x55, nullptr, "QueryIoMapping"},
@@ -2032,7 +2138,7 @@ void CallSVC(u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
     // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
 
     const FunctionDef* info = GetSVCInfo(immediate);
     if (info) {
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 3b22e8e0d..fa3ac3abc 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -105,6 +105,7 @@ void Thread::ResumeFromWait() {
     case ThreadStatus::WaitSleep:
     case ThreadStatus::WaitIPC:
     case ThreadStatus::WaitMutex:
+    case ThreadStatus::WaitCondVar:
     case ThreadStatus::WaitArb:
         break;
 
@@ -198,7 +199,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
     thread->owner_process = &owner_process;
     thread->scheduler = &system.Scheduler(processor_id);
-    thread->scheduler->AddThread(thread, priority);
+    thread->scheduler->AddThread(thread);
     thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
 
     // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
@@ -351,7 +352,7 @@ void Thread::ChangeScheduler() {
     if (*new_processor_id != processor_id) {
         // Remove thread from previous core's scheduler
         scheduler->RemoveThread(this);
-        next_scheduler.AddThread(this, current_priority);
+        next_scheduler.AddThread(this);
     }
 
     processor_id = *new_processor_id;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index faad5f391..9c684758c 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -51,7 +51,8 @@ enum class ThreadStatus {
     WaitIPC,      ///< Waiting for the reply from an IPC request
     WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
     WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true
-    WaitMutex,    ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc
+    WaitMutex,    ///< Waiting due to an ArbitrateLock svc
+    WaitCondVar,  ///< Waiting due to an WaitProcessWideKey svc
     WaitArb,      ///< Waiting due to a SignalToAddress/WaitForAddress svc
     Dormant,      ///< Created but not yet made ready
     Dead          ///< Run to completion, or forcefully terminated
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
new file mode 100644
index 000000000..23228e1b5
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -0,0 +1,73 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/transfer_memory.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
+TransferMemory::~TransferMemory() = default;
+
+SharedPtr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address,
+                                                 size_t size, MemoryPermission permissions) {
+    SharedPtr<TransferMemory> transfer_memory{new TransferMemory(kernel)};
+
+    transfer_memory->base_address = base_address;
+    transfer_memory->memory_size = size;
+    transfer_memory->owner_permissions = permissions;
+    transfer_memory->owner_process = kernel.CurrentProcess();
+
+    return transfer_memory;
+}
+
+ResultCode TransferMemory::MapMemory(VAddr address, size_t size, MemoryPermission permissions) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    if (owner_permissions != permissions) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (is_mapped) {
+        return ERR_INVALID_STATE;
+    }
+
+    const auto map_state = owner_permissions == MemoryPermission::None
+                               ? MemoryState::TransferMemoryIsolated
+                               : MemoryState::TransferMemory;
+    auto& vm_manager = owner_process->VMManager();
+    const auto map_result = vm_manager.MapMemoryBlock(
+        address, std::make_shared<std::vector<u8>>(size), 0, size, map_state);
+
+    if (map_result.Failed()) {
+        return map_result.Code();
+    }
+
+    is_mapped = true;
+    return RESULT_SUCCESS;
+}
+
+ResultCode TransferMemory::UnmapMemory(VAddr address, size_t size) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    auto& vm_manager = owner_process->VMManager();
+    const auto result = vm_manager.UnmapRange(address, size);
+
+    if (result.IsError()) {
+        return result;
+    }
+
+    is_mapped = false;
+    return RESULT_SUCCESS;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
new file mode 100644
index 000000000..ec294951e
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -0,0 +1,91 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/kernel/object.h"
+
+union ResultCode;
+
+namespace Kernel {
+
+class KernelCore;
+class Process;
+
+enum class MemoryPermission : u32;
+
+/// Defines the interface for transfer memory objects.
+///
+/// Transfer memory is typically used for the purpose of
+/// transferring memory between separate process instances,
+/// thus the name.
+///
+class TransferMemory final : public Object {
+public:
+    static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;
+
+    static SharedPtr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, size_t size,
+                                            MemoryPermission permissions);
+
+    TransferMemory(const TransferMemory&) = delete;
+    TransferMemory& operator=(const TransferMemory&) = delete;
+
+    TransferMemory(TransferMemory&&) = delete;
+    TransferMemory& operator=(TransferMemory&&) = delete;
+
+    std::string GetTypeName() const override {
+        return "TransferMemory";
+    }
+
+    std::string GetName() const override {
+        return GetTypeName();
+    }
+
+    HandleType GetHandleType() const override {
+        return HANDLE_TYPE;
+    }
+
+    /// Attempts to map transfer memory with the given range and memory permissions.
+    ///
+    /// @param address     The base address to being mapping memory at.
+    /// @param size        The size of the memory to map, in bytes.
+    /// @param permissions The memory permissions to check against when mapping memory.
+    ///
+    /// @pre The given address, size, and memory permissions must all match
+    ///      the same values that were given when creating the transfer memory
+    ///      instance.
+    ///
+    ResultCode MapMemory(VAddr address, size_t size, MemoryPermission permissions);
+
+    /// Unmaps the transfer memory with the given range
+    ///
+    /// @param address The base address to begin unmapping memory at.
+    /// @param size    The size of the memory to unmap, in bytes.
+    ///
+    /// @pre The given address and size must be the same as the ones used
+    ///      to create the transfer memory instance.
+    ///
+    ResultCode UnmapMemory(VAddr address, size_t size);
+
+private:
+    explicit TransferMemory(KernelCore& kernel);
+    ~TransferMemory() override;
+
+    /// The base address for the memory managed by this instance.
+    VAddr base_address = 0;
+
+    /// Size of the memory, in bytes, that this instance manages.
+    size_t memory_size = 0;
+
+    /// The memory permissions that are applied to this instance.
+    MemoryPermission owner_permissions{};
+
+    /// The process that this transfer memory instance was created under.
+    Process* owner_process = nullptr;
+
+    /// Whether or not this transfer memory instance has mapped memory.
+    bool is_mapped = false;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 22bf55ce7..ec0a480ce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -256,57 +256,50 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
     return RESULT_SUCCESS;
 }
 
-ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
-    if (!IsWithinHeapRegion(target, size)) {
-        return ERR_INVALID_ADDRESS;
+ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
+    if (size > GetHeapRegionSize()) {
+        return ERR_OUT_OF_MEMORY;
+    }
+
+    // No need to do any additional work if the heap is already the given size.
+    if (size == GetCurrentHeapSize()) {
+        return MakeResult(heap_region_base);
     }
 
     if (heap_memory == nullptr) {
         // Initialize heap
-        heap_memory = std::make_shared<std::vector<u8>>();
-        heap_start = heap_end = target;
+        heap_memory = std::make_shared<std::vector<u8>>(size);
+        heap_end = heap_region_base + size;
     } else {
-        UnmapRange(heap_start, heap_end - heap_start);
-    }
-
-    // If necessary, expand backing vector to cover new heap extents.
-    if (target < heap_start) {
-        heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
-        heap_start = target;
-        RefreshMemoryBlockMappings(heap_memory.get());
-    }
-    if (target + size > heap_end) {
-        heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
-        heap_end = target + size;
-        RefreshMemoryBlockMappings(heap_memory.get());
+        UnmapRange(heap_region_base, GetCurrentHeapSize());
     }
-    ASSERT(heap_end - heap_start == heap_memory->size());
 
-    CASCADE_RESULT(auto vma, MapMemoryBlock(target, heap_memory, target - heap_start, size,
-                                            MemoryState::Heap));
-    Reprotect(vma, perms);
+    // If necessary, expand backing vector to cover new heap extents in
+    // the case of allocating. Otherwise, shrink the backing memory,
+    // if a smaller heap has been requested.
+    const u64 old_heap_size = GetCurrentHeapSize();
+    if (size > old_heap_size) {
+        const u64 alloc_size = size - old_heap_size;
 
-    heap_used = size;
-
-    return MakeResult<VAddr>(heap_end - size);
-}
+        heap_memory->insert(heap_memory->end(), alloc_size, 0);
+        RefreshMemoryBlockMappings(heap_memory.get());
+    } else if (size < old_heap_size) {
+        heap_memory->resize(size);
+        heap_memory->shrink_to_fit();
 
-ResultCode VMManager::HeapFree(VAddr target, u64 size) {
-    if (!IsWithinHeapRegion(target, size)) {
-        return ERR_INVALID_ADDRESS;
+        RefreshMemoryBlockMappings(heap_memory.get());
     }
 
-    if (size == 0) {
-        return RESULT_SUCCESS;
-    }
+    heap_end = heap_region_base + size;
+    ASSERT(GetCurrentHeapSize() == heap_memory->size());
 
-    const ResultCode result = UnmapRange(target, size);
-    if (result.IsError()) {
-        return result;
+    const auto mapping_result =
+        MapMemoryBlock(heap_region_base, heap_memory, 0, size, MemoryState::Heap);
+    if (mapping_result.Failed()) {
+        return mapping_result.Code();
     }
 
-    heap_used -= size;
-    return RESULT_SUCCESS;
+    return MakeResult<VAddr>(heap_region_base);
 }
 
 MemoryInfo VMManager::QueryMemory(VAddr address) const {
@@ -598,6 +591,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
 
     heap_region_base = map_region_end;
     heap_region_end = heap_region_base + heap_region_size;
+    heap_end = heap_region_base;
 
     new_map_region_base = heap_region_end;
     new_map_region_end = new_map_region_base + new_map_region_size;
@@ -692,10 +686,6 @@ u64 VMManager::GetTotalMemoryUsage() const {
     return 0xF8000000;
 }
 
-u64 VMManager::GetTotalHeapUsage() const {
-    return heap_used;
-}
-
 VAddr VMManager::GetAddressSpaceBaseAddress() const {
     return address_space_base;
 }
@@ -778,6 +768,10 @@ u64 VMManager::GetHeapRegionSize() const {
     return heap_region_end - heap_region_base;
 }
 
+u64 VMManager::GetCurrentHeapSize() const {
+    return heap_end - heap_region_base;
+}
+
 bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
     return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
                                 GetHeapRegionEndAddress());
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 7cdff6094..6f484b7bf 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -380,11 +380,41 @@ public:
     /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
     ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms);
 
-    ResultVal<VAddr> HeapAllocate(VAddr target, u64 size, VMAPermission perms);
-    ResultCode HeapFree(VAddr target, u64 size);
-
     ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state);
 
+    /// Attempts to allocate a heap with the given size.
+    ///
+    /// @param size The size of the heap to allocate in bytes.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size that is equal to the size of the current heap,
+    ///       then this function will do nothing and return the current
+    ///       heap's starting address, as there's no need to perform
+    ///       any additional heap allocation work.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size less than the current heap's size, then
+    ///       this function will attempt to shrink the heap.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size larger than the current heap's size, then
+    ///       this function will attempt to extend the size of the heap.
+    ///
+    /// @returns A result indicating either success or failure.
+    ///          <p>
+    ///          If successful, this function will return a result
+    ///          containing the starting address to the allocated heap.
+    ///          <p>
+    ///          If unsuccessful, this function will return a result
+    ///          containing an error code.
+    ///
+    /// @pre The given size must lie within the allowable heap
+    ///      memory region managed by this VMManager instance.
+    ///      Failure to abide by this will result in ERR_OUT_OF_MEMORY
+    ///      being returned as the result.
+    ///
+    ResultVal<VAddr> SetHeapSize(u64 size);
+
     /// Queries the memory manager for information about the given address.
     ///
     /// @param address The address to query the memory manager about for information.
@@ -418,9 +448,6 @@ public:
     /// Gets the total memory usage, used by svcGetInfo
     u64 GetTotalMemoryUsage() const;
 
-    /// Gets the total heap usage, used by svcGetInfo
-    u64 GetTotalHeapUsage() const;
-
     /// Gets the address space base address
     VAddr GetAddressSpaceBaseAddress() const;
 
@@ -469,6 +496,13 @@ public:
     /// Gets the total size of the heap region in bytes.
     u64 GetHeapRegionSize() const;
 
+    /// Gets the total size of the current heap in bytes.
+    ///
+    /// @note This is the current allocated heap size, not the size
+    ///       of the region it's allowed to exist within.
+    ///
+    u64 GetCurrentHeapSize() const;
+
     /// Determines whether or not the specified range is within the heap region.
     bool IsWithinHeapRegion(VAddr address, u64 size) const;
 
@@ -617,9 +651,6 @@ private:
     VAddr new_map_region_base = 0;
     VAddr new_map_region_end = 0;
 
-    VAddr main_code_region_base = 0;
-    VAddr main_code_region_end = 0;
-
     VAddr tls_io_region_base = 0;
     VAddr tls_io_region_end = 0;
 
@@ -628,9 +659,9 @@ private:
     // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
     // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
     std::shared_ptr<std::vector<u8>> heap_memory;
-    // The left/right bounds of the address space covered by heap_memory.
-    VAddr heap_start = 0;
+
+    // The end of the currently allocated heap. This is not an inclusive
+    // end of the range. This is essentially 'base_address + current_size'.
     VAddr heap_end = 0;
-    u64 heap_used = 0;
 };
 } // namespace Kernel
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 770590d0b..2c229bcad 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -25,21 +25,34 @@ Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
 Module::Interface::~Interface() = default;
 
 struct FatalInfo {
-    std::array<u64_le, 31> registers{}; // TODO(ogniK): See if this actually is registers or
-                                        // not(find a game which has non zero valeus)
-    u64_le unk0{};
-    u64_le unk1{};
-    u64_le unk2{};
-    u64_le unk3{};
-    u64_le unk4{};
-    u64_le unk5{};
-    u64_le unk6{};
+    enum class Architecture : s32 {
+        AArch64,
+        AArch32,
+    };
+
+    const char* ArchAsString() const {
+        return arch == Architecture::AArch64 ? "AArch64" : "AArch32";
+    }
+
+    std::array<u64_le, 31> registers{};
+    u64_le sp{};
+    u64_le pc{};
+    u64_le pstate{};
+    u64_le afsr0{};
+    u64_le afsr1{};
+    u64_le esr{};
+    u64_le far{};
 
     std::array<u64_le, 32> backtrace{};
-    u64_le unk7{};
-    u64_le unk8{};
+    u64_le program_entry_point{};
+
+    // Bit flags that indicate which registers have been set with values
+    // for this context. The service itself uses these to determine which
+    // registers to specifically print out.
+    u64_le set_flags{};
+
     u32_le backtrace_size{};
-    u32_le unk9{};
+    Architecture arch{};
     u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding?
 };
 static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size");
@@ -52,36 +65,36 @@ enum class FatalType : u32 {
 
 static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) {
     const auto title_id = Core::CurrentProcess()->GetTitleID();
-    std::string crash_report =
-        fmt::format("Yuzu {}-{} crash report\n"
-                    "Title ID:                        {:016x}\n"
-                    "Result:                          0x{:X} ({:04}-{:04d})\n"
-                    "\n",
-                    Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
-                    2000 + static_cast<u32>(error_code.module.Value()),
-                    static_cast<u32>(error_code.description.Value()), info.unk8, info.unk7);
+    std::string crash_report = fmt::format(
+        "Yuzu {}-{} crash report\n"
+        "Title ID:                        {:016x}\n"
+        "Result:                          0x{:X} ({:04}-{:04d})\n"
+        "Set flags:                       0x{:16X}\n"
+        "Program entry point:             0x{:16X}\n"
+        "\n",
+        Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
+        2000 + static_cast<u32>(error_code.module.Value()),
+        static_cast<u32>(error_code.description.Value()), info.set_flags, info.program_entry_point);
     if (info.backtrace_size != 0x0) {
         crash_report += "Registers:\n";
-        // TODO(ogniK): This is just a guess, find a game which actually has non zero values
         for (size_t i = 0; i < info.registers.size(); i++) {
             crash_report +=
                 fmt::format("    X[{:02d}]:                       {:016x}\n", i, info.registers[i]);
         }
-        crash_report += fmt::format("    Unknown 0:                   {:016x}\n", info.unk0);
-        crash_report += fmt::format("    Unknown 1:                   {:016x}\n", info.unk1);
-        crash_report += fmt::format("    Unknown 2:                   {:016x}\n", info.unk2);
-        crash_report += fmt::format("    Unknown 3:                   {:016x}\n", info.unk3);
-        crash_report += fmt::format("    Unknown 4:                   {:016x}\n", info.unk4);
-        crash_report += fmt::format("    Unknown 5:                   {:016x}\n", info.unk5);
-        crash_report += fmt::format("    Unknown 6:                   {:016x}\n", info.unk6);
+        crash_report += fmt::format("    SP:                          {:016x}\n", info.sp);
+        crash_report += fmt::format("    PC:                          {:016x}\n", info.pc);
+        crash_report += fmt::format("    PSTATE:                      {:016x}\n", info.pstate);
+        crash_report += fmt::format("    AFSR0:                       {:016x}\n", info.afsr0);
+        crash_report += fmt::format("    AFSR1:                       {:016x}\n", info.afsr1);
+        crash_report += fmt::format("    ESR:                         {:016x}\n", info.esr);
+        crash_report += fmt::format("    FAR:                         {:016x}\n", info.far);
         crash_report += "\nBacktrace:\n";
         for (size_t i = 0; i < info.backtrace_size; i++) {
             crash_report +=
                 fmt::format("    Backtrace[{:02d}]:               {:016x}\n", i, info.backtrace[i]);
         }
-        crash_report += fmt::format("\nUnknown 7:                       0x{:016x}\n", info.unk7);
-        crash_report += fmt::format("Unknown 8:                       0x{:016x}\n", info.unk8);
-        crash_report += fmt::format("Unknown 9:                       0x{:016x}\n", info.unk9);
+
+        crash_report += fmt::format("Architecture:                    {}\n", info.ArchAsString());
         crash_report += fmt::format("Unknown 10:                      0x{:016x}\n", info.unk10);
     }
 
@@ -125,13 +138,13 @@ static void ThrowFatalError(ResultCode error_code, FatalType fatal_type, const F
     case FatalType::ErrorReport:
         GenerateErrorReport(error_code, info);
         break;
-    };
+    }
 }
 
 void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp{ctx};
-    auto error_code = rp.Pop<ResultCode>();
+    const auto error_code = rp.Pop<ResultCode>();
 
     ThrowFatalError(error_code, FatalType::ErrorScreen, {});
     IPC::ResponseBuilder rb{ctx, 2};
@@ -141,8 +154,8 @@ void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
 void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp(ctx);
-    auto error_code = rp.Pop<ResultCode>();
-    auto fatal_type = rp.PopEnum<FatalType>();
+    const auto error_code = rp.Pop<ResultCode>();
+    const auto fatal_type = rp.PopEnum<FatalType>();
 
     ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy
     IPC::ResponseBuilder rb{ctx, 2};
@@ -152,9 +165,9 @@ void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
 void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp(ctx);
-    auto error_code = rp.Pop<ResultCode>();
-    auto fatal_type = rp.PopEnum<FatalType>();
-    auto fatal_info = ctx.ReadBuffer();
+    const auto error_code = rp.Pop<ResultCode>();
+    const auto fatal_type = rp.PopEnum<FatalType>();
+    const auto fatal_info = ctx.ReadBuffer();
     FatalInfo info{};
 
     ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!");
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 8a6de83a2..63b55758b 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {
 
 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66;
-constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
-constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
+constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
+constexpr s64 accelerometer_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+constexpr s64 gyroscope_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
 
 IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -75,7 +75,7 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
     // Register update callbacks
     auto& core_timing = Core::System::GetInstance().CoreTiming();
     pad_update_event =
-        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
             UpdateControllers(userdata, cycles_late);
         });
 
@@ -106,7 +106,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
     rb.PushCopyObjects(shared_mem);
 }
 
-void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
+void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
     auto& core_timing = Core::System::GetInstance().CoreTiming();
 
     const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 498602de5..d3660cad2 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -65,7 +65,7 @@ private:
     }
 
     void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
-    void UpdateControllers(u64 userdata, int cycles_late);
+    void UpdateControllers(u64 userdata, s64 cycles_late);
 
     Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
 
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index 5c62d42ba..ca88bf97f 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -150,7 +150,7 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.PushRaw<u8>(Settings::values.enable_nfc);
+        rb.PushRaw<u8>(true);
     }
 
     void GetStateOld(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 1c4482e47..c6babdd4d 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -335,7 +335,7 @@ void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
 }
 
 bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) {
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     if (buffer.size() < sizeof(AmiiboFile)) {
         return false;
     }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index b031ebc66..af62d33d2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -89,7 +89,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
     for (const auto& entry : entries) {
         LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
                     entry.offset, entry.nvmap_handle, entry.pages);
-        Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
+        GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10;
         auto object = nvmap_dev->GetObject(entry.nvmap_handle);
         if (!object) {
             LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
@@ -102,7 +102,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
         u64 size = static_cast<u64>(entry.pages) << 0x10;
         ASSERT(size <= object->size);
 
-        Tegra::GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
+        GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
         ASSERT(returned == offset);
     }
     std::memcpy(output.data(), entries.data(), output.size());
@@ -173,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
         return 0;
     }
 
-    auto& system_instance = Core::System::GetInstance();
-
-    // Remove this memory region from the rasterizer cache.
-    auto& gpu = system_instance.GPU();
-    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
-    ASSERT(cpu_addr);
-    gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);
-
-    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
-
+    params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset,
+                                                                                  itr->second.size);
     buffer_mappings.erase(itr->second.offset);
 
     std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index fc496b654..c7f5bbf28 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -26,7 +26,7 @@
 namespace Service::NVFlinger {
 
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
 
 NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
     displays.emplace_back(0, "Default");
@@ -37,7 +37,7 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
 
     // Schedule the screen composition events
     composition_event =
-        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
             Compose();
             this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
         });
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index c9b4da5b0..ecee554bf 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -2,13 +2,88 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/file_sys/errors.h"
+#include "core/file_sys/system_archive/system_version.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
+#include "core/hle/service/filesystem/filesystem.h"
 #include "core/hle/service/set/set_sys.h"
 
 namespace Service::Set {
 
+namespace {
+constexpr u64 SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET = 0x05;
+
+enum class GetFirmwareVersionType {
+    Version1,
+    Version2,
+};
+
+void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionType type) {
+    LOG_WARNING(Service_SET, "called - Using hardcoded firmware version '{}'",
+                FileSys::SystemArchive::GetLongDisplayVersion());
+
+    ASSERT_MSG(ctx.GetWriteBufferSize() == 0x100,
+               "FirmwareVersion output buffer must be 0x100 bytes in size!");
+
+    // Instead of using the normal procedure of checking for the real system archive and if it
+    // doesn't exist, synthesizing one, I feel that that would lead to strange bugs because a
+    // used is using a really old or really new SystemVersion title. The synthesized one ensures
+    // consistence (currently reports as 5.1.0-0.0)
+    const auto archive = FileSys::SystemArchive::SystemVersion();
+
+    const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) {
+        LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).",
+                  desc.c_str());
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(code);
+    };
+
+    if (archive == nullptr) {
+        early_exit_failure("The system version archive couldn't be synthesized.",
+                           FileSys::ERROR_FAILED_MOUNT_ARCHIVE);
+        return;
+    }
+
+    const auto ver_file = archive->GetFile("file");
+    if (ver_file == nullptr) {
+        early_exit_failure("The system version archive didn't contain the file 'file'.",
+                           FileSys::ERROR_INVALID_ARGUMENT);
+        return;
+    }
+
+    auto data = ver_file->ReadAllBytes();
+    if (data.size() != 0x100) {
+        early_exit_failure("The system version file 'file' was not the correct size.",
+                           FileSys::ERROR_OUT_OF_BOUNDS);
+        return;
+    }
+
+    // If the command is GetFirmwareVersion (as opposed to GetFirmwareVersion2), hardware will
+    // zero out the REVISION_MINOR field.
+    if (type == GetFirmwareVersionType::Version1) {
+        data[SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET] = 0;
+    }
+
+    ctx.WriteBuffer(data);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+} // Anonymous namespace
+
+void SET_SYS::GetFirmwareVersion(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version1);
+}
+
+void SET_SYS::GetFirmwareVersion2(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version2);
+}
+
 void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_SET, "called");
 
@@ -33,8 +108,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
         {0, nullptr, "SetLanguageCode"},
         {1, nullptr, "SetNetworkSettings"},
         {2, nullptr, "GetNetworkSettings"},
-        {3, nullptr, "GetFirmwareVersion"},
-        {4, nullptr, "GetFirmwareVersion2"},
+        {3, &SET_SYS::GetFirmwareVersion, "GetFirmwareVersion"},
+        {4, &SET_SYS::GetFirmwareVersion2, "GetFirmwareVersion2"},
         {5, nullptr, "GetFirmwareVersionDigest"},
         {7, nullptr, "GetLockScreenFlag"},
         {8, nullptr, "SetLockScreenFlag"},
diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h
index f602f3c77..13ee2cf46 100644
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -20,6 +20,8 @@ private:
         BasicBlack = 1,
     };
 
+    void GetFirmwareVersion(Kernel::HLERequestContext& ctx);
+    void GetFirmwareVersion2(Kernel::HLERequestContext& ctx);
     void GetColorSetId(Kernel::HLERequestContext& ctx);
     void SetColorSetId(Kernel::HLERequestContext& ctx);
 
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 5caceb267..babc7e646 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -21,36 +21,8 @@
 #include "core/settings.h"
 
 namespace Loader {
-
-struct NsoSegmentHeader {
-    u32_le offset;
-    u32_le location;
-    u32_le size;
-    union {
-        u32_le alignment;
-        u32_le bss_size;
-    };
-};
-static_assert(sizeof(NsoSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
-
-struct NsoHeader {
-    u32_le magic;
-    u32_le version;
-    INSERT_PADDING_WORDS(1);
-    u8 flags;
-    std::array<NsoSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
-    std::array<u8, 0x20> build_id;
-    std::array<u32_le, 3> segments_compressed_size;
-
-    bool IsSegmentCompressed(size_t segment_num) const {
-        ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
-        return ((flags >> segment_num) & 1);
-    }
-};
-static_assert(sizeof(NsoHeader) == 0x6c, "NsoHeader has incorrect size.");
-static_assert(std::is_trivially_copyable_v<NsoHeader>, "NsoHeader isn't trivially copyable.");
-
-struct ModHeader {
+namespace {
+struct MODHeader {
     u32_le magic;
     u32_le dynamic_offset;
     u32_le bss_start_offset;
@@ -59,25 +31,10 @@ struct ModHeader {
     u32_le eh_frame_hdr_end_offset;
     u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base
 };
-static_assert(sizeof(ModHeader) == 0x1c, "ModHeader has incorrect size.");
-
-AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {}
-
-FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) {
-    u32 magic = 0;
-    if (file->ReadObject(&magic) != sizeof(magic)) {
-        return FileType::Error;
-    }
-
-    if (Common::MakeMagic('N', 'S', 'O', '0') != magic) {
-        return FileType::Error;
-    }
-
-    return FileType::NSO;
-}
+static_assert(sizeof(MODHeader) == 0x1c, "MODHeader has incorrect size.");
 
-static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
-                                         const NsoSegmentHeader& header) {
+std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
+                                  const NSOSegmentHeader& header) {
     std::vector<u8> uncompressed_data(header.size);
     const int bytes_uncompressed =
         LZ4_decompress_safe(reinterpret_cast<const char*>(compressed_data.data()),
@@ -91,23 +48,47 @@ static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
     return uncompressed_data;
 }
 
-static constexpr u32 PageAlignSize(u32 size) {
+constexpr u32 PageAlignSize(u32 size) {
     return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
 }
+} // Anonymous namespace
+
+bool NSOHeader::IsSegmentCompressed(size_t segment_num) const {
+    ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
+    return ((flags >> segment_num) & 1) != 0;
+}
+
+AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {}
+
+FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) {
+    u32 magic = 0;
+    if (file->ReadObject(&magic) != sizeof(magic)) {
+        return FileType::Error;
+    }
+
+    if (Common::MakeMagic('N', 'S', 'O', '0') != magic) {
+        return FileType::Error;
+    }
+
+    return FileType::NSO;
+}
 
 std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
                                                const FileSys::VfsFile& file, VAddr load_base,
                                                bool should_pass_arguments,
                                                std::optional<FileSys::PatchManager> pm) {
-    if (file.GetSize() < sizeof(NsoHeader))
+    if (file.GetSize() < sizeof(NSOHeader)) {
         return {};
+    }
 
-    NsoHeader nso_header{};
-    if (sizeof(NsoHeader) != file.ReadObject(&nso_header))
+    NSOHeader nso_header{};
+    if (sizeof(NSOHeader) != file.ReadObject(&nso_header)) {
         return {};
+    }
 
-    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
         return {};
+    }
 
     // Build program image
     Kernel::CodeSet codeset;
@@ -143,10 +124,10 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
     std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
 
     // Read MOD header
-    ModHeader mod_header{};
+    MODHeader mod_header{};
     // Default .bss to size in segment header if MOD0 section doesn't exist
     u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
-    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader));
+    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(MODHeader));
     const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
     if (has_mod_header) {
         // Resize program image to include .bss section and page align each section
@@ -158,22 +139,24 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
 
     // Apply patches if necessary
     if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) {
-        std::vector<u8> pi_header(program_image.size() + 0x100);
-        std::memcpy(pi_header.data(), &nso_header, sizeof(NsoHeader));
-        std::memcpy(pi_header.data() + 0x100, program_image.data(), program_image.size());
+        std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size());
+        pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
+                         reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
+        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
+                         program_image.end());
 
         pi_header = pm->PatchNSO(pi_header);
 
-        std::memcpy(program_image.data(), pi_header.data() + 0x100, program_image.size());
+        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
     }
 
     // Apply cheats if they exist and the program has a valid title ID
     if (pm) {
-        const auto cheats = pm->CreateCheatList(nso_header.build_id);
+        auto& system = Core::System::GetInstance();
+        const auto cheats = pm->CreateCheatList(system, nso_header.build_id);
         if (!cheats.empty()) {
-            Core::System::GetInstance().RegisterCheatList(
-                cheats, Common::HexArrayToString(nso_header.build_id), load_base,
-                load_base + program_image.size());
+            system.RegisterCheatList(cheats, Common::HexArrayToString(nso_header.build_id),
+                                     load_base, load_base + program_image.size());
         }
     }
 
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 167c8a694..4674c3724 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -4,7 +4,9 @@
 
 #pragma once
 
+#include <array>
 #include <optional>
+#include <type_traits>
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/file_sys/patch_manager.h"
@@ -16,6 +18,43 @@ class Process;
 
 namespace Loader {
 
+struct NSOSegmentHeader {
+    u32_le offset;
+    u32_le location;
+    u32_le size;
+    union {
+        u32_le alignment;
+        u32_le bss_size;
+    };
+};
+static_assert(sizeof(NSOSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
+
+struct NSOHeader {
+    using SHA256Hash = std::array<u8, 0x20>;
+
+    struct RODataRelativeExtent {
+        u32_le data_offset;
+        u32_le size;
+    };
+
+    u32_le magic;
+    u32_le version;
+    u32 reserved;
+    u32_le flags;
+    std::array<NSOSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
+    std::array<u8, 0x20> build_id;
+    std::array<u32_le, 3> segments_compressed_size;
+    std::array<u8, 0x1C> padding;
+    RODataRelativeExtent api_info_extent;
+    RODataRelativeExtent dynstr_extent;
+    RODataRelativeExtent dynsyn_extent;
+    std::array<SHA256Hash, 3> segment_hashes;
+
+    bool IsSegmentCompressed(size_t segment_num) const;
+};
+static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size.");
+static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable.");
+
 constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000;
 
 struct NSOArgumentHeader {
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 365ac82b4..332c1037c 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -48,7 +48,7 @@ static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* me
               (base + size) * PAGE_SIZE);
 
     // During boot, current_page_table might not be set yet, in which case we need not flush
-    if (current_page_table) {
+    if (Core::System::GetInstance().IsPoweredOn()) {
         Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
                                                                    size * PAGE_SIZE);
     }
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index c716a462b..4afd6c8a3 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -18,13 +18,13 @@ using std::chrono::microseconds;
 namespace Core {
 
 void PerfStats::BeginSystemFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     frame_begin = Clock::now();
 }
 
 void PerfStats::EndSystemFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     auto frame_end = Clock::now();
     accumulated_frametime += frame_end - frame_begin;
@@ -35,13 +35,13 @@ void PerfStats::EndSystemFrame() {
 }
 
 void PerfStats::EndGameFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     game_frames += 1;
 }
 
 PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     const auto now = Clock::now();
     // Walltime elapsed since stats were reset
@@ -67,7 +67,7 @@ PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us
 }
 
 double PerfStats::GetLastFrameTimeScale() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     constexpr double FRAME_LENGTH = 1.0 / 60;
     return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 6dd3139cc..6d32ebea3 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -82,7 +82,6 @@ void LogSetting(const std::string& name, const T& value) {
 void LogSettings() {
     LOG_INFO(Config, "yuzu Configuration:");
     LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
-    LogSetting("System_EnableNfc", Settings::values.enable_nfc);
     LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
     LogSetting("System_CurrentUser", Settings::values.current_user);
     LogSetting("System_LanguageIndex", Settings::values.language_index);
diff --git a/src/core/settings.h b/src/core/settings.h
index cdfb2f742..d543eb32f 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -349,7 +349,6 @@ struct TouchscreenInput {
 struct Values {
     // System
     bool use_docked_mode;
-    bool enable_nfc;
     std::optional<u32> rng_seed;
     // Measured in seconds since epoch
     std::optional<std::chrono::seconds> custom_rtc;
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 525fe6abc..078374be5 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -36,18 +36,18 @@ struct KeyButtonPair {
 class KeyButtonList {
 public:
     void AddKeyButton(int key_code, KeyButton* key_button) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         list.push_back(KeyButtonPair{key_code, key_button});
     }
 
     void RemoveKeyButton(const KeyButton* key_button) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         list.remove_if(
             [key_button](const KeyButtonPair& pair) { return pair.key_button == key_button; });
     }
 
     void ChangeKeyStatus(int key_code, bool pressed) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         for (const KeyButtonPair& pair : list) {
             if (pair.key_code == key_code)
                 pair.key_button->status.store(pressed);
@@ -55,7 +55,7 @@ public:
     }
 
     void ChangeAllKeyStatus(bool pressed) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         for (const KeyButtonPair& pair : list) {
             pair.key_button->status.store(pressed);
         }
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 6d96d4019..868251628 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -39,7 +39,7 @@ public:
     void Tilt(int x, int y) {
         auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
         if (is_tilting) {
-            std::lock_guard<std::mutex> guard(tilt_mutex);
+            std::lock_guard guard{tilt_mutex};
             if (mouse_move.x == 0 && mouse_move.y == 0) {
                 tilt_angle = 0;
             } else {
@@ -51,13 +51,13 @@ public:
     }
 
     void EndTilt() {
-        std::lock_guard<std::mutex> guard(tilt_mutex);
+        std::lock_guard guard{tilt_mutex};
         tilt_angle = 0;
         is_tilting = false;
     }
 
     std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
-        std::lock_guard<std::mutex> guard(status_mutex);
+        std::lock_guard guard{status_mutex};
         return status;
     }
 
@@ -93,7 +93,7 @@ private:
             old_q = q;
 
             {
-                std::lock_guard<std::mutex> guard(tilt_mutex);
+                std::lock_guard guard{tilt_mutex};
 
                 // Find the quaternion describing current 3DS tilting
                 q = Common::MakeQuaternion(
@@ -115,7 +115,7 @@ private:
 
             // Update the sensor state
             {
-                std::lock_guard<std::mutex> guard(status_mutex);
+                std::lock_guard guard{status_mutex};
                 status = std::make_tuple(gravity, angular_rate);
             }
         }
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
index b132d77f5..5949ecbae 100644
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -55,22 +55,22 @@ public:
         : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
 
     void SetButton(int button, bool value) {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         state.buttons[button] = value;
     }
 
     bool GetButton(int button) const {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         return state.buttons.at(button);
     }
 
     void SetAxis(int axis, Sint16 value) {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         state.axes[axis] = value;
     }
 
     float GetAxis(int axis) const {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         return state.axes.at(axis) / 32767.0f;
     }
 
@@ -92,12 +92,12 @@ public:
     }
 
     void SetHat(int hat, Uint8 direction) {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         state.hats[hat] = direction;
     }
 
     bool GetHatDirection(int hat, Uint8 direction) const {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         return (state.hats.at(hat) & direction) != 0;
     }
     /**
@@ -140,7 +140,7 @@ private:
  * Get the nth joystick with the corresponding GUID
  */
 std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    std::lock_guard lock{joystick_map_mutex};
     const auto it = joystick_map.find(guid);
     if (it != joystick_map.end()) {
         while (it->second.size() <= port) {
@@ -161,7 +161,8 @@ std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& g
 std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
     auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
     const std::string guid = GetGUID(sdl_joystick);
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+
+    std::lock_guard lock{joystick_map_mutex};
     auto map_it = joystick_map.find(guid);
     if (map_it != joystick_map.end()) {
         auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
@@ -198,8 +199,9 @@ void SDLState::InitJoystick(int joystick_index) {
         LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
         return;
     }
-    std::string guid = GetGUID(sdl_joystick);
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    const std::string guid = GetGUID(sdl_joystick);
+
+    std::lock_guard lock{joystick_map_mutex};
     if (joystick_map.find(guid) == joystick_map.end()) {
         auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
         joystick_map[guid].emplace_back(std::move(joystick));
@@ -221,7 +223,7 @@ void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
     std::string guid = GetGUID(sdl_joystick);
     std::shared_ptr<SDLJoystick> joystick;
     {
-        std::lock_guard<std::mutex> lock(joystick_map_mutex);
+        std::lock_guard lock{joystick_map_mutex};
         // This call to guid is safe since the joystick is guaranteed to be in the map
         auto& joystick_guid_list = joystick_map[guid];
         const auto joystick_it =
@@ -274,7 +276,7 @@ void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
 }
 
 void SDLState::CloseJoysticks() {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    std::lock_guard lock{joystick_map_mutex};
     joystick_map.clear();
 }
 
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index d0284bdf4..c7038b217 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_executable(tests
     common/bit_field.cpp
+    common/bit_utils.cpp
+    common/multi_level_queue.cpp
     common/param_package.cpp
     common/ring_buffer.cpp
     core/arm/arm_test_common.cpp
diff --git a/src/tests/common/bit_utils.cpp b/src/tests/common/bit_utils.cpp
new file mode 100644
index 000000000..479b5995a
--- /dev/null
+++ b/src/tests/common/bit_utils.cpp
@@ -0,0 +1,23 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/bit_util.h"
+
+namespace Common {
+
+TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") {
+    REQUIRE(Common::CountTrailingZeroes32(0) == 32);
+    REQUIRE(Common::CountTrailingZeroes64(0) == 64);
+    REQUIRE(Common::CountTrailingZeroes32(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes32(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes64(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36);
+}
+
+} // namespace Common
diff --git a/src/tests/common/multi_level_queue.cpp b/src/tests/common/multi_level_queue.cpp
new file mode 100644
index 000000000..cca7ec7da
--- /dev/null
+++ b/src/tests/common/multi_level_queue.cpp
@@ -0,0 +1,55 @@
+// Copyright 2019 Yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/common_types.h"
+#include "common/multi_level_queue.h"
+
+namespace Common {
+
+TEST_CASE("MultiLevelQueue", "[common]") {
+    std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0};
+    Common::MultiLevelQueue<f32, 64> mlq;
+    REQUIRE(mlq.empty());
+    mlq.add(values[2], 2);
+    mlq.add(values[7], 7);
+    mlq.add(values[3], 3);
+    mlq.add(values[4], 4);
+    mlq.add(values[0], 0);
+    mlq.add(values[5], 5);
+    mlq.add(values[6], 6);
+    mlq.add(values[1], 1);
+    u32 index = 0;
+    bool all_set = true;
+    for (auto& f : mlq) {
+        all_set &= (f == values[index]);
+        index++;
+    }
+    REQUIRE(all_set);
+    REQUIRE(!mlq.empty());
+    f32 v = 8.0;
+    mlq.add(v, 2);
+    v = -7.0;
+    mlq.add(v, 2, false);
+    REQUIRE(mlq.front(2) == -7.0);
+    mlq.yield(2);
+    REQUIRE(mlq.front(2) == values[2]);
+    REQUIRE(mlq.back(2) == -7.0);
+    REQUIRE(mlq.empty(8));
+    v = 10.0;
+    mlq.add(v, 8);
+    mlq.adjust(v, 8, 9);
+    REQUIRE(mlq.front(9) == v);
+    REQUIRE(mlq.empty(8));
+    REQUIRE(!mlq.empty(9));
+    mlq.adjust(values[0], 0, 9);
+    REQUIRE(mlq.highest_priority_set() == 1);
+    REQUIRE(mlq.lowest_priority_set() == 9);
+    mlq.remove(values[1], 1);
+    REQUIRE(mlq.highest_priority_set() == 2);
+    REQUIRE(mlq.empty(1));
+}
+
+} // namespace Common
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5ffb492ea..f0ef67535 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -10,7 +10,7 @@ namespace Tegra {
 
 void DebugContext::DoOnEvent(Event event, void* data) {
     {
-        std::unique_lock<std::mutex> lock(breakpoint_mutex);
+        std::unique_lock lock{breakpoint_mutex};
 
         // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
         // show on debug widgets
@@ -32,7 +32,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
 
 void DebugContext::Resume() {
     {
-        std::lock_guard<std::mutex> lock(breakpoint_mutex);
+        std::lock_guard lock{breakpoint_mutex};
 
         // Tell all observers that we are about to resume
         for (auto& breakpoint_observer : breakpoint_observers) {
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index c235faf46..ac3a2eb01 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -40,7 +40,7 @@ public:
         /// Constructs the object such that it observes events of the given DebugContext.
         explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
             : context_weak(debug_context) {
-            std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
+            std::unique_lock lock{debug_context->breakpoint_mutex};
             debug_context->breakpoint_observers.push_back(this);
         }
 
@@ -48,7 +48,7 @@ public:
             auto context = context_weak.lock();
             if (context) {
                 {
-                    std::unique_lock<std::mutex> lock(context->breakpoint_mutex);
+                    std::unique_lock lock{context->breakpoint_mutex};
                     context->breakpoint_observers.remove(this);
                 }
 
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 27a36348c..6ab06518f 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@
 
 #include "common/bit_field.h"
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0931b9626..e259bf46b 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -46,7 +46,7 @@ void KeplerMemory::ProcessData(u32 data) {
     // contain a dirty surface that will have to be written back to memory.
     const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
     rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
-    memory_manager.Write32(address, data);
+    memory_manager.Write<u32>(address, data);
 
     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c5d5be4ef..defcfbd3f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -307,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
             // Write the current query sequence to the sequence address.
             // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
             // query.
-            memory_manager.Write32(sequence_address, sequence);
+            memory_manager.Write<u32>(sequence_address, sequence);
         } else {
             // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
             // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -395,7 +395,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
 
     u8* ptr{memory_manager.GetPointer(address)};
     rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
-    memory_manager.Write32(address, value);
+    memory_manager.Write<u32>(address, value);
 
     dirty_flags.OnMemoryWrite();
 
@@ -447,7 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
 
-        const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
+        const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
 
         Texture::FullTextureInfo tex_info{};
         // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -482,7 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
 
     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
 
-    const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
 
     Texture::FullTextureInfo tex_info{};
     tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a0ded4c25..5cca5c29a 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -88,6 +88,16 @@ void MaxwellDMA::HandleCopy() {
     auto source_ptr{memory_manager.GetPointer(source)};
     auto dst_ptr{memory_manager.GetPointer(dest)};
 
+    if (!source_ptr) {
+        LOG_ERROR(HW_GPU, "source_ptr is invalid");
+        return;
+    }
+
+    if (!dst_ptr) {
+        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
+        return;
+    }
+
     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
         // copying.
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 66c690494..30b29e14d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,6 +12,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"
 
 namespace Tegra {
@@ -285,9 +286,10 @@ void GPU::ProcessSemaphoreTriggerMethod() {
         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
         // CoreTiming
         block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
-        memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
+        memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
+                                   sizeof(block));
     } else {
-        const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
+        const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
         if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
             (op == GpuSemaphoreOperation::AcquireGequal &&
              static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -314,11 +316,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
 }
 
 void GPU::ProcessSemaphoreRelease() {
-    memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
+    memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
 }
 
 void GPU::ProcessSemaphoreAcquire() {
-    const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
+    const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
     const auto value = regs.semaphore_acquire;
     if (word != value) {
         regs.acquire_active = true;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a14b95c30..de30ea354 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -9,7 +9,6 @@
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/dma_pusher.h"
-#include "video_core/memory_manager.h"
 
 using CacheAddr = std::uintptr_t;
 inline CacheAddr ToCacheAddr(const void* host_ptr) {
@@ -124,6 +123,8 @@ enum class EngineID {
     MAXWELL_DMA_COPY_A = 0xB0B5,
 };
 
+class MemoryManager;
+
 class GPU {
 public:
     explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
@@ -176,11 +177,11 @@ public:
                     u32 address_high;
                     u32 address_low;
 
-                    GPUVAddr SmaphoreAddress() const {
+                    GPUVAddr SemaphoreAddress() const {
                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
                                                      address_low);
                     }
-                } smaphore_address;
+                } semaphore_address;
 
                 u32 semaphore_sequence;
                 u32 semaphore_trigger;
@@ -244,9 +245,8 @@ protected:
 private:
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
 
-    /// Mapping of command subchannels to their bound engine ids.
+    /// Mapping of command subchannels to their bound engine ids
     std::array<EngineID, 8> bound_engines = {};
-
     /// 3D engine
     std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
     /// 2D engine
@@ -263,7 +263,7 @@ private:
     static_assert(offsetof(GPU::Regs, field_name) == position * 4,                                 \
                   "Field " #field_name " has invalid position")
 
-ASSERT_REG_POSITION(smaphore_address, 0x4);
+ASSERT_REG_POSITION(semaphore_address, 0x4);
 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
 ASSERT_REG_POSITION(reference_count, 0x14);
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 086b2f625..c5dc199c5 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -52,8 +52,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
 }
 
 ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
-    : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
-                                                         std::ref(dma_pusher), std::ref(state)} {}
+    : renderer{renderer}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher),
+                                 std::ref(state)} {}
 
 ThreadManager::~ThreadManager() {
     // Notify GPU thread that a shutdown is pending
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 8cd7db1c6..70acb2e79 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -4,10 +4,8 @@
 
 #pragma once
 
-#include <array>
 #include <atomic>
 #include <condition_variable>
-#include <memory>
 #include <mutex>
 #include <optional>
 #include <thread>
@@ -97,13 +95,13 @@ struct SynchState final {
     std::condition_variable frames_condition;
 
     void IncrementFramesCounter() {
-        std::lock_guard<std::mutex> lock{frames_mutex};
+        std::lock_guard lock{frames_mutex};
         ++queued_frame_count;
     }
 
     void DecrementFramesCounter() {
         {
-            std::lock_guard<std::mutex> lock{frames_mutex};
+            std::lock_guard lock{frames_mutex};
             --queued_frame_count;
 
             if (queued_frame_count) {
@@ -115,7 +113,7 @@ struct SynchState final {
 
     void WaitForFrames() {
         {
-            std::lock_guard<std::mutex> lock{frames_mutex};
+            std::lock_guard lock{frames_mutex};
             if (!queued_frame_count) {
                 return;
             }
@@ -123,14 +121,14 @@ struct SynchState final {
 
         // Wait for the GPU to be idle (all commands to be executed)
         {
-            std::unique_lock<std::mutex> lock{frames_mutex};
+            std::unique_lock lock{frames_mutex};
             frames_condition.wait(lock, [this] { return !queued_frame_count; });
         }
     }
 
     void SignalCommands() {
         {
-            std::unique_lock<std::mutex> lock{commands_mutex};
+            std::unique_lock lock{commands_mutex};
             if (queue.Empty()) {
                 return;
             }
@@ -140,7 +138,7 @@ struct SynchState final {
     }
 
     void WaitForCommands() {
-        std::unique_lock<std::mutex> lock{commands_mutex};
+        std::unique_lock lock{commands_mutex};
         commands_condition.wait(lock, [this] { return !queue.Empty(); });
     }
 
@@ -177,7 +175,6 @@ private:
 private:
     SynchState state;
     VideoCore::RendererBase& renderer;
-    Tegra::DmaPusher& dma_pusher;
     std::thread thread;
     std::thread::id thread_id;
 };
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 8e8f36f28..e76b59842 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,218 +5,446 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/memory.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 
 namespace Tegra {
 
 MemoryManager::MemoryManager() {
-    // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
-    // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
-    // Undertale using 0 for a render target.
-    PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
-}
-
-GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
+    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
+    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
+              Common::PageType::Unmapped);
+    page_table.Resize(address_space_width);
 
-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    // Initialize the map with a single free region covering the entire managed space.
+    VirtualMemoryArea initial_vma;
+    initial_vma.size = address_space_end;
+    vma_map.emplace(initial_vma.base, initial_vma);
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
+    UpdatePageTableForVMA(initial_vma);
+}
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
 
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
+    AllocateMemory(gpu_addr, 0, aligned_size);
 
-    return *gpu_addr;
+    return gpu_addr;
 }
 
 GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
-
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
+    AllocateMemory(gpu_addr, 0, aligned_size);
 
     return gpu_addr;
 }
 
 GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
 
-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
+    return gpu_addr;
+}
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
+    ASSERT((gpu_addr & page_mask) == 0);
 
-        slot = cpu_addr + offset;
-    }
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
 
-    const MappedRegion region{cpu_addr, *gpu_addr, size};
-    mapped_regions.push_back(region);
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
 
-    return *gpu_addr;
+    return gpu_addr;
 }
 
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
+    ASSERT((gpu_addr & page_mask) == 0);
 
-    if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
-        // Page has been already mapped. In this case, we must find a new area of memory to use that
-        // is different than the specified one. Super Mario Odyssey hits this scenario when changing
-        // areas, but we do not want to overwrite the old pages.
-        // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
 
-        LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+    Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
+                                                                                 aligned_size);
+    UnmapRange(gpu_addr, aligned_size);
 
-        const std::optional<GPUVAddr> new_gpu_addr{
-            FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+    return gpu_addr;
+}
 
-        ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) {
+    // Find the first Free VMA.
+    const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
+        if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
+            return false;
+        }
 
-        gpu_addr = *new_gpu_addr;
+        const VAddr vma_end{vma.second.base + vma.second.size};
+        return vma_end > region_start && vma_end >= region_start + size;
+    })};
+
+    if (vma_handle == vma_map.end()) {
+        return {};
     }
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    return std::max(region_start, vma_handle->second.base);
+}
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
+    return (addr >> page_bits) < page_table.pointers.size();
+}
 
-        slot = cpu_addr + offset;
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
+    if (!IsAddressValid(addr)) {
+        return {};
     }
 
-    const MappedRegion region{cpu_addr, gpu_addr, size};
-    mapped_regions.push_back(region);
+    VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
+    if (cpu_addr) {
+        return cpu_addr + (addr & page_mask);
+    }
 
-    return gpu_addr;
+    return {};
 }
 
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+template <typename T>
+T MemoryManager::Read(GPUVAddr addr) {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        T value;
+        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+        return value;
+    }
 
-        ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
-               slot != static_cast<u64>(PageStatus::Unmapped));
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
+        return 0;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
+    }
+    return {};
+}
 
-        slot = static_cast<u64>(PageStatus::Unmapped);
+template <typename T>
+void MemoryManager::Write(GPUVAddr addr, T data) {
+    if (!IsAddressValid(addr)) {
+        return;
     }
 
-    // Delete the region mappings that are contained within the unmapped region
-    mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
-                                        [&](const MappedRegion& region) {
-                                            return region.gpu_addr <= gpu_addr &&
-                                                   region.gpu_addr + region.size < gpu_addr + size;
-                                        }),
-                         mapped_regions.end());
-    return gpu_addr;
-}
+    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+        return;
+    }
 
-GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
-    for (const auto& region : mapped_regions) {
-        const GPUVAddr region_end{region.gpu_addr + region.size};
-        if (region_start >= region.gpu_addr && region_start < region_end) {
-            return region_end;
-        }
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+                  static_cast<u32>(data), addr);
+        return;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
     }
-    return {};
 }
 
-std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                                     PageStatus status) {
-    GPUVAddr gpu_addr{region_start};
-    u64 free_space{};
-    align = (align + PAGE_MASK) & ~PAGE_MASK;
-
-    while (gpu_addr + free_space < MAX_ADDRESS) {
-        if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
-            free_space += PAGE_SIZE;
-            if (free_space >= size) {
-                return gpu_addr;
-            }
-        } else {
-            gpu_addr += free_space + PAGE_SIZE;
-            free_space = 0;
-            gpu_addr = Common::AlignUp(gpu_addr, align);
-        }
+template u8 MemoryManager::Read<u8>(GPUVAddr addr);
+template u16 MemoryManager::Read<u16>(GPUVAddr addr);
+template u32 MemoryManager::Read<u32>(GPUVAddr addr);
+template u64 MemoryManager::Read<u64>(GPUVAddr addr);
+template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
+template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
+template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
+template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
+
+u8* MemoryManager::GetPointer(GPUVAddr addr) {
+    if (!IsAddressValid(addr)) {
+        return {};
     }
 
+    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        return page_pointer + (addr & page_mask);
+    }
+
+    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
     return {};
 }
 
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
-    const VAddr base_addr{PageSlot(gpu_addr)};
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
+    std::memcpy(dest_buffer, GetPointer(src_addr), size);
+}
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+    std::memcpy(GetPointer(dest_addr), src_buffer, size);
+}
+
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+    std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
+}
 
-    if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
-        base_addr == static_cast<u64>(PageStatus::Unmapped) ||
-        base_addr == static_cast<u64>(PageStatus::Reserved)) {
-        return {};
+void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                             VAddr backing_addr) {
+    LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
+              (base + size) * page_size);
+
+    const VAddr end{base + size};
+    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+               base + page_table.pointers.size());
+
+    std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
+
+    if (memory == nullptr) {
+        std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+        std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
+                  backing_addr);
+    } else {
+        while (base != end) {
+            page_table.pointers[base] = memory;
+            page_table.backing_addr[base] = backing_addr;
+
+            base += 1;
+            memory += page_size;
+            backing_addr += page_size;
+        }
     }
+}
 
-    return base_addr + (gpu_addr & PAGE_MASK);
+void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
 }
 
-u8 MemoryManager::Read8(GPUVAddr addr) {
-    return Memory::Read8(*GpuToCpuAddress(addr));
+void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
 }
 
-u16 MemoryManager::Read16(GPUVAddr addr) {
-    return Memory::Read16(*GpuToCpuAddress(addr));
+bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
+    ASSERT(base + size == next.base);
+    if (type != next.type) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
+        return {};
+    }
+    return true;
 }
 
-u32 MemoryManager::Read32(GPUVAddr addr) {
-    return Memory::Read32(*GpuToCpuAddress(addr));
+MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
+    if (target >= address_space_end) {
+        return vma_map.end();
+    } else {
+        return std::prev(vma_map.upper_bound(target));
+    }
 }
 
-u64 MemoryManager::Read64(GPUVAddr addr) {
-    return Memory::Read64(*GpuToCpuAddress(addr));
+MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    vma.type = VirtualMemoryArea::Type::Allocated;
+    vma.backing_addr = 0;
+    vma.backing_memory = {};
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
 }
 
-void MemoryManager::Write8(GPUVAddr addr, u8 data) {
-    Memory::Write8(*GpuToCpuAddress(addr), data);
+MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
+                                                       u64 size) {
+
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.offset = offset;
+
+    return Allocate(vma_handle);
 }
 
-void MemoryManager::Write16(GPUVAddr addr, u16 data) {
-    Memory::Write16(*GpuToCpuAddress(addr), data);
+MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
+                                                         VAddr backing_addr) {
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.type = VirtualMemoryArea::Type::Mapped;
+    vma.backing_memory = memory;
+    vma.backing_addr = backing_addr;
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
 }
 
-void MemoryManager::Write32(GPUVAddr addr, u32 data) {
-    Memory::Write32(*GpuToCpuAddress(addr), data);
+void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
+    VMAIter vma{CarveVMARange(target, size)};
+    const VAddr target_end{target + size};
+    const VMAIter end{vma_map.end()};
+
+    // The comparison against the end of the range must be done using addresses since VMAs can be
+    // merged during this process, causing invalidation of the iterators.
+    while (vma != end && vma->second.base < target_end) {
+        // Unmapped ranges return to allocated state and can be reused
+        // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
+        vma = std::next(Allocate(vma));
+    }
+
+    ASSERT(FindVMA(target)->second.size >= size);
 }
 
-void MemoryManager::Write64(GPUVAddr addr, u64 data) {
-    Memory::Write64(*GpuToCpuAddress(addr), data);
+MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
+    // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
+    // non-const access to its container.
+    return vma_map.erase(iter, iter); // Erases an empty range of elements
 }
 
-u8* MemoryManager::GetPointer(GPUVAddr addr) {
-    return Memory::GetPointer(*GpuToCpuAddress(addr));
+MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
+
+    VMAIter vma_handle{StripIterConstness(FindVMA(base))};
+    if (vma_handle == vma_map.end()) {
+        // Target address is outside the managed range
+        return {};
+    }
+
+    const VirtualMemoryArea& vma{vma_handle->second};
+    if (vma.type == VirtualMemoryArea::Type::Mapped) {
+        // Region is already allocated
+        return {};
+    }
+
+    const VAddr start_in_vma{base - vma.base};
+    const VAddr end_in_vma{start_in_vma + size};
+
+    ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
+               vma.size, end_in_vma);
+
+    if (end_in_vma < vma.size) {
+        // Split VMA at the end of the allocated region
+        SplitVMA(vma_handle, end_in_vma);
+    }
+    if (start_in_vma != 0) {
+        // Split VMA at the start of the allocated region
+        vma_handle = SplitVMA(vma_handle, start_in_vma);
+    }
+
+    return vma_handle;
 }
 
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
-    std::memcpy(dest_buffer, GetPointer(src_addr), size);
+MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
+
+    const VAddr target_end{target + size};
+    ASSERT(target_end >= target);
+    ASSERT(size > 0);
+
+    VMAIter begin_vma{StripIterConstness(FindVMA(target))};
+    const VMAIter i_end{vma_map.lower_bound(target_end)};
+    if (std::any_of(begin_vma, i_end, [](const auto& entry) {
+            return entry.second.type == VirtualMemoryArea::Type::Unmapped;
+        })) {
+        return {};
+    }
+
+    if (target != begin_vma->second.base) {
+        begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+    }
+
+    VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
+    if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+        end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+    }
+
+    return begin_vma;
 }
-void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
-    std::memcpy(GetPointer(dest_addr), src_buffer, size);
+
+MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
+    VirtualMemoryArea& old_vma{vma_handle->second};
+    VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
+
+    // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
+    // a bug. This restriction might be removed later.
+    ASSERT(offset_in_vma < old_vma.size);
+    ASSERT(offset_in_vma > 0);
+
+    old_vma.size = offset_in_vma;
+    new_vma.base += offset_in_vma;
+    new_vma.size -= offset_in_vma;
+
+    switch (new_vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        new_vma.offset += offset_in_vma;
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        new_vma.backing_memory += offset_in_vma;
+        break;
+    }
+
+    ASSERT(old_vma.CanBeMergedWith(new_vma));
+
+    return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
 }
 
-void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
-    std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
+MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
+    const VMAIter next_vma{std::next(iter)};
+    if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
+        iter->second.size += next_vma->second.size;
+        vma_map.erase(next_vma);
+    }
+
+    if (iter != vma_map.begin()) {
+        VMAIter prev_vma{std::prev(iter)};
+        if (prev_vma->second.CanBeMergedWith(iter->second)) {
+            prev_vma->second.size += iter->second.size;
+            vma_map.erase(iter);
+            iter = prev_vma;
+        }
+    }
+
+    return iter;
 }
 
-VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
-    auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
-    if (!block) {
-        block = std::make_unique<PageBlock>();
-        block->fill(static_cast<VAddr>(PageStatus::Unmapped));
+void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
+    switch (vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        UnmapRegion(vma.base, vma.size);
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
+        break;
     }
-    return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
 }
 
 } // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 425e2f31c..34744bb27 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,82 +1,148 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
 
-#include <array>
-#include <memory>
+#include <map>
 #include <optional>
-#include <vector>
 
 #include "common/common_types.h"
+#include "common/page_table.h"
 
 namespace Tegra {
 
-/// Virtual addresses in the GPU's memory map are 64 bit.
-using GPUVAddr = u64;
+/**
+ * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
+ * with homogeneous attributes across its extents. In this particular implementation each VMA is
+ * also backed by a single host memory allocation.
+ */
+struct VirtualMemoryArea {
+    enum class Type : u8 {
+        Unmapped,
+        Allocated,
+        Mapped,
+    };
+
+    /// Virtual base address of the region.
+    GPUVAddr base{};
+    /// Size of the region.
+    u64 size{};
+    /// Memory area mapping type.
+    Type type{Type::Unmapped};
+    /// CPU memory mapped address corresponding to this memory area.
+    VAddr backing_addr{};
+    /// Offset into the backing_memory the mapping starts from.
+    std::size_t offset{};
+    /// Pointer backing this VMA.
+    u8* backing_memory{};
+
+    /// Tests if this area can be merged to the right with `next`.
+    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+};
 
 class MemoryManager final {
 public:
     MemoryManager();
 
     GPUVAddr AllocateSpace(u64 size, u64 align);
-    GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
+    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
     GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
-    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
-    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
-    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
-    std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
-
-    static constexpr u64 PAGE_BITS = 16;
-    static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
-    static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
+    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
+    GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
+    std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr);
 
-    u8 Read8(GPUVAddr addr);
-    u16 Read16(GPUVAddr addr);
-    u32 Read32(GPUVAddr addr);
-    u64 Read64(GPUVAddr addr);
+    template <typename T>
+    T Read(GPUVAddr addr);
 
-    void Write8(GPUVAddr addr, u8 data);
-    void Write16(GPUVAddr addr, u16 data);
-    void Write32(GPUVAddr addr, u32 data);
-    void Write64(GPUVAddr addr, u64 data);
+    template <typename T>
+    void Write(GPUVAddr addr, T data);
 
-    u8* GetPointer(GPUVAddr vaddr);
+    u8* GetPointer(GPUVAddr addr);
 
     void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
     void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
-    void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 
 private:
-    enum class PageStatus : u64 {
-        Unmapped = 0xFFFFFFFFFFFFFFFFULL,
-        Allocated = 0xFFFFFFFFFFFFFFFEULL,
-        Reserved = 0xFFFFFFFFFFFFFFFDULL,
-    };
-
-    std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                          PageStatus status);
-    VAddr& PageSlot(GPUVAddr gpu_addr);
-
-    static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
-    static constexpr u64 PAGE_TABLE_BITS{10};
-    static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
-    static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
-    static constexpr u64 PAGE_BLOCK_BITS{14};
-    static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
-    static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
-
-    using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
-    std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
-
-    struct MappedRegion {
-        VAddr cpu_addr;
-        GPUVAddr gpu_addr;
-        u64 size;
-    };
+    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
+    using VMAHandle = VMAMap::const_iterator;
+    using VMAIter = VMAMap::iterator;
+
+    bool IsAddressValid(GPUVAddr addr) const;
+    void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                  VAddr backing_addr = 0);
+    void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
+    void UnmapRegion(GPUVAddr base, u64 size);
+
+    /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
+    VMAHandle FindVMA(GPUVAddr target) const;
+
+    VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
+
+    /**
+     * Maps an unmanaged host memory pointer at a given address.
+     *
+     * @param target The guest address to start the mapping at.
+     * @param memory The memory to be mapped.
+     * @param size Size of the mapping.
+     * @param state MemoryState tag to attach to the VMA.
+     */
+    VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+
+    /// Unmaps a range of addresses, splitting VMAs as necessary.
+    void UnmapRange(GPUVAddr target, u64 size);
+
+    /// Converts a VMAHandle to a mutable VMAIter.
+    VMAIter StripIterConstness(const VMAHandle& iter);
+
+    /// Marks as the specfied VMA as allocated.
+    VMAIter Allocate(VMAIter vma);
+
+    /**
+     * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
+     * the appropriate error checking.
+     */
+    VMAIter CarveVMA(GPUVAddr base, u64 size);
+
+    /**
+     * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+     * end of the range.
+     */
+    VMAIter CarveVMARange(GPUVAddr base, u64 size);
+
+    /**
+     * Splits a VMA in two, at the specified offset.
+     * @returns the right side of the split, with the original iterator becoming the left side.
+     */
+    VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
+
+    /**
+     * Checks for and merges the specified VMA with adjacent ones if possible.
+     * @returns the merged VMA or the original if no merging was possible.
+     */
+    VMAIter MergeAdjacent(VMAIter vma);
+
+    /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
+    void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+
+    /// Finds a free (unmapped region) of the specified size starting at the specified address.
+    GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size);
 
-    std::vector<MappedRegion> mapped_regions;
+private:
+    static constexpr u64 page_bits{16};
+    static constexpr u64 page_size{1 << page_bits};
+    static constexpr u64 page_mask{page_size - 1};
+
+    /// Address space in bits, this is fairly arbitrary but sufficiently large.
+    static constexpr u32 address_space_width{39};
+    /// Start address for mapping, this is fairly arbitrary but must be non-zero.
+    static constexpr GPUVAddr address_space_base{0x100000};
+    /// End of address space, based on address space in bits.
+    static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
+
+    Common::PageTable page_table{page_bits};
+    VMAMap vma_map;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index ecd9986a0..291772186 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -71,8 +71,8 @@ private:
     bool is_registered{};      ///< Whether the object is currently registered with the cache
     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
-    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
     const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
+    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
 };
 
 template <class T>
@@ -84,7 +84,7 @@ public:
 
     /// Write any cached resources overlapping the specified region back to memory
     void FlushRegion(CacheAddr addr, std::size_t size) {
-        std::lock_guard<std::recursive_mutex> lock{mutex};
+        std::lock_guard lock{mutex};
 
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
         for (auto& object : objects) {
@@ -94,7 +94,7 @@ public:
 
     /// Mark the specified region as being invalidated
     void InvalidateRegion(CacheAddr addr, u64 size) {
-        std::lock_guard<std::recursive_mutex> lock{mutex};
+        std::lock_guard lock{mutex};
 
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
         for (auto& object : objects) {
@@ -108,7 +108,7 @@ public:
 
     /// Invalidates everything in the cache
     void InvalidateAll() {
-        std::lock_guard<std::recursive_mutex> lock{mutex};
+        std::lock_guard lock{mutex};
 
         while (interval_cache.begin() != interval_cache.end()) {
             Unregister(*interval_cache.begin()->second.begin());
@@ -132,8 +132,8 @@ protected:
     }
 
     /// Register an object into the cache
-    void Register(const T& object) {
-        std::lock_guard<std::recursive_mutex> lock{mutex};
+    virtual void Register(const T& object) {
+        std::lock_guard lock{mutex};
 
         object->SetIsRegistered(true);
         interval_cache.add({GetInterval(object), ObjectSet{object}});
@@ -142,8 +142,8 @@ protected:
     }
 
     /// Unregisters an object from the cache
-    void Unregister(const T& object) {
-        std::lock_guard<std::recursive_mutex> lock{mutex};
+    virtual void Unregister(const T& object) {
+        std::lock_guard lock{mutex};
 
         object->SetIsRegistered(false);
         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
@@ -153,14 +153,14 @@ protected:
 
     /// Returns a ticks counter used for tracking when cached objects were last modified
     u64 GetModifiedTicks() {
-        std::lock_guard<std::recursive_mutex> lock{mutex};
+        std::lock_guard lock{mutex};
 
         return ++modified_ticks;
     }
 
     /// Flushes the specified object, updating appropriate cache state as needed
     void FlushObject(const T& object) {
-        std::lock_guard<std::recursive_mutex> lock{mutex};
+        std::lock_guard lock{mutex};
 
         if (!object->IsDirty()) {
             return;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 76e292e87..d7b86df38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,7 +9,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 
 namespace VideoCore {
 
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 5048ed6ce..fd091c84c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -15,14 +15,14 @@ namespace OpenGL {
 
 CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
                                      std::size_t alignment, u8* host_ptr)
-    : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
-                                                                                host_ptr} {}
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+      alignment{alignment} {}
 
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
     : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
 
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
-                                      std::size_t alignment, bool cache) {
+GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
+                                      bool cache) {
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
 
     // Cache management is a big overhead, so only cache entries with a given size.
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 1de1f84ae..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -58,7 +58,7 @@ public:
 
     /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
     /// allocated.
-    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+    GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                           bool cache = true);
 
     /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index c8dbcacbd..da9326253 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -15,7 +15,7 @@
 namespace OpenGL {
 
 CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
-    : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
     buffer.Create();
     // Bind and unbind the buffer so it gets allocated by the driver
     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
@@ -46,7 +46,7 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
     return search->second;
 }
 
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
                                                               u8* host_ptr) {
     GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
     if (!region) {
@@ -76,8 +76,8 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
     const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
     const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
                     global_region.GetCbufOffset()};
-    const auto actual_addr{memory_manager.Read64(addr)};
-    const auto size{memory_manager.Read32(addr + 8)};
+    const auto actual_addr{memory_manager.Read<u64>(addr)};
+    const auto size{memory_manager.Read<u32>(addr + 8)};
 
     // Look up global region in the cache based on address
     const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index a840491f7..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -66,7 +66,7 @@ public:
 
 private:
     GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
+    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
     void ReserveGlobalRegion(GlobalRegion region);
 
     std::unordered_map<CacheAddr, GlobalRegion> reserve;
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 75d816795..2bcbd3da2 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -40,8 +40,7 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
     return index_offset;
 }
 
-GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
-                                             u32 count) {
+GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
     const std::size_t map_size{CalculateQuadSize(count)};
     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
 
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..0e2e7dc36 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -24,7 +24,7 @@ public:
 
     GLintptr MakeQuadArray(u32 first, u32 count);
 
-    GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+    GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
 
 private:
     OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 198c54872..046fc935b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -100,11 +100,9 @@ struct FramebufferCacheKey {
     }
 };
 
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
-                                   ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this},
-      emu_window{window}, system{system}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE) {
+RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
+    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
+      screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
     // Create sampler objects
     for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
@@ -225,8 +223,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
         if (!vertex_array.IsEnabled())
             continue;
 
-        const Tegra::GPUVAddr start = vertex_array.StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = vertex_array.StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
 
         ASSERT(end > start);
         const u64 size = end - start + 1;
@@ -421,8 +419,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
         if (!regs.vertex_array[index].IsEnabled())
             continue;
 
-        const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = regs.vertex_array[index].StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
 
         ASSERT(end > start);
         size += end - start + 1;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 30f3e8acb..4de565321 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -50,8 +50,7 @@ struct FramebufferCacheKey;
 
 class RasterizerOpenGL : public VideoCore::RasterizerInterface {
 public:
-    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
-                              ScreenInfo& info);
+    explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
     ~RasterizerOpenGL() override;
 
     void DrawArrays() override;
@@ -214,7 +213,6 @@ private:
     ShaderCacheOpenGL shader_cache;
     GlobalRegionCacheOpenGL global_cache;
 
-    Core::Frontend::EmuWindow& emu_window;
     Core::System& system;
 
     ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 57329cd61..aba6ce731 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -55,7 +55,7 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
     }
 }
 
-void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
 
     gpu_addr = gpu_addr_;
@@ -222,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
 }
 
 /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
-    u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+    u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
     u32 block_width, u32 block_height, u32 block_depth,
     Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
     SurfaceParams params{};
@@ -562,8 +562,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
 }
 
 CachedSurface::CachedSurface(const SurfaceParams& params)
-    : params{params}, gl_target{SurfaceTargetToGL(params.target)},
-      cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
+    : RasterizerCacheObject{params.host_ptr}, params{params},
+      gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
+
+    const auto optional_cpu_addr{
+        Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
+    ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
+    cpu_addr = *optional_cpu_addr;
+
     texture.Create(gl_target);
 
     // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,20 +609,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
     ApplyTextureDefaults(texture.handle, params.max_mip_level);
 
     OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
-
-    // Clamp size to mapped GPU memory region
-    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
-    // R32F render buffer. We do not yet know if this is a game bug or something else, but this
-    // check is necessary to prevent flushing from overwriting unmapped memory.
-
-    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
-    if (cached_size_in_bytes > max_size) {
-        LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
-        cached_size_in_bytes = max_size;
-    }
-
-    cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
 }
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -925,7 +917,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
 }
 
 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
-    if (params.gpu_addr == 0 || params.height * params.width == 0) {
+    if (!params.IsValid()) {
         return {};
     }
 
@@ -941,7 +933,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
             // If surface parameters changed and we care about keeping the previous data, recreate
             // the surface from the old one
             Surface new_surface{RecreateSurface(surface, params)};
-            UnregisterSurface(surface);
+            Unregister(surface);
             Register(new_surface);
             if (new_surface->IsUploaded()) {
                 RegisterReinterpretSurface(new_surface);
@@ -949,7 +941,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
             return new_surface;
         } else {
             // Delete the old surface before creating a new one to prevent collisions.
-            UnregisterSurface(surface);
+            Unregister(surface);
         }
     }
 
@@ -980,11 +972,11 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
     const auto& init_params{src_surface->GetSurfaceParams()};
     const auto& dst_params{dst_surface->GetSurfaceParams()};
     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    Tegra::GPUVAddr address{init_params.gpu_addr};
+    GPUVAddr address{init_params.gpu_addr};
     const std::size_t layer_size{dst_params.LayerMemorySize()};
     for (u32 layer = 0; layer < dst_params.depth; layer++) {
         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
-            const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+            const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
             const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
             if (!copy) {
                 continue;
@@ -1244,10 +1236,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
     return {};
 }
 
-static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
-                                           u32 mipmap) {
+static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
     const std::size_t size{params.LayerMemorySize()};
-    Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
+    GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
     for (u32 i = 0; i < params.depth; i++) {
         if (start == addr) {
             return {i};
@@ -1304,12 +1295,12 @@ static bool IsReinterpretInvalidSecond(const Surface render_surface,
 bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
                                                       Surface intersect) {
     if (IsReinterpretInvalid(triggering_surface, intersect)) {
-        UnregisterSurface(intersect);
+        Unregister(intersect);
         return false;
     }
     if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
         if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
-            UnregisterSurface(intersect);
+            Unregister(intersect);
             return false;
         }
         FlushObject(intersect);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 9366f47f2..e8073579f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -109,6 +109,11 @@ struct SurfaceParams {
         return size;
     }
 
+    /// Returns true if the parameters constitute a valid rasterizer surface.
+    bool IsValid() const {
+        return gpu_addr && host_ptr && height && width;
+    }
+
     /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
     /// mipmaps.
     std::size_t LayerMemorySize() const {
@@ -210,7 +215,7 @@ struct SurfaceParams {
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(
-        u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+        u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
         u32 block_width, u32 block_height, u32 block_depth,
         Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
 
@@ -232,7 +237,7 @@ struct SurfaceParams {
     }
 
     /// Initializes parameters for caching, should be called after everything has been initialized
-    void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+    void InitCacheParameters(GPUVAddr gpu_addr);
 
     std::string TargetName() const {
         switch (target) {
@@ -297,7 +302,7 @@ struct SurfaceParams {
     bool srgb_conversion;
     // Parameters used for caching
     u8* host_ptr;
-    Tegra::GPUVAddr gpu_addr;
+    GPUVAddr gpu_addr;
     std::size_t size_in_bytes;
     std::size_t size_in_bytes_gl;
 
@@ -533,13 +538,17 @@ private:
         return nullptr;
     }
 
+    void Register(const Surface& object) override {
+        RasterizerCache<Surface>::Register(object);
+    }
+
     /// Unregisters an object from the cache
-    void UnregisterSurface(const Surface& object) {
+    void Unregister(const Surface& object) override {
         if (object->IsReinterpreted()) {
             auto interval = GetReinterpretInterval(object);
             reinterpreted_surfaces.erase(interval);
         }
-        Unregister(object);
+        RasterizerCache<Surface>::Unregister(object);
     }
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1ed740877..290e654bc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -32,7 +32,7 @@ struct UnspecializedShader {
 namespace {
 
 /// Gets the address for the specified shader stage program
-Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
     const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
     const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
     return gpu.regs.code_address.CodeAddress() + shader_config.offset;
@@ -215,9 +215,9 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                            const PrecompiledPrograms& precompiled_programs,
                            ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
-    : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
-      program_type{program_type}, disk_cache{disk_cache},
-      precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
+    : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
+      unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
+      precompiled_programs{precompiled_programs} {
 
     const std::size_t code_size = CalculateProgramSize(program_code);
     const std::size_t code_size_b =
@@ -245,9 +245,9 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                            const PrecompiledPrograms& precompiled_programs,
                            GLShader::ProgramResult result, u8* host_ptr)
-    : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
-      disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
-                                                                              host_ptr} {
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
+      program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
+                                                              precompiled_programs} {
 
     code = std::move(result.first);
     entries = result.second;
@@ -486,7 +486,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     }
 
     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
+    const GPUVAddr program_addr{GetShaderAddress(program)};
 
     // Look up shader in the cache based on address
     const auto& host_ptr{memory_manager.GetPointer(program_addr)};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 5e3d862c6..a01efeb05 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -266,7 +266,7 @@ void RendererOpenGL::CreateRasterizer() {
     }
     // Initialize sRGB Usage
     OpenGLState::ClearsRGBUsed();
-    rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
+    rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
 }
 
 void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 95eab3fec..388b5ffd5 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -19,8 +19,8 @@ namespace Vulkan {
 
 CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
                                      std::size_t alignment, u8* host_ptr)
-    : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
-                                                                                host_ptr} {}
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+      alignment{alignment} {}
 
 VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
                              VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
@@ -39,8 +39,7 @@ VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
 
 VKBufferCache::~VKBufferCache() = default;
 
-u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
-                                bool cache) {
+u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
     const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
     ASSERT_MSG(cpu_addr, "Invalid GPU address");
 
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 8b415744b..08b786aad 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -68,8 +68,7 @@ public:
 
     /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
     /// allocated.
-    u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
-                     bool cache = true);
+    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
 
     /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
     u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index a1e117443..13c46e5b8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -21,7 +21,7 @@ public:
     CommandBufferPool(const VKDevice& device)
         : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
 
-    void Allocate(std::size_t begin, std::size_t end) {
+    void Allocate(std::size_t begin, std::size_t end) override {
         const auto dev = device.GetLogical();
         const auto& dld = device.GetDispatchLoader();
         const u32 graphics_family = device.GetGraphicsFamily();
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5bfe4cead..08ee86fa6 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -97,7 +97,7 @@ private:
 class VKFenceWatch final : public VKResource {
 public:
     explicit VKFenceWatch();
-    ~VKFenceWatch();
+    ~VKFenceWatch() override;
 
     /// Waits for the fence to be released.
     void Wait();
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index 40da1a4e2..dc149d2ed 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -24,7 +24,7 @@ constexpr u32 TIMEOUT_SECONDS = 30;
 struct Client::Impl {
     Impl(std::string host, std::string username, std::string token)
         : host{std::move(host)}, username{std::move(username)}, token{std::move(token)} {
-        std::lock_guard<std::mutex> lock(jwt_cache.mutex);
+        std::lock_guard lock{jwt_cache.mutex};
         if (this->username == jwt_cache.username && this->token == jwt_cache.token) {
             jwt = jwt_cache.jwt;
         }
@@ -151,7 +151,7 @@ struct Client::Impl {
         if (result.result_code != Common::WebResult::Code::Success) {
             LOG_ERROR(WebService, "UpdateJWT failed");
         } else {
-            std::lock_guard<std::mutex> lock(jwt_cache.mutex);
+            std::lock_guard lock{jwt_cache.mutex};
             jwt_cache.username = username;
             jwt_cache.token = token;
             jwt_cache.jwt = jwt = result.returned_data;
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 5c1b65a2c..730426c16 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -163,6 +163,6 @@ void QtProfileSelector::SelectProfile(
 
 void QtProfileSelector::MainWindowFinishedSelection(std::optional<Service::Account::UUID> uuid) {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     callback(uuid);
 }
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
index 8a26fdff1..eddc9c941 100644
--- a/src/yuzu/applets/software_keyboard.cpp
+++ b/src/yuzu/applets/software_keyboard.cpp
@@ -141,12 +141,12 @@ void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message,
 
 void QtSoftwareKeyboard::MainWindowFinishedText(std::optional<std::u16string> text) {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     text_output(text);
 }
 
 void QtSoftwareKeyboard::MainWindowFinishedCheckDialog() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     finished_check();
 }
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 979b9ec14..ac80b2fa2 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -104,12 +104,12 @@ void QtWebBrowser::OpenPage(std::string_view url, std::function<void()> unpack_r
 
 void QtWebBrowser::MainWindowUnpackRomFS() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     unpack_romfs_callback();
 }
 
 void QtWebBrowser::MainWindowFinishedBrowsing() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     finished_callback();
 }
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 05ad19e1d..7438fbc0a 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -67,7 +67,7 @@ void EmuThread::run() {
 
             was_active = false;
         } else {
-            std::unique_lock<std::mutex> lock(running_mutex);
+            std::unique_lock lock{running_mutex};
             running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
         }
     }
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 7226e690e..3183621bc 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -53,7 +53,7 @@ public:
      * @note This function is thread-safe
      */
     void SetRunning(bool running) {
-        std::unique_lock<std::mutex> lock(running_mutex);
+        std::unique_lock lock{running_mutex};
         this->running = running;
         lock.unlock();
         running_cv.notify_all();
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 4650f96a3..dead9f807 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -413,7 +413,6 @@ void Config::ReadValues() {
 
     qt_config->beginGroup("System");
     Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
-    Settings::values.enable_nfc = ReadSetting("enable_nfc", true).toBool();
 
     Settings::values.current_user =
         std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);
@@ -675,7 +674,6 @@ void Config::SaveValues() {
 
     qt_config->beginGroup("System");
     WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
-    WriteSetting("enable_nfc", Settings::values.enable_nfc, true);
     WriteSetting("current_user", Settings::values.current_user, 0);
     WriteSetting("language_index", Settings::values.language_index, 1);
 
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 4116b6cd7..389fcf667 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -33,7 +33,6 @@ void ConfigureGeneral::setConfiguration() {
     ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
     ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
     ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
-    ui->enable_nfc->setChecked(Settings::values.enable_nfc);
 }
 
 void ConfigureGeneral::PopulateHotkeyList(const HotkeyRegistry& registry) {
@@ -48,5 +47,4 @@ void ConfigureGeneral::applyConfiguration() {
         ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
 
     Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
-    Settings::values.enable_nfc = ui->enable_nfc->isChecked();
 }
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index dff0ad5d0..01d1c0b8e 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -71,26 +71,6 @@
       </widget>
      </item>
      <item>
-      <widget class="QGroupBox" name="EmulationGroupBox">
-       <property name="title">
-        <string>Emulation</string>
-       </property>
-       <layout class="QHBoxLayout" name="EmulationHorizontalLayout">
-        <item>
-         <layout class="QVBoxLayout" name="EmulationVerticalLayout">
-          <item>
-           <widget class="QCheckBox" name="enable_nfc">
-            <property name="text">
-             <string>Enable NFC</string>
-            </property>
-           </widget>
-          </item>
-         </layout>
-        </item>
-       </layout>
-      </widget>
-     </item>
-     <item>
       <widget class="QGroupBox" name="theme_group_box">
        <property name="title">
         <string>Theme</string>
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 29f01dfb2..11023ed63 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -261,7 +261,7 @@ void GraphicsSurfaceWidget::OnSurfaceSourceChanged(int new_value) {
 
 void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) {
     if (surface_address != new_value) {
-        surface_address = static_cast<Tegra::GPUVAddr>(new_value);
+        surface_address = static_cast<GPUVAddr>(new_value);
 
         surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
         emit Update();
diff --git a/src/yuzu/debugger/graphics/graphics_surface.h b/src/yuzu/debugger/graphics/graphics_surface.h
index 323e39d94..89445b18f 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.h
+++ b/src/yuzu/debugger/graphics/graphics_surface.h
@@ -87,7 +87,7 @@ private:
     QPushButton* save_surface;
 
     Source surface_source;
-    Tegra::GPUVAddr surface_address;
+    GPUVAddr surface_address;
     unsigned surface_width;
     unsigned surface_height;
     Tegra::Texture::TextureFormat surface_format;
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 06ad74ffe..593bb681f 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -234,6 +234,9 @@ QString WaitTreeThread::GetText() const {
     case Kernel::ThreadStatus::WaitMutex:
         status = tr("waiting for mutex");
         break;
+    case Kernel::ThreadStatus::WaitCondVar:
+        status = tr("waiting for condition variable");
+        break;
     case Kernel::ThreadStatus::WaitArb:
         status = tr("waiting for address arbiter");
         break;
@@ -269,6 +272,7 @@ QColor WaitTreeThread::GetColor() const {
     case Kernel::ThreadStatus::WaitSynchAll:
     case Kernel::ThreadStatus::WaitSynchAny:
     case Kernel::ThreadStatus::WaitMutex:
+    case Kernel::ThreadStatus::WaitCondVar:
     case Kernel::ThreadStatus::WaitArb:
         return QColor(Qt::GlobalColor::red);
     case Kernel::ThreadStatus::Dormant:
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 32e78049c..f24cc77fe 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -319,7 +319,6 @@ void Config::ReadValues() {
 
     // System
     Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false);
-    Settings::values.enable_nfc = sdl2_config->GetBoolean("System", "enable_nfc", true);
     const auto size = sdl2_config->GetInteger("System", "users_size", 0);
 
     Settings::values.current_user = std::clamp<int>(