177 files changed, 4802 insertions, 2999 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8777df751..61adbef28 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -45,10 +45,15 @@ if (MSVC)
 
         # Warnings
         /W3
+        /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled
+        /we4101 # 'identifier': unreferenced local variable
+        /we4265 # 'class': class has virtual functions, but destructor is not virtual
+        /we4388 # signed/unsigned mismatch
         /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
         /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
         /we4555 # Expression has no effect; expected expression with side-effect
         /we4834 # Discarding return value of function with 'nodiscard' attribute
+        /we5038 # data member 'member1' will be initialized after data member 'member2'
     )
 
     # /GS- - No stack buffer overflow checks
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2c2bd2ee8..5d781cd77 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -123,6 +123,7 @@ add_library(common STATIC
     hash.h
     hex_util.cpp
     hex_util.h
+    intrusive_red_black_tree.h
     logging/backend.cpp
     logging/backend.h
     logging/filter.cpp
@@ -143,6 +144,7 @@ add_library(common STATIC
     page_table.h
     param_package.cpp
     param_package.h
+    parent_of_member.h
     quaternion.h
     ring_buffer.h
     scm_rev.cpp
@@ -167,6 +169,7 @@ add_library(common STATIC
     time_zone.h
     timer.cpp
     timer.h
+    tree.h
     uint128.cpp
     uint128.h
     uuid.cpp
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 367b6bf6e..c90978f9c 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -93,6 +93,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
         return static_cast<T>(key) == 0;                                                           \
     }
 
+/// Evaluates a boolean expression, and returns a result unless that expression is true.
+#define R_UNLESS(expr, res)                                                                        \
+    {                                                                                              \
+        if (!(expr)) {                                                                             \
+            return res;                                                                            \
+        }                                                                                          \
+    }
+
 namespace Common {
 
 [[nodiscard]] constexpr u32 MakeMagic(char a, char b, char c, char d) {
diff --git a/src/common/div_ceil.h b/src/common/div_ceil.h
index 6b2c48f91..95e1489a9 100644
--- a/src/common/div_ceil.h
+++ b/src/common/div_ceil.h
@@ -11,16 +11,16 @@ namespace Common {
 
 /// Ceiled integer division.
 template <typename N, typename D>
-requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeil(
-    N number, D divisor) {
-    return (static_cast<D>(number) + divisor - 1) / divisor;
+requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeil(N number,
+                                                                                        D divisor) {
+    return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
 }
 
 /// Ceiled integer division with logarithmic divisor in base 2
 template <typename N, typename D>
-requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeilLog2(
+requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeilLog2(
     N value, D alignment_log2) {
-    return (static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2;
+    return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
 }
 
 } // namespace Common
diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h
new file mode 100644
index 000000000..fb55de94e
--- /dev/null
+++ b/src/common/intrusive_red_black_tree.h
@@ -0,0 +1,627 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/parent_of_member.h"
+#include "common/tree.h"
+
+namespace Common {
+
+namespace impl {
+
+class IntrusiveRedBlackTreeImpl;
+
+}
+
+struct IntrusiveRedBlackTreeNode {
+
+private:
+    RB_ENTRY(IntrusiveRedBlackTreeNode) entry{};
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+
+public:
+    constexpr IntrusiveRedBlackTreeNode() = default;
+};
+
+template <class T, class Traits, class Comparator>
+class IntrusiveRedBlackTree;
+
+namespace impl {
+
+class IntrusiveRedBlackTreeImpl {
+
+private:
+    template <class, class, class>
+    friend class ::Common::IntrusiveRedBlackTree;
+
+private:
+    RB_HEAD(IntrusiveRedBlackTreeRoot, IntrusiveRedBlackTreeNode);
+    using RootType = IntrusiveRedBlackTreeRoot;
+
+private:
+    IntrusiveRedBlackTreeRoot root;
+
+public:
+    template <bool Const>
+    class Iterator;
+
+    using value_type = IntrusiveRedBlackTreeNode;
+    using size_type = size_t;
+    using difference_type = ptrdiff_t;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using iterator = Iterator<false>;
+    using const_iterator = Iterator<true>;
+
+    template <bool Const>
+    class Iterator {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = typename IntrusiveRedBlackTreeImpl::value_type;
+        using difference_type = typename IntrusiveRedBlackTreeImpl::difference_type;
+        using pointer = std::conditional_t<Const, IntrusiveRedBlackTreeImpl::const_pointer,
+                                           IntrusiveRedBlackTreeImpl::pointer>;
+        using reference = std::conditional_t<Const, IntrusiveRedBlackTreeImpl::const_reference,
+                                             IntrusiveRedBlackTreeImpl::reference>;
+
+    private:
+        pointer node;
+
+    public:
+        explicit Iterator(pointer n) : node(n) {}
+
+        bool operator==(const Iterator& rhs) const {
+            return this->node == rhs.node;
+        }
+
+        bool operator!=(const Iterator& rhs) const {
+            return !(*this == rhs);
+        }
+
+        pointer operator->() const {
+            return this->node;
+        }
+
+        reference operator*() const {
+            return *this->node;
+        }
+
+        Iterator& operator++() {
+            this->node = GetNext(this->node);
+            return *this;
+        }
+
+        Iterator& operator--() {
+            this->node = GetPrev(this->node);
+            return *this;
+        }
+
+        Iterator operator++(int) {
+            const Iterator it{*this};
+            ++(*this);
+            return it;
+        }
+
+        Iterator operator--(int) {
+            const Iterator it{*this};
+            --(*this);
+            return it;
+        }
+
+        operator Iterator<true>() const {
+            return Iterator<true>(this->node);
+        }
+    };
+
+protected:
+    // Generate static implementations for non-comparison operations for IntrusiveRedBlackTreeRoot.
+    RB_GENERATE_WITHOUT_COMPARE_STATIC(IntrusiveRedBlackTreeRoot, IntrusiveRedBlackTreeNode, entry);
+
+private:
+    // Define accessors using RB_* functions.
+    constexpr void InitializeImpl() {
+        RB_INIT(&this->root);
+    }
+
+    bool EmptyImpl() const {
+        return RB_EMPTY(&this->root);
+    }
+
+    IntrusiveRedBlackTreeNode* GetMinImpl() const {
+        return RB_MIN(IntrusiveRedBlackTreeRoot,
+                      const_cast<IntrusiveRedBlackTreeRoot*>(&this->root));
+    }
+
+    IntrusiveRedBlackTreeNode* GetMaxImpl() const {
+        return RB_MAX(IntrusiveRedBlackTreeRoot,
+                      const_cast<IntrusiveRedBlackTreeRoot*>(&this->root));
+    }
+
+    IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) {
+        return RB_REMOVE(IntrusiveRedBlackTreeRoot, &this->root, node);
+    }
+
+public:
+    static IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) {
+        return RB_NEXT(IntrusiveRedBlackTreeRoot, nullptr, node);
+    }
+
+    static IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) {
+        return RB_PREV(IntrusiveRedBlackTreeRoot, nullptr, node);
+    }
+
+    static IntrusiveRedBlackTreeNode const* GetNext(const IntrusiveRedBlackTreeNode* node) {
+        return static_cast<const IntrusiveRedBlackTreeNode*>(
+            GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node)));
+    }
+
+    static IntrusiveRedBlackTreeNode const* GetPrev(const IntrusiveRedBlackTreeNode* node) {
+        return static_cast<const IntrusiveRedBlackTreeNode*>(
+            GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node)));
+    }
+
+public:
+    constexpr IntrusiveRedBlackTreeImpl() : root() {
+        this->InitializeImpl();
+    }
+
+    // Iterator accessors.
+    iterator begin() {
+        return iterator(this->GetMinImpl());
+    }
+
+    const_iterator begin() const {
+        return const_iterator(this->GetMinImpl());
+    }
+
+    iterator end() {
+        return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr));
+    }
+
+    const_iterator end() const {
+        return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr));
+    }
+
+    const_iterator cbegin() const {
+        return this->begin();
+    }
+
+    const_iterator cend() const {
+        return this->end();
+    }
+
+    iterator iterator_to(reference ref) {
+        return iterator(&ref);
+    }
+
+    const_iterator iterator_to(const_reference ref) const {
+        return const_iterator(&ref);
+    }
+
+    // Content management.
+    bool empty() const {
+        return this->EmptyImpl();
+    }
+
+    reference back() {
+        return *this->GetMaxImpl();
+    }
+
+    const_reference back() const {
+        return *this->GetMaxImpl();
+    }
+
+    reference front() {
+        return *this->GetMinImpl();
+    }
+
+    const_reference front() const {
+        return *this->GetMinImpl();
+    }
+
+    iterator erase(iterator it) {
+        auto cur = std::addressof(*it);
+        auto next = GetNext(cur);
+        this->RemoveImpl(cur);
+        return iterator(next);
+    }
+};
+
+} // namespace impl
+
+template <typename T>
+concept HasLightCompareType = requires {
+    { std::is_same<typename T::LightCompareType, void>::value }
+    ->std::convertible_to<bool>;
+};
+
+namespace impl {
+
+template <typename T, typename Default>
+consteval auto* GetLightCompareType() {
+    if constexpr (HasLightCompareType<T>) {
+        return static_cast<typename T::LightCompareType*>(nullptr);
+    } else {
+        return static_cast<Default*>(nullptr);
+    }
+}
+
+} // namespace impl
+
+template <typename T, typename Default>
+using LightCompareType = std::remove_pointer_t<decltype(impl::GetLightCompareType<T, Default>())>;
+
+template <class T, class Traits, class Comparator>
+class IntrusiveRedBlackTree {
+
+public:
+    using ImplType = impl::IntrusiveRedBlackTreeImpl;
+
+private:
+    ImplType impl{};
+
+public:
+    struct IntrusiveRedBlackTreeRootWithCompare : ImplType::IntrusiveRedBlackTreeRoot {};
+
+    template <bool Const>
+    class Iterator;
+
+    using value_type = T;
+    using size_type = size_t;
+    using difference_type = ptrdiff_t;
+    using pointer = T*;
+    using const_pointer = const T*;
+    using reference = T&;
+    using const_reference = const T&;
+    using iterator = Iterator<false>;
+    using const_iterator = Iterator<true>;
+
+    using light_value_type = LightCompareType<Comparator, value_type>;
+    using const_light_pointer = const light_value_type*;
+    using const_light_reference = const light_value_type&;
+
+    template <bool Const>
+    class Iterator {
+    public:
+        friend class IntrusiveRedBlackTree<T, Traits, Comparator>;
+
+        using ImplIterator =
+            std::conditional_t<Const, ImplType::const_iterator, ImplType::iterator>;
+
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = typename IntrusiveRedBlackTree::value_type;
+        using difference_type = typename IntrusiveRedBlackTree::difference_type;
+        using pointer = std::conditional_t<Const, IntrusiveRedBlackTree::const_pointer,
+                                           IntrusiveRedBlackTree::pointer>;
+        using reference = std::conditional_t<Const, IntrusiveRedBlackTree::const_reference,
+                                             IntrusiveRedBlackTree::reference>;
+
+    private:
+        ImplIterator iterator;
+
+    private:
+        explicit Iterator(ImplIterator it) : iterator(it) {}
+
+        explicit Iterator(typename std::conditional<Const, ImplType::const_iterator,
+                                                    ImplType::iterator>::type::pointer ptr)
+            : iterator(ptr) {}
+
+        ImplIterator GetImplIterator() const {
+            return this->iterator;
+        }
+
+    public:
+        bool operator==(const Iterator& rhs) const {
+            return this->iterator == rhs.iterator;
+        }
+
+        bool operator!=(const Iterator& rhs) const {
+            return !(*this == rhs);
+        }
+
+        pointer operator->() const {
+            return Traits::GetParent(std::addressof(*this->iterator));
+        }
+
+        reference operator*() const {
+            return *Traits::GetParent(std::addressof(*this->iterator));
+        }
+
+        Iterator& operator++() {
+            ++this->iterator;
+            return *this;
+        }
+
+        Iterator& operator--() {
+            --this->iterator;
+            return *this;
+        }
+
+        Iterator operator++(int) {
+            const Iterator it{*this};
+            ++this->iterator;
+            return it;
+        }
+
+        Iterator operator--(int) {
+            const Iterator it{*this};
+            --this->iterator;
+            return it;
+        }
+
+        operator Iterator<true>() const {
+            return Iterator<true>(this->iterator);
+        }
+    };
+
+private:
+    // Generate static implementations for comparison operations for IntrusiveRedBlackTreeRoot.
+    RB_GENERATE_WITH_COMPARE_STATIC(IntrusiveRedBlackTreeRootWithCompare, IntrusiveRedBlackTreeNode,
+                                    entry, CompareImpl, LightCompareImpl);
+
+private:
+    static int CompareImpl(const IntrusiveRedBlackTreeNode* lhs,
+                           const IntrusiveRedBlackTreeNode* rhs) {
+        return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs));
+    }
+
+    static int LightCompareImpl(const void* elm, const IntrusiveRedBlackTreeNode* rhs) {
+        return Comparator::Compare(*static_cast<const_light_pointer>(elm), *Traits::GetParent(rhs));
+    }
+
+    // Define accessors using RB_* functions.
+    IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) {
+        return RB_INSERT(IntrusiveRedBlackTreeRootWithCompare,
+                         static_cast<IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root),
+                         node);
+    }
+
+    IntrusiveRedBlackTreeNode* FindImpl(const IntrusiveRedBlackTreeNode* node) const {
+        return RB_FIND(
+            IntrusiveRedBlackTreeRootWithCompare,
+            const_cast<IntrusiveRedBlackTreeRootWithCompare*>(
+                static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)),
+            const_cast<IntrusiveRedBlackTreeNode*>(node));
+    }
+
+    IntrusiveRedBlackTreeNode* NFindImpl(const IntrusiveRedBlackTreeNode* node) const {
+        return RB_NFIND(
+            IntrusiveRedBlackTreeRootWithCompare,
+            const_cast<IntrusiveRedBlackTreeRootWithCompare*>(
+                static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)),
+            const_cast<IntrusiveRedBlackTreeNode*>(node));
+    }
+
+    IntrusiveRedBlackTreeNode* FindLightImpl(const_light_pointer lelm) const {
+        return RB_FIND_LIGHT(
+            IntrusiveRedBlackTreeRootWithCompare,
+            const_cast<IntrusiveRedBlackTreeRootWithCompare*>(
+                static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)),
+            static_cast<const void*>(lelm));
+    }
+
+    IntrusiveRedBlackTreeNode* NFindLightImpl(const_light_pointer lelm) const {
+        return RB_NFIND_LIGHT(
+            IntrusiveRedBlackTreeRootWithCompare,
+            const_cast<IntrusiveRedBlackTreeRootWithCompare*>(
+                static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)),
+            static_cast<const void*>(lelm));
+    }
+
+public:
+    constexpr IntrusiveRedBlackTree() = default;
+
+    // Iterator accessors.
+    iterator begin() {
+        return iterator(this->impl.begin());
+    }
+
+    const_iterator begin() const {
+        return const_iterator(this->impl.begin());
+    }
+
+    iterator end() {
+        return iterator(this->impl.end());
+    }
+
+    const_iterator end() const {
+        return const_iterator(this->impl.end());
+    }
+
+    const_iterator cbegin() const {
+        return this->begin();
+    }
+
+    const_iterator cend() const {
+        return this->end();
+    }
+
+    iterator iterator_to(reference ref) {
+        return iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+    }
+
+    const_iterator iterator_to(const_reference ref) const {
+        return const_iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+    }
+
+    // Content management.
+    bool empty() const {
+        return this->impl.empty();
+    }
+
+    reference back() {
+        return *Traits::GetParent(std::addressof(this->impl.back()));
+    }
+
+    const_reference back() const {
+        return *Traits::GetParent(std::addressof(this->impl.back()));
+    }
+
+    reference front() {
+        return *Traits::GetParent(std::addressof(this->impl.front()));
+    }
+
+    const_reference front() const {
+        return *Traits::GetParent(std::addressof(this->impl.front()));
+    }
+
+    iterator erase(iterator it) {
+        return iterator(this->impl.erase(it.GetImplIterator()));
+    }
+
+    iterator insert(reference ref) {
+        ImplType::pointer node = Traits::GetNode(std::addressof(ref));
+        this->InsertImpl(node);
+        return iterator(node);
+    }
+
+    iterator find(const_reference ref) const {
+        return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref))));
+    }
+
+    iterator nfind(const_reference ref) const {
+        return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref))));
+    }
+
+    iterator find_light(const_light_reference ref) const {
+        return iterator(this->FindLightImpl(std::addressof(ref)));
+    }
+
+    iterator nfind_light(const_light_reference ref) const {
+        return iterator(this->NFindLightImpl(std::addressof(ref)));
+    }
+};
+
+template <auto T, class Derived = impl::GetParentType<T>>
+class IntrusiveRedBlackTreeMemberTraits;
+
+template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
+class IntrusiveRedBlackTreeMemberTraits<Member, Derived> {
+public:
+    template <class Comparator>
+    using TreeType = IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraits, Comparator>;
+    using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
+
+private:
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+
+    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+
+private:
+    static constexpr TypedStorage<Derived> DerivedStorage = {};
+    static_assert(GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage));
+};
+
+template <auto T, class Derived = impl::GetParentType<T>>
+class IntrusiveRedBlackTreeMemberTraitsDeferredAssert;
+
+template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
+class IntrusiveRedBlackTreeMemberTraitsDeferredAssert<Member, Derived> {
+public:
+    template <class Comparator>
+    using TreeType =
+        IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>;
+    using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr bool IsValid() {
+        TypedStorage<Derived> DerivedStorage = {};
+        return GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage);
+    }
+
+private:
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
+        return std::addressof(parent->*Member);
+    }
+
+    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+
+    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
+        return GetParentPointer<Member, Derived>(node);
+    }
+};
+
+template <class Derived>
+class IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode {
+public:
+    constexpr Derived* GetPrev() {
+        return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+    }
+    constexpr const Derived* GetPrev() const {
+        return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+    }
+
+    constexpr Derived* GetNext() {
+        return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+    }
+    constexpr const Derived* GetNext() const {
+        return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+    }
+};
+
+template <class Derived>
+class IntrusiveRedBlackTreeBaseTraits {
+public:
+    template <class Comparator>
+    using TreeType = IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeBaseTraits, Comparator>;
+    using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
+
+private:
+    template <class, class, class>
+    friend class IntrusiveRedBlackTree;
+
+    friend class impl::IntrusiveRedBlackTreeImpl;
+
+    static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
+        return static_cast<IntrusiveRedBlackTreeNode*>(parent);
+    }
+
+    static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
+        return static_cast<const IntrusiveRedBlackTreeNode*>(parent);
+    }
+
+    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return static_cast<Derived*>(node);
+    }
+
+    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
+        return static_cast<const Derived*>(node);
+    }
+};
+
+} // namespace Common
diff --git a/src/common/parent_of_member.h b/src/common/parent_of_member.h
new file mode 100644
index 000000000..d9a14529d
--- /dev/null
+++ b/src/common/parent_of_member.h
@@ -0,0 +1,191 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace Common {
+namespace detail {
+template <typename T, size_t Size, size_t Align>
+struct TypedStorageImpl {
+    std::aligned_storage_t<Size, Align> storage_;
+};
+} // namespace detail
+
+template <typename T>
+using TypedStorage = detail::TypedStorageImpl<T, sizeof(T), alignof(T)>;
+
+template <typename T>
+static constexpr T* GetPointer(TypedStorage<T>& ts) {
+    return static_cast<T*>(static_cast<void*>(std::addressof(ts.storage_)));
+}
+
+template <typename T>
+static constexpr const T* GetPointer(const TypedStorage<T>& ts) {
+    return static_cast<const T*>(static_cast<const void*>(std::addressof(ts.storage_)));
+}
+
+namespace impl {
+
+template <size_t MaxDepth>
+struct OffsetOfUnionHolder {
+    template <typename ParentType, typename MemberType, size_t Offset>
+    union UnionImpl {
+        using PaddingMember = char;
+        static constexpr size_t GetOffset() {
+            return Offset;
+        }
+
+#pragma pack(push, 1)
+        struct {
+            PaddingMember padding[Offset];
+            MemberType members[(sizeof(ParentType) / sizeof(MemberType)) + 1];
+        } data;
+#pragma pack(pop)
+        UnionImpl<ParentType, MemberType, Offset + 1> next_union;
+    };
+
+    template <typename ParentType, typename MemberType>
+    union UnionImpl<ParentType, MemberType, 0> {
+        static constexpr size_t GetOffset() {
+            return 0;
+        }
+
+        struct {
+            MemberType members[(sizeof(ParentType) / sizeof(MemberType)) + 1];
+        } data;
+        UnionImpl<ParentType, MemberType, 1> next_union;
+    };
+
+    template <typename ParentType, typename MemberType>
+    union UnionImpl<ParentType, MemberType, MaxDepth> {};
+};
+
+template <typename ParentType, typename MemberType>
+struct OffsetOfCalculator {
+    using UnionHolder =
+        typename OffsetOfUnionHolder<sizeof(MemberType)>::template UnionImpl<ParentType, MemberType,
+                                                                             0>;
+    union Union {
+        char c{};
+        UnionHolder first_union;
+        TypedStorage<ParentType> parent;
+
+        constexpr Union() : c() {}
+    };
+    static constexpr Union U = {};
+
+    static constexpr const MemberType* GetNextAddress(const MemberType* start,
+                                                      const MemberType* target) {
+        while (start < target) {
+            start++;
+        }
+        return start;
+    }
+
+    static constexpr std::ptrdiff_t GetDifference(const MemberType* start,
+                                                  const MemberType* target) {
+        return (target - start) * sizeof(MemberType);
+    }
+
+    template <typename CurUnion>
+    static constexpr std::ptrdiff_t OffsetOfImpl(MemberType ParentType::*member,
+                                                 CurUnion& cur_union) {
+        constexpr size_t Offset = CurUnion::GetOffset();
+        const auto target = std::addressof(GetPointer(U.parent)->*member);
+        const auto start = std::addressof(cur_union.data.members[0]);
+        const auto next = GetNextAddress(start, target);
+
+        if (next != target) {
+            if constexpr (Offset < sizeof(MemberType) - 1) {
+                return OffsetOfImpl(member, cur_union.next_union);
+            } else {
+                UNREACHABLE();
+            }
+        }
+
+        return (next - start) * sizeof(MemberType) + Offset;
+    }
+
+    static constexpr std::ptrdiff_t OffsetOf(MemberType ParentType::*member) {
+        return OffsetOfImpl(member, U.first_union);
+    }
+};
+
+template <typename T>
+struct GetMemberPointerTraits;
+
+template <typename P, typename M>
+struct GetMemberPointerTraits<M P::*> {
+    using Parent = P;
+    using Member = M;
+};
+
+template <auto MemberPtr>
+using GetParentType = typename GetMemberPointerTraits<decltype(MemberPtr)>::Parent;
+
+template <auto MemberPtr>
+using GetMemberType = typename GetMemberPointerTraits<decltype(MemberPtr)>::Member;
+
+template <auto MemberPtr, typename RealParentType = GetParentType<MemberPtr>>
+static inline std::ptrdiff_t OffsetOf = [] {
+    using DeducedParentType = GetParentType<MemberPtr>;
+    using MemberType = GetMemberType<MemberPtr>;
+    static_assert(std::is_base_of<DeducedParentType, RealParentType>::value ||
+                  std::is_same<RealParentType, DeducedParentType>::value);
+
+    return OffsetOfCalculator<RealParentType, MemberType>::OffsetOf(MemberPtr);
+}();
+
+} // namespace impl
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType& GetParentReference(impl::GetMemberType<MemberPtr>* member) {
+    std::ptrdiff_t Offset = impl::OffsetOf<MemberPtr, RealParentType>;
+    return *static_cast<RealParentType*>(
+        static_cast<void*>(static_cast<uint8_t*>(static_cast<void*>(member)) - Offset));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const& GetParentReference(impl::GetMemberType<MemberPtr> const* member) {
+    std::ptrdiff_t Offset = impl::OffsetOf<MemberPtr, RealParentType>;
+    return *static_cast<const RealParentType*>(static_cast<const void*>(
+        static_cast<const uint8_t*>(static_cast<const void*>(member)) - Offset));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType* GetParentPointer(impl::GetMemberType<MemberPtr>* member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const* GetParentPointer(impl::GetMemberType<MemberPtr> const* member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType& GetParentReference(impl::GetMemberType<MemberPtr>& member) {
+    return GetParentReference<MemberPtr, RealParentType>(std::addressof(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const& GetParentReference(impl::GetMemberType<MemberPtr> const& member) {
+    return GetParentReference<MemberPtr, RealParentType>(std::addressof(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType* GetParentPointer(impl::GetMemberType<MemberPtr>& member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>>
+constexpr RealParentType const* GetParentPointer(impl::GetMemberType<MemberPtr> const& member) {
+    return std::addressof(GetParentReference<MemberPtr, RealParentType>(member));
+}
+
+} // namespace Common
diff --git a/src/common/tree.h b/src/common/tree.h
new file mode 100644
index 000000000..a6b636646
--- /dev/null
+++ b/src/common/tree.h
@@ -0,0 +1,822 @@
+/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */
+/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_TREE_H_
+#define _SYS_TREE_H_
+
+/* FreeBSD <sys/cdefs.h> has a lot of defines we don't really want. */
+/* tree.h only actually uses __inline and __unused, so we'll just define those. */
+
+/* #include <sys/cdefs.h> */
+
+#ifndef __inline
+#define __inline inline
+#endif
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ * - every search path from the root to a leaf consists of the
+ *   same number of black nodes,
+ * - each red node (except for the root) has a black parent,
+ * - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)                                                                     \
+    struct name {                                                                                  \
+        struct type* sph_root; /* root of the tree */                                              \
+    }
+
+#define SPLAY_INITIALIZER(root)                                                                    \
+    { NULL }
+
+#define SPLAY_INIT(root)                                                                           \
+    do {                                                                                           \
+        (root)->sph_root = NULL;                                                                   \
+    } while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type)                                                                          \
+    struct {                                                                                       \
+        struct type* spe_left;  /* left element */                                                 \
+        struct type* spe_right; /* right element */                                                \
+    }
+
+#define SPLAY_LEFT(elm, field) (elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right
+#define SPLAY_ROOT(head) (head)->sph_root
+#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field)                                                       \
+    do {                                                                                           \
+        SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);                             \
+        SPLAY_RIGHT(tmp, field) = (head)->sph_root;                                                \
+        (head)->sph_root = tmp;                                                                    \
+    } while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ROTATE_LEFT(head, tmp, field)                                                        \
+    do {                                                                                           \
+        SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);                             \
+        SPLAY_LEFT(tmp, field) = (head)->sph_root;                                                 \
+        (head)->sph_root = tmp;                                                                    \
+    } while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field)                                                           \
+    do {                                                                                           \
+        SPLAY_LEFT(tmp, field) = (head)->sph_root;                                                 \
+        tmp = (head)->sph_root;                                                                    \
+        (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);                                    \
+    } while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field)                                                          \
+    do {                                                                                           \
+        SPLAY_RIGHT(tmp, field) = (head)->sph_root;                                                \
+        tmp = (head)->sph_root;                                                                    \
+        (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);                                   \
+    } while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field)                                             \
+    do {                                                                                           \
+        SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);                            \
+        SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);                           \
+        SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);                            \
+        SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);                            \
+    } while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)                                                    \
+    void name##_SPLAY(struct name*, struct type*);                                                 \
+    void name##_SPLAY_MINMAX(struct name*, int);                                                   \
+    struct type* name##_SPLAY_INSERT(struct name*, struct type*);                                  \
+    struct type* name##_SPLAY_REMOVE(struct name*, struct type*);                                  \
+                                                                                                   \
+    /* Finds the node with the same key as elm */                                                  \
+    static __inline struct type* name##_SPLAY_FIND(struct name* head, struct type* elm) {          \
+        if (SPLAY_EMPTY(head))                                                                     \
+            return (NULL);                                                                         \
+        name##_SPLAY(head, elm);                                                                   \
+        if ((cmp)(elm, (head)->sph_root) == 0)                                                     \
+            return (head->sph_root);                                                               \
+        return (NULL);                                                                             \
+    }                                                                                              \
+                                                                                                   \
+    static __inline struct type* name##_SPLAY_NEXT(struct name* head, struct type* elm) {          \
+        name##_SPLAY(head, elm);                                                                   \
+        if (SPLAY_RIGHT(elm, field) != NULL) {                                                     \
+            elm = SPLAY_RIGHT(elm, field);                                                         \
+            while (SPLAY_LEFT(elm, field) != NULL) {                                               \
+                elm = SPLAY_LEFT(elm, field);                                                      \
+            }                                                                                      \
+        } else                                                                                     \
+            elm = NULL;                                                                            \
+        return (elm);                                                                              \
+    }                                                                                              \
+                                                                                                   \
+    static __inline struct type* name##_SPLAY_MIN_MAX(struct name* head, int val) {                \
+        name##_SPLAY_MINMAX(head, val);                                                            \
+        return (SPLAY_ROOT(head));                                                                 \
+    }
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)                                                     \
+    struct type* name##_SPLAY_INSERT(struct name* head, struct type* elm) {                        \
+        if (SPLAY_EMPTY(head)) {                                                                   \
+            SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;                               \
+        } else {                                                                                   \
+            int __comp;                                                                            \
+            name##_SPLAY(head, elm);                                                               \
+            __comp = (cmp)(elm, (head)->sph_root);                                                 \
+            if (__comp < 0) {                                                                      \
+                SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);                      \
+                SPLAY_RIGHT(elm, field) = (head)->sph_root;                                        \
+                SPLAY_LEFT((head)->sph_root, field) = NULL;                                        \
+            } else if (__comp > 0) {                                                               \
+                SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);                    \
+                SPLAY_LEFT(elm, field) = (head)->sph_root;                                         \
+                SPLAY_RIGHT((head)->sph_root, field) = NULL;                                       \
+            } else                                                                                 \
+                return ((head)->sph_root);                                                         \
+        }                                                                                          \
+        (head)->sph_root = (elm);                                                                  \
+        return (NULL);                                                                             \
+    }                                                                                              \
+                                                                                                   \
+    struct type* name##_SPLAY_REMOVE(struct name* head, struct type* elm) {                        \
+        struct type* __tmp;                                                                        \
+        if (SPLAY_EMPTY(head))                                                                     \
+            return (NULL);                                                                         \
+        name##_SPLAY(head, elm);                                                                   \
+        if ((cmp)(elm, (head)->sph_root) == 0) {                                                   \
+            if (SPLAY_LEFT((head)->sph_root, field) == NULL) {                                     \
+                (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);                           \
+            } else {                                                                               \
+                __tmp = SPLAY_RIGHT((head)->sph_root, field);                                      \
+                (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);                            \
+                name##_SPLAY(head, elm);                                                           \
+                SPLAY_RIGHT((head)->sph_root, field) = __tmp;                                      \
+            }                                                                                      \
+            return (elm);                                                                          \
+        }                                                                                          \
+        return (NULL);                                                                             \
+    }                                                                                              \
+                                                                                                   \
+    void name##_SPLAY(struct name* head, struct type* elm) {                                       \
+        struct type __node, *__left, *__right, *__tmp;                                             \
+        int __comp;                                                                                \
+                                                                                                   \
+        SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;                           \
+        __left = __right = &__node;                                                                \
+                                                                                                   \
+        while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {                                     \
+            if (__comp < 0) {                                                                      \
+                __tmp = SPLAY_LEFT((head)->sph_root, field);                                       \
+                if (__tmp == NULL)                                                                 \
+                    break;                                                                         \
+                if ((cmp)(elm, __tmp) < 0) {                                                       \
+                    SPLAY_ROTATE_RIGHT(head, __tmp, field);                                        \
+                    if (SPLAY_LEFT((head)->sph_root, field) == NULL)                               \
+                        break;                                                                     \
+                }                                                                                  \
+                SPLAY_LINKLEFT(head, __right, field);                                              \
+            } else if (__comp > 0) {                                                               \
+                __tmp = SPLAY_RIGHT((head)->sph_root, field);                                      \
+                if (__tmp == NULL)                                                                 \
+                    break;                                                                         \
+                if ((cmp)(elm, __tmp) > 0) {                                                       \
+                    SPLAY_ROTATE_LEFT(head, __tmp, field);                                         \
+                    if (SPLAY_RIGHT((head)->sph_root, field) == NULL)                              \
+                        break;                                                                     \
+                }                                                                                  \
+                SPLAY_LINKRIGHT(head, __left, field);                                              \
+            }                                                                                      \
+        }                                                                                          \
+        SPLAY_ASSEMBLE(head, &__node, __left, __right, field);                                     \
+    }                                                                                              \
+                                                                                                   \
+    /* Splay with either the minimum or the maximum element                                        \
+     * Used to find minimum or maximum element in tree.                                            \
+     */                                                                                            \
+    void name##_SPLAY_MINMAX(struct name* head, int __comp) {                                      \
+        struct type __node, *__left, *__right, *__tmp;                                             \
+                                                                                                   \
+        SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;                           \
+        __left = __right = &__node;                                                                \
+                                                                                                   \
+        while (1) {                                                                                \
+            if (__comp < 0) {                                                                      \
+                __tmp = SPLAY_LEFT((head)->sph_root, field);                                       \
+                if (__tmp == NULL)                                                                 \
+                    break;                                                                         \
+                if (__comp < 0) {                                                                  \
+                    SPLAY_ROTATE_RIGHT(head, __tmp, field);                                        \
+                    if (SPLAY_LEFT((head)->sph_root, field) == NULL)                               \
+                        break;                                                                     \
+                }                                                                                  \
+                SPLAY_LINKLEFT(head, __right, field);                                              \
+            } else if (__comp > 0) {                                                               \
+                __tmp = SPLAY_RIGHT((head)->sph_root, field);                                      \
+                if (__tmp == NULL)                                                                 \
+                    break;                                                                         \
+                if (__comp > 0) {                                                                  \
+                    SPLAY_ROTATE_LEFT(head, __tmp, field);                                         \
+                    if (SPLAY_RIGHT((head)->sph_root, field) == NULL)                              \
+                        break;                                                                     \
+                }                                                                                  \
+                SPLAY_LINKRIGHT(head, __left, field);                                              \
+            }                                                                                      \
+        }                                                                                          \
+        SPLAY_ASSEMBLE(head, &__node, __left, __right, field);                                     \
+    }
+
+#define SPLAY_NEGINF -1
+#define SPLAY_INF 1
+
+#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)                                                               \
+    for ((x) = SPLAY_MIN(name, head); (x) != NULL; (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type)                                                                        \
+    struct name {                                                                                  \
+        struct type* rbh_root; /* root of the tree */                                              \
+    }
+
+#define RB_INITIALIZER(root)                                                                       \
+    { NULL }
+
+#define RB_INIT(root)                                                                              \
+    do {                                                                                           \
+        (root)->rbh_root = NULL;                                                                   \
+    } while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK 0
+#define RB_RED 1
+#define RB_ENTRY(type)                                                                             \
+    struct {                                                                                       \
+        struct type* rbe_left;   /* left element */                                                \
+        struct type* rbe_right;  /* right element */                                               \
+        struct type* rbe_parent; /* parent element */                                              \
+        int rbe_color;           /* node color */                                                  \
+    }
+
+#define RB_LEFT(elm, field) (elm)->field.rbe_left
+#define RB_RIGHT(elm, field) (elm)->field.rbe_right
+#define RB_PARENT(elm, field) (elm)->field.rbe_parent
+#define RB_COLOR(elm, field) (elm)->field.rbe_color
+#define RB_ROOT(head) (head)->rbh_root
+#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field)                                                                 \
+    do {                                                                                           \
+        RB_PARENT(elm, field) = parent;                                                            \
+        RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;                                         \
+        RB_COLOR(elm, field) = RB_RED;                                                             \
+    } while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field)                                                         \
+    do {                                                                                           \
+        RB_COLOR(black, field) = RB_BLACK;                                                         \
+        RB_COLOR(red, field) = RB_RED;                                                             \
+    } while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)                                                                              \
+    do {                                                                                           \
+    } while (0)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field)                                                      \
+    do {                                                                                           \
+        (tmp) = RB_RIGHT(elm, field);                                                              \
+        if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) {                                \
+            RB_PARENT(RB_LEFT(tmp, field), field) = (elm);                                         \
+        }                                                                                          \
+        RB_AUGMENT(elm);                                                                           \
+        if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {                             \
+            if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))                                    \
+                RB_LEFT(RB_PARENT(elm, field), field) = (tmp);                                     \
+            else                                                                                   \
+                RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);                                    \
+        } else                                                                                     \
+            (head)->rbh_root = (tmp);                                                              \
+        RB_LEFT(tmp, field) = (elm);                                                               \
+        RB_PARENT(elm, field) = (tmp);                                                             \
+        RB_AUGMENT(tmp);                                                                           \
+        if ((RB_PARENT(tmp, field)))                                                               \
+            RB_AUGMENT(RB_PARENT(tmp, field));                                                     \
+    } while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field)                                                     \
+    do {                                                                                           \
+        (tmp) = RB_LEFT(elm, field);                                                               \
+        if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) {                                \
+            RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);                                        \
+        }                                                                                          \
+        RB_AUGMENT(elm);                                                                           \
+        if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {                             \
+            if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))                                    \
+                RB_LEFT(RB_PARENT(elm, field), field) = (tmp);                                     \
+            else                                                                                   \
+                RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);                                    \
+        } else                                                                                     \
+            (head)->rbh_root = (tmp);                                                              \
+        RB_RIGHT(tmp, field) = (elm);                                                              \
+        RB_PARENT(elm, field) = (tmp);                                                             \
+        RB_AUGMENT(tmp);                                                                           \
+        if ((RB_PARENT(tmp, field)))                                                               \
+            RB_AUGMENT(RB_PARENT(tmp, field));                                                     \
+    } while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, )
+#define RB_PROTOTYPE_STATIC(name, type, field, cmp)                                                \
+    RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static)
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)                                        \
+    RB_PROTOTYPE_INSERT_COLOR(name, type, attr);                                                   \
+    RB_PROTOTYPE_REMOVE_COLOR(name, type, attr);                                                   \
+    RB_PROTOTYPE_INSERT(name, type, attr);                                                         \
+    RB_PROTOTYPE_REMOVE(name, type, attr);                                                         \
+    RB_PROTOTYPE_FIND(name, type, attr);                                                           \
+    RB_PROTOTYPE_NFIND(name, type, attr);                                                          \
+    RB_PROTOTYPE_FIND_LIGHT(name, type, attr);                                                     \
+    RB_PROTOTYPE_NFIND_LIGHT(name, type, attr);                                                    \
+    RB_PROTOTYPE_NEXT(name, type, attr);                                                           \
+    RB_PROTOTYPE_PREV(name, type, attr);                                                           \
+    RB_PROTOTYPE_MINMAX(name, type, attr);
+#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr)                                                \
+    attr void name##_RB_INSERT_COLOR(struct name*, struct type*)
+#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr)                                                \
+    attr void name##_RB_REMOVE_COLOR(struct name*, struct type*, struct type*)
+#define RB_PROTOTYPE_REMOVE(name, type, attr)                                                      \
+    attr struct type* name##_RB_REMOVE(struct name*, struct type*)
+#define RB_PROTOTYPE_INSERT(name, type, attr)                                                      \
+    attr struct type* name##_RB_INSERT(struct name*, struct type*)
+#define RB_PROTOTYPE_FIND(name, type, attr)                                                        \
+    attr struct type* name##_RB_FIND(struct name*, struct type*)
+#define RB_PROTOTYPE_NFIND(name, type, attr)                                                       \
+    attr struct type* name##_RB_NFIND(struct name*, struct type*)
+#define RB_PROTOTYPE_FIND_LIGHT(name, type, attr)                                                  \
+    attr struct type* name##_RB_FIND_LIGHT(struct name*, const void*)
+#define RB_PROTOTYPE_NFIND_LIGHT(name, type, attr)                                                 \
+    attr struct type* name##_RB_NFIND_LIGHT(struct name*, const void*)
+#define RB_PROTOTYPE_NEXT(name, type, attr) attr struct type* name##_RB_NEXT(struct type*)
+#define RB_PROTOTYPE_PREV(name, type, attr) attr struct type* name##_RB_PREV(struct type*)
+#define RB_PROTOTYPE_MINMAX(name, type, attr) attr struct type* name##_RB_MINMAX(struct name*, int)
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE_WITHOUT_COMPARE(name, type, field)                                             \
+    RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, )
+#define RB_GENERATE_WITHOUT_COMPARE_STATIC(name, type, field)                                      \
+    RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, static)
+#define RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, attr)                              \
+    RB_GENERATE_REMOVE_COLOR(name, type, field, attr)                                              \
+    RB_GENERATE_REMOVE(name, type, field, attr)                                                    \
+    RB_GENERATE_NEXT(name, type, field, attr)                                                      \
+    RB_GENERATE_PREV(name, type, field, attr)                                                      \
+    RB_GENERATE_MINMAX(name, type, field, attr)
+
+#define RB_GENERATE_WITH_COMPARE(name, type, field, cmp, lcmp)                                     \
+    RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, lcmp, )
+#define RB_GENERATE_WITH_COMPARE_STATIC(name, type, field, cmp, lcmp)                              \
+    RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, lcmp, static)
+#define RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, lcmp, attr)                      \
+    RB_GENERATE_INSERT_COLOR(name, type, field, attr)                                              \
+    RB_GENERATE_INSERT(name, type, field, cmp, attr)                                               \
+    RB_GENERATE_FIND(name, type, field, cmp, attr)                                                 \
+    RB_GENERATE_NFIND(name, type, field, cmp, attr)                                                \
+    RB_GENERATE_FIND_LIGHT(name, type, field, lcmp, attr)                                          \
+    RB_GENERATE_NFIND_LIGHT(name, type, field, lcmp, attr)
+
+#define RB_GENERATE_ALL(name, type, field, cmp) RB_GENERATE_ALL_INTERNAL(name, type, field, cmp, )
+#define RB_GENERATE_ALL_STATIC(name, type, field, cmp)                                             \
+    RB_GENERATE_ALL_INTERNAL(name, type, field, cmp, static)
+#define RB_GENERATE_ALL_INTERNAL(name, type, field, cmp, attr)                                     \
+    RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, attr)                                  \
+    RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, attr)
+
+#define RB_GENERATE_INSERT_COLOR(name, type, field, attr)                                          \
+    attr void name##_RB_INSERT_COLOR(struct name* head, struct type* elm) {                        \
+        struct type *parent, *gparent, *tmp;                                                       \
+        while ((parent = RB_PARENT(elm, field)) != NULL && RB_COLOR(parent, field) == RB_RED) {    \
+            gparent = RB_PARENT(parent, field);                                                    \
+            if (parent == RB_LEFT(gparent, field)) {                                               \
+                tmp = RB_RIGHT(gparent, field);                                                    \
+                if (tmp && RB_COLOR(tmp, field) == RB_RED) {                                       \
+                    RB_COLOR(tmp, field) = RB_BLACK;                                               \
+                    RB_SET_BLACKRED(parent, gparent, field);                                       \
+                    elm = gparent;                                                                 \
+                    continue;                                                                      \
+                }                                                                                  \
+                if (RB_RIGHT(parent, field) == elm) {                                              \
+                    RB_ROTATE_LEFT(head, parent, tmp, field);                                      \
+                    tmp = parent;                                                                  \
+                    parent = elm;                                                                  \
+                    elm = tmp;                                                                     \
+                }                                                                                  \
+                RB_SET_BLACKRED(parent, gparent, field);                                           \
+                RB_ROTATE_RIGHT(head, gparent, tmp, field);                                        \
+            } else {                                                                               \
+                tmp = RB_LEFT(gparent, field);                                                     \
+                if (tmp && RB_COLOR(tmp, field) == RB_RED) {                                       \
+                    RB_COLOR(tmp, field) = RB_BLACK;                                               \
+                    RB_SET_BLACKRED(parent, gparent, field);                                       \
+                    elm = gparent;                                                                 \
+                    continue;                                                                      \
+                }                                                                                  \
+                if (RB_LEFT(parent, field) == elm) {                                               \
+                    RB_ROTATE_RIGHT(head, parent, tmp, field);                                     \
+                    tmp = parent;                                                                  \
+                    parent = elm;                                                                  \
+                    elm = tmp;                                                                     \
+                }                                                                                  \
+                RB_SET_BLACKRED(parent, gparent, field);                                           \
+                RB_ROTATE_LEFT(head, gparent, tmp, field);                                         \
+            }                                                                                      \
+        }                                                                                          \
+        RB_COLOR(head->rbh_root, field) = RB_BLACK;                                                \
+    }
+
+#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr)                                          \
+    attr void name##_RB_REMOVE_COLOR(struct name* head, struct type* parent, struct type* elm) {   \
+        struct type* tmp;                                                                          \
+        while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && elm != RB_ROOT(head)) {        \
+            if (RB_LEFT(parent, field) == elm) {                                                   \
+                tmp = RB_RIGHT(parent, field);                                                     \
+                if (RB_COLOR(tmp, field) == RB_RED) {                                              \
+                    RB_SET_BLACKRED(tmp, parent, field);                                           \
+                    RB_ROTATE_LEFT(head, parent, tmp, field);                                      \
+                    tmp = RB_RIGHT(parent, field);                                                 \
+                }                                                                                  \
+                if ((RB_LEFT(tmp, field) == NULL ||                                                \
+                     RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&                          \
+                    (RB_RIGHT(tmp, field) == NULL ||                                               \
+                     RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {                         \
+                    RB_COLOR(tmp, field) = RB_RED;                                                 \
+                    elm = parent;                                                                  \
+                    parent = RB_PARENT(elm, field);                                                \
+                } else {                                                                           \
+                    if (RB_RIGHT(tmp, field) == NULL ||                                            \
+                        RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {                       \
+                        struct type* oleft;                                                        \
+                        if ((oleft = RB_LEFT(tmp, field)) != NULL)                                 \
+                            RB_COLOR(oleft, field) = RB_BLACK;                                     \
+                        RB_COLOR(tmp, field) = RB_RED;                                             \
+                        RB_ROTATE_RIGHT(head, tmp, oleft, field);                                  \
+                        tmp = RB_RIGHT(parent, field);                                             \
+                    }                                                                              \
+                    RB_COLOR(tmp, field) = RB_COLOR(parent, field);                                \
+                    RB_COLOR(parent, field) = RB_BLACK;                                            \
+                    if (RB_RIGHT(tmp, field))                                                      \
+                        RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;                          \
+                    RB_ROTATE_LEFT(head, parent, tmp, field);                                      \
+                    elm = RB_ROOT(head);                                                           \
+                    break;                                                                         \
+                }                                                                                  \
+            } else {                                                                               \
+                tmp = RB_LEFT(parent, field);                                                      \
+                if (RB_COLOR(tmp, field) == RB_RED) {                                              \
+                    RB_SET_BLACKRED(tmp, parent, field);                                           \
+                    RB_ROTATE_RIGHT(head, parent, tmp, field);                                     \
+                    tmp = RB_LEFT(parent, field);                                                  \
+                }                                                                                  \
+                if ((RB_LEFT(tmp, field) == NULL ||                                                \
+                     RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&                          \
+                    (RB_RIGHT(tmp, field) == NULL ||                                               \
+                     RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {                         \
+                    RB_COLOR(tmp, field) = RB_RED;                                                 \
+                    elm = parent;                                                                  \
+                    parent = RB_PARENT(elm, field);                                                \
+                } else {                                                                           \
+                    if (RB_LEFT(tmp, field) == NULL ||                                             \
+                        RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {                        \
+                        struct type* oright;                                                       \
+                        if ((oright = RB_RIGHT(tmp, field)) != NULL)                               \
+                            RB_COLOR(oright, field) = RB_BLACK;                                    \
+                        RB_COLOR(tmp, field) = RB_RED;                                             \
+                        RB_ROTATE_LEFT(head, tmp, oright, field);                                  \
+                        tmp = RB_LEFT(parent, field);                                              \
+                    }                                                                              \
+                    RB_COLOR(tmp, field) = RB_COLOR(parent, field);                                \
+                    RB_COLOR(parent, field) = RB_BLACK;                                            \
+                    if (RB_LEFT(tmp, field))                                                       \
+                        RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;                           \
+                    RB_ROTATE_RIGHT(head, parent, tmp, field);                                     \
+                    elm = RB_ROOT(head);                                                           \
+                    break;                                                                         \
+                }                                                                                  \
+            }                                                                                      \
+        }                                                                                          \
+        if (elm)                                                                                   \
+            RB_COLOR(elm, field) = RB_BLACK;                                                       \
+    }
+
+#define RB_GENERATE_REMOVE(name, type, field, attr)                                                \
+    attr struct type* name##_RB_REMOVE(struct name* head, struct type* elm) {                      \
+        struct type *child, *parent, *old = elm;                                                   \
+        int color;                                                                                 \
+        if (RB_LEFT(elm, field) == NULL)                                                           \
+            child = RB_RIGHT(elm, field);                                                          \
+        else if (RB_RIGHT(elm, field) == NULL)                                                     \
+            child = RB_LEFT(elm, field);                                                           \
+        else {                                                                                     \
+            struct type* left;                                                                     \
+            elm = RB_RIGHT(elm, field);                                                            \
+            while ((left = RB_LEFT(elm, field)) != NULL)                                           \
+                elm = left;                                                                        \
+            child = RB_RIGHT(elm, field);                                                          \
+            parent = RB_PARENT(elm, field);                                                        \
+            color = RB_COLOR(elm, field);                                                          \
+            if (child)                                                                             \
+                RB_PARENT(child, field) = parent;                                                  \
+            if (parent) {                                                                          \
+                if (RB_LEFT(parent, field) == elm)                                                 \
+                    RB_LEFT(parent, field) = child;                                                \
+                else                                                                               \
+                    RB_RIGHT(parent, field) = child;                                               \
+                RB_AUGMENT(parent);                                                                \
+            } else                                                                                 \
+                RB_ROOT(head) = child;                                                             \
+            if (RB_PARENT(elm, field) == old)                                                      \
+                parent = elm;                                                                      \
+            (elm)->field = (old)->field;                                                           \
+            if (RB_PARENT(old, field)) {                                                           \
+                if (RB_LEFT(RB_PARENT(old, field), field) == old)                                  \
+                    RB_LEFT(RB_PARENT(old, field), field) = elm;                                   \
+                else                                                                               \
+                    RB_RIGHT(RB_PARENT(old, field), field) = elm;                                  \
+                RB_AUGMENT(RB_PARENT(old, field));                                                 \
+            } else                                                                                 \
+                RB_ROOT(head) = elm;                                                               \
+            RB_PARENT(RB_LEFT(old, field), field) = elm;                                           \
+            if (RB_RIGHT(old, field))                                                              \
+                RB_PARENT(RB_RIGHT(old, field), field) = elm;                                      \
+            if (parent) {                                                                          \
+                left = parent;                                                                     \
+                do {                                                                               \
+                    RB_AUGMENT(left);                                                              \
+                } while ((left = RB_PARENT(left, field)) != NULL);                                 \
+            }                                                                                      \
+            goto color;                                                                            \
+        }                                                                                          \
+        parent = RB_PARENT(elm, field);                                                            \
+        color = RB_COLOR(elm, field);                                                              \
+        if (child)                                                                                 \
+            RB_PARENT(child, field) = parent;                                                      \
+        if (parent) {                                                                              \
+            if (RB_LEFT(parent, field) == elm)                                                     \
+                RB_LEFT(parent, field) = child;                                                    \
+            else                                                                                   \
+                RB_RIGHT(parent, field) = child;                                                   \
+            RB_AUGMENT(parent);                                                                    \
+        } else                                                                                     \
+            RB_ROOT(head) = child;                                                                 \
+    color:                                                                                         \
+        if (color == RB_BLACK)                                                                     \
+            name##_RB_REMOVE_COLOR(head, parent, child);                                           \
+        return (old);                                                                              \
+    }
+
+#define RB_GENERATE_INSERT(name, type, field, cmp, attr)                                           \
+    /* Inserts a node into the RB tree */                                                          \
+    attr struct type* name##_RB_INSERT(struct name* head, struct type* elm) {                      \
+        struct type* tmp;                                                                          \
+        struct type* parent = NULL;                                                                \
+        int comp = 0;                                                                              \
+        tmp = RB_ROOT(head);                                                                       \
+        while (tmp) {                                                                              \
+            parent = tmp;                                                                          \
+            comp = (cmp)(elm, parent);                                                             \
+            if (comp < 0)                                                                          \
+                tmp = RB_LEFT(tmp, field);                                                         \
+            else if (comp > 0)                                                                     \
+                tmp = RB_RIGHT(tmp, field);                                                        \
+            else                                                                                   \
+                return (tmp);                                                                      \
+        }                                                                                          \
+        RB_SET(elm, parent, field);                                                                \
+        if (parent != NULL) {                                                                      \
+            if (comp < 0)                                                                          \
+                RB_LEFT(parent, field) = elm;                                                      \
+            else                                                                                   \
+                RB_RIGHT(parent, field) = elm;                                                     \
+            RB_AUGMENT(parent);                                                                    \
+        } else                                                                                     \
+            RB_ROOT(head) = elm;                                                                   \
+        name##_RB_INSERT_COLOR(head, elm);                                                         \
+        return (NULL);                                                                             \
+    }
+
+#define RB_GENERATE_FIND(name, type, field, cmp, attr)                                             \
+    /* Finds the node with the same key as elm */                                                  \
+    attr struct type* name##_RB_FIND(struct name* head, struct type* elm) {                        \
+        struct type* tmp = RB_ROOT(head);                                                          \
+        int comp;                                                                                  \
+        while (tmp) {                                                                              \
+            comp = cmp(elm, tmp);                                                                  \
+            if (comp < 0)                                                                          \
+                tmp = RB_LEFT(tmp, field);                                                         \
+            else if (comp > 0)                                                                     \
+                tmp = RB_RIGHT(tmp, field);                                                        \
+            else                                                                                   \
+                return (tmp);                                                                      \
+        }                                                                                          \
+        return (NULL);                                                                             \
+    }
+
+#define RB_GENERATE_NFIND(name, type, field, cmp, attr)                                            \
+    /* Finds the first node greater than or equal to the search key */                             \
+    attr struct type* name##_RB_NFIND(struct name* head, struct type* elm) {                       \
+        struct type* tmp = RB_ROOT(head);                                                          \
+        struct type* res = NULL;                                                                   \
+        int comp;                                                                                  \
+        while (tmp) {                                                                              \
+            comp = cmp(elm, tmp);                                                                  \
+            if (comp < 0) {                                                                        \
+                res = tmp;                                                                         \
+                tmp = RB_LEFT(tmp, field);                                                         \
+            } else if (comp > 0)                                                                   \
+                tmp = RB_RIGHT(tmp, field);                                                        \
+            else                                                                                   \
+                return (tmp);                                                                      \
+        }                                                                                          \
+        return (res);                                                                              \
+    }
+
+#define RB_GENERATE_FIND_LIGHT(name, type, field, lcmp, attr)                                      \
+    /* Finds the node with the same key as elm */                                                  \
+    attr struct type* name##_RB_FIND_LIGHT(struct name* head, const void* lelm) {                  \
+        struct type* tmp = RB_ROOT(head);                                                          \
+        int comp;                                                                                  \
+        while (tmp) {                                                                              \
+            comp = lcmp(lelm, tmp);                                                                \
+            if (comp < 0)                                                                          \
+                tmp = RB_LEFT(tmp, field);                                                         \
+            else if (comp > 0)                                                                     \
+                tmp = RB_RIGHT(tmp, field);                                                        \
+            else                                                                                   \
+                return (tmp);                                                                      \
+        }                                                                                          \
+        return (NULL);                                                                             \
+    }
+
+#define RB_GENERATE_NFIND_LIGHT(name, type, field, lcmp, attr)                                     \
+    /* Finds the first node greater than or equal to the search key */                             \
+    attr struct type* name##_RB_NFIND_LIGHT(struct name* head, const void* lelm) {                 \
+        struct type* tmp = RB_ROOT(head);                                                          \
+        struct type* res = NULL;                                                                   \
+        int comp;                                                                                  \
+        while (tmp) {                                                                              \
+            comp = lcmp(lelm, tmp);                                                                \
+            if (comp < 0) {                                                                        \
+                res = tmp;                                                                         \
+                tmp = RB_LEFT(tmp, field);                                                         \
+            } else if (comp > 0)                                                                   \
+                tmp = RB_RIGHT(tmp, field);                                                        \
+            else                                                                                   \
+                return (tmp);                                                                      \
+        }                                                                                          \
+        return (res);                                                                              \
+    }
+
+#define RB_GENERATE_NEXT(name, type, field, attr)                                                  \
+    /* ARGSUSED */                                                                                 \
+    attr struct type* name##_RB_NEXT(struct type* elm) {                                           \
+        if (RB_RIGHT(elm, field)) {                                                                \
+            elm = RB_RIGHT(elm, field);                                                            \
+            while (RB_LEFT(elm, field))                                                            \
+                elm = RB_LEFT(elm, field);                                                         \
+        } else {                                                                                   \
+            if (RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field)))           \
+                elm = RB_PARENT(elm, field);                                                       \
+            else {                                                                                 \
+                while (RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field)))   \
+                    elm = RB_PARENT(elm, field);                                                   \
+                elm = RB_PARENT(elm, field);                                                       \
+            }                                                                                      \
+        }                                                                                          \
+        return (elm);                                                                              \
+    }
+
+#define RB_GENERATE_PREV(name, type, field, attr)                                                  \
+    /* ARGSUSED */                                                                                 \
+    attr struct type* name##_RB_PREV(struct type* elm) {                                           \
+        if (RB_LEFT(elm, field)) {                                                                 \
+            elm = RB_LEFT(elm, field);                                                             \
+            while (RB_RIGHT(elm, field))                                                           \
+                elm = RB_RIGHT(elm, field);                                                        \
+        } else {                                                                                   \
+            if (RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field)))          \
+                elm = RB_PARENT(elm, field);                                                       \
+            else {                                                                                 \
+                while (RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field)))    \
+                    elm = RB_PARENT(elm, field);                                                   \
+                elm = RB_PARENT(elm, field);                                                       \
+            }                                                                                      \
+        }                                                                                          \
+        return (elm);                                                                              \
+    }
+
+#define RB_GENERATE_MINMAX(name, type, field, attr)                                                \
+    attr struct type* name##_RB_MINMAX(struct name* head, int val) {                               \
+        struct type* tmp = RB_ROOT(head);                                                          \
+        struct type* parent = NULL;                                                                \
+        while (tmp) {                                                                              \
+            parent = tmp;                                                                          \
+            if (val < 0)                                                                           \
+                tmp = RB_LEFT(tmp, field);                                                         \
+            else                                                                                   \
+                tmp = RB_RIGHT(tmp, field);                                                        \
+        }                                                                                          \
+        return (parent);                                                                           \
+    }
+
+#define RB_NEGINF -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y)
+#define RB_FIND_LIGHT(name, x, y) name##_RB_FIND_LIGHT(x, y)
+#define RB_NFIND_LIGHT(name, x, y) name##_RB_NFIND_LIGHT(x, y)
+#define RB_NEXT(name, x, y) name##_RB_NEXT(y)
+#define RB_PREV(name, x, y) name##_RB_PREV(y)
+#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)                                                                  \
+    for ((x) = RB_MIN(name, head); (x) != NULL; (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y)                                                                \
+    for ((x) = (y); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y)                                                          \
+    for ((x) = RB_MIN(name, head); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);        \
+         (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head)                                                          \
+    for ((x) = RB_MAX(name, head); (x) != NULL; (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y)                                                        \
+    for ((x) = (y); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y)                                                  \
+    for ((x) = RB_MAX(name, head); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);        \
+         (x) = (y))
+
+#endif /* _SYS_TREE_H_ */
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 01f3e9419..1b8ad476e 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -142,8 +142,6 @@ add_library(core STATIC
     hardware_interrupt_manager.h
     hle/ipc.h
     hle/ipc_helpers.h
-    hle/kernel/address_arbiter.cpp
-    hle/kernel/address_arbiter.h
     hle/kernel/client_port.cpp
     hle/kernel/client_port.h
     hle/kernel/client_session.cpp
@@ -157,13 +155,19 @@ add_library(core STATIC
     hle/kernel/handle_table.h
     hle/kernel/hle_ipc.cpp
     hle/kernel/hle_ipc.h
+    hle/kernel/k_address_arbiter.cpp
+    hle/kernel/k_address_arbiter.h
     hle/kernel/k_affinity_mask.h
+    hle/kernel/k_condition_variable.cpp
+    hle/kernel/k_condition_variable.h
     hle/kernel/k_priority_queue.h
     hle/kernel/k_scheduler.cpp
     hle/kernel/k_scheduler.h
     hle/kernel/k_scheduler_lock.h
     hle/kernel/k_scoped_lock.h
     hle/kernel/k_scoped_scheduler_lock_and_sleep.h
+    hle/kernel/k_synchronization_object.cpp
+    hle/kernel/k_synchronization_object.h
     hle/kernel/kernel.cpp
     hle/kernel/kernel.h
     hle/kernel/memory/address_space_info.cpp
@@ -183,8 +187,6 @@ add_library(core STATIC
     hle/kernel/memory/slab_heap.h
     hle/kernel/memory/system_control.cpp
     hle/kernel/memory/system_control.h
-    hle/kernel/mutex.cpp
-    hle/kernel/mutex.h
     hle/kernel/object.cpp
     hle/kernel/object.h
     hle/kernel/physical_core.cpp
@@ -210,12 +212,10 @@ add_library(core STATIC
     hle/kernel/shared_memory.h
     hle/kernel/svc.cpp
     hle/kernel/svc.h
+    hle/kernel/svc_common.h
+    hle/kernel/svc_results.h
     hle/kernel/svc_types.h
     hle/kernel/svc_wrap.h
-    hle/kernel/synchronization_object.cpp
-    hle/kernel/synchronization_object.h
-    hle/kernel/synchronization.cpp
-    hle/kernel/synchronization.h
     hle/kernel/thread.cpp
     hle/kernel/thread.h
     hle/kernel/time_manager.cpp
@@ -635,6 +635,8 @@ if (MSVC)
         /we4267
         # 'context' : truncation from 'type1' to 'type2'
         /we4305
+        # 'function' : not all control paths return a value
+        /we4715
     )
 else()
     target_compile_options(core PRIVATE
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 70098c526..9a0151736 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -26,9 +26,10 @@ using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CO
 /// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
-    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock)
-        : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{
-                                                                       uses_wall_clock} {}
+    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers_,
+                           bool uses_wall_clock_)
+        : system{system_}, interrupt_handlers{interrupt_handlers_}, uses_wall_clock{
+                                                                        uses_wall_clock_} {}
     virtual ~ARM_Interface() = default;
 
     struct ThreadContext32 {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 8aaf11eee..6c4c8e9e4 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -71,15 +71,8 @@ public:
     }
 
     void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
-        switch (exception) {
-        case Dynarmic::A32::Exception::UndefinedInstruction:
-        case Dynarmic::A32::Exception::UnpredictableInstruction:
-            break;
-        case Dynarmic::A32::Exception::Breakpoint:
-            break;
-        }
         LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
-                     static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
+                     exception, pc, MemoryReadCode(pc));
         UNIMPLEMENTED();
     }
 
@@ -181,6 +174,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
         if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
             config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
         }
+        if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+        }
     }
 
     return std::make_unique<Dynarmic::A32::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index d2e1dc724..4c5ebca22 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -212,6 +212,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
         if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
             config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
         }
+        if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+        }
     }
 
     return std::make_shared<Dynarmic::A64::Jit>(config);
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index e6c8461a5..874b5673a 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -49,6 +49,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
     Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh);
     instance.on_thread_init();
     instance.ThreadLoop();
+    MicroProfileOnThreadExit();
 }
 
 void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index da15f764a..cebe2ce37 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -143,6 +143,7 @@ u64 GetSignatureTypeDataSize(SignatureType type) {
         return 0x3C;
     }
     UNREACHABLE();
+    return 0;
 }
 
 u64 GetSignatureTypePaddingSize(SignatureType type) {
@@ -157,6 +158,7 @@ u64 GetSignatureTypePaddingSize(SignatureType type) {
         return 0x40;
     }
     UNREACHABLE();
+    return 0;
 }
 
 SignatureType Ticket::GetSignatureType() const {
@@ -169,8 +171,7 @@ SignatureType Ticket::GetSignatureType() const {
     if (const auto* ticket = std::get_if<ECDSATicket>(&data)) {
         return ticket->sig_type;
     }
-
-    UNREACHABLE();
+    throw std::bad_variant_access{};
 }
 
 TicketData& Ticket::GetData() {
@@ -183,8 +184,7 @@ TicketData& Ticket::GetData() {
     if (auto* ticket = std::get_if<ECDSATicket>(&data)) {
         return ticket->data;
     }
-
-    UNREACHABLE();
+    throw std::bad_variant_access{};
 }
 
 const TicketData& Ticket::GetData() const {
@@ -197,8 +197,7 @@ const TicketData& Ticket::GetData() const {
     if (const auto* ticket = std::get_if<ECDSATicket>(&data)) {
         return ticket->data;
     }
-
-    UNREACHABLE();
+    throw std::bad_variant_access{};
 }
 
 u64 Ticket::GetSize() const {
diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp
index adcf0732f..a65ec6798 100644
--- a/src/core/file_sys/nca_patch.cpp
+++ b/src/core/file_sys/nca_patch.cpp
@@ -51,8 +51,8 @@ std::pair<std::size_t, std::size_t> SearchBucketEntry(u64 offset, const BlockTyp
             low = mid + 1;
         }
     }
-
     UNREACHABLE_MSG("Offset could not be found in BKTR block.");
+    return {0, 0};
 }
 } // Anonymous namespace
 
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index da01002d5..431302f55 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -105,7 +105,8 @@ ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
         // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.
         return ContentRecordType::HtmlDocument;
     default:
-        UNREACHABLE_MSG("Invalid NCAContentType={:02X}", static_cast<u8>(type));
+        UNREACHABLE_MSG("Invalid NCAContentType={:02X}", type);
+        return ContentRecordType{};
     }
 }
 
diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h
index 5b414b0f0..b08a1687a 100644
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -67,18 +67,18 @@ public:
     virtual void Refresh() = 0;
 
     virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0;
-    virtual bool HasEntry(ContentProviderEntry entry) const;
+    bool HasEntry(ContentProviderEntry entry) const;
 
     virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0;
 
     virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0;
-    virtual VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const;
+    VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const;
 
     virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0;
-    virtual VirtualFile GetEntryRaw(ContentProviderEntry entry) const;
+    VirtualFile GetEntryRaw(ContentProviderEntry entry) const;
 
     virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0;
-    virtual std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const;
+    std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const;
 
     virtual std::vector<ContentProviderEntry> ListEntries() const;
 
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
deleted file mode 100644
index 20ffa7d47..000000000
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ /dev/null
@@ -1,317 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/arm/exclusive_monitor.h"
-#include "core/core.h"
-#include "core/hle/kernel/address_arbiter.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/time_manager.h"
-#include "core/hle/result.h"
-#include "core/memory.h"
-
-namespace Kernel {
-
-// Wake up num_to_wake (or all) threads in a vector.
-void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
-                                 s32 num_to_wake) {
-    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
-    // them all.
-    std::size_t last = waiting_threads.size();
-    if (num_to_wake > 0) {
-        last = std::min(last, static_cast<std::size_t>(num_to_wake));
-    }
-
-    // Signal the waiting threads.
-    for (std::size_t i = 0; i < last; i++) {
-        waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-        RemoveThread(waiting_threads[i]);
-        waiting_threads[i]->WaitForArbitration(false);
-        waiting_threads[i]->ResumeFromWait();
-    }
-}
-
-AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
-AddressArbiter::~AddressArbiter() = default;
-
-ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
-                                           s32 num_to_wake) {
-    switch (type) {
-    case SignalType::Signal:
-        return SignalToAddressOnly(address, num_to_wake);
-    case SignalType::IncrementAndSignalIfEqual:
-        return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
-    case SignalType::ModifyByWaitingCountAndSignalIfEqual:
-        return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
-    default:
-        return ERR_INVALID_ENUM_VALUE;
-    }
-}
-
-ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
-    KScopedSchedulerLock lock(system.Kernel());
-    const std::vector<std::shared_ptr<Thread>> waiting_threads =
-        GetThreadsWaitingOnAddress(address);
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                              s32 num_to_wake) {
-    KScopedSchedulerLock lock(system.Kernel());
-    auto& memory = system.Memory();
-
-    // Ensure that we can write to the address.
-    if (!memory.IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    const std::size_t current_core = system.CurrentCoreIndex();
-    auto& monitor = system.Monitor();
-    u32 current_value;
-    do {
-        current_value = monitor.ExclusiveRead32(current_core, address);
-
-        if (current_value != static_cast<u32>(value)) {
-            return ERR_INVALID_STATE;
-        }
-        current_value++;
-    } while (!monitor.ExclusiveWrite32(current_core, address, current_value));
-
-    return SignalToAddressOnly(address, num_to_wake);
-}
-
-ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                                         s32 num_to_wake) {
-    KScopedSchedulerLock lock(system.Kernel());
-    auto& memory = system.Memory();
-
-    // Ensure that we can write to the address.
-    if (!memory.IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    // Get threads waiting on the address.
-    const std::vector<std::shared_ptr<Thread>> waiting_threads =
-        GetThreadsWaitingOnAddress(address);
-
-    const std::size_t current_core = system.CurrentCoreIndex();
-    auto& monitor = system.Monitor();
-    s32 updated_value;
-    do {
-        updated_value = monitor.ExclusiveRead32(current_core, address);
-
-        if (updated_value != value) {
-            return ERR_INVALID_STATE;
-        }
-        // Determine the modified value depending on the waiting count.
-        if (num_to_wake <= 0) {
-            if (waiting_threads.empty()) {
-                updated_value = value + 1;
-            } else {
-                updated_value = value - 1;
-            }
-        } else {
-            if (waiting_threads.empty()) {
-                updated_value = value + 1;
-            } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
-                updated_value = value - 1;
-            } else {
-                updated_value = value;
-            }
-        }
-    } while (!monitor.ExclusiveWrite32(current_core, address, updated_value));
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
-                                          s64 timeout_ns) {
-    switch (type) {
-    case ArbitrationType::WaitIfLessThan:
-        return WaitForAddressIfLessThan(address, value, timeout_ns, false);
-    case ArbitrationType::DecrementAndWaitIfLessThan:
-        return WaitForAddressIfLessThan(address, value, timeout_ns, true);
-    case ArbitrationType::WaitIfEqual:
-        return WaitForAddressIfEqual(address, value, timeout_ns);
-    default:
-        return ERR_INVALID_ENUM_VALUE;
-    }
-}
-
-ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
-                                                    bool should_decrement) {
-    auto& memory = system.Memory();
-    auto& kernel = system.Kernel();
-    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
-
-    Handle event_handle = InvalidHandle;
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
-
-        if (current_thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return ERR_THREAD_TERMINATING;
-        }
-
-        // Ensure that we can read the address.
-        if (!memory.IsValidVirtualAddress(address)) {
-            lock.CancelSleep();
-            return ERR_INVALID_ADDRESS_STATE;
-        }
-
-        s32 current_value = static_cast<s32>(memory.Read32(address));
-        if (current_value >= value) {
-            lock.CancelSleep();
-            return ERR_INVALID_STATE;
-        }
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-
-        s32 decrement_value;
-
-        const std::size_t current_core = system.CurrentCoreIndex();
-        auto& monitor = system.Monitor();
-        do {
-            current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
-            if (should_decrement) {
-                decrement_value = current_value - 1;
-            } else {
-                decrement_value = current_value;
-            }
-        } while (
-            !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value)));
-
-        // Short-circuit without rescheduling, if timeout is zero.
-        if (timeout == 0) {
-            lock.CancelSleep();
-            return RESULT_TIMEOUT;
-        }
-
-        current_thread->SetArbiterWaitAddress(address);
-        InsertThread(SharedFrom(current_thread));
-        current_thread->SetStatus(ThreadStatus::WaitArb);
-        current_thread->WaitForArbitration(true);
-    }
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        if (current_thread->IsWaitingForArbitration()) {
-            RemoveThread(SharedFrom(current_thread));
-            current_thread->WaitForArbitration(false);
-        }
-    }
-
-    return current_thread->GetSignalingResult();
-}
-
-ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
-    auto& memory = system.Memory();
-    auto& kernel = system.Kernel();
-    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
-
-    Handle event_handle = InvalidHandle;
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
-
-        if (current_thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return ERR_THREAD_TERMINATING;
-        }
-
-        // Ensure that we can read the address.
-        if (!memory.IsValidVirtualAddress(address)) {
-            lock.CancelSleep();
-            return ERR_INVALID_ADDRESS_STATE;
-        }
-
-        s32 current_value = static_cast<s32>(memory.Read32(address));
-        if (current_value != value) {
-            lock.CancelSleep();
-            return ERR_INVALID_STATE;
-        }
-
-        // Short-circuit without rescheduling, if timeout is zero.
-        if (timeout == 0) {
-            lock.CancelSleep();
-            return RESULT_TIMEOUT;
-        }
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-        current_thread->SetArbiterWaitAddress(address);
-        InsertThread(SharedFrom(current_thread));
-        current_thread->SetStatus(ThreadStatus::WaitArb);
-        current_thread->WaitForArbitration(true);
-    }
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        if (current_thread->IsWaitingForArbitration()) {
-            RemoveThread(SharedFrom(current_thread));
-            current_thread->WaitForArbitration(false);
-        }
-    }
-
-    return current_thread->GetSignalingResult();
-}
-
-void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
-    const VAddr arb_addr = thread->GetArbiterWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-
-    const auto iter =
-        std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) {
-            return entry->GetPriority() >= thread->GetPriority();
-        });
-
-    if (iter == thread_list.cend()) {
-        thread_list.push_back(std::move(thread));
-    } else {
-        thread_list.insert(iter, std::move(thread));
-    }
-}
-
-void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
-    const VAddr arb_addr = thread->GetArbiterWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-
-    const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
-                                   [&thread](const auto& entry) { return thread == entry; });
-
-    if (iter != thread_list.cend()) {
-        thread_list.erase(iter);
-    }
-}
-
-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
-    VAddr address) const {
-    const auto iter = arb_threads.find(address);
-    if (iter == arb_threads.cend()) {
-        return {};
-    }
-
-    const std::list<std::shared_ptr<Thread>>& thread_list = iter->second;
-    return {thread_list.cbegin(), thread_list.cend()};
-}
-} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
deleted file mode 100644
index b91edc67d..000000000
--- a/src/core/hle/kernel/address_arbiter.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <list>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include "common/common_types.h"
-
-union ResultCode;
-
-namespace Core {
-class System;
-}
-
-namespace Kernel {
-
-class Thread;
-
-class AddressArbiter {
-public:
-    enum class ArbitrationType {
-        WaitIfLessThan = 0,
-        DecrementAndWaitIfLessThan = 1,
-        WaitIfEqual = 2,
-    };
-
-    enum class SignalType {
-        Signal = 0,
-        IncrementAndSignalIfEqual = 1,
-        ModifyByWaitingCountAndSignalIfEqual = 2,
-    };
-
-    explicit AddressArbiter(Core::System& system);
-    ~AddressArbiter();
-
-    AddressArbiter(const AddressArbiter&) = delete;
-    AddressArbiter& operator=(const AddressArbiter&) = delete;
-
-    AddressArbiter(AddressArbiter&&) = default;
-    AddressArbiter& operator=(AddressArbiter&&) = delete;
-
-    /// Signals an address being waited on with a particular signaling type.
-    ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
-
-    /// Waits on an address with a particular arbitration type.
-    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
-
-private:
-    /// Signals an address being waited on.
-    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
-
-    /// Signals an address being waited on and increments its value if equal to the value argument.
-    ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-
-    /// Signals an address being waited on and modifies its value based on waiting thread count if
-    /// equal to the value argument.
-    ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                             s32 num_to_wake);
-
-    /// Waits on an address if the value passed is less than the argument value,
-    /// optionally decrementing.
-    ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
-                                        bool should_decrement);
-
-    /// Waits on an address if the value passed is equal to the argument value.
-    ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
-
-    /// Wake up num_to_wake (or all) threads in a vector.
-    void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
-
-    /// Insert a thread into the address arbiter container
-    void InsertThread(std::shared_ptr<Thread> thread);
-
-    /// Removes a thread from the address arbiter container
-    void RemoveThread(std::shared_ptr<Thread> thread);
-
-    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
-
-    /// List of threads waiting for a address arbiter
-    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
-
-    Core::System& system;
-};
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index 8aff2227a..f8f005f15 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -33,9 +33,6 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
         server_port->AppendPendingSession(std::move(server));
     }
 
-    // Wake the threads waiting on the ServerPort
-    server_port->Signal();
-
     return MakeResult(std::move(client));
 }
 
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index be9eba519..e8e52900d 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -12,7 +12,7 @@
 
 namespace Kernel {
 
-ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ClientSession::ClientSession(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 
 ClientSession::~ClientSession() {
     // This destructor will be called automatically when the last ClientSession handle is closed by
@@ -22,15 +22,6 @@ ClientSession::~ClientSession() {
     }
 }
 
-bool ClientSession::ShouldWait(const Thread* thread) const {
-    UNIMPLEMENTED();
-    return {};
-}
-
-void ClientSession::Acquire(Thread* thread) {
-    UNIMPLEMENTED();
-}
-
 bool ClientSession::IsSignaled() const {
     UNIMPLEMENTED();
     return true;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index e5e0690c2..d5c9ebee8 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -7,7 +7,7 @@
 #include <memory>
 #include <string>
 
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/result.h"
 
 union ResultCode;
@@ -26,7 +26,7 @@ class KernelCore;
 class Session;
 class Thread;
 
-class ClientSession final : public SynchronizationObject {
+class ClientSession final : public KSynchronizationObject {
 public:
     explicit ClientSession(KernelCore& kernel);
     ~ClientSession() override;
@@ -49,10 +49,6 @@ public:
     ResultCode SendSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory,
                                Core::Timing::CoreTiming& core_timing);
 
-    bool ShouldWait(const Thread* thread) const override;
-
-    void Acquire(Thread* thread) override;
-
     bool IsSignaled() const override;
 
 private:
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d4e5d88cf..7d32a39f0 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -13,12 +13,14 @@ namespace Kernel {
 constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
 constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
+constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
 constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
 constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
 constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
+constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106};
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
 constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110};
 constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113};
@@ -28,6 +30,7 @@ constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115};
 constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116};
 constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117};
 constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118};
+constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118};
 constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119};
 constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120};
 constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121};
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
new file mode 100644
index 000000000..d9e702f13
--- /dev/null
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -0,0 +1,367 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/arm/exclusive_monitor.h"
+#include "core/core.h"
+#include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/svc_results.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
+#include "core/memory.h"
+
+namespace Kernel {
+
+KAddressArbiter::KAddressArbiter(Core::System& system_)
+    : system{system_}, kernel{system.Kernel()} {}
+KAddressArbiter::~KAddressArbiter() = default;
+
+namespace {
+
+bool ReadFromUser(Core::System& system, s32* out, VAddr address) {
+    *out = system.Memory().Read32(address);
+    return true;
+}
+
+bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 value) {
+    auto& monitor = system.Monitor();
+    const auto current_core = system.CurrentCoreIndex();
+
+    // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
+    // TODO(bunnei): We should call CanAccessAtomic(..) here.
+
+    // Load the value from the address.
+    const s32 current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
+
+    // Compare it to the desired one.
+    if (current_value < value) {
+        // If less than, we want to try to decrement.
+        const s32 decrement_value = current_value - 1;
+
+        // Decrement and try to store.
+        if (!monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value))) {
+            // If we failed to store, try again.
+            DecrementIfLessThan(system, out, address, value);
+        }
+    } else {
+        // Otherwise, clear our exclusive hold and finish
+        monitor.ClearExclusive();
+    }
+
+    // We're done.
+    *out = current_value;
+    return true;
+}
+
+bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 new_value) {
+    auto& monitor = system.Monitor();
+    const auto current_core = system.CurrentCoreIndex();
+
+    // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
+    // TODO(bunnei): We should call CanAccessAtomic(..) here.
+
+    // Load the value from the address.
+    const s32 current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
+
+    // Compare it to the desired one.
+    if (current_value == value) {
+        // If equal, we want to try to write the new value.
+
+        // Try to store.
+        if (!monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(new_value))) {
+            // If we failed to store, try again.
+            UpdateIfEqual(system, out, address, value, new_value);
+        }
+    } else {
+        // Otherwise, clear our exclusive hold and finish.
+        monitor.ClearExclusive();
+    }
+
+    // We're done.
+    *out = current_value;
+    return true;
+}
+
+} // namespace
+
+ResultCode KAddressArbiter::Signal(VAddr addr, s32 count) {
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        auto it = thread_tree.nfind_light({addr, -1});
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetAddressArbiterKey() == addr)) {
+            Thread* target_thread = std::addressof(*it);
+            target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            ASSERT(target_thread->IsWaitingForAddressArbiter());
+            target_thread->Wakeup();
+
+            it = thread_tree.erase(it);
+            target_thread->ClearAddressArbiter();
+            ++num_waiters;
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 count) {
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        // Check the userspace value.
+        s32 user_value{};
+        R_UNLESS(UpdateIfEqual(system, std::addressof(user_value), addr, value, value + 1),
+                 Svc::ResultInvalidCurrentMemory);
+        R_UNLESS(user_value == value, Svc::ResultInvalidState);
+
+        auto it = thread_tree.nfind_light({addr, -1});
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetAddressArbiterKey() == addr)) {
+            Thread* target_thread = std::addressof(*it);
+            target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            ASSERT(target_thread->IsWaitingForAddressArbiter());
+            target_thread->Wakeup();
+
+            it = thread_tree.erase(it);
+            target_thread->ClearAddressArbiter();
+            ++num_waiters;
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 value, s32 count) {
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        auto it = thread_tree.nfind_light({addr, -1});
+        // Determine the updated value.
+        s32 new_value{};
+        if (/*GetTargetFirmware() >= TargetFirmware_7_0_0*/ true) {
+            if (count <= 0) {
+                if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) {
+                    new_value = value - 2;
+                } else {
+                    new_value = value + 1;
+                }
+            } else {
+                if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) {
+                    auto tmp_it = it;
+                    s32 tmp_num_waiters{};
+                    while ((++tmp_it != thread_tree.end()) &&
+                           (tmp_it->GetAddressArbiterKey() == addr)) {
+                        if ((tmp_num_waiters++) >= count) {
+                            break;
+                        }
+                    }
+
+                    if (tmp_num_waiters < count) {
+                        new_value = value - 1;
+                    } else {
+                        new_value = value;
+                    }
+                } else {
+                    new_value = value + 1;
+                }
+            }
+        } else {
+            if (count <= 0) {
+                if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) {
+                    new_value = value - 1;
+                } else {
+                    new_value = value + 1;
+                }
+            } else {
+                auto tmp_it = it;
+                s32 tmp_num_waiters{};
+                while ((tmp_it != thread_tree.end()) && (tmp_it->GetAddressArbiterKey() == addr) &&
+                       (tmp_num_waiters < count + 1)) {
+                    ++tmp_num_waiters;
+                    ++tmp_it;
+                }
+
+                if (tmp_num_waiters == 0) {
+                    new_value = value + 1;
+                } else if (tmp_num_waiters <= count) {
+                    new_value = value - 1;
+                } else {
+                    new_value = value;
+                }
+            }
+        }
+
+        // Check the userspace value.
+        s32 user_value{};
+        bool succeeded{};
+        if (value != new_value) {
+            succeeded = UpdateIfEqual(system, std::addressof(user_value), addr, value, new_value);
+        } else {
+            succeeded = ReadFromUser(system, std::addressof(user_value), addr);
+        }
+
+        R_UNLESS(succeeded, Svc::ResultInvalidCurrentMemory);
+        R_UNLESS(user_value == value, Svc::ResultInvalidState);
+
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetAddressArbiterKey() == addr)) {
+            Thread* target_thread = std::addressof(*it);
+            target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            ASSERT(target_thread->IsWaitingForAddressArbiter());
+            target_thread->Wakeup();
+
+            it = thread_tree.erase(it);
+            target_thread->ClearAddressArbiter();
+            ++num_waiters;
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement, s64 timeout) {
+    // Prepare to wait.
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout);
+
+        // Check that the thread isn't terminating.
+        if (cur_thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Set the synced object.
+        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+
+        // Read the value from userspace.
+        s32 user_value{};
+        bool succeeded{};
+        if (decrement) {
+            succeeded = DecrementIfLessThan(system, std::addressof(user_value), addr, value);
+        } else {
+            succeeded = ReadFromUser(system, std::addressof(user_value), addr);
+        }
+
+        if (!succeeded) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidCurrentMemory;
+        }
+
+        // Check that the value is less than the specified one.
+        if (user_value >= value) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidState;
+        }
+
+        // Check that the timeout is non-zero.
+        if (timeout == 0) {
+            slp.CancelSleep();
+            return Svc::ResultTimedOut;
+        }
+
+        // Set the arbiter.
+        cur_thread->SetAddressArbiter(std::addressof(thread_tree), addr);
+        thread_tree.insert(*cur_thread);
+        cur_thread->SetState(ThreadState::Waiting);
+        cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Arbitration);
+    }
+
+    // Cancel the timer wait.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Remove from the address arbiter.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        if (cur_thread->IsWaitingForAddressArbiter()) {
+            thread_tree.erase(thread_tree.iterator_to(*cur_thread));
+            cur_thread->ClearAddressArbiter();
+        }
+    }
+
+    // Get the result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) {
+    // Prepare to wait.
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout);
+
+        // Check that the thread isn't terminating.
+        if (cur_thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Set the synced object.
+        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+
+        // Read the value from userspace.
+        s32 user_value{};
+        if (!ReadFromUser(system, std::addressof(user_value), addr)) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidCurrentMemory;
+        }
+
+        // Check that the value is equal.
+        if (value != user_value) {
+            slp.CancelSleep();
+            return Svc::ResultInvalidState;
+        }
+
+        // Check that the timeout is non-zero.
+        if (timeout == 0) {
+            slp.CancelSleep();
+            return Svc::ResultTimedOut;
+        }
+
+        // Set the arbiter.
+        cur_thread->SetAddressArbiter(std::addressof(thread_tree), addr);
+        thread_tree.insert(*cur_thread);
+        cur_thread->SetState(ThreadState::Waiting);
+        cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Arbitration);
+    }
+
+    // Cancel the timer wait.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Remove from the address arbiter.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        if (cur_thread->IsWaitingForAddressArbiter()) {
+            thread_tree.erase(thread_tree.iterator_to(*cur_thread));
+            cur_thread->ClearAddressArbiter();
+        }
+    }
+
+    // Get the result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_address_arbiter.h b/src/core/hle/kernel/k_address_arbiter.h
new file mode 100644
index 000000000..8d379b524
--- /dev/null
+++ b/src/core/hle/kernel/k_address_arbiter.h
@@ -0,0 +1,70 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/hle/kernel/k_condition_variable.h"
+#include "core/hle/kernel/svc_types.h"
+
+union ResultCode;
+
+namespace Core {
+class System;
+}
+
+namespace Kernel {
+
+class KernelCore;
+
+class KAddressArbiter {
+public:
+    using ThreadTree = KConditionVariable::ThreadTree;
+
+    explicit KAddressArbiter(Core::System& system_);
+    ~KAddressArbiter();
+
+    [[nodiscard]] ResultCode SignalToAddress(VAddr addr, Svc::SignalType type, s32 value,
+                                             s32 count) {
+        switch (type) {
+        case Svc::SignalType::Signal:
+            return Signal(addr, count);
+        case Svc::SignalType::SignalAndIncrementIfEqual:
+            return SignalAndIncrementIfEqual(addr, value, count);
+        case Svc::SignalType::SignalAndModifyByWaitingCountIfEqual:
+            return SignalAndModifyByWaitingCountIfEqual(addr, value, count);
+        }
+        UNREACHABLE();
+        return RESULT_UNKNOWN;
+    }
+
+    [[nodiscard]] ResultCode WaitForAddress(VAddr addr, Svc::ArbitrationType type, s32 value,
+                                            s64 timeout) {
+        switch (type) {
+        case Svc::ArbitrationType::WaitIfLessThan:
+            return WaitIfLessThan(addr, value, false, timeout);
+        case Svc::ArbitrationType::DecrementAndWaitIfLessThan:
+            return WaitIfLessThan(addr, value, true, timeout);
+        case Svc::ArbitrationType::WaitIfEqual:
+            return WaitIfEqual(addr, value, timeout);
+        }
+        UNREACHABLE();
+        return RESULT_UNKNOWN;
+    }
+
+private:
+    [[nodiscard]] ResultCode Signal(VAddr addr, s32 count);
+    [[nodiscard]] ResultCode SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 count);
+    [[nodiscard]] ResultCode SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 value, s32 count);
+    [[nodiscard]] ResultCode WaitIfLessThan(VAddr addr, s32 value, bool decrement, s64 timeout);
+    [[nodiscard]] ResultCode WaitIfEqual(VAddr addr, s32 value, s64 timeout);
+
+    ThreadTree thread_tree;
+
+    Core::System& system;
+    KernelCore& kernel;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
new file mode 100644
index 000000000..49a068310
--- /dev/null
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -0,0 +1,349 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "core/arm/exclusive_monitor.h"
+#include "core/core.h"
+#include "core/hle/kernel/k_condition_variable.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/k_synchronization_object.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_common.h"
+#include "core/hle/kernel/svc_results.h"
+#include "core/hle/kernel/thread.h"
+#include "core/memory.h"
+
+namespace Kernel {
+
+namespace {
+
+bool ReadFromUser(Core::System& system, u32* out, VAddr address) {
+    *out = system.Memory().Read32(address);
+    return true;
+}
+
+bool WriteToUser(Core::System& system, VAddr address, const u32* p) {
+    system.Memory().Write32(address, *p);
+    return true;
+}
+
+bool UpdateLockAtomic(Core::System& system, u32* out, VAddr address, u32 if_zero,
+                      u32 new_orr_mask) {
+    auto& monitor = system.Monitor();
+    const auto current_core = system.CurrentCoreIndex();
+
+    // Load the value from the address.
+    const auto expected = monitor.ExclusiveRead32(current_core, address);
+
+    // Orr in the new mask.
+    u32 value = expected | new_orr_mask;
+
+    // If the value is zero, use the if_zero value, otherwise use the newly orr'd value.
+    if (!expected) {
+        value = if_zero;
+    }
+
+    // Try to store.
+    if (!monitor.ExclusiveWrite32(current_core, address, value)) {
+        // If we failed to store, try again.
+        return UpdateLockAtomic(system, out, address, if_zero, new_orr_mask);
+    }
+
+    // We're done.
+    *out = expected;
+    return true;
+}
+
+} // namespace
+
+KConditionVariable::KConditionVariable(Core::System& system_)
+    : system{system_}, kernel{system.Kernel()} {}
+
+KConditionVariable::~KConditionVariable() = default;
+
+ResultCode KConditionVariable::SignalToAddress(VAddr addr) {
+    Thread* owner_thread = kernel.CurrentScheduler()->GetCurrentThread();
+
+    // Signal the address.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        // Remove waiter thread.
+        s32 num_waiters{};
+        Thread* next_owner_thread =
+            owner_thread->RemoveWaiterByKey(std::addressof(num_waiters), addr);
+
+        // Determine the next tag.
+        u32 next_value{};
+        if (next_owner_thread) {
+            next_value = next_owner_thread->GetAddressKeyValue();
+            if (num_waiters > 1) {
+                next_value |= Svc::HandleWaitMask;
+            }
+
+            next_owner_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+            next_owner_thread->Wakeup();
+        }
+
+        // Write the value to userspace.
+        if (!WriteToUser(system, addr, std::addressof(next_value))) {
+            if (next_owner_thread) {
+                next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+            }
+
+            return Svc::ResultInvalidCurrentMemory;
+        }
+    }
+
+    return RESULT_SUCCESS;
+}
+
+ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 value) {
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+
+    // Wait for the address.
+    {
+        std::shared_ptr<Thread> owner_thread;
+        ASSERT(!owner_thread);
+        {
+            KScopedSchedulerLock sl(kernel);
+            cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+
+            // Check if the thread should terminate.
+            R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested);
+
+            {
+                // Read the tag from userspace.
+                u32 test_tag{};
+                R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr),
+                         Svc::ResultInvalidCurrentMemory);
+
+                // If the tag isn't the handle (with wait mask), we're done.
+                R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS);
+
+                // Get the lock owner thread.
+                owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<Thread>(handle);
+                R_UNLESS(owner_thread, Svc::ResultInvalidHandle);
+
+                // Update the lock.
+                cur_thread->SetAddressKey(addr, value);
+                owner_thread->AddWaiter(cur_thread);
+                cur_thread->SetState(ThreadState::Waiting);
+                cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::ConditionVar);
+                cur_thread->SetMutexWaitAddressForDebugging(addr);
+            }
+        }
+        ASSERT(owner_thread);
+    }
+
+    // Remove the thread as a waiter from the lock owner.
+    {
+        KScopedSchedulerLock sl(kernel);
+        Thread* owner_thread = cur_thread->GetLockOwner();
+        if (owner_thread != nullptr) {
+            owner_thread->RemoveWaiter(cur_thread);
+        }
+    }
+
+    // Get the wait result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+Thread* KConditionVariable::SignalImpl(Thread* thread) {
+    // Check pre-conditions.
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    // Update the tag.
+    VAddr address = thread->GetAddressKey();
+    u32 own_tag = thread->GetAddressKeyValue();
+
+    u32 prev_tag{};
+    bool can_access{};
+    {
+        // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
+        // TODO(bunnei): We should call CanAccessAtomic(..) here.
+        can_access = true;
+        if (can_access) {
+            UpdateLockAtomic(system, std::addressof(prev_tag), address, own_tag,
+                             Svc::HandleWaitMask);
+        }
+    }
+
+    Thread* thread_to_close = nullptr;
+    if (can_access) {
+        if (prev_tag == InvalidHandle) {
+            // If nobody held the lock previously, we're all good.
+            thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+            thread->Wakeup();
+        } else {
+            // Get the previous owner.
+            auto owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<Thread>(
+                prev_tag & ~Svc::HandleWaitMask);
+
+            if (owner_thread) {
+                // Add the thread as a waiter on the owner.
+                owner_thread->AddWaiter(thread);
+                thread_to_close = owner_thread.get();
+            } else {
+                // The lock was tagged with a thread that doesn't exist.
+                thread->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+                thread->Wakeup();
+            }
+        }
+    } else {
+        // If the address wasn't accessible, note so.
+        thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+        thread->Wakeup();
+    }
+
+    return thread_to_close;
+}
+
+void KConditionVariable::Signal(u64 cv_key, s32 count) {
+    // Prepare for signaling.
+    constexpr int MaxThreads = 16;
+
+    // TODO(bunnei): This should just be Thread once we implement KAutoObject instead of using
+    // std::shared_ptr.
+    std::vector<std::shared_ptr<Thread>> thread_list;
+    std::array<Thread*, MaxThreads> thread_array;
+    s32 num_to_close{};
+
+    // Perform signaling.
+    s32 num_waiters{};
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        auto it = thread_tree.nfind_light({cv_key, -1});
+        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
+               (it->GetConditionVariableKey() == cv_key)) {
+            Thread* target_thread = std::addressof(*it);
+
+            if (Thread* thread = SignalImpl(target_thread); thread != nullptr) {
+                if (num_to_close < MaxThreads) {
+                    thread_array[num_to_close++] = thread;
+                } else {
+                    thread_list.push_back(SharedFrom(thread));
+                }
+            }
+
+            it = thread_tree.erase(it);
+            target_thread->ClearConditionVariable();
+            ++num_waiters;
+        }
+
+        // If we have no waiters, clear the has waiter flag.
+        if (it == thread_tree.end() || it->GetConditionVariableKey() != cv_key) {
+            const u32 has_waiter_flag{};
+            WriteToUser(system, cv_key, std::addressof(has_waiter_flag));
+        }
+    }
+
+    // Close threads in the array.
+    for (auto i = 0; i < num_to_close; ++i) {
+        thread_array[i]->Close();
+    }
+
+    // Close threads in the list.
+    for (auto it = thread_list.begin(); it != thread_list.end(); it = thread_list.erase(it)) {
+        (*it)->Close();
+    }
+}
+
+ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout) {
+    // Prepare to wait.
+    Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout);
+
+        // Set the synced object.
+        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+
+        // Check that the thread isn't terminating.
+        if (cur_thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Update the value and process for the next owner.
+        {
+            // Remove waiter thread.
+            s32 num_waiters{};
+            Thread* next_owner_thread =
+                cur_thread->RemoveWaiterByKey(std::addressof(num_waiters), addr);
+
+            // Update for the next owner thread.
+            u32 next_value{};
+            if (next_owner_thread != nullptr) {
+                // Get the next tag value.
+                next_value = next_owner_thread->GetAddressKeyValue();
+                if (num_waiters > 1) {
+                    next_value |= Svc::HandleWaitMask;
+                }
+
+                // Wake up the next owner.
+                next_owner_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
+                next_owner_thread->Wakeup();
+            }
+
+            // Write to the cv key.
+            {
+                const u32 has_waiter_flag = 1;
+                WriteToUser(system, key, std::addressof(has_waiter_flag));
+                // TODO(bunnei): We should call DataMemoryBarrier(..) here.
+            }
+
+            // Write the value to userspace.
+            if (!WriteToUser(system, addr, std::addressof(next_value))) {
+                slp.CancelSleep();
+                return Svc::ResultInvalidCurrentMemory;
+            }
+        }
+
+        // Update condition variable tracking.
+        {
+            cur_thread->SetConditionVariable(std::addressof(thread_tree), addr, key, value);
+            thread_tree.insert(*cur_thread);
+        }
+
+        // If the timeout is non-zero, set the thread as waiting.
+        if (timeout != 0) {
+            cur_thread->SetState(ThreadState::Waiting);
+            cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::ConditionVar);
+            cur_thread->SetMutexWaitAddressForDebugging(addr);
+        }
+    }
+
+    // Cancel the timer wait.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Remove from the condition variable.
+    {
+        KScopedSchedulerLock sl(kernel);
+
+        if (Thread* owner = cur_thread->GetLockOwner(); owner != nullptr) {
+            owner->RemoveWaiter(cur_thread);
+        }
+
+        if (cur_thread->IsWaitingForConditionVariable()) {
+            thread_tree.erase(thread_tree.iterator_to(*cur_thread));
+            cur_thread->ClearConditionVariable();
+        }
+    }
+
+    // Get the result.
+    KSynchronizationObject* dummy{};
+    return cur_thread->GetWaitResult(std::addressof(dummy));
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_condition_variable.h b/src/core/hle/kernel/k_condition_variable.h
new file mode 100644
index 000000000..98ed5b323
--- /dev/null
+++ b/src/core/hle/kernel/k_condition_variable.h
@@ -0,0 +1,59 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/result.h"
+
+namespace Core {
+class System;
+}
+
+namespace Kernel {
+
+class KConditionVariable {
+public:
+    using ThreadTree = typename Thread::ConditionVariableThreadTreeType;
+
+    explicit KConditionVariable(Core::System& system_);
+    ~KConditionVariable();
+
+    // Arbitration
+    [[nodiscard]] ResultCode SignalToAddress(VAddr addr);
+    [[nodiscard]] ResultCode WaitForAddress(Handle handle, VAddr addr, u32 value);
+
+    // Condition variable
+    void Signal(u64 cv_key, s32 count);
+    [[nodiscard]] ResultCode Wait(VAddr addr, u64 key, u32 value, s64 timeout);
+
+private:
+    [[nodiscard]] Thread* SignalImpl(Thread* thread);
+
+    ThreadTree thread_tree;
+
+    Core::System& system;
+    KernelCore& kernel;
+};
+
+inline void BeforeUpdatePriority(const KernelCore& kernel, KConditionVariable::ThreadTree* tree,
+                                 Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    tree->erase(tree->iterator_to(*thread));
+}
+
+inline void AfterUpdatePriority(const KernelCore& kernel, KConditionVariable::ThreadTree* tree,
+                                Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    tree->insert(*thread);
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index c5fd82a6b..42f0ea483 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -180,22 +180,22 @@ u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) {
     return cores_needing_scheduling;
 }
 
-void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state) {
+void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, ThreadState old_state) {
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // Check if the state has changed, because if it hasn't there's nothing to do.
-    const auto cur_state = thread->scheduling_state;
+    const auto cur_state = thread->GetRawState();
     if (cur_state == old_state) {
         return;
     }
 
     // Update the priority queues.
-    if (old_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    if (old_state == ThreadState::Runnable) {
         // If we were previously runnable, then we're not runnable now, and we should remove.
         GetPriorityQueue(kernel).Remove(thread);
         IncrementScheduledCount(thread);
         SetSchedulerUpdateNeeded(kernel);
-    } else if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    } else if (cur_state == ThreadState::Runnable) {
         // If we're now runnable, then we weren't previously, and we should add.
         GetPriorityQueue(kernel).PushBack(thread);
         IncrementScheduledCount(thread);
@@ -203,13 +203,11 @@ void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 ol
     }
 }
 
-void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread,
-                                         u32 old_priority) {
-
+void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, s32 old_priority) {
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // If the thread is runnable, we want to change its priority in the queue.
-    if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    if (thread->GetRawState() == ThreadState::Runnable) {
         GetPriorityQueue(kernel).ChangePriority(
             old_priority, thread == kernel.CurrentScheduler()->GetCurrentThread(), thread);
         IncrementScheduledCount(thread);
@@ -222,7 +220,7 @@ void KScheduler::OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // If the thread is runnable, we want to change its affinity in the queue.
-    if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+    if (thread->GetRawState() == ThreadState::Runnable) {
         GetPriorityQueue(kernel).ChangeAffinityMask(old_core, old_affinity, thread);
         IncrementScheduledCount(thread);
         SetSchedulerUpdateNeeded(kernel);
@@ -292,7 +290,7 @@ void KScheduler::RotateScheduledQueue(s32 core_id, s32 priority) {
 
         // If the best thread we can choose has a priority the same or worse than ours, try to
         // migrate a higher priority thread.
-        if (best_thread != nullptr && best_thread->GetPriority() >= static_cast<u32>(priority)) {
+        if (best_thread != nullptr && best_thread->GetPriority() >= priority) {
             Thread* suggested = priority_queue.GetSuggestedFront(core_id);
             while (suggested != nullptr) {
                 // If the suggestion's priority is the same as ours, don't bother.
@@ -395,8 +393,8 @@ void KScheduler::YieldWithoutCoreMigration() {
     {
         KScopedSchedulerLock lock(kernel);
 
-        const auto cur_state = cur_thread.scheduling_state;
-        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        const auto cur_state = cur_thread.GetRawState();
+        if (cur_state == ThreadState::Runnable) {
             // Put the current thread at the back of the queue.
             Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread));
             IncrementScheduledCount(std::addressof(cur_thread));
@@ -436,8 +434,8 @@ void KScheduler::YieldWithCoreMigration() {
     {
         KScopedSchedulerLock lock(kernel);
 
-        const auto cur_state = cur_thread.scheduling_state;
-        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        const auto cur_state = cur_thread.GetRawState();
+        if (cur_state == ThreadState::Runnable) {
             // Get the current active core.
             const s32 core_id = cur_thread.GetActiveCore();
 
@@ -526,8 +524,8 @@ void KScheduler::YieldToAnyThread() {
     {
         KScopedSchedulerLock lock(kernel);
 
-        const auto cur_state = cur_thread.scheduling_state;
-        if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+        const auto cur_state = cur_thread.GetRawState();
+        if (cur_state == ThreadState::Runnable) {
             // Get the current active core.
             const s32 core_id = cur_thread.GetActiveCore();
 
@@ -645,8 +643,7 @@ void KScheduler::Unload(Thread* thread) {
 
 void KScheduler::Reload(Thread* thread) {
     if (thread) {
-        ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
-                   "Thread must be runnable.");
+        ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");
 
         // Cancel any outstanding wakeup events for this thread
         thread->SetIsRunning(true);
@@ -725,7 +722,7 @@ void KScheduler::SwitchToCurrent() {
         do {
             if (current_thread != nullptr && !current_thread->IsHLEThread()) {
                 current_thread->context_guard.lock();
-                if (!current_thread->IsRunnable()) {
+                if (current_thread->GetRawState() != ThreadState::Runnable) {
                     current_thread->context_guard.unlock();
                     break;
                 }
@@ -772,7 +769,7 @@ void KScheduler::Initialize() {
 
     {
         KScopedSchedulerLock lock{system.Kernel()};
-        idle_thread->SetStatus(ThreadStatus::Ready);
+        idle_thread->SetState(ThreadState::Runnable);
     }
 }
 
diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h
index e84abc84c..783665123 100644
--- a/src/core/hle/kernel/k_scheduler.h
+++ b/src/core/hle/kernel/k_scheduler.h
@@ -100,11 +100,10 @@ public:
     void YieldToAnyThread();
 
     /// Notify the scheduler a thread's status has changed.
-    static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state);
+    static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, ThreadState old_state);
 
     /// Notify the scheduler a thread's priority has changed.
-    static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread,
-                                        u32 old_priority);
+    static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, s32 old_priority);
 
     /// Notify the scheduler a thread's core and/or affinity mask has changed.
     static void OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h
index 2f1c1f691..9b40bd22c 100644
--- a/src/core/hle/kernel/k_scheduler_lock.h
+++ b/src/core/hle/kernel/k_scheduler_lock.h
@@ -19,7 +19,7 @@ class KernelCore;
 template <typename SchedulerType>
 class KAbstractSchedulerLock {
 public:
-    explicit KAbstractSchedulerLock(KernelCore& kernel) : kernel{kernel} {}
+    explicit KAbstractSchedulerLock(KernelCore& kernel_) : kernel{kernel_} {}
 
     bool IsLockedByCurrentThread() const {
         return this->owner_thread == kernel.GetCurrentEmuThreadID();
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
new file mode 100644
index 000000000..1c508cb55
--- /dev/null
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -0,0 +1,172 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/k_synchronization_object.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/svc_results.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
+                                        KSynchronizationObject** objects, const s32 num_objects,
+                                        s64 timeout) {
+    // Allocate space on stack for thread nodes.
+    std::vector<ThreadListNode> thread_nodes(num_objects);
+
+    // Prepare for wait.
+    Thread* thread = kernel.CurrentScheduler()->GetCurrentThread();
+    Handle timer = InvalidHandle;
+
+    {
+        // Setup the scheduling lock and sleep.
+        KScopedSchedulerLockAndSleep slp(kernel, timer, thread, timeout);
+
+        // Check if any of the objects are already signaled.
+        for (auto i = 0; i < num_objects; ++i) {
+            ASSERT(objects[i] != nullptr);
+
+            if (objects[i]->IsSignaled()) {
+                *out_index = i;
+                slp.CancelSleep();
+                return RESULT_SUCCESS;
+            }
+        }
+
+        // Check if the timeout is zero.
+        if (timeout == 0) {
+            slp.CancelSleep();
+            return Svc::ResultTimedOut;
+        }
+
+        // Check if the thread should terminate.
+        if (thread->IsTerminationRequested()) {
+            slp.CancelSleep();
+            return Svc::ResultTerminationRequested;
+        }
+
+        // Check if waiting was canceled.
+        if (thread->IsWaitCancelled()) {
+            slp.CancelSleep();
+            thread->ClearWaitCancelled();
+            return Svc::ResultCancelled;
+        }
+
+        // Add the waiters.
+        for (auto i = 0; i < num_objects; ++i) {
+            thread_nodes[i].thread = thread;
+            thread_nodes[i].next = nullptr;
+
+            if (objects[i]->thread_list_tail == nullptr) {
+                objects[i]->thread_list_head = std::addressof(thread_nodes[i]);
+            } else {
+                objects[i]->thread_list_tail->next = std::addressof(thread_nodes[i]);
+            }
+
+            objects[i]->thread_list_tail = std::addressof(thread_nodes[i]);
+        }
+
+        // For debugging only
+        thread->SetWaitObjectsForDebugging({objects, static_cast<std::size_t>(num_objects)});
+
+        // Mark the thread as waiting.
+        thread->SetCancellable();
+        thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        thread->SetState(ThreadState::Waiting);
+        thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization);
+    }
+
+    // The lock/sleep is done, so we should be able to get our result.
+
+    // Thread is no longer cancellable.
+    thread->ClearCancellable();
+
+    // For debugging only
+    thread->SetWaitObjectsForDebugging({});
+
+    // Cancel the timer as needed.
+    if (timer != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(timer);
+    }
+
+    // Get the wait result.
+    ResultCode wait_result{RESULT_SUCCESS};
+    s32 sync_index = -1;
+    {
+        KScopedSchedulerLock lock(kernel);
+        KSynchronizationObject* synced_obj;
+        wait_result = thread->GetWaitResult(std::addressof(synced_obj));
+
+        for (auto i = 0; i < num_objects; ++i) {
+            // Unlink the object from the list.
+            ThreadListNode* prev_ptr =
+                reinterpret_cast<ThreadListNode*>(std::addressof(objects[i]->thread_list_head));
+            ThreadListNode* prev_val = nullptr;
+            ThreadListNode *prev, *tail_prev;
+
+            do {
+                prev = prev_ptr;
+                prev_ptr = prev_ptr->next;
+                tail_prev = prev_val;
+                prev_val = prev_ptr;
+            } while (prev_ptr != std::addressof(thread_nodes[i]));
+
+            if (objects[i]->thread_list_tail == std::addressof(thread_nodes[i])) {
+                objects[i]->thread_list_tail = tail_prev;
+            }
+
+            prev->next = thread_nodes[i].next;
+
+            if (objects[i] == synced_obj) {
+                sync_index = i;
+            }
+        }
+    }
+
+    // Set output.
+    *out_index = sync_index;
+    return wait_result;
+}
+
+KSynchronizationObject::KSynchronizationObject(KernelCore& kernel) : Object{kernel} {}
+
+KSynchronizationObject ::~KSynchronizationObject() = default;
+
+void KSynchronizationObject::NotifyAvailable(ResultCode result) {
+    KScopedSchedulerLock lock(kernel);
+
+    // If we're not signaled, we've nothing to notify.
+    if (!this->IsSignaled()) {
+        return;
+    }
+
+    // Iterate over each thread.
+    for (auto* cur_node = thread_list_head; cur_node != nullptr; cur_node = cur_node->next) {
+        Thread* thread = cur_node->thread;
+        if (thread->GetState() == ThreadState::Waiting) {
+            thread->SetSyncedObject(this, result);
+            thread->SetState(ThreadState::Runnable);
+        }
+    }
+}
+
+std::vector<Thread*> KSynchronizationObject::GetWaitingThreadsForDebugging() const {
+    std::vector<Thread*> threads;
+
+    // If debugging, dump the list of waiters.
+    {
+        KScopedSchedulerLock lock(kernel);
+        for (auto* cur_node = thread_list_head; cur_node != nullptr; cur_node = cur_node->next) {
+            threads.emplace_back(cur_node->thread);
+        }
+    }
+
+    return threads;
+}
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_synchronization_object.h b/src/core/hle/kernel/k_synchronization_object.h
new file mode 100644
index 000000000..14d80ebf1
--- /dev/null
+++ b/src/core/hle/kernel/k_synchronization_object.h
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "core/hle/kernel/object.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+class KernelCore;
+class Synchronization;
+class Thread;
+
+/// Class that represents a Kernel object that a thread can be waiting on
+class KSynchronizationObject : public Object {
+public:
+    struct ThreadListNode {
+        ThreadListNode* next{};
+        Thread* thread{};
+    };
+
+    [[nodiscard]] static ResultCode Wait(KernelCore& kernel, s32* out_index,
+                                         KSynchronizationObject** objects, const s32 num_objects,
+                                         s64 timeout);
+
+    [[nodiscard]] virtual bool IsSignaled() const = 0;
+
+    [[nodiscard]] std::vector<Thread*> GetWaitingThreadsForDebugging() const;
+
+protected:
+    explicit KSynchronizationObject(KernelCore& kernel);
+    virtual ~KSynchronizationObject();
+
+    void NotifyAvailable(ResultCode result);
+    void NotifyAvailable() {
+        return this->NotifyAvailable(RESULT_SUCCESS);
+    }
+
+private:
+    ThreadListNode* thread_list_head{};
+    ThreadListNode* thread_list_tail{};
+};
+
+// Specialization of DynamicObjectCast for KSynchronizationObjects
+template <>
+inline std::shared_ptr<KSynchronizationObject> DynamicObjectCast<KSynchronizationObject>(
+    std::shared_ptr<Object> object) {
+    if (object != nullptr && object->IsWaitable()) {
+        return std::static_pointer_cast<KSynchronizationObject>(object);
+    }
+    return nullptr;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index e8ece8164..c0ff287a6 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -38,7 +38,6 @@
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/service_thread.h"
 #include "core/hle/kernel/shared_memory.h"
-#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/lock.h"
@@ -51,8 +50,7 @@ namespace Kernel {
 
 struct KernelCore::Impl {
     explicit Impl(Core::System& system, KernelCore& kernel)
-        : synchronization{system}, time_manager{system}, global_handle_table{kernel}, system{
-                                                                                          system} {}
+        : time_manager{system}, global_handle_table{kernel}, system{system} {}
 
     void SetMulticore(bool is_multicore) {
         this->is_multicore = is_multicore;
@@ -307,7 +305,6 @@ struct KernelCore::Impl {
     std::vector<std::shared_ptr<Process>> process_list;
     Process* current_process = nullptr;
     std::unique_ptr<Kernel::GlobalSchedulerContext> global_scheduler_context;
-    Kernel::Synchronization synchronization;
     Kernel::TimeManager time_manager;
 
     std::shared_ptr<ResourceLimit> system_resource_limit;
@@ -461,14 +458,6 @@ const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Kern
     return impl->interrupts;
 }
 
-Kernel::Synchronization& KernelCore::Synchronization() {
-    return impl->synchronization;
-}
-
-const Kernel::Synchronization& KernelCore::Synchronization() const {
-    return impl->synchronization;
-}
-
 Kernel::TimeManager& KernelCore::TimeManager() {
     return impl->time_manager;
 }
@@ -613,9 +602,11 @@ void KernelCore::Suspend(bool in_suspention) {
     const bool should_suspend = exception_exited || in_suspention;
     {
         KScopedSchedulerLock lock(*this);
-        ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep;
+        const auto state = should_suspend ? ThreadState::Runnable : ThreadState::Waiting;
         for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
-            impl->suspend_threads[i]->SetStatus(status);
+            impl->suspend_threads[i]->SetState(state);
+            impl->suspend_threads[i]->SetWaitReasonForDebugging(
+                ThreadWaitReasonForDebugging::Suspended);
         }
     }
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index e3169f5a7..933d9a7d6 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -33,7 +33,6 @@ template <typename T>
 class SlabHeap;
 } // namespace Memory
 
-class AddressArbiter;
 class ClientPort;
 class GlobalSchedulerContext;
 class HandleTable;
@@ -129,12 +128,6 @@ public:
     /// Gets the an instance of the current physical CPU core.
     const Kernel::PhysicalCore& CurrentPhysicalCore() const;
 
-    /// Gets the an instance of the Synchronization Interface.
-    Kernel::Synchronization& Synchronization();
-
-    /// Gets the an instance of the Synchronization Interface.
-    const Kernel::Synchronization& Synchronization() const;
-
     /// Gets the an instance of the TimeManager Interface.
     Kernel::TimeManager& TimeManager();
 
diff --git a/src/core/hle/kernel/memory/address_space_info.cpp b/src/core/hle/kernel/memory/address_space_info.cpp
index e4288cab4..6cf43ba24 100644
--- a/src/core/hle/kernel/memory/address_space_info.cpp
+++ b/src/core/hle/kernel/memory/address_space_info.cpp
@@ -96,6 +96,7 @@ u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) {
         return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].address;
     }
     UNREACHABLE();
+    return 0;
 }
 
 std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) {
@@ -112,6 +113,7 @@ std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type)
         return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].size;
     }
     UNREACHABLE();
+    return 0;
 }
 
 } // namespace Kernel::Memory
diff --git a/src/core/hle/kernel/memory/memory_layout.h b/src/core/hle/kernel/memory/memory_layout.h
index 9b3d6267a..c7c0b2f49 100644
--- a/src/core/hle/kernel/memory/memory_layout.h
+++ b/src/core/hle/kernel/memory/memory_layout.h
@@ -5,9 +5,28 @@
 #pragma once
 
 #include "common/common_types.h"
+#include "core/device_memory.h"
 
 namespace Kernel::Memory {
 
+constexpr std::size_t KernelAslrAlignment = 2 * 1024 * 1024;
+constexpr std::size_t KernelVirtualAddressSpaceWidth = 1ULL << 39;
+constexpr std::size_t KernelPhysicalAddressSpaceWidth = 1ULL << 48;
+constexpr std::size_t KernelVirtualAddressSpaceBase = 0ULL - KernelVirtualAddressSpaceWidth;
+constexpr std::size_t KernelVirtualAddressSpaceEnd =
+    KernelVirtualAddressSpaceBase + (KernelVirtualAddressSpaceWidth - KernelAslrAlignment);
+constexpr std::size_t KernelVirtualAddressSpaceLast = KernelVirtualAddressSpaceEnd - 1;
+constexpr std::size_t KernelVirtualAddressSpaceSize =
+    KernelVirtualAddressSpaceEnd - KernelVirtualAddressSpaceBase;
+
+constexpr bool IsKernelAddressKey(VAddr key) {
+    return KernelVirtualAddressSpaceBase <= key && key <= KernelVirtualAddressSpaceLast;
+}
+
+constexpr bool IsKernelAddress(VAddr address) {
+    return KernelVirtualAddressSpaceBase <= address && address < KernelVirtualAddressSpaceEnd;
+}
+
 class MemoryRegion final {
     friend class MemoryLayout;
 
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
deleted file mode 100644
index 4f8075e0e..000000000
--- a/src/core/hle/kernel/mutex.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/mutex.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/process.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/result.h"
-#include "core/memory.h"
-
-namespace Kernel {
-
-/// Returns the number of threads that are waiting for a mutex, and the highest priority one among
-/// those.
-static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThread(
-    const std::shared_ptr<Thread>& current_thread, VAddr mutex_addr) {
-
-    std::shared_ptr<Thread> highest_priority_thread;
-    u32 num_waiters = 0;
-
-    for (const auto& thread : current_thread->GetMutexWaitingThreads()) {
-        if (thread->GetMutexWaitAddress() != mutex_addr)
-            continue;
-
-        ++num_waiters;
-        if (highest_priority_thread == nullptr ||
-            thread->GetPriority() < highest_priority_thread->GetPriority()) {
-            highest_priority_thread = thread;
-        }
-    }
-
-    return {highest_priority_thread, num_waiters};
-}
-
-/// Update the mutex owner field of all threads waiting on the mutex to point to the new owner.
-static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread,
-                                   std::shared_ptr<Thread> new_owner) {
-    current_thread->RemoveMutexWaiter(new_owner);
-    const auto threads = current_thread->GetMutexWaitingThreads();
-    for (const auto& thread : threads) {
-        if (thread->GetMutexWaitAddress() != mutex_addr)
-            continue;
-
-        ASSERT(thread->GetLockOwner() == current_thread.get());
-        current_thread->RemoveMutexWaiter(thread);
-        if (new_owner != thread)
-            new_owner->AddMutexWaiter(thread);
-    }
-}
-
-Mutex::Mutex(Core::System& system) : system{system} {}
-Mutex::~Mutex() = default;
-
-ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
-                             Handle requesting_thread_handle) {
-    // The mutex address must be 4-byte aligned
-    if ((address % sizeof(u32)) != 0) {
-        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
-        return ERR_INVALID_ADDRESS;
-    }
-
-    auto& kernel = system.Kernel();
-    std::shared_ptr<Thread> current_thread =
-        SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
-    {
-        KScopedSchedulerLock lock(kernel);
-        // The mutex address must be 4-byte aligned
-        if ((address % sizeof(u32)) != 0) {
-            return ERR_INVALID_ADDRESS;
-        }
-
-        const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
-        std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
-        std::shared_ptr<Thread> requesting_thread =
-            handle_table.Get<Thread>(requesting_thread_handle);
-
-        // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of
-        // another thread.
-        ASSERT(requesting_thread == current_thread);
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-
-        const u32 addr_value = system.Memory().Read32(address);
-
-        // If the mutex isn't being held, just return success.
-        if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
-            return RESULT_SUCCESS;
-        }
-
-        if (holding_thread == nullptr) {
-            return ERR_INVALID_HANDLE;
-        }
-
-        // Wait until the mutex is released
-        current_thread->SetMutexWaitAddress(address);
-        current_thread->SetWaitHandle(requesting_thread_handle);
-
-        current_thread->SetStatus(ThreadStatus::WaitMutex);
-
-        // Update the lock holder thread's priority to prevent priority inversion.
-        holding_thread->AddMutexWaiter(current_thread);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        auto* owner = current_thread->GetLockOwner();
-        if (owner != nullptr) {
-            owner->RemoveMutexWaiter(current_thread);
-        }
-    }
-    return current_thread->GetSignalingResult();
-}
-
-std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner,
-                                                             VAddr address) {
-    // The mutex address must be 4-byte aligned
-    if ((address % sizeof(u32)) != 0) {
-        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
-        return {ERR_INVALID_ADDRESS, nullptr};
-    }
-
-    auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
-    if (new_owner == nullptr) {
-        system.Memory().Write32(address, 0);
-        return {RESULT_SUCCESS, nullptr};
-    }
-    // Transfer the ownership of the mutex from the previous owner to the new one.
-    TransferMutexOwnership(address, owner, new_owner);
-    u32 mutex_value = new_owner->GetWaitHandle();
-    if (num_waiters >= 2) {
-        // Notify the guest that there are still some threads waiting for the mutex
-        mutex_value |= Mutex::MutexHasWaitersFlag;
-    }
-    new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-    new_owner->SetLockOwner(nullptr);
-    new_owner->ResumeFromWait();
-
-    system.Memory().Write32(address, mutex_value);
-    return {RESULT_SUCCESS, new_owner};
-}
-
-ResultCode Mutex::Release(VAddr address) {
-    auto& kernel = system.Kernel();
-    KScopedSchedulerLock lock(kernel);
-
-    std::shared_ptr<Thread> current_thread =
-        SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
-
-    auto [result, new_owner] = Unlock(current_thread, address);
-
-    if (result != RESULT_SUCCESS && new_owner != nullptr) {
-        new_owner->SetSynchronizationResults(nullptr, result);
-    }
-
-    return result;
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
deleted file mode 100644
index 3b81dc3df..000000000
--- a/src/core/hle/kernel/mutex.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-union ResultCode;
-
-namespace Core {
-class System;
-}
-
-namespace Kernel {
-
-class Mutex final {
-public:
-    explicit Mutex(Core::System& system);
-    ~Mutex();
-
-    /// Flag that indicates that a mutex still has threads waiting for it.
-    static constexpr u32 MutexHasWaitersFlag = 0x40000000;
-    /// Mask of the bits in a mutex address value that contain the mutex owner.
-    static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;
-
-    /// Attempts to acquire a mutex at the specified address.
-    ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
-                          Handle requesting_thread_handle);
-
-    /// Unlocks a mutex for owner at address
-    std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner,
-                                                          VAddr address);
-
-    /// Releases the mutex at the specified address.
-    ResultCode Release(VAddr address);
-
-private:
-    Core::System& system;
-};
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index e3391e2af..27124ef67 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -50,6 +50,11 @@ public:
     }
     virtual HandleType GetHandleType() const = 0;
 
+    void Close() {
+        // TODO(bunnei): This is a placeholder to decrement the reference count, which we will use
+        // when we implement KAutoObject instead of using shared_ptr.
+    }
+
     /**
      * Check if a thread can wait on the object
      * @return True if a thread can wait on the object, otherwise false
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b905b486a..37b77fa6e 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -55,7 +55,7 @@ void SetupMainThread(Core::System& system, Process& owner_process, u32 priority,
     // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
     {
         KScopedSchedulerLock lock{kernel};
-        thread->SetStatus(ThreadStatus::Ready);
+        thread->SetState(ThreadState::Runnable);
     }
 }
 } // Anonymous namespace
@@ -162,48 +162,6 @@ u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
     return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
 }
 
-void Process::InsertConditionVariableThread(std::shared_ptr<Thread> thread) {
-    VAddr cond_var_addr = thread->GetCondVarWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread> current_thread = *it;
-        if (current_thread->GetPriority() > thread->GetPriority()) {
-            thread_list.insert(it, thread);
-            return;
-        }
-        ++it;
-    }
-    thread_list.push_back(thread);
-}
-
-void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) {
-    VAddr cond_var_addr = thread->GetCondVarWaitAddress();
-    std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread> current_thread = *it;
-        if (current_thread.get() == thread.get()) {
-            thread_list.erase(it);
-            return;
-        }
-        ++it;
-    }
-}
-
-std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads(
-    const VAddr cond_var_addr) {
-    std::vector<std::shared_ptr<Thread>> result{};
-    std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        std::shared_ptr<Thread> current_thread = *it;
-        result.push_back(current_thread);
-        ++it;
-    }
-    return result;
-}
-
 void Process::RegisterThread(const Thread* thread) {
     thread_list.push_back(thread);
 }
@@ -318,7 +276,7 @@ void Process::PrepareForTermination() {
                 continue;
 
             // TODO(Subv): When are the other running/ready threads terminated?
-            ASSERT_MSG(thread->GetStatus() == ThreadStatus::WaitSynch,
+            ASSERT_MSG(thread->GetState() == ThreadState::Waiting,
                        "Exiting processes with non-waiting threads is currently unimplemented");
 
             thread->Stop();
@@ -406,21 +364,18 @@ void Process::LoadModule(CodeSet code_set, VAddr base_addr) {
     ReprotectSegment(code_set.DataSegment(), Memory::MemoryPermission::ReadAndWrite);
 }
 
+bool Process::IsSignaled() const {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+    return is_signaled;
+}
+
 Process::Process(Core::System& system)
-    : SynchronizationObject{system.Kernel()}, page_table{std::make_unique<Memory::PageTable>(
-                                                  system)},
-      handle_table{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
+    : KSynchronizationObject{system.Kernel()},
+      page_table{std::make_unique<Memory::PageTable>(system)}, handle_table{system.Kernel()},
+      address_arbiter{system}, condition_var{system}, system{system} {}
 
 Process::~Process() = default;
 
-void Process::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
-}
-
-bool Process::ShouldWait(const Thread* thread) const {
-    return !is_signaled;
-}
-
 void Process::ChangeStatus(ProcessStatus new_status) {
     if (status == new_status) {
         return;
@@ -428,7 +383,7 @@ void Process::ChangeStatus(ProcessStatus new_status) {
 
     status = new_status;
     is_signaled = true;
-    Signal();
+    NotifyAvailable();
 }
 
 ResultCode Process::AllocateMainThreadStack(std::size_t stack_size) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index e412e58aa..564e1f27d 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -11,11 +11,11 @@
 #include <unordered_map>
 #include <vector>
 #include "common/common_types.h"
-#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_condition_variable.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/process_capability.h"
-#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"
 
 namespace Core {
@@ -63,7 +63,7 @@ enum class ProcessStatus {
     DebugBreak,
 };
 
-class Process final : public SynchronizationObject {
+class Process final : public KSynchronizationObject {
 public:
     explicit Process(Core::System& system);
     ~Process() override;
@@ -123,24 +123,30 @@ public:
         return handle_table;
     }
 
-    /// Gets a reference to the process' address arbiter.
-    AddressArbiter& GetAddressArbiter() {
-        return address_arbiter;
+    ResultCode SignalToAddress(VAddr address) {
+        return condition_var.SignalToAddress(address);
     }
 
-    /// Gets a const reference to the process' address arbiter.
-    const AddressArbiter& GetAddressArbiter() const {
-        return address_arbiter;
+    ResultCode WaitForAddress(Handle handle, VAddr address, u32 tag) {
+        return condition_var.WaitForAddress(handle, address, tag);
     }
 
-    /// Gets a reference to the process' mutex lock.
-    Mutex& GetMutex() {
-        return mutex;
+    void SignalConditionVariable(u64 cv_key, int32_t count) {
+        return condition_var.Signal(cv_key, count);
     }
 
-    /// Gets a const reference to the process' mutex lock
-    const Mutex& GetMutex() const {
-        return mutex;
+    ResultCode WaitConditionVariable(VAddr address, u64 cv_key, u32 tag, s64 ns) {
+        return condition_var.Wait(address, cv_key, tag, ns);
+    }
+
+    ResultCode SignalAddressArbiter(VAddr address, Svc::SignalType signal_type, s32 value,
+                                    s32 count) {
+        return address_arbiter.SignalToAddress(address, signal_type, value, count);
+    }
+
+    ResultCode WaitAddressArbiter(VAddr address, Svc::ArbitrationType arb_type, s32 value,
+                                  s64 timeout) {
+        return address_arbiter.WaitForAddress(address, arb_type, value, timeout);
     }
 
     /// Gets the address to the process' dedicated TLS region.
@@ -250,15 +256,6 @@ public:
         return thread_list;
     }
 
-    /// Insert a thread into the condition variable wait container
-    void InsertConditionVariableThread(std::shared_ptr<Thread> thread);
-
-    /// Remove a thread from the condition variable wait container
-    void RemoveConditionVariableThread(std::shared_ptr<Thread> thread);
-
-    /// Obtain all condition variable threads waiting for some address
-    std::vector<std::shared_ptr<Thread>> GetConditionVariableThreads(VAddr cond_var_addr);
-
     /// Registers a thread as being created under this process,
     /// adding it to this process' thread list.
     void RegisterThread(const Thread* thread);
@@ -304,6 +301,8 @@ public:
 
     void LoadModule(CodeSet code_set, VAddr base_addr);
 
+    bool IsSignaled() const override;
+
     ///////////////////////////////////////////////////////////////////////////////////////////////
     // Thread-local storage management
 
@@ -314,12 +313,6 @@ public:
     void FreeTLSRegion(VAddr tls_address);
 
 private:
-    /// Checks if the specified thread should wait until this process is available.
-    bool ShouldWait(const Thread* thread) const override;
-
-    /// Acquires/locks this process for the specified thread if it's available.
-    void Acquire(Thread* thread) override;
-
     /// Changes the process status. If the status is different
     /// from the current process status, then this will trigger
     /// a process signal.
@@ -373,12 +366,12 @@ private:
     HandleTable handle_table;
 
     /// Per-process address arbiter.
-    AddressArbiter address_arbiter;
+    KAddressArbiter address_arbiter;
 
     /// The per-process mutex lock instance used for handling various
     /// forms of services, such as lock arbitration, and condition
     /// variable related facilities.
-    Mutex mutex;
+    KConditionVariable condition_var;
 
     /// Address indicating the location of the process' dedicated TLS region.
     VAddr tls_region_address = 0;
@@ -389,9 +382,6 @@ private:
     /// List of threads that are running with this process as their owner.
     std::list<const Thread*> thread_list;
 
-    /// List of threads waiting for a condition variable
-    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> cond_var_threads;
-
     /// Address of the top of the main thread's stack
     VAddr main_thread_stack_top{};
 
@@ -410,6 +400,8 @@ private:
     /// Schedule count of this process
     s64 schedule_count{};
 
+    bool is_signaled{};
+
     /// System context
     Core::System& system;
 };
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index cea262ce0..99ed0857e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -14,24 +14,22 @@
 
 namespace Kernel {
 
-ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ReadableEvent::ReadableEvent(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 ReadableEvent::~ReadableEvent() = default;
 
-bool ReadableEvent::ShouldWait(const Thread* thread) const {
-    return !is_signaled;
-}
-
-void ReadableEvent::Acquire(Thread* thread) {
-    ASSERT_MSG(IsSignaled(), "object unavailable!");
-}
-
 void ReadableEvent::Signal() {
     if (is_signaled) {
         return;
     }
 
     is_signaled = true;
-    SynchronizationObject::Signal();
+    NotifyAvailable();
+}
+
+bool ReadableEvent::IsSignaled() const {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    return is_signaled;
 }
 
 void ReadableEvent::Clear() {
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 3264dd066..34e477274 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/synchronization_object.h"
 
 union ResultCode;
 
@@ -14,7 +14,7 @@ namespace Kernel {
 class KernelCore;
 class WritableEvent;
 
-class ReadableEvent final : public SynchronizationObject {
+class ReadableEvent final : public KSynchronizationObject {
     friend class WritableEvent;
 
 public:
@@ -32,9 +32,6 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(const Thread* thread) const override;
-    void Acquire(Thread* thread) override;
-
     /// Unconditionally clears the readable event's state.
     void Clear();
 
@@ -46,11 +43,14 @@ public:
     ///      then ERR_INVALID_STATE will be returned.
     ResultCode Reset();
 
-    void Signal() override;
+    void Signal();
+
+    bool IsSignaled() const override;
 
 private:
     explicit ReadableEvent(KernelCore& kernel);
 
+    bool is_signaled{};
     std::string name; ///< Name of event (optional)
 };
 
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index a549ae9d7..82857f93b 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -13,7 +13,7 @@
 
 namespace Kernel {
 
-ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ServerPort::ServerPort(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 ServerPort::~ServerPort() = default;
 
 ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
@@ -28,15 +28,9 @@ ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
 
 void ServerPort::AppendPendingSession(std::shared_ptr<ServerSession> pending_session) {
     pending_sessions.push_back(std::move(pending_session));
-}
-
-bool ServerPort::ShouldWait(const Thread* thread) const {
-    // If there are no pending sessions, we wait until a new one is added.
-    return pending_sessions.empty();
-}
-
-void ServerPort::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+    if (pending_sessions.size() == 1) {
+        NotifyAvailable();
+    }
 }
 
 bool ServerPort::IsSignaled() const {
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index 41b191b86..6470df993 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -9,8 +9,8 @@
 #include <utility>
 #include <vector>
 #include "common/common_types.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"
 
 namespace Kernel {
@@ -20,7 +20,7 @@ class KernelCore;
 class ServerSession;
 class SessionRequestHandler;
 
-class ServerPort final : public SynchronizationObject {
+class ServerPort final : public KSynchronizationObject {
 public:
     explicit ServerPort(KernelCore& kernel);
     ~ServerPort() override;
@@ -79,9 +79,6 @@ public:
     /// waiting to be accepted by this port.
     void AppendPendingSession(std::shared_ptr<ServerSession> pending_session);
 
-    bool ShouldWait(const Thread* thread) const override;
-    void Acquire(Thread* thread) override;
-
     bool IsSignaled() const override;
 
 private:
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index b40fe3916..4f2bb7822 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -24,7 +24,7 @@
 
 namespace Kernel {
 
-ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
+ServerSession::ServerSession(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 
 ServerSession::~ServerSession() {
     kernel.ReleaseServiceThread(service_thread);
@@ -42,16 +42,6 @@ ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kern
     return MakeResult(std::move(session));
 }
 
-bool ServerSession::ShouldWait(const Thread* thread) const {
-    // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
-    if (!parent->Client()) {
-        return false;
-    }
-
-    // Wait if we have no pending requests, or if we're currently handling a request.
-    return pending_requesting_threads.empty() || currently_handling != nullptr;
-}
-
 bool ServerSession::IsSignaled() const {
     // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
     if (!parent->Client()) {
@@ -62,15 +52,6 @@ bool ServerSession::IsSignaled() const {
     return !pending_requesting_threads.empty() && currently_handling == nullptr;
 }
 
-void ServerSession::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
-    // We are now handling a request, pop it from the stack.
-    // TODO(Subv): What happens if the client endpoint is closed before any requests are made?
-    ASSERT(!pending_requesting_threads.empty());
-    currently_handling = pending_requesting_threads.back();
-    pending_requesting_threads.pop_back();
-}
-
 void ServerSession::ClientDisconnected() {
     // We keep a shared pointer to the hle handler to keep it alive throughout
     // the call to ClientDisconnected, as ClientDisconnected invalidates the
@@ -172,7 +153,7 @@ ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) {
     {
         KScopedSchedulerLock lock(kernel);
         if (!context.IsThreadWaiting()) {
-            context.GetThread().ResumeFromWait();
+            context.GetThread().Wakeup();
             context.GetThread().SetSynchronizationResults(nullptr, result);
         }
     }
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e8d1d99ea..9155cf7f5 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,8 +10,8 @@
 #include <vector>
 
 #include "common/threadsafe_queue.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/service_thread.h"
-#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"
 
 namespace Core::Memory {
@@ -43,7 +43,7 @@ class Thread;
  * After the server replies to the request, the response is marshalled back to the caller's
  * TLS buffer and control is transferred back to it.
  */
-class ServerSession final : public SynchronizationObject {
+class ServerSession final : public KSynchronizationObject {
     friend class ServiceThread;
 
 public:
@@ -77,8 +77,6 @@ public:
         return parent.get();
     }
 
-    bool IsSignaled() const override;
-
     /**
      * Sets the HLE handler for the session. This handler will be called to service IPC requests
      * instead of the regular IPC machinery. (The regular IPC machinery is currently not
@@ -100,10 +98,6 @@ public:
     ResultCode HandleSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory,
                                  Core::Timing::CoreTiming& core_timing);
 
-    bool ShouldWait(const Thread* thread) const override;
-
-    void Acquire(Thread* thread) override;
-
     /// Called when a client disconnection occurs.
     void ClientDisconnected();
 
@@ -130,6 +124,8 @@ public:
         convert_to_domain = true;
     }
 
+    bool IsSignaled() const override;
+
 private:
     /// Queues a sync request from the emulated application.
     ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory);
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp
index e4dd53e24..75304b961 100644
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -9,7 +9,7 @@
 
 namespace Kernel {
 
-Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {}
+Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 Session::~Session() = default;
 
 Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
@@ -24,18 +24,9 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
     return std::make_pair(std::move(client_session), std::move(server_session));
 }
 
-bool Session::ShouldWait(const Thread* thread) const {
-    UNIMPLEMENTED();
-    return {};
-}
-
 bool Session::IsSignaled() const {
     UNIMPLEMENTED();
     return true;
 }
 
-void Session::Acquire(Thread* thread) {
-    UNIMPLEMENTED();
-}
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h
index 7cd9c0d77..f6dd2c1d2 100644
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -8,7 +8,7 @@
 #include <string>
 #include <utility>
 
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 
 namespace Kernel {
 
@@ -19,7 +19,7 @@ class ServerSession;
  * Parent structure to link the client and server endpoints of a session with their associated
  * client port.
  */
-class Session final : public SynchronizationObject {
+class Session final : public KSynchronizationObject {
 public:
     explicit Session(KernelCore& kernel);
     ~Session() override;
@@ -37,12 +37,8 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(const Thread* thread) const override;
-
     bool IsSignaled() const override;
 
-    void Acquire(Thread* thread) override;
-
     std::shared_ptr<ClientSession> Client() {
         if (auto result{client.lock()}) {
             return result;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index de3ed25da..cc8b661af 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -10,6 +10,7 @@
 
 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/common_funcs.h"
 #include "common/fiber.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
@@ -19,26 +20,28 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/cpu_manager.h"
-#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_condition_variable.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/memory_block.h"
+#include "core/hle/kernel/memory/memory_layout.h"
 #include "core/hle/kernel/memory/page_table.h"
-#include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/svc.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/kernel/svc_types.h"
 #include "core/hle/kernel/svc_wrap.h"
-#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/kernel/transfer_memory.h"
@@ -343,27 +346,11 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
     auto thread = kernel.CurrentScheduler()->GetCurrentThread();
     {
         KScopedSchedulerLock lock(kernel);
-        thread->InvalidateHLECallback();
-        thread->SetStatus(ThreadStatus::WaitIPC);
+        thread->SetState(ThreadState::Waiting);
+        thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC);
         session->SendSyncRequest(SharedFrom(thread), system.Memory(), system.CoreTiming());
     }
 
-    if (thread->HasHLECallback()) {
-        Handle event_handle = thread->GetHLETimeEvent();
-        if (event_handle != InvalidHandle) {
-            auto& time_manager = kernel.TimeManager();
-            time_manager.UnscheduleTimeEvent(event_handle);
-        }
-
-        {
-            KScopedSchedulerLock lock(kernel);
-            auto* sync_object = thread->GetHLESyncObject();
-            sync_object->RemoveWaitingThread(SharedFrom(thread));
-        }
-
-        thread->InvokeHLECallback(SharedFrom(thread));
-    }
-
     return thread->GetSignalingResult();
 }
 
@@ -436,7 +423,7 @@ static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32*
 }
 
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
-static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
+static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr handles_address,
                                       u64 handle_count, s64 nano_seconds) {
     LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}",
               handles_address, handle_count, nano_seconds);
@@ -458,28 +445,26 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
     }
 
     auto& kernel = system.Kernel();
-    Thread::ThreadSynchronizationObjects objects(handle_count);
+    std::vector<KSynchronizationObject*> objects(handle_count);
     const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
 
     for (u64 i = 0; i < handle_count; ++i) {
         const Handle handle = memory.Read32(handles_address + i * sizeof(Handle));
-        const auto object = handle_table.Get<SynchronizationObject>(handle);
+        const auto object = handle_table.Get<KSynchronizationObject>(handle);
 
         if (object == nullptr) {
             LOG_ERROR(Kernel_SVC, "Object is a nullptr");
             return ERR_INVALID_HANDLE;
         }
 
-        objects[i] = object;
+        objects[i] = object.get();
     }
-    auto& synchronization = kernel.Synchronization();
-    const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds);
-    *index = handle_result;
-    return result;
+    return KSynchronizationObject::Wait(kernel, index, objects.data(),
+                                        static_cast<s32>(objects.size()), nano_seconds);
 }
 
 static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address,
-                                        s32 handle_count, u32 timeout_high, Handle* index) {
+                                        s32 handle_count, u32 timeout_high, s32* index) {
     const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)};
     return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds);
 }
@@ -504,56 +489,37 @@ static ResultCode CancelSynchronization32(Core::System& system, Handle thread_ha
     return CancelSynchronization(system, thread_handle);
 }
 
-/// Attempts to locks a mutex, creating it if it does not already exist
-static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,
-                                VAddr mutex_addr, Handle requesting_thread_handle) {
-    LOG_TRACE(Kernel_SVC,
-              "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, "
-              "requesting_current_thread_handle=0x{:08X}",
-              holding_thread_handle, mutex_addr, requesting_thread_handle);
-
-    if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}",
-                  mutex_addr);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+/// Attempts to locks a mutex
+static ResultCode ArbitrateLock(Core::System& system, Handle thread_handle, VAddr address,
+                                u32 tag) {
+    LOG_TRACE(Kernel_SVC, "called thread_handle=0x{:08X}, address=0x{:X}, tag=0x{:08X}",
+              thread_handle, address, tag);
 
-    if (!Common::IsWordAligned(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is not word aligned, mutex_addr={:016X}", mutex_addr);
-        return ERR_INVALID_ADDRESS;
-    }
+    // Validate the input address.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(u32)), Svc::ResultInvalidAddress);
 
-    auto* const current_process = system.Kernel().CurrentProcess();
-    return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
-                                                  requesting_thread_handle);
+    return system.Kernel().CurrentProcess()->WaitForAddress(thread_handle, address, tag);
 }
 
-static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle,
-                                  u32 mutex_addr, Handle requesting_thread_handle) {
-    return ArbitrateLock(system, holding_thread_handle, mutex_addr, requesting_thread_handle);
+static ResultCode ArbitrateLock32(Core::System& system, Handle thread_handle, u32 address,
+                                  u32 tag) {
+    return ArbitrateLock(system, thread_handle, address, tag);
 }
 
 /// Unlock a mutex
-static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
-    LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
-
-    if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}",
-                  mutex_addr);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) {
+    LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address);
 
-    if (!Common::IsWordAligned(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Mutex Address is not word aligned, mutex_addr={:016X}", mutex_addr);
-        return ERR_INVALID_ADDRESS;
-    }
+    // Validate the input address.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(u32)), Svc::ResultInvalidAddress);
 
-    auto* const current_process = system.Kernel().CurrentProcess();
-    return current_process->GetMutex().Release(mutex_addr);
+    return system.Kernel().CurrentProcess()->SignalToAddress(address);
 }
 
-static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) {
-    return ArbitrateUnlock(system, mutex_addr);
+static ResultCode ArbitrateUnlock32(Core::System& system, u32 address) {
+    return ArbitrateUnlock(system, address);
 }
 
 enum class BreakType : u32 {
@@ -1180,7 +1146,7 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
         return ERR_INVALID_HANDLE;
     }
 
-    thread->SetPriority(priority);
+    thread->SetBasePriority(priority);
 
     return RESULT_SUCCESS;
 }
@@ -1559,7 +1525,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
         return ERR_INVALID_HANDLE;
     }
 
-    ASSERT(thread->GetStatus() == ThreadStatus::Dormant);
+    ASSERT(thread->GetState() == ThreadState::Initialized);
 
     return thread->Start();
 }
@@ -1620,224 +1586,135 @@ static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanosec
 }
 
 /// Wait process wide key atomic
-static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_addr,
-                                           VAddr condition_variable_addr, Handle thread_handle,
-                                           s64 nano_seconds) {
-    LOG_TRACE(
-        Kernel_SVC,
-        "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
-        mutex_addr, condition_variable_addr, thread_handle, nano_seconds);
-
-    if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) {
-        LOG_ERROR(
-            Kernel_SVC,
-            "Given mutex address must not be within the kernel address space. address=0x{:016X}",
-            mutex_addr);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    if (!Common::IsWordAligned(mutex_addr)) {
-        LOG_ERROR(Kernel_SVC, "Given mutex address must be word-aligned. address=0x{:016X}",
-                  mutex_addr);
-        return ERR_INVALID_ADDRESS;
-    }
-
-    ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
-    auto& kernel = system.Kernel();
-    Handle event_handle;
-    Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
-    auto* const current_process = kernel.CurrentProcess();
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds);
-        const auto& handle_table = current_process->GetHandleTable();
-        std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
-        ASSERT(thread);
-
-        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-
-        if (thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return ERR_THREAD_TERMINATING;
-        }
-
-        const auto release_result = current_process->GetMutex().Release(mutex_addr);
-        if (release_result.IsError()) {
-            lock.CancelSleep();
-            return release_result;
-        }
-
-        if (nano_seconds == 0) {
-            lock.CancelSleep();
-            return RESULT_TIMEOUT;
-        }
-
-        current_thread->SetCondVarWaitAddress(condition_variable_addr);
-        current_thread->SetMutexWaitAddress(mutex_addr);
-        current_thread->SetWaitHandle(thread_handle);
-        current_thread->SetStatus(ThreadStatus::WaitCondVar);
-        current_process->InsertConditionVariableThread(SharedFrom(current_thread));
-    }
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-
-        auto* owner = current_thread->GetLockOwner();
-        if (owner != nullptr) {
-            owner->RemoveMutexWaiter(SharedFrom(current_thread));
+static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr address, VAddr cv_key,
+                                           u32 tag, s64 timeout_ns) {
+    LOG_TRACE(Kernel_SVC, "called address={:X}, cv_key={:X}, tag=0x{:08X}, timeout_ns={}", address,
+              cv_key, tag, timeout_ns);
+
+    // Validate input.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(int32_t)), Svc::ResultInvalidAddress);
+
+    // Convert timeout from nanoseconds to ticks.
+    s64 timeout{};
+    if (timeout_ns > 0) {
+        const s64 offset_tick(timeout_ns);
+        if (offset_tick > 0) {
+            timeout = offset_tick + 2;
+            if (timeout <= 0) {
+                timeout = std::numeric_limits<s64>::max();
+            }
+        } else {
+            timeout = std::numeric_limits<s64>::max();
         }
-
-        current_process->RemoveConditionVariableThread(SharedFrom(current_thread));
+    } else {
+        timeout = timeout_ns;
     }
-    // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    return current_thread->GetSignalingResult();
+    // Wait on the condition variable.
+    return system.Kernel().CurrentProcess()->WaitConditionVariable(
+        address, Common::AlignDown(cv_key, sizeof(u32)), tag, timeout);
 }
 
-static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr,
-                                             u32 condition_variable_addr, Handle thread_handle,
-                                             u32 nanoseconds_low, u32 nanoseconds_high) {
-    const auto nanoseconds = static_cast<s64>(nanoseconds_low | (u64{nanoseconds_high} << 32));
-    return WaitProcessWideKeyAtomic(system, mutex_addr, condition_variable_addr, thread_handle,
-                                    nanoseconds);
+static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 address, u32 cv_key, u32 tag,
+                                             u32 timeout_ns_low, u32 timeout_ns_high) {
+    const auto timeout_ns = static_cast<s64>(timeout_ns_low | (u64{timeout_ns_high} << 32));
+    return WaitProcessWideKeyAtomic(system, address, cv_key, tag, timeout_ns);
 }
 
 /// Signal process wide key
-static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
-    LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
-              condition_variable_addr, target);
+static void SignalProcessWideKey(Core::System& system, VAddr cv_key, s32 count) {
+    LOG_TRACE(Kernel_SVC, "called, cv_key=0x{:X}, count=0x{:08X}", cv_key, count);
 
-    ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
+    // Signal the condition variable.
+    return system.Kernel().CurrentProcess()->SignalConditionVariable(
+        Common::AlignDown(cv_key, sizeof(u32)), count);
+}
 
-    // Retrieve a list of all threads that are waiting for this condition variable.
-    auto& kernel = system.Kernel();
-    KScopedSchedulerLock lock(kernel);
-    auto* const current_process = kernel.CurrentProcess();
-    std::vector<std::shared_ptr<Thread>> waiting_threads =
-        current_process->GetConditionVariableThreads(condition_variable_addr);
-
-    // Only process up to 'target' threads, unless 'target' is less equal 0, in which case process
-    // them all.
-    std::size_t last = waiting_threads.size();
-    if (target > 0) {
-        last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
-    }
-    for (std::size_t index = 0; index < last; ++index) {
-        auto& thread = waiting_threads[index];
-
-        ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
-
-        // liberate Cond Var Thread.
-        current_process->RemoveConditionVariableThread(thread);
-
-        const std::size_t current_core = system.CurrentCoreIndex();
-        auto& monitor = system.Monitor();
-
-        // Atomically read the value of the mutex.
-        u32 mutex_val = 0;
-        u32 update_val = 0;
-        const VAddr mutex_address = thread->GetMutexWaitAddress();
-        do {
-            // If the mutex is not yet acquired, acquire it.
-            mutex_val = monitor.ExclusiveRead32(current_core, mutex_address);
-
-            if (mutex_val != 0) {
-                update_val = mutex_val | Mutex::MutexHasWaitersFlag;
-            } else {
-                update_val = thread->GetWaitHandle();
-            }
-        } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val));
-        monitor.ClearExclusive();
-        if (mutex_val == 0) {
-            // We were able to acquire the mutex, resume this thread.
-            auto* const lock_owner = thread->GetLockOwner();
-            if (lock_owner != nullptr) {
-                lock_owner->RemoveMutexWaiter(thread);
-            }
+static void SignalProcessWideKey32(Core::System& system, u32 cv_key, s32 count) {
+    SignalProcessWideKey(system, cv_key, count);
+}
 
-            thread->SetLockOwner(nullptr);
-            thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-            thread->ResumeFromWait();
-        } else {
-            // The mutex is already owned by some other thread, make this thread wait on it.
-            const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
-            const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
-            auto owner = handle_table.Get<Thread>(owner_handle);
-            ASSERT(owner);
-            if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
-                thread->SetStatus(ThreadStatus::WaitMutex);
-            }
+namespace {
 
-            owner->AddMutexWaiter(thread);
-        }
+constexpr bool IsValidSignalType(Svc::SignalType type) {
+    switch (type) {
+    case Svc::SignalType::Signal:
+    case Svc::SignalType::SignalAndIncrementIfEqual:
+    case Svc::SignalType::SignalAndModifyByWaitingCountIfEqual:
+        return true;
+    default:
+        return false;
     }
 }
 
-static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) {
-    SignalProcessWideKey(system, condition_variable_addr, target);
+constexpr bool IsValidArbitrationType(Svc::ArbitrationType type) {
+    switch (type) {
+    case Svc::ArbitrationType::WaitIfLessThan:
+    case Svc::ArbitrationType::DecrementAndWaitIfLessThan:
+    case Svc::ArbitrationType::WaitIfEqual:
+        return true;
+    default:
+        return false;
+    }
 }
 
-// Wait for an address (via Address Arbiter)
-static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
-                                 s64 timeout) {
-    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
-              type, value, timeout);
-
-    // If the passed address is a kernel virtual address, return invalid memory state.
-    if (Core::Memory::IsKernelVirtualAddress(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+} // namespace
 
-    // If the address is not properly aligned to 4 bytes, return invalid address.
-    if (!Common::IsWordAligned(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
-        return ERR_INVALID_ADDRESS;
+// Wait for an address (via Address Arbiter)
+static ResultCode WaitForAddress(Core::System& system, VAddr address, Svc::ArbitrationType arb_type,
+                                 s32 value, s64 timeout_ns) {
+    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, arb_type=0x{:X}, value=0x{:X}, timeout_ns={}",
+              address, arb_type, value, timeout_ns);
+
+    // Validate input.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(int32_t)), Svc::ResultInvalidAddress);
+    R_UNLESS(IsValidArbitrationType(arb_type), Svc::ResultInvalidEnumValue);
+
+    // Convert timeout from nanoseconds to ticks.
+    s64 timeout{};
+    if (timeout_ns > 0) {
+        const s64 offset_tick(timeout_ns);
+        if (offset_tick > 0) {
+            timeout = offset_tick + 2;
+            if (timeout <= 0) {
+                timeout = std::numeric_limits<s64>::max();
+            }
+        } else {
+            timeout = std::numeric_limits<s64>::max();
+        }
+    } else {
+        timeout = timeout_ns;
     }
 
-    const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
-    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
-    const ResultCode result =
-        address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
-    return result;
+    return system.Kernel().CurrentProcess()->WaitAddressArbiter(address, arb_type, value, timeout);
 }
 
-static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value,
-                                   u32 timeout_low, u32 timeout_high) {
-    const auto timeout = static_cast<s64>(timeout_low | (u64{timeout_high} << 32));
-    return WaitForAddress(system, address, type, value, timeout);
+static ResultCode WaitForAddress32(Core::System& system, u32 address, Svc::ArbitrationType arb_type,
+                                   s32 value, u32 timeout_ns_low, u32 timeout_ns_high) {
+    const auto timeout = static_cast<s64>(timeout_ns_low | (u64{timeout_ns_high} << 32));
+    return WaitForAddress(system, address, arb_type, value, timeout);
 }
 
 // Signals to an address (via Address Arbiter)
-static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
-                                  s32 num_to_wake) {
-    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
-              address, type, value, num_to_wake);
-
-    // If the passed address is a kernel virtual address, return invalid memory state.
-    if (Core::Memory::IsKernelVirtualAddress(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
+static ResultCode SignalToAddress(Core::System& system, VAddr address, Svc::SignalType signal_type,
+                                  s32 value, s32 count) {
+    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, signal_type=0x{:X}, value=0x{:X}, count=0x{:X}",
+              address, signal_type, value, count);
 
-    // If the address is not properly aligned to 4 bytes, return invalid address.
-    if (!Common::IsWordAligned(address)) {
-        LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
-        return ERR_INVALID_ADDRESS;
-    }
+    // Validate input.
+    R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory);
+    R_UNLESS(Common::IsAligned(address, sizeof(s32)), Svc::ResultInvalidAddress);
+    R_UNLESS(IsValidSignalType(signal_type), Svc::ResultInvalidEnumValue);
 
-    const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
-    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
-    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
+    return system.Kernel().CurrentProcess()->SignalAddressArbiter(address, signal_type, value,
+                                                                  count);
 }
 
-static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value,
-                                    s32 num_to_wake) {
-    return SignalToAddress(system, address, type, value, num_to_wake);
+static ResultCode SignalToAddress32(Core::System& system, u32 address, Svc::SignalType signal_type,
+                                    s32 value, s32 count) {
+    return SignalToAddress(system, address, signal_type, value, count);
 }
 
 static void KernelDebug([[maybe_unused]] Core::System& system,
diff --git a/src/core/hle/kernel/svc_common.h b/src/core/hle/kernel/svc_common.h
new file mode 100644
index 000000000..4af049551
--- /dev/null
+++ b/src/core/hle/kernel/svc_common.h
@@ -0,0 +1,14 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Kernel::Svc {
+
+constexpr s32 ArgumentHandleCountMax = 0x40;
+constexpr u32 HandleWaitMask{1u << 30};
+
+} // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
new file mode 100644
index 000000000..78282f021
--- /dev/null
+++ b/src/core/hle/kernel/svc_results.h
@@ -0,0 +1,20 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Kernel::Svc {
+
+constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59};
+constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102};
+constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106};
+constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114};
+constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117};
+constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118};
+constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120};
+constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125};
+
+} // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h
index 11e1d8e2d..d623f7a50 100644
--- a/src/core/hle/kernel/svc_types.h
+++ b/src/core/hle/kernel/svc_types.h
@@ -65,4 +65,16 @@ struct MemoryInfo {
     u32 padding{};
 };
 
+enum class SignalType : u32 {
+    Signal = 0,
+    SignalAndIncrementIfEqual = 1,
+    SignalAndModifyByWaitingCountIfEqual = 2,
+};
+
+enum class ArbitrationType : u32 {
+    WaitIfLessThan = 0,
+    DecrementAndWaitIfLessThan = 1,
+    WaitIfEqual = 2,
+};
+
 } // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 0b6dd9df0..a32750ed7 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -7,6 +7,7 @@
 #include "common/common_types.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/hle/kernel/svc_types.h"
 #include "core/hle/result.h"
 
 namespace Kernel {
@@ -215,9 +216,10 @@ void SvcWrap64(Core::System& system) {
         func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
 }
 
-template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
+// Used by WaitSynchronization
+template <ResultCode func(Core::System&, s32*, u64, u64, s64)>
 void SvcWrap64(Core::System& system) {
-    u32 param_1 = 0;
+    s32 param_1 = 0;
     const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
                             static_cast<s64>(Param(system, 3)))
                            .raw;
@@ -276,18 +278,22 @@ void SvcWrap64(Core::System& system) {
     FuncReturn(system, retval);
 }
 
-template <ResultCode func(Core::System&, u64, u32, s32, s64)>
+// Used by WaitForAddress
+template <ResultCode func(Core::System&, u64, Svc::ArbitrationType, s32, s64)>
 void SvcWrap64(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
-                            static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
-                           .raw);
+    FuncReturn(system,
+               func(system, Param(system, 0), static_cast<Svc::ArbitrationType>(Param(system, 1)),
+                    static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                   .raw);
 }
 
-template <ResultCode func(Core::System&, u64, u32, s32, s32)>
+// Used by SignalToAddress
+template <ResultCode func(Core::System&, u64, Svc::SignalType, s32, s32)>
 void SvcWrap64(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
-                            static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
-                           .raw);
+    FuncReturn(system,
+               func(system, Param(system, 0), static_cast<Svc::SignalType>(Param(system, 1)),
+                    static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+                   .raw);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -503,22 +509,23 @@ void SvcWrap32(Core::System& system) {
 }
 
 // Used by WaitForAddress32
-template <ResultCode func(Core::System&, u32, u32, s32, u32, u32)>
+template <ResultCode func(Core::System&, u32, Svc::ArbitrationType, s32, u32, u32)>
 void SvcWrap32(Core::System& system) {
     const u32 retval = func(system, static_cast<u32>(Param(system, 0)),
-                            static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)),
-                            static_cast<u32>(Param(system, 3)), static_cast<u32>(Param(system, 4)))
+                            static_cast<Svc::ArbitrationType>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<u32>(Param(system, 3)),
+                            static_cast<u32>(Param(system, 4)))
                            .raw;
     FuncReturn(system, retval);
 }
 
 // Used by SignalToAddress32
-template <ResultCode func(Core::System&, u32, u32, s32, s32)>
+template <ResultCode func(Core::System&, u32, Svc::SignalType, s32, s32)>
 void SvcWrap32(Core::System& system) {
-    const u32 retval =
-        func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
-             static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
-            .raw;
+    const u32 retval = func(system, static_cast<u32>(Param(system, 0)),
+                            static_cast<Svc::SignalType>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+                           .raw;
     FuncReturn(system, retval);
 }
 
@@ -539,9 +546,9 @@ void SvcWrap32(Core::System& system) {
 }
 
 // Used by WaitSynchronization32
-template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)>
+template <ResultCode func(Core::System&, u32, u32, s32, u32, s32*)>
 void SvcWrap32(Core::System& system) {
-    u32 param_1 = 0;
+    s32 param_1 = 0;
     const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2),
                             Param32(system, 3), &param_1)
                            .raw;
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
deleted file mode 100644
index d3f520ea2..000000000
--- a/src/core/hle/kernel/synchronization.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/core.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/synchronization.h"
-#include "core/hle/kernel/synchronization_object.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/time_manager.h"
-
-namespace Kernel {
-
-Synchronization::Synchronization(Core::System& system) : system{system} {}
-
-void Synchronization::SignalObject(SynchronizationObject& obj) const {
-    auto& kernel = system.Kernel();
-    KScopedSchedulerLock lock(kernel);
-    if (obj.IsSignaled()) {
-        for (auto thread : obj.GetWaitingThreads()) {
-            if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
-                if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) {
-                    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-                    ASSERT(thread->IsWaitingSync());
-                }
-                thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
-                thread->ResumeFromWait();
-            }
-        }
-        obj.ClearWaitingThreads();
-    }
-}
-
-std::pair<ResultCode, Handle> Synchronization::WaitFor(
-    std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
-    auto& kernel = system.Kernel();
-    auto* const thread = kernel.CurrentScheduler()->GetCurrentThread();
-    Handle event_handle = InvalidHandle;
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds);
-        const auto itr =
-            std::find_if(sync_objects.begin(), sync_objects.end(),
-                         [thread](const std::shared_ptr<SynchronizationObject>& object) {
-                             return object->IsSignaled();
-                         });
-
-        if (itr != sync_objects.end()) {
-            // We found a ready object, acquire it and set the result value
-            SynchronizationObject* object = itr->get();
-            object->Acquire(thread);
-            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
-            lock.CancelSleep();
-            return {RESULT_SUCCESS, index};
-        }
-
-        if (nano_seconds == 0) {
-            lock.CancelSleep();
-            return {RESULT_TIMEOUT, InvalidHandle};
-        }
-
-        if (thread->IsPendingTermination()) {
-            lock.CancelSleep();
-            return {ERR_THREAD_TERMINATING, InvalidHandle};
-        }
-
-        if (thread->IsSyncCancelled()) {
-            thread->SetSyncCancelled(false);
-            lock.CancelSleep();
-            return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
-        }
-
-        for (auto& object : sync_objects) {
-            object->AddWaitingThread(SharedFrom(thread));
-        }
-
-        thread->SetSynchronizationObjects(&sync_objects);
-        thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-        thread->SetStatus(ThreadStatus::WaitSynch);
-        thread->SetWaitingSync(true);
-    }
-    thread->SetWaitingSync(false);
-
-    if (event_handle != InvalidHandle) {
-        auto& time_manager = kernel.TimeManager();
-        time_manager.UnscheduleTimeEvent(event_handle);
-    }
-
-    {
-        KScopedSchedulerLock lock(kernel);
-        ResultCode signaling_result = thread->GetSignalingResult();
-        SynchronizationObject* signaling_object = thread->GetSignalingObject();
-        thread->SetSynchronizationObjects(nullptr);
-        auto shared_thread = SharedFrom(thread);
-        for (auto& obj : sync_objects) {
-            obj->RemoveWaitingThread(shared_thread);
-        }
-        if (signaling_object != nullptr) {
-            const auto itr = std::find_if(
-                sync_objects.begin(), sync_objects.end(),
-                [signaling_object](const std::shared_ptr<SynchronizationObject>& object) {
-                    return object.get() == signaling_object;
-                });
-            ASSERT(itr != sync_objects.end());
-            signaling_object->Acquire(thread);
-            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
-            return {signaling_result, index};
-        }
-        return {signaling_result, -1};
-    }
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization.h b/src/core/hle/kernel/synchronization.h
deleted file mode 100644
index 379f4b1d3..000000000
--- a/src/core/hle/kernel/synchronization.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "core/hle/kernel/object.h"
-#include "core/hle/result.h"
-
-namespace Core {
-class System;
-} // namespace Core
-
-namespace Kernel {
-
-class SynchronizationObject;
-
-/**
- * The 'Synchronization' class is an interface for handling synchronization methods
- * used by Synchronization objects and synchronization SVCs. This centralizes processing of
- * such
- */
-class Synchronization {
-public:
-    explicit Synchronization(Core::System& system);
-
-    /// Signals a synchronization object, waking up all its waiting threads
-    void SignalObject(SynchronizationObject& obj) const;
-
-    /// Tries to see if waiting for any of the sync_objects is necessary, if not
-    /// it returns Success and the handle index of the signaled sync object. In
-    /// case not, the current thread will be locked and wait for nano_seconds or
-    /// for a synchronization object to signal.
-    std::pair<ResultCode, Handle> WaitFor(
-        std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds);
-
-private:
-    Core::System& system;
-};
-} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp
deleted file mode 100644
index ba4d39157..000000000
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/process.h"
-#include "core/hle/kernel/synchronization.h"
-#include "core/hle/kernel/synchronization_object.h"
-#include "core/hle/kernel/thread.h"
-
-namespace Kernel {
-
-SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {}
-SynchronizationObject::~SynchronizationObject() = default;
-
-void SynchronizationObject::Signal() {
-    kernel.Synchronization().SignalObject(*this);
-}
-
-void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
-    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
-    if (itr == waiting_threads.end())
-        waiting_threads.push_back(std::move(thread));
-}
-
-void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
-    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
-    // If a thread passed multiple handles to the same object,
-    // the kernel might attempt to remove the thread from the object's
-    // waiting threads list multiple times.
-    if (itr != waiting_threads.end())
-        waiting_threads.erase(itr);
-}
-
-void SynchronizationObject::ClearWaitingThreads() {
-    waiting_threads.clear();
-}
-
-const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
-    return waiting_threads;
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
deleted file mode 100644
index 7408ed51f..000000000
--- a/src/core/hle/kernel/synchronization_object.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <memory>
-#include <vector>
-
-#include "core/hle/kernel/object.h"
-
-namespace Kernel {
-
-class KernelCore;
-class Synchronization;
-class Thread;
-
-/// Class that represents a Kernel object that a thread can be waiting on
-class SynchronizationObject : public Object {
-public:
-    explicit SynchronizationObject(KernelCore& kernel);
-    ~SynchronizationObject() override;
-
-    /**
-     * Check if the specified thread should wait until the object is available
-     * @param thread The thread about which we're deciding.
-     * @return True if the current thread should wait due to this object being unavailable
-     */
-    virtual bool ShouldWait(const Thread* thread) const = 0;
-
-    /// Acquire/lock the object for the specified thread if it is available
-    virtual void Acquire(Thread* thread) = 0;
-
-    /// Signal this object
-    virtual void Signal();
-
-    virtual bool IsSignaled() const {
-        return is_signaled;
-    }
-
-    /**
-     * Add a thread to wait on this object
-     * @param thread Pointer to thread to add
-     */
-    void AddWaitingThread(std::shared_ptr<Thread> thread);
-
-    /**
-     * Removes a thread from waiting on this object (e.g. if it was resumed already)
-     * @param thread Pointer to thread to remove
-     */
-    void RemoveWaitingThread(std::shared_ptr<Thread> thread);
-
-    /// Get a const reference to the waiting threads list for debug use
-    const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
-
-    void ClearWaitingThreads();
-
-protected:
-    std::atomic_bool is_signaled{}; // Tells if this sync object is signaled
-
-private:
-    /// Threads waiting for this object to become available
-    std::vector<std::shared_ptr<Thread>> waiting_threads;
-};
-
-// Specialization of DynamicObjectCast for SynchronizationObjects
-template <>
-inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>(
-    std::shared_ptr<Object> object) {
-    if (object != nullptr && object->IsWaitable()) {
-        return std::static_pointer_cast<SynchronizationObject>(object);
-    }
-    return nullptr;
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index a4f9e0d97..d97323255 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -17,9 +17,11 @@
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_condition_variable.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/memory/memory_layout.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/thread.h"
@@ -34,26 +36,19 @@
 
 namespace Kernel {
 
-bool Thread::ShouldWait(const Thread* thread) const {
-    return status != ThreadStatus::Dead;
-}
-
 bool Thread::IsSignaled() const {
-    return status == ThreadStatus::Dead;
-}
-
-void Thread::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+    return signaled;
 }
 
-Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
+Thread::Thread(KernelCore& kernel) : KSynchronizationObject{kernel} {}
 Thread::~Thread() = default;
 
 void Thread::Stop() {
     {
         KScopedSchedulerLock lock(kernel);
-        SetStatus(ThreadStatus::Dead);
-        Signal();
+        SetState(ThreadState::Terminated);
+        signaled = true;
+        NotifyAvailable();
         kernel.GlobalHandleTable().Close(global_handle);
 
         if (owner_process) {
@@ -67,59 +62,27 @@ void Thread::Stop() {
     global_handle = 0;
 }
 
-void Thread::ResumeFromWait() {
+void Thread::Wakeup() {
     KScopedSchedulerLock lock(kernel);
-    switch (status) {
-    case ThreadStatus::Paused:
-    case ThreadStatus::WaitSynch:
-    case ThreadStatus::WaitHLEEvent:
-    case ThreadStatus::WaitSleep:
-    case ThreadStatus::WaitIPC:
-    case ThreadStatus::WaitMutex:
-    case ThreadStatus::WaitCondVar:
-    case ThreadStatus::WaitArb:
-    case ThreadStatus::Dormant:
-        break;
-
-    case ThreadStatus::Ready:
-        // The thread's wakeup callback must have already been cleared when the thread was first
-        // awoken.
-        ASSERT(hle_callback == nullptr);
-        // If the thread is waiting on multiple wait objects, it might be awoken more than once
-        // before actually resuming. We can ignore subsequent wakeups if the thread status has
-        // already been set to ThreadStatus::Ready.
-        return;
-    case ThreadStatus::Dead:
-        // This should never happen, as threads must complete before being stopped.
-        DEBUG_ASSERT_MSG(false, "Thread with object id {} cannot be resumed because it's DEAD.",
-                         GetObjectId());
-        return;
-    }
-
-    SetStatus(ThreadStatus::Ready);
-}
-
-void Thread::OnWakeUp() {
-    KScopedSchedulerLock lock(kernel);
-    SetStatus(ThreadStatus::Ready);
+    SetState(ThreadState::Runnable);
 }
 
 ResultCode Thread::Start() {
     KScopedSchedulerLock lock(kernel);
-    SetStatus(ThreadStatus::Ready);
+    SetState(ThreadState::Runnable);
     return RESULT_SUCCESS;
 }
 
 void Thread::CancelWait() {
     KScopedSchedulerLock lock(kernel);
-    if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) {
+    if (GetState() != ThreadState::Waiting || !is_cancellable) {
         is_sync_cancelled = true;
         return;
     }
     // TODO(Blinkhawk): Implement cancel of server session
     is_sync_cancelled = false;
     SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED);
-    SetStatus(ThreadStatus::Ready);
+    SetState(ThreadState::Runnable);
 }
 
 static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top,
@@ -183,25 +146,24 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel);
 
     thread->thread_id = kernel.CreateNewThreadID();
-    thread->status = ThreadStatus::Dormant;
+    thread->thread_state = ThreadState::Initialized;
     thread->entry_point = entry_point;
     thread->stack_top = stack_top;
     thread->disable_count = 1;
     thread->tpidr_el0 = 0;
-    thread->nominal_priority = thread->current_priority = priority;
+    thread->current_priority = priority;
+    thread->base_priority = priority;
+    thread->lock_owner = nullptr;
     thread->schedule_count = -1;
     thread->last_scheduled_tick = 0;
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
     thread->affinity_mask.SetAffinity(processor_id, true);
-    thread->wait_objects = nullptr;
-    thread->mutex_wait_address = 0;
-    thread->condvar_wait_address = 0;
-    thread->wait_handle = 0;
     thread->name = std::move(name);
     thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap();
     thread->owner_process = owner_process;
     thread->type = type_flags;
+    thread->signaled = false;
     if ((type_flags & THREADTYPE_IDLE) == 0) {
         auto& scheduler = kernel.GlobalSchedulerContext();
         scheduler.AddThread(thread);
@@ -226,153 +188,185 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
 }
 
-void Thread::SetPriority(u32 priority) {
-    KScopedSchedulerLock lock(kernel);
+void Thread::SetBasePriority(u32 priority) {
     ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST,
                "Invalid priority value.");
-    nominal_priority = priority;
-    UpdatePriority();
+
+    KScopedSchedulerLock lock(kernel);
+
+    // Change our base priority.
+    base_priority = priority;
+
+    // Perform a priority restoration.
+    RestorePriority(kernel, this);
 }
 
-void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) {
+void Thread::SetSynchronizationResults(KSynchronizationObject* object, ResultCode result) {
     signaling_object = object;
     signaling_result = result;
 }
 
-s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
-    ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything");
-    const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object);
-    return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1);
-}
-
 VAddr Thread::GetCommandBufferAddress() const {
     // Offset from the start of TLS at which the IPC command buffer begins.
     constexpr u64 command_header_offset = 0x80;
     return GetTLSAddress() + command_header_offset;
 }
 
-void Thread::SetStatus(ThreadStatus new_status) {
-    if (new_status == status) {
-        return;
-    }
+void Thread::SetState(ThreadState state) {
+    KScopedSchedulerLock sl(kernel);
 
-    switch (new_status) {
-    case ThreadStatus::Ready:
-        SetSchedulingStatus(ThreadSchedStatus::Runnable);
-        break;
-    case ThreadStatus::Dormant:
-        SetSchedulingStatus(ThreadSchedStatus::None);
-        break;
-    case ThreadStatus::Dead:
-        SetSchedulingStatus(ThreadSchedStatus::Exited);
-        break;
-    default:
-        SetSchedulingStatus(ThreadSchedStatus::Paused);
-        break;
-    }
+    // Clear debugging state
+    SetMutexWaitAddressForDebugging({});
+    SetWaitReasonForDebugging({});
 
-    status = new_status;
+    const ThreadState old_state = thread_state;
+    thread_state =
+        static_cast<ThreadState>((old_state & ~ThreadState::Mask) | (state & ThreadState::Mask));
+    if (thread_state != old_state) {
+        KScheduler::OnThreadStateChanged(kernel, this, old_state);
+    }
 }
 
-void Thread::AddMutexWaiter(std::shared_ptr<Thread> thread) {
-    if (thread->lock_owner.get() == this) {
-        // If the thread is already waiting for this thread to release the mutex, ensure that the
-        // waiters list is consistent and return without doing anything.
-        const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-        ASSERT(iter != wait_mutex_threads.end());
-        return;
+void Thread::AddWaiterImpl(Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    // Find the right spot to insert the waiter.
+    auto it = waiter_list.begin();
+    while (it != waiter_list.end()) {
+        if (it->GetPriority() > thread->GetPriority()) {
+            break;
+        }
+        it++;
     }
 
-    // A thread can't wait on two different mutexes at the same time.
-    ASSERT(thread->lock_owner == nullptr);
+    // Keep track of how many kernel waiters we have.
+    if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+        ASSERT((num_kernel_waiters++) >= 0);
+    }
 
-    // Ensure that the thread is not already in the list of mutex waiters
-    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(iter == wait_mutex_threads.end());
+    // Insert the waiter.
+    waiter_list.insert(it, *thread);
+    thread->SetLockOwner(this);
+}
 
-    // Keep the list in an ordered fashion
-    const auto insertion_point = std::find_if(
-        wait_mutex_threads.begin(), wait_mutex_threads.end(),
-        [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
-    wait_mutex_threads.insert(insertion_point, thread);
-    thread->lock_owner = SharedFrom(this);
+void Thread::RemoveWaiterImpl(Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
-    UpdatePriority();
-}
+    // Keep track of how many kernel waiters we have.
+    if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+        ASSERT((num_kernel_waiters--) > 0);
+    }
 
-void Thread::RemoveMutexWaiter(std::shared_ptr<Thread> thread) {
-    ASSERT(thread->lock_owner.get() == this);
+    // Remove the waiter.
+    waiter_list.erase(waiter_list.iterator_to(*thread));
+    thread->SetLockOwner(nullptr);
+}
 
-    // Ensure that the thread is in the list of mutex waiters
-    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(iter != wait_mutex_threads.end());
+void Thread::RestorePriority(KernelCore& kernel, Thread* thread) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
-    wait_mutex_threads.erase(iter);
+    while (true) {
+        // We want to inherit priority where possible.
+        s32 new_priority = thread->GetBasePriority();
+        if (thread->HasWaiters()) {
+            new_priority = std::min(new_priority, thread->waiter_list.front().GetPriority());
+        }
 
-    thread->lock_owner = nullptr;
-    UpdatePriority();
-}
+        // If the priority we would inherit is not different from ours, don't do anything.
+        if (new_priority == thread->GetPriority()) {
+            return;
+        }
 
-void Thread::UpdatePriority() {
-    // If any of the threads waiting on the mutex have a higher priority
-    // (taking into account priority inheritance), then this thread inherits
-    // that thread's priority.
-    u32 new_priority = nominal_priority;
-    if (!wait_mutex_threads.empty()) {
-        if (wait_mutex_threads.front()->current_priority < new_priority) {
-            new_priority = wait_mutex_threads.front()->current_priority;
+        // Ensure we don't violate condition variable red black tree invariants.
+        if (auto* cv_tree = thread->GetConditionVariableTree(); cv_tree != nullptr) {
+            BeforeUpdatePriority(kernel, cv_tree, thread);
         }
-    }
 
-    if (new_priority == current_priority) {
-        return;
-    }
+        // Change the priority.
+        const s32 old_priority = thread->GetPriority();
+        thread->SetPriority(new_priority);
 
-    if (GetStatus() == ThreadStatus::WaitCondVar) {
-        owner_process->RemoveConditionVariableThread(SharedFrom(this));
-    }
+        // Restore the condition variable, if relevant.
+        if (auto* cv_tree = thread->GetConditionVariableTree(); cv_tree != nullptr) {
+            AfterUpdatePriority(kernel, cv_tree, thread);
+        }
 
-    SetCurrentPriority(new_priority);
+        // Update the scheduler.
+        KScheduler::OnThreadPriorityChanged(kernel, thread, old_priority);
 
-    if (GetStatus() == ThreadStatus::WaitCondVar) {
-        owner_process->InsertConditionVariableThread(SharedFrom(this));
-    }
+        // Keep the lock owner up to date.
+        Thread* lock_owner = thread->GetLockOwner();
+        if (lock_owner == nullptr) {
+            return;
+        }
 
-    if (!lock_owner) {
-        return;
+        // Update the thread in the lock owner's sorted list, and continue inheriting.
+        lock_owner->RemoveWaiterImpl(thread);
+        lock_owner->AddWaiterImpl(thread);
+        thread = lock_owner;
     }
+}
 
-    // Ensure that the thread is within the correct location in the waiting list.
-    auto old_owner = lock_owner;
-    lock_owner->RemoveMutexWaiter(SharedFrom(this));
-    old_owner->AddMutexWaiter(SharedFrom(this));
-
-    // Recursively update the priority of the thread that depends on the priority of this one.
-    lock_owner->UpdatePriority();
+void Thread::AddWaiter(Thread* thread) {
+    AddWaiterImpl(thread);
+    RestorePriority(kernel, this);
 }
 
-bool Thread::AllSynchronizationObjectsReady() const {
-    return std::none_of(wait_objects->begin(), wait_objects->end(),
-                        [this](const std::shared_ptr<SynchronizationObject>& object) {
-                            return object->ShouldWait(this);
-                        });
+void Thread::RemoveWaiter(Thread* thread) {
+    RemoveWaiterImpl(thread);
+    RestorePriority(kernel, this);
 }
 
-bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
-    ASSERT(hle_callback);
-    return hle_callback(std::move(thread));
+Thread* Thread::RemoveWaiterByKey(s32* out_num_waiters, VAddr key) {
+    ASSERT(kernel.GlobalSchedulerContext().IsLocked());
+
+    s32 num_waiters{};
+    Thread* next_lock_owner{};
+    auto it = waiter_list.begin();
+    while (it != waiter_list.end()) {
+        if (it->GetAddressKey() == key) {
+            Thread* thread = std::addressof(*it);
+
+            // Keep track of how many kernel waiters we have.
+            if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+                ASSERT((num_kernel_waiters--) > 0);
+            }
+            it = waiter_list.erase(it);
+
+            // Update the next lock owner.
+            if (next_lock_owner == nullptr) {
+                next_lock_owner = thread;
+                next_lock_owner->SetLockOwner(nullptr);
+            } else {
+                next_lock_owner->AddWaiterImpl(thread);
+            }
+            num_waiters++;
+        } else {
+            it++;
+        }
+    }
+
+    // Do priority updates, if we have a next owner.
+    if (next_lock_owner) {
+        RestorePriority(kernel, this);
+        RestorePriority(kernel, next_lock_owner);
+    }
+
+    // Return output.
+    *out_num_waiters = num_waiters;
+    return next_lock_owner;
 }
 
 ResultCode Thread::SetActivity(ThreadActivity value) {
     KScopedSchedulerLock lock(kernel);
 
-    auto sched_status = GetSchedulingStatus();
+    auto sched_status = GetState();
 
-    if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) {
+    if (sched_status != ThreadState::Runnable && sched_status != ThreadState::Waiting) {
         return ERR_INVALID_STATE;
     }
 
-    if (IsPendingTermination()) {
+    if (IsTerminationRequested()) {
         return RESULT_SUCCESS;
     }
 
@@ -394,7 +388,8 @@ ResultCode Thread::Sleep(s64 nanoseconds) {
     Handle event_handle{};
     {
         KScopedSchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds);
-        SetStatus(ThreadStatus::WaitSleep);
+        SetState(ThreadState::Waiting);
+        SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Sleep);
     }
 
     if (event_handle != InvalidHandle) {
@@ -405,34 +400,21 @@ ResultCode Thread::Sleep(s64 nanoseconds) {
 }
 
 void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
-    const u32 old_state = scheduling_state;
+    const auto old_state = GetRawState();
     pausing_state |= static_cast<u32>(flag);
-    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
-    scheduling_state = base_scheduling | pausing_state;
+    const auto base_scheduling = GetState();
+    thread_state = base_scheduling | static_cast<ThreadState>(pausing_state);
     KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }
 
 void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
-    const u32 old_state = scheduling_state;
+    const auto old_state = GetRawState();
     pausing_state &= ~static_cast<u32>(flag);
-    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
-    scheduling_state = base_scheduling | pausing_state;
+    const auto base_scheduling = GetState();
+    thread_state = base_scheduling | static_cast<ThreadState>(pausing_state);
     KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }
 
-void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
-    const u32 old_state = scheduling_state;
-    scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
-                       static_cast<u32>(new_status);
-    KScheduler::OnThreadStateChanged(kernel, this, old_state);
-}
-
-void Thread::SetCurrentPriority(u32 new_priority) {
-    const u32 old_priority = std::exchange(current_priority, new_priority);
-    KScheduler::OnThreadPriorityChanged(kernel, this, kernel.CurrentScheduler()->GetCurrentThread(),
-                                        old_priority);
-}
-
 ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
     KScopedSchedulerLock lock(kernel);
     const auto HighestSetCore = [](u64 mask, u32 max_cores) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 11ef29888..6b66c9a0e 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -6,16 +6,21 @@
 
 #include <array>
 #include <functional>
+#include <span>
 #include <string>
 #include <utility>
 #include <vector>
 
+#include <boost/intrusive/list.hpp>
+
 #include "common/common_types.h"
+#include "common/intrusive_red_black_tree.h"
 #include "common/spin_lock.h"
 #include "core/arm/arm_interface.h"
 #include "core/hle/kernel/k_affinity_mask.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/svc_common.h"
 #include "core/hle/result.h"
 
 namespace Common {
@@ -73,19 +78,24 @@ enum ThreadProcessorId : s32 {
                                      (1 << THREADPROCESSORID_2) | (1 << THREADPROCESSORID_3)
 };
 
-enum class ThreadStatus {
-    Ready,        ///< Ready to run
-    Paused,       ///< Paused by SetThreadActivity or debug
-    WaitHLEEvent, ///< Waiting for hle event to finish
-    WaitSleep,    ///< Waiting due to a SleepThread SVC
-    WaitIPC,      ///< Waiting for the reply from an IPC request
-    WaitSynch,    ///< Waiting due to WaitSynchronization
-    WaitMutex,    ///< Waiting due to an ArbitrateLock svc
-    WaitCondVar,  ///< Waiting due to an WaitProcessWideKey svc
-    WaitArb,      ///< Waiting due to a SignalToAddress/WaitForAddress svc
-    Dormant,      ///< Created but not yet made ready
-    Dead          ///< Run to completion, or forcefully terminated
+enum class ThreadState : u16 {
+    Initialized = 0,
+    Waiting = 1,
+    Runnable = 2,
+    Terminated = 3,
+
+    SuspendShift = 4,
+    Mask = (1 << SuspendShift) - 1,
+
+    ProcessSuspended = (1 << (0 + SuspendShift)),
+    ThreadSuspended = (1 << (1 + SuspendShift)),
+    DebugSuspended = (1 << (2 + SuspendShift)),
+    BacktraceSuspended = (1 << (3 + SuspendShift)),
+    InitSuspended = (1 << (4 + SuspendShift)),
+
+    SuspendFlagMask = ((1 << 5) - 1) << SuspendShift,
 };
+DECLARE_ENUM_FLAG_OPERATORS(ThreadState);
 
 enum class ThreadWakeupReason {
     Signal, // The thread was woken up by WakeupAllWaitingThreads due to an object signal.
@@ -97,13 +107,6 @@ enum class ThreadActivity : u32 {
     Paused = 1,
 };
 
-enum class ThreadSchedStatus : u32 {
-    None = 0,
-    Paused = 1,
-    Runnable = 2,
-    Exited = 3,
-};
-
 enum class ThreadSchedFlags : u32 {
     ProcessPauseFlag = 1 << 4,
     ThreadPauseFlag = 1 << 5,
@@ -111,13 +114,20 @@ enum class ThreadSchedFlags : u32 {
     KernelInitPauseFlag = 1 << 8,
 };
 
-enum class ThreadSchedMasks : u32 {
-    LowMask = 0x000f,
-    HighMask = 0xfff0,
-    ForcePauseMask = 0x0070,
+enum class ThreadWaitReasonForDebugging : u32 {
+    None,            ///< Thread is not waiting
+    Sleep,           ///< Thread is waiting due to a SleepThread SVC
+    IPC,             ///< Thread is waiting for the reply from an IPC request
+    Synchronization, ///< Thread is waiting due to a WaitSynchronization SVC
+    ConditionVar,    ///< Thread is waiting due to a WaitProcessWideKey SVC
+    Arbitration,     ///< Thread is waiting due to a SignalToAddress/WaitForAddress SVC
+    Suspended,       ///< Thread is waiting due to process suspension
 };
 
-class Thread final : public SynchronizationObject {
+class Thread final : public KSynchronizationObject, public boost::intrusive::list_base_hook<> {
+    friend class KScheduler;
+    friend class Process;
+
 public:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;
@@ -127,10 +137,6 @@ public:
     using ThreadContext32 = Core::ARM_Interface::ThreadContext32;
     using ThreadContext64 = Core::ARM_Interface::ThreadContext64;
 
-    using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
-
-    using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>;
-
     /**
      * Creates and returns a new thread. The new thread is immediately scheduled
      * @param system The instance of the whole system
@@ -186,59 +192,54 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(const Thread* thread) const override;
-    void Acquire(Thread* thread) override;
-    bool IsSignaled() const override;
-
     /**
      * Gets the thread's current priority
      * @return The current thread's priority
      */
-    u32 GetPriority() const {
+    [[nodiscard]] s32 GetPriority() const {
         return current_priority;
     }
 
     /**
+     * Sets the thread's current priority.
+     * @param priority The new priority.
+     */
+    void SetPriority(s32 priority) {
+        current_priority = priority;
+    }
+
+    /**
      * Gets the thread's nominal priority.
      * @return The current thread's nominal priority.
      */
-    u32 GetNominalPriority() const {
-        return nominal_priority;
+    [[nodiscard]] s32 GetBasePriority() const {
+        return base_priority;
     }
 
     /**
-     * Sets the thread's current priority
-     * @param priority The new priority
+     * Sets the thread's nominal priority.
+     * @param priority The new priority.
      */
-    void SetPriority(u32 priority);
-
-    /// Adds a thread to the list of threads that are waiting for a lock held by this thread.
-    void AddMutexWaiter(std::shared_ptr<Thread> thread);
-
-    /// Removes a thread from the list of threads that are waiting for a lock held by this thread.
-    void RemoveMutexWaiter(std::shared_ptr<Thread> thread);
-
-    /// Recalculates the current priority taking into account priority inheritance.
-    void UpdatePriority();
+    void SetBasePriority(u32 priority);
 
     /// Changes the core that the thread is running or scheduled to run on.
-    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
+    [[nodiscard]] ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
 
     /**
      * Gets the thread's thread ID
      * @return The thread's ID
      */
-    u64 GetThreadID() const {
+    [[nodiscard]] u64 GetThreadID() const {
         return thread_id;
     }
 
     /// Resumes a thread from waiting
-    void ResumeFromWait();
-
-    void OnWakeUp();
+    void Wakeup();
 
     ResultCode Start();
 
+    virtual bool IsSignaled() const override;
+
     /// Cancels a waiting operation that this thread may or may not be within.
     ///
     /// When the thread is within a waiting state, this will set the thread's
@@ -247,29 +248,20 @@ public:
     ///
     void CancelWait();
 
-    void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
+    void SetSynchronizationResults(KSynchronizationObject* object, ResultCode result);
 
-    SynchronizationObject* GetSignalingObject() const {
-        return signaling_object;
+    void SetSyncedObject(KSynchronizationObject* object, ResultCode result) {
+        SetSynchronizationResults(object, result);
     }
 
-    ResultCode GetSignalingResult() const {
+    ResultCode GetWaitResult(KSynchronizationObject** out) const {
+        *out = signaling_object;
         return signaling_result;
     }
 
-    /**
-     * Retrieves the index that this particular object occupies in the list of objects
-     * that the thread passed to WaitSynchronization, starting the search from the last element.
-     *
-     * It is used to set the output index of WaitSynchronization when the thread is awakened.
-     *
-     * When a thread wakes up due to an object signal, the kernel will use the index of the last
-     * matching object in the wait objects list in case of having multiple instances of the same
-     * object in the list.
-     *
-     * @param object Object to query the index of.
-     */
-    s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const;
+    ResultCode GetSignalingResult() const {
+        return signaling_result;
+    }
 
     /**
      * Stops a thread, invalidating it from further use
@@ -341,18 +333,22 @@ public:
 
     std::shared_ptr<Common::Fiber>& GetHostContext();
 
-    ThreadStatus GetStatus() const {
-        return status;
+    ThreadState GetState() const {
+        return thread_state & ThreadState::Mask;
+    }
+
+    ThreadState GetRawState() const {
+        return thread_state;
     }
 
-    void SetStatus(ThreadStatus new_status);
+    void SetState(ThreadState state);
 
     s64 GetLastScheduledTick() const {
-        return this->last_scheduled_tick;
+        return last_scheduled_tick;
     }
 
     void SetLastScheduledTick(s64 tick) {
-        this->last_scheduled_tick = tick;
+        last_scheduled_tick = tick;
     }
 
     u64 GetTotalCPUTimeTicks() const {
@@ -387,98 +383,18 @@ public:
         return owner_process;
     }
 
-    const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
-        return *wait_objects;
-    }
-
-    void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) {
-        wait_objects = objects;
-    }
-
-    void ClearSynchronizationObjects() {
-        for (const auto& waiting_object : *wait_objects) {
-            waiting_object->RemoveWaitingThread(SharedFrom(this));
-        }
-        wait_objects->clear();
-    }
-
-    /// Determines whether all the objects this thread is waiting on are ready.
-    bool AllSynchronizationObjectsReady() const;
-
     const MutexWaitingThreads& GetMutexWaitingThreads() const {
         return wait_mutex_threads;
     }
 
     Thread* GetLockOwner() const {
-        return lock_owner.get();
-    }
-
-    void SetLockOwner(std::shared_ptr<Thread> owner) {
-        lock_owner = std::move(owner);
-    }
-
-    VAddr GetCondVarWaitAddress() const {
-        return condvar_wait_address;
-    }
-
-    void SetCondVarWaitAddress(VAddr address) {
-        condvar_wait_address = address;
-    }
-
-    VAddr GetMutexWaitAddress() const {
-        return mutex_wait_address;
-    }
-
-    void SetMutexWaitAddress(VAddr address) {
-        mutex_wait_address = address;
-    }
-
-    Handle GetWaitHandle() const {
-        return wait_handle;
-    }
-
-    void SetWaitHandle(Handle handle) {
-        wait_handle = handle;
-    }
-
-    VAddr GetArbiterWaitAddress() const {
-        return arb_wait_address;
-    }
-
-    void SetArbiterWaitAddress(VAddr address) {
-        arb_wait_address = address;
-    }
-
-    bool HasHLECallback() const {
-        return hle_callback != nullptr;
-    }
-
-    void SetHLECallback(HLECallback callback) {
-        hle_callback = std::move(callback);
-    }
-
-    void SetHLETimeEvent(Handle time_event) {
-        hle_time_event = time_event;
-    }
-
-    void SetHLESyncObject(SynchronizationObject* object) {
-        hle_object = object;
-    }
-
-    Handle GetHLETimeEvent() const {
-        return hle_time_event;
-    }
-
-    SynchronizationObject* GetHLESyncObject() const {
-        return hle_object;
+        return lock_owner;
     }
 
-    void InvalidateHLECallback() {
-        SetHLECallback(nullptr);
+    void SetLockOwner(Thread* owner) {
+        lock_owner = owner;
     }
 
-    bool InvokeHLECallback(std::shared_ptr<Thread> thread);
-
     u32 GetIdealCore() const {
         return ideal_core;
     }
@@ -493,20 +409,11 @@ public:
     ResultCode Sleep(s64 nanoseconds);
 
     s64 GetYieldScheduleCount() const {
-        return this->schedule_count;
+        return schedule_count;
     }
 
     void SetYieldScheduleCount(s64 count) {
-        this->schedule_count = count;
-    }
-
-    ThreadSchedStatus GetSchedulingStatus() const {
-        return static_cast<ThreadSchedStatus>(scheduling_state &
-                                              static_cast<u32>(ThreadSchedMasks::LowMask));
-    }
-
-    bool IsRunnable() const {
-        return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable);
+        schedule_count = count;
     }
 
     bool IsRunning() const {
@@ -517,36 +424,32 @@ public:
         is_running = value;
     }
 
-    bool IsSyncCancelled() const {
+    bool IsWaitCancelled() const {
         return is_sync_cancelled;
     }
 
-    void SetSyncCancelled(bool value) {
-        is_sync_cancelled = value;
+    void ClearWaitCancelled() {
+        is_sync_cancelled = false;
     }
 
     Handle GetGlobalHandle() const {
         return global_handle;
     }
 
-    bool IsWaitingForArbitration() const {
-        return waiting_for_arbitration;
+    bool IsCancellable() const {
+        return is_cancellable;
     }
 
-    void WaitForArbitration(bool set) {
-        waiting_for_arbitration = set;
+    void SetCancellable() {
+        is_cancellable = true;
     }
 
-    bool IsWaitingSync() const {
-        return is_waiting_on_sync;
+    void ClearCancellable() {
+        is_cancellable = false;
     }
 
-    void SetWaitingSync(bool is_waiting) {
-        is_waiting_on_sync = is_waiting;
-    }
-
-    bool IsPendingTermination() const {
-        return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited;
+    bool IsTerminationRequested() const {
+        return will_be_terminated || GetRawState() == ThreadState::Terminated;
     }
 
     bool IsPaused() const {
@@ -578,21 +481,21 @@ public:
         constexpr QueueEntry() = default;
 
         constexpr void Initialize() {
-            this->prev = nullptr;
-            this->next = nullptr;
+            prev = nullptr;
+            next = nullptr;
         }
 
         constexpr Thread* GetPrev() const {
-            return this->prev;
+            return prev;
         }
         constexpr Thread* GetNext() const {
-            return this->next;
+            return next;
         }
         constexpr void SetPrev(Thread* thread) {
-            this->prev = thread;
+            prev = thread;
         }
         constexpr void SetNext(Thread* thread) {
-            this->next = thread;
+            next = thread;
         }
 
     private:
@@ -601,11 +504,11 @@ public:
     };
 
     QueueEntry& GetPriorityQueueEntry(s32 core) {
-        return this->per_core_priority_queue_entry[core];
+        return per_core_priority_queue_entry[core];
     }
 
     const QueueEntry& GetPriorityQueueEntry(s32 core) const {
-        return this->per_core_priority_queue_entry[core];
+        return per_core_priority_queue_entry[core];
     }
 
     s32 GetDisableDispatchCount() const {
@@ -622,24 +525,170 @@ public:
         disable_count--;
     }
 
+    void SetWaitReasonForDebugging(ThreadWaitReasonForDebugging reason) {
+        wait_reason_for_debugging = reason;
+    }
+
+    [[nodiscard]] ThreadWaitReasonForDebugging GetWaitReasonForDebugging() const {
+        return wait_reason_for_debugging;
+    }
+
+    void SetWaitObjectsForDebugging(const std::span<KSynchronizationObject*>& objects) {
+        wait_objects_for_debugging.clear();
+        wait_objects_for_debugging.reserve(objects.size());
+        for (const auto& object : objects) {
+            wait_objects_for_debugging.emplace_back(object);
+        }
+    }
+
+    [[nodiscard]] const std::vector<KSynchronizationObject*>& GetWaitObjectsForDebugging() const {
+        return wait_objects_for_debugging;
+    }
+
+    void SetMutexWaitAddressForDebugging(VAddr address) {
+        mutex_wait_address_for_debugging = address;
+    }
+
+    [[nodiscard]] VAddr GetMutexWaitAddressForDebugging() const {
+        return mutex_wait_address_for_debugging;
+    }
+
+    void AddWaiter(Thread* thread);
+
+    void RemoveWaiter(Thread* thread);
+
+    [[nodiscard]] Thread* RemoveWaiterByKey(s32* out_num_waiters, VAddr key);
+
+    [[nodiscard]] VAddr GetAddressKey() const {
+        return address_key;
+    }
+
+    [[nodiscard]] u32 GetAddressKeyValue() const {
+        return address_key_value;
+    }
+
+    void SetAddressKey(VAddr key) {
+        address_key = key;
+    }
+
+    void SetAddressKey(VAddr key, u32 val) {
+        address_key = key;
+        address_key_value = val;
+    }
+
 private:
-    friend class GlobalSchedulerContext;
-    friend class KScheduler;
-    friend class Process;
+    static constexpr size_t PriorityInheritanceCountMax = 10;
+    union SyncObjectBuffer {
+        std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> sync_objects{};
+        std::array<Handle,
+                   Svc::ArgumentHandleCountMax*(sizeof(KSynchronizationObject*) / sizeof(Handle))>
+            handles;
+        constexpr SyncObjectBuffer() {}
+    };
+    static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles));
+
+    struct ConditionVariableComparator {
+        struct LightCompareType {
+            u64 cv_key{};
+            s32 priority{};
+
+            [[nodiscard]] constexpr u64 GetConditionVariableKey() const {
+                return cv_key;
+            }
+
+            [[nodiscard]] constexpr s32 GetPriority() const {
+                return priority;
+            }
+        };
+
+        template <typename T>
+        requires(
+            std::same_as<T, Thread> ||
+            std::same_as<T, LightCompareType>) static constexpr int Compare(const T& lhs,
+                                                                            const Thread& rhs) {
+            const uintptr_t l_key = lhs.GetConditionVariableKey();
+            const uintptr_t r_key = rhs.GetConditionVariableKey();
+
+            if (l_key < r_key) {
+                // Sort first by key
+                return -1;
+            } else if (l_key == r_key && lhs.GetPriority() < rhs.GetPriority()) {
+                // And then by priority.
+                return -1;
+            } else {
+                return 1;
+            }
+        }
+    };
+
+    Common::IntrusiveRedBlackTreeNode condvar_arbiter_tree_node{};
+
+    using ConditionVariableThreadTreeTraits =
+        Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&Thread::condvar_arbiter_tree_node>;
+    using ConditionVariableThreadTree =
+        ConditionVariableThreadTreeTraits::TreeType<ConditionVariableComparator>;
+
+public:
+    using ConditionVariableThreadTreeType = ConditionVariableThreadTree;
+
+    [[nodiscard]] uintptr_t GetConditionVariableKey() const {
+        return condvar_key;
+    }
+
+    [[nodiscard]] uintptr_t GetAddressArbiterKey() const {
+        return condvar_key;
+    }
 
-    void SetSchedulingStatus(ThreadSchedStatus new_status);
+    void SetConditionVariable(ConditionVariableThreadTree* tree, VAddr address, uintptr_t cv_key,
+                              u32 value) {
+        condvar_tree = tree;
+        condvar_key = cv_key;
+        address_key = address;
+        address_key_value = value;
+    }
+
+    void ClearConditionVariable() {
+        condvar_tree = nullptr;
+    }
+
+    [[nodiscard]] bool IsWaitingForConditionVariable() const {
+        return condvar_tree != nullptr;
+    }
+
+    void SetAddressArbiter(ConditionVariableThreadTree* tree, uintptr_t address) {
+        condvar_tree = tree;
+        condvar_key = address;
+    }
+
+    void ClearAddressArbiter() {
+        condvar_tree = nullptr;
+    }
+
+    [[nodiscard]] bool IsWaitingForAddressArbiter() const {
+        return condvar_tree != nullptr;
+    }
+
+    [[nodiscard]] ConditionVariableThreadTree* GetConditionVariableTree() const {
+        return condvar_tree;
+    }
+
+    [[nodiscard]] bool HasWaiters() const {
+        return !waiter_list.empty();
+    }
+
+private:
     void AddSchedulingFlag(ThreadSchedFlags flag);
     void RemoveSchedulingFlag(ThreadSchedFlags flag);
-
-    void SetCurrentPriority(u32 new_priority);
+    void AddWaiterImpl(Thread* thread);
+    void RemoveWaiterImpl(Thread* thread);
+    static void RestorePriority(KernelCore& kernel, Thread* thread);
 
     Common::SpinLock context_guard{};
     ThreadContext32 context_32{};
     ThreadContext64 context_64{};
     std::shared_ptr<Common::Fiber> host_context{};
 
-    ThreadStatus status = ThreadStatus::Dormant;
-    u32 scheduling_state = 0;
+    ThreadState thread_state = ThreadState::Initialized;
 
     u64 thread_id = 0;
 
@@ -652,11 +701,11 @@ private:
     /// Nominal thread priority, as set by the emulated application.
     /// The nominal priority is the thread priority without priority
     /// inheritance taken into account.
-    u32 nominal_priority = 0;
+    s32 base_priority{};
 
     /// Current thread priority. This may change over the course of the
     /// thread's lifetime in order to facilitate priority inheritance.
-    u32 current_priority = 0;
+    s32 current_priority{};
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     s64 schedule_count{};
@@ -671,37 +720,27 @@ private:
     Process* owner_process;
 
     /// Objects that the thread is waiting on, in the same order as they were
-    /// passed to WaitSynchronization.
-    ThreadSynchronizationObjects* wait_objects;
+    /// passed to WaitSynchronization. This is used for debugging only.
+    std::vector<KSynchronizationObject*> wait_objects_for_debugging;
 
-    SynchronizationObject* signaling_object;
+    /// The current mutex wait address. This is used for debugging only.
+    VAddr mutex_wait_address_for_debugging{};
+
+    /// The reason the thread is waiting. This is used for debugging only.
+    ThreadWaitReasonForDebugging wait_reason_for_debugging{};
+
+    KSynchronizationObject* signaling_object;
     ResultCode signaling_result{RESULT_SUCCESS};
 
     /// List of threads that are waiting for a mutex that is held by this thread.
     MutexWaitingThreads wait_mutex_threads;
 
     /// Thread that owns the lock that this thread is waiting for.
-    std::shared_ptr<Thread> lock_owner;
-
-    /// If waiting on a ConditionVariable, this is the ConditionVariable address
-    VAddr condvar_wait_address = 0;
-    /// If waiting on a Mutex, this is the mutex address
-    VAddr mutex_wait_address = 0;
-    /// The handle used to wait for the mutex.
-    Handle wait_handle = 0;
-
-    /// If waiting for an AddressArbiter, this is the address being waited on.
-    VAddr arb_wait_address{0};
-    bool waiting_for_arbitration{};
+    Thread* lock_owner{};
 
     /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
     Handle global_handle = 0;
 
-    /// Callback for HLE Events
-    HLECallback hle_callback;
-    Handle hle_time_event;
-    SynchronizationObject* hle_object;
-
     KScheduler* scheduler = nullptr;
 
     std::array<QueueEntry, Core::Hardware::NUM_CPU_CORES> per_core_priority_queue_entry{};
@@ -714,7 +753,7 @@ private:
 
     u32 pausing_state = 0;
     bool is_running = false;
-    bool is_waiting_on_sync = false;
+    bool is_cancellable = false;
     bool is_sync_cancelled = false;
 
     bool is_continuous_on_svc = false;
@@ -725,6 +764,18 @@ private:
 
     bool was_running = false;
 
+    bool signaled{};
+
+    ConditionVariableThreadTree* condvar_tree{};
+    uintptr_t condvar_key{};
+    VAddr address_key{};
+    u32 address_key_value{};
+    s32 num_kernel_waiters{};
+
+    using WaiterList = boost::intrusive::list<Thread>;
+    WaiterList waiter_list{};
+    WaiterList pinned_waiter_list{};
+
     std::string name;
 };
 
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index 79628e2b4..832edd629 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -18,12 +18,10 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
     time_manager_event_type = Core::Timing::CreateEvent(
         "Kernel::TimeManagerCallback",
         [this](std::uintptr_t thread_handle, std::chrono::nanoseconds) {
-            const KScopedSchedulerLock lock(system.Kernel());
-            const auto proper_handle = static_cast<Handle>(thread_handle);
-
             std::shared_ptr<Thread> thread;
             {
                 std::lock_guard lock{mutex};
+                const auto proper_handle = static_cast<Handle>(thread_handle);
                 if (cancelled_events[proper_handle]) {
                     return;
                 }
@@ -32,7 +30,7 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
 
             if (thread) {
                 // Thread can be null if process has exited
-                thread->OnWakeUp();
+                thread->Wakeup();
             }
         });
 }
@@ -42,8 +40,7 @@ void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64
     event_handle = timetask->GetGlobalHandle();
     if (nanoseconds > 0) {
         ASSERT(timetask);
-        ASSERT(timetask->GetStatus() != ThreadStatus::Ready);
-        ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex);
+        ASSERT(timetask->GetState() != ThreadState::Runnable);
         system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{nanoseconds},
                                           time_manager_event_type, event_handle);
     } else {
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp
index 298f6d520..0bff97a37 100644
--- a/src/core/hle/service/apm/interface.cpp
+++ b/src/core/hle/service/apm/interface.cpp
@@ -56,7 +56,7 @@ APM::APM(Core::System& system_, std::shared_ptr<Module> apm_, Controller& contro
     static const FunctionInfo functions[] = {
         {0, &APM::OpenSession, "OpenSession"},
         {1, &APM::GetPerformanceMode, "GetPerformanceMode"},
-        {6, nullptr, "IsCpuOverclockEnabled"},
+        {6, &APM::IsCpuOverclockEnabled, "IsCpuOverclockEnabled"},
     };
     RegisterHandlers(functions);
 }
@@ -78,6 +78,14 @@ void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
     rb.PushEnum(controller.GetCurrentPerformanceMode());
 }
 
+void APM::IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_APM, "(STUBBED) called");
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.Push(false);
+}
+
 APM_Sys::APM_Sys(Core::System& system_, Controller& controller_)
     : ServiceFramework{system_, "apm:sys"}, controller{controller_} {
     // clang-format off
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h
index 7d57c4978..063ad5308 100644
--- a/src/core/hle/service/apm/interface.h
+++ b/src/core/hle/service/apm/interface.h
@@ -20,6 +20,7 @@ public:
 private:
     void OpenSession(Kernel::HLERequestContext& ctx);
     void GetPerformanceMode(Kernel::HLERequestContext& ctx);
+    void IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx);
 
     std::shared_ptr<Module> apm;
     Controller& controller;
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 5557da72e..641bcadea 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -190,12 +190,6 @@ private:
     void GetDeviceState(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_NFP, "called");
 
-        auto nfc_event = nfp_interface.GetNFCEvent();
-        if (!nfc_event->ShouldWait(&ctx.GetThread()) && !has_attached_handle) {
-            device_state = DeviceState::TagFound;
-            nfc_event->Clear();
-        }
-
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u32>(static_cast<u32>(device_state));
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index d8735491c..36970f828 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -11,8 +11,9 @@
 
 namespace Service::Nvidia::Devices {
 
-nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
-    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
+nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                           SyncpointManager& syncpoint_manager)
+    : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
 nvhost_nvdec::~nvhost_nvdec() = default;
 
 NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 79b8b6de1..77ef53cdd 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices {
 
 class nvhost_nvdec final : public nvhost_nvdec_common {
 public:
-    explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                          SyncpointManager& syncpoint_manager);
     ~nvhost_nvdec() override;
 
     NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index b49cecb42..4898dc27a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -11,6 +11,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/memory.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"
@@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
 }
 } // Anonymous namespace
 
-nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
-    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
+nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                                         SyncpointManager& syncpoint_manager)
+    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {}
 nvhost_nvdec_common::~nvhost_nvdec_common() = default;
 
 NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
@@ -71,10 +73,15 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
     offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
     offset = SpliceVectors(input, fences, params.fence_count, offset);
 
-    // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment
-
     auto& gpu = system.GPU();
-
+    if (gpu.UseNvdec()) {
+        for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
+            const SyncptIncr& syncpt_incr = syncpt_increments[i];
+            fences[i].id = syncpt_incr.id;
+            fences[i].value =
+                syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
+        }
+    }
     for (const auto& cmd_buffer : command_buffers) {
         auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
         ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
@@ -89,7 +96,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
                                       cmdlist.size() * sizeof(u32));
         gpu.PushCommandBuffer(cmdlist);
     }
+    if (gpu.UseNvdec()) {
 
+        fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
+
+        Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
+        gpu.PushCommandBuffer(cmdlist);
+    }
     std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
     // Some games expect command_buffers to be written back
     offset = sizeof(IoctlSubmit);
@@ -98,6 +111,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
     offset = WriteVectors(output, reloc_shifts, offset);
     offset = WriteVectors(output, syncpt_increments, offset);
     offset = WriteVectors(output, wait_checks, offset);
+    offset = WriteVectors(output, fences, offset);
 
     return NvResult::Success;
 }
@@ -107,9 +121,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
     std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
     LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
 
-    // We found that implementing this causes deadlocks with async gpu, along with degraded
-    // performance. TODO: RE the nvdec async implementation
-    params.value = 0;
+    if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) {
+        device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
+    }
+    params.value = device_syncpoints[params.param];
     std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
 
     return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index d9f95ba58..4c9d4ba41 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -10,12 +10,16 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 
-namespace Service::Nvidia::Devices {
+namespace Service::Nvidia {
+class SyncpointManager;
+
+namespace Devices {
 class nvmap;
 
 class nvhost_nvdec_common : public nvdevice {
 public:
-    explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                                 SyncpointManager& syncpoint_manager);
     ~nvhost_nvdec_common() override;
 
 protected:
@@ -157,8 +161,10 @@ protected:
     s32_le nvmap_fd{};
     u32_le submit_timeout{};
     std::shared_ptr<nvmap> nvmap_dev;
-
+    SyncpointManager& syncpoint_manager;
+    std::array<u32, MaxSyncPoints> device_syncpoints{};
     // This is expected to be ordered, therefore we must use a map, not unordered_map
     std::map<GPUVAddr, BufferMap> buffer_mappings;
 };
-}; // namespace Service::Nvidia::Devices
+}; // namespace Devices
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 805fe86ae..72499654c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -10,8 +10,9 @@
 #include "video_core/renderer_base.h"
 
 namespace Service::Nvidia::Devices {
-nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
-    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
+nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                       SyncpointManager& syncpoint_manager)
+    : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
 
 nvhost_vic::~nvhost_vic() = default;
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index b2e11f4d4..f401c61fa 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -7,11 +7,11 @@
 #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
 
 namespace Service::Nvidia::Devices {
-class nvmap;
 
 class nvhost_vic final : public nvhost_nvdec_common {
 public:
-    explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                        SyncpointManager& syncpoint_manager);
     ~nvhost_vic();
 
     NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index e03195afe..620c18728 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
     devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
     devices["/dev/nvhost-ctrl"] =
         std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
-    devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
+    devices["/dev/nvhost-nvdec"] =
+        std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
     devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
-    devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
+    devices["/dev/nvhost-vic"] =
+        std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
 }
 
 Module::~Module() = default;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index c8c6a4d64..5578181a4 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -26,10 +26,10 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
     LOG_WARNING(Service, "Adding graphics buffer {}", slot);
 
     {
-        std::unique_lock lock{queue_mutex};
+        std::unique_lock lock{free_buffers_mutex};
         free_buffers.push_back(slot);
     }
-    condition.notify_one();
+    free_buffers_condition.notify_one();
 
     buffers[slot] = {
         .slot = slot,
@@ -48,8 +48,8 @@ std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::Dequeue
                                                                                        u32 height) {
     // Wait for first request before trying to dequeue
     {
-        std::unique_lock lock{queue_mutex};
-        condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; });
+        std::unique_lock lock{free_buffers_mutex};
+        free_buffers_condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; });
     }
 
     if (!is_connect) {
@@ -58,7 +58,7 @@ std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::Dequeue
         return std::nullopt;
     }
 
-    std::unique_lock lock{queue_mutex};
+    std::unique_lock lock{free_buffers_mutex};
 
     auto f_itr = free_buffers.begin();
     auto slot = buffers.size();
@@ -100,6 +100,7 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
     buffers[slot].crop_rect = crop_rect;
     buffers[slot].swap_interval = swap_interval;
     buffers[slot].multi_fence = multi_fence;
+    std::unique_lock lock{queue_sequence_mutex};
     queue_sequence.push_back(slot);
 }
 
@@ -113,15 +114,16 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult
     buffers[slot].swap_interval = 0;
 
     {
-        std::unique_lock lock{queue_mutex};
+        std::unique_lock lock{free_buffers_mutex};
         free_buffers.push_back(slot);
     }
-    condition.notify_one();
+    free_buffers_condition.notify_one();
 
     buffer_wait_event.writable->Signal();
 }
 
 std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
+    std::unique_lock lock{queue_sequence_mutex};
     std::size_t buffer_slot = buffers.size();
     // Iterate to find a queued buffer matching the requested slot.
     while (buffer_slot == buffers.size() && !queue_sequence.empty()) {
@@ -147,27 +149,29 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
 
     buffers[slot].status = Buffer::Status::Free;
     {
-        std::unique_lock lock{queue_mutex};
+        std::unique_lock lock{free_buffers_mutex};
         free_buffers.push_back(slot);
     }
-    condition.notify_one();
+    free_buffers_condition.notify_one();
 
     buffer_wait_event.writable->Signal();
 }
 
 void BufferQueue::Connect() {
+    std::unique_lock lock{queue_sequence_mutex};
     queue_sequence.clear();
-    id = 1;
-    layer_id = 1;
     is_connect = true;
 }
 
 void BufferQueue::Disconnect() {
     buffers.fill({});
-    queue_sequence.clear();
+    {
+        std::unique_lock lock{queue_sequence_mutex};
+        queue_sequence.clear();
+    }
     buffer_wait_event.writable->Signal();
     is_connect = false;
-    condition.notify_one();
+    free_buffers_condition.notify_one();
 }
 
 u32 BufferQueue::Query(QueryType type) {
@@ -176,9 +180,11 @@ u32 BufferQueue::Query(QueryType type) {
     switch (type) {
     case QueryType::NativeWindowFormat:
         return static_cast<u32>(PixelFormat::RGBA8888);
+    case QueryType::NativeWindowWidth:
+    case QueryType::NativeWindowHeight:
+        break;
     }
-
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_MSG("Unimplemented query type={}", type);
     return 0;
 }
 
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index a2f60d9eb..ad7469277 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -129,8 +129,10 @@ private:
     std::list<u32> queue_sequence;
     Kernel::EventPair buffer_wait_event;
 
-    std::mutex queue_mutex;
-    std::condition_variable condition;
+    std::mutex free_buffers_mutex;
+    std::condition_variable free_buffers_condition;
+
+    std::mutex queue_sequence_mutex;
 };
 
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 4b3581949..ceaa93d28 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -38,6 +38,10 @@ void NVFlinger::SplitVSync() {
     system.RegisterHostThread();
     std::string name = "yuzu:VSyncThread";
     MicroProfileOnThreadCreate(name.c_str());
+
+    // Cleanup
+    SCOPE_EXIT({ MicroProfileOnThreadExit(); });
+
     Common::SetCurrentThreadName(name.c_str());
     Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
     s64 delay = 0;
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 4da69f503..2b91a89d1 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -139,9 +139,6 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
         server_port->AppendPendingSession(server);
     }
 
-    // Wake the threads waiting on the ServerPort
-    server_port->Signal();
-
     LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId());
     IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
     rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp
index c822d21b8..ca61d72ca 100644
--- a/src/core/hle/service/sockets/sockets_translate.cpp
+++ b/src/core/hle/service/sockets/sockets_translate.cpp
@@ -64,6 +64,7 @@ Network::Type Translate(Type type) {
         return Network::Type::DGRAM;
     default:
         UNIMPLEMENTED_MSG("Unimplemented type={}", type);
+        return Network::Type{};
     }
 }
 
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index d91c15561..e4f5fd40c 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -185,6 +185,10 @@ constexpr std::array<const char*, 66> RESULT_MESSAGES{
     "The INI file contains more than the maximum allowable number of KIP files.",
 };
 
+std::string GetResultStatusString(ResultStatus status) {
+    return RESULT_MESSAGES.at(static_cast<std::size_t>(status));
+}
+
 std::ostream& operator<<(std::ostream& os, ResultStatus status) {
     os << RESULT_MESSAGES.at(static_cast<std::size_t>(status));
     return os;
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index 36e79e71d..b2e5b13de 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -135,6 +135,7 @@ enum class ResultStatus : u16 {
     ErrorINITooManyKIPs,
 };
 
+std::string GetResultStatusString(ResultStatus status);
 std::ostream& operator<<(std::ostream& os, ResultStatus status);
 
 /// Interface for loading an application
diff --git a/src/core/settings.h b/src/core/settings.h
index 1cb7ff7f5..a324530bd 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -131,6 +131,7 @@ struct Values {
 
     bool cpuopt_unsafe_unfuse_fma;
     bool cpuopt_unsafe_reduce_fp_error;
+    bool cpuopt_unsafe_inaccurate_nan;
 
     // Renderer
     Setting<RendererBackend> renderer_backend;
diff --git a/src/tests/common/ring_buffer.cpp b/src/tests/common/ring_buffer.cpp
index c883c4d56..54def22da 100644
--- a/src/tests/common/ring_buffer.cpp
+++ b/src/tests/common/ring_buffer.cpp
@@ -20,60 +20,60 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") {
     for (std::size_t i = 0; i < 4; i++) {
         const char elem = static_cast<char>(i);
         const std::size_t count = buf.Push(&elem, 1);
-        REQUIRE(count == 1);
+        REQUIRE(count == 1U);
     }
 
-    REQUIRE(buf.Size() == 4);
+    REQUIRE(buf.Size() == 4U);
 
     // Pushing values into a full ring buffer should fail.
     {
         const char elem = static_cast<char>(42);
         const std::size_t count = buf.Push(&elem, 1);
-        REQUIRE(count == 0);
+        REQUIRE(count == 0U);
     }
 
-    REQUIRE(buf.Size() == 4);
+    REQUIRE(buf.Size() == 4U);
 
     // Popping multiple values from a ring buffer with values should succeed.
     {
         const std::vector<char> popped = buf.Pop(2);
-        REQUIRE(popped.size() == 2);
+        REQUIRE(popped.size() == 2U);
         REQUIRE(popped[0] == 0);
         REQUIRE(popped[1] == 1);
     }
 
-    REQUIRE(buf.Size() == 2);
+    REQUIRE(buf.Size() == 2U);
 
     // Popping a single value from a ring buffer with values should succeed.
     {
         const std::vector<char> popped = buf.Pop(1);
-        REQUIRE(popped.size() == 1);
+        REQUIRE(popped.size() == 1U);
         REQUIRE(popped[0] == 2);
     }
 
-    REQUIRE(buf.Size() == 1);
+    REQUIRE(buf.Size() == 1U);
 
     // Pushing more values than space available should partially suceed.
     {
         std::vector<char> to_push(6);
         std::iota(to_push.begin(), to_push.end(), 88);
         const std::size_t count = buf.Push(to_push);
-        REQUIRE(count == 3);
+        REQUIRE(count == 3U);
     }
 
-    REQUIRE(buf.Size() == 4);
+    REQUIRE(buf.Size() == 4U);
 
     // Doing an unlimited pop should pop all values.
     {
         const std::vector<char> popped = buf.Pop();
-        REQUIRE(popped.size() == 4);
+        REQUIRE(popped.size() == 4U);
         REQUIRE(popped[0] == 3);
         REQUIRE(popped[1] == 88);
         REQUIRE(popped[2] == 89);
         REQUIRE(popped[3] == 90);
     }
 
-    REQUIRE(buf.Size() == 0);
+    REQUIRE(buf.Size() == 0U);
 }
 
 TEST_CASE("RingBuffer: Threaded Test", "[common]") {
@@ -93,7 +93,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
         std::size_t i = 0;
         while (i < count) {
             if (const std::size_t c = buf.Push(&value[0], 1); c > 0) {
-                REQUIRE(c == 1);
+                REQUIRE(c == 1U);
                 i++;
                 next_value(value);
             } else {
@@ -108,7 +108,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
         std::size_t i = 0;
         while (i < count) {
             if (const std::vector<char> v = buf.Pop(1); v.size() > 0) {
-                REQUIRE(v.size() == 2);
+                REQUIRE(v.size() == 2U);
                 REQUIRE(v[0] == value[0]);
                 REQUIRE(v[1] == value[1]);
                 i++;
@@ -123,7 +123,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
     producer.join();
     consumer.join();
 
-    REQUIRE(buf.Size() == 0);
+    REQUIRE(buf.Size() == 0U);
     printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty);
 }
 
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e050f9aed..f7b9d7f86 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -115,8 +115,6 @@ add_library(video_core STATIC
     renderer_vulkan/fixed_pipeline_state.h
     renderer_vulkan/maxwell_to_vk.cpp
     renderer_vulkan/maxwell_to_vk.h
-    renderer_vulkan/nsight_aftermath_tracker.cpp
-    renderer_vulkan/nsight_aftermath_tracker.h
     renderer_vulkan/renderer_vulkan.h
     renderer_vulkan/renderer_vulkan.cpp
     renderer_vulkan/vk_blit_screen.cpp
@@ -131,8 +129,6 @@ add_library(video_core STATIC
     renderer_vulkan/vk_compute_pipeline.h
     renderer_vulkan/vk_descriptor_pool.cpp
     renderer_vulkan/vk_descriptor_pool.h
-    renderer_vulkan/vk_device.cpp
-    renderer_vulkan/vk_device.h
     renderer_vulkan/vk_fence_manager.cpp
     renderer_vulkan/vk_fence_manager.h
     renderer_vulkan/vk_graphics_pipeline.cpp
@@ -167,8 +163,6 @@ add_library(video_core STATIC
     renderer_vulkan/vk_texture_cache.h
     renderer_vulkan/vk_update_descriptor.cpp
     renderer_vulkan/vk_update_descriptor.h
-    renderer_vulkan/wrapper.cpp
-    renderer_vulkan/wrapper.h
     shader_cache.h
     shader_notify.cpp
     shader_notify.h
@@ -257,6 +251,20 @@ add_library(video_core STATIC
     textures/texture.h
     video_core.cpp
     video_core.h
+    vulkan_common/vulkan_debug_callback.cpp
+    vulkan_common/vulkan_debug_callback.h
+    vulkan_common/vulkan_device.cpp
+    vulkan_common/vulkan_device.h
+    vulkan_common/vulkan_instance.cpp
+    vulkan_common/vulkan_instance.h
+    vulkan_common/vulkan_library.cpp
+    vulkan_common/vulkan_library.h
+    vulkan_common/vulkan_surface.cpp
+    vulkan_common/vulkan_surface.h
+    vulkan_common/vulkan_wrapper.cpp
+    vulkan_common/vulkan_wrapper.h
+    vulkan_common/nsight_aftermath_tracker.cpp
+    vulkan_common/nsight_aftermath_tracker.h
 )
 
 create_target_directory_groups(video_core)
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index e3e7432f7..94679d5d1 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_)
     : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
       vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
       host1x_processor(std::make_unique<Host1x>(gpu)),
-      nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)),
-      vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
+      sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
 
 CDmaPusher::~CDmaPusher() = default;
 
@@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
             const auto syncpoint_id = static_cast<u32>(data & 0xFF);
             const auto cond = static_cast<u32>((data >> 8) & 0xFF);
             if (cond == 0) {
-                nvdec_sync->Increment(syncpoint_id);
+                sync_manager->Increment(syncpoint_id);
             } else {
-                nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
-                nvdec_sync->SignalDone(syncpoint_id);
+                sync_manager->SignalDone(
+                    sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
             }
             break;
         }
@@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
             const auto syncpoint_id = static_cast<u32>(data & 0xFF);
             const auto cond = static_cast<u32>((data >> 8) & 0xFF);
             if (cond == 0) {
-                vic_sync->Increment(syncpoint_id);
+                sync_manager->Increment(syncpoint_id);
             } else {
-                vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
-                vic_sync->SignalDone(syncpoint_id);
+                sync_manager->SignalDone(
+                    sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
             }
             break;
         }
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 0db1cd646..8ca70b6dd 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -116,12 +116,10 @@ private:
     void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
 
     GPU& gpu;
-
-    std::shared_ptr<Nvdec> nvdec_processor;
-    std::unique_ptr<Vic> vic_processor;
-    std::unique_ptr<Host1x> host1x_processor;
-    std::unique_ptr<SyncptIncrManager> nvdec_sync;
-    std::unique_ptr<SyncptIncrManager> vic_sync;
+    std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+    std::unique_ptr<Tegra::Vic> vic_processor;
+    std::unique_ptr<Tegra::Host1x> host1x_processor;
+    std::unique_ptr<SyncptIncrManager> sync_manager;
     ChClassId current_class{};
     ThiRegisters vic_thi_state{};
     ThiRegisters nvdec_thi_state{};
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
index c4dd4881a..b12494528 100644
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
 
 Tegra::Host1x::~Host1x() = default;
 
-void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) {
-    u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32);
-    std::memcpy(state_offset, &arguments, sizeof(u32));
-}
-
-void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) {
-    StateWrite(static_cast<u32>(method), arguments[0]);
+void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
     switch (method) {
-    case Method::WaitSyncpt:
-        Execute(arguments[0]);
-        break;
     case Method::LoadSyncptPayload32:
-        syncpoint_value = arguments[0];
+        syncpoint_value = argument;
         break;
+    case Method::WaitSyncpt:
     case Method::WaitSyncpt32:
-        Execute(arguments[0]);
+        Execute(argument);
         break;
     default:
         UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
@@ -34,6 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen
 }
 
 void Tegra::Host1x::Execute(u32 data) {
-    // This method waits on a valid syncpoint.
-    // TODO: Implement when proper Async is in place
+    gpu.WaitFence(data, syncpoint_value);
 }
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
index 013eaa0c1..7e94799dd 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/command_classes/host1x.h
@@ -14,64 +14,23 @@ class Nvdec;
 
 class Host1x {
 public:
-    struct Host1xClassRegisters {
-        u32 incr_syncpt{};
-        u32 incr_syncpt_ctrl{};
-        u32 incr_syncpt_error{};
-        INSERT_PADDING_WORDS(5);
-        u32 wait_syncpt{};
-        u32 wait_syncpt_base{};
-        u32 wait_syncpt_incr{};
-        u32 load_syncpt_base{};
-        u32 incr_syncpt_base{};
-        u32 clear{};
-        u32 wait{};
-        u32 wait_with_interrupt{};
-        u32 delay_use{};
-        u32 tick_count_high{};
-        u32 tick_count_low{};
-        u32 tick_ctrl{};
-        INSERT_PADDING_WORDS(23);
-        u32 ind_ctrl{};
-        u32 ind_off2{};
-        u32 ind_off{};
-        std::array<u32, 31> ind_data{};
-        INSERT_PADDING_WORDS(1);
-        u32 load_syncpoint_payload32{};
-        u32 stall_ctrl{};
-        u32 wait_syncpt32{};
-        u32 wait_syncpt_base32{};
-        u32 load_syncpt_base32{};
-        u32 incr_syncpt_base32{};
-        u32 stall_count_high{};
-        u32 stall_count_low{};
-        u32 xref_ctrl{};
-        u32 channel_xref_high{};
-        u32 channel_xref_low{};
-    };
-    static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size");
-
     enum class Method : u32 {
-        WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4,
-        LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4,
-        WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4,
+        WaitSyncpt = 0x8,
+        LoadSyncptPayload32 = 0x4e,
+        WaitSyncpt32 = 0x50,
     };
 
     explicit Host1x(GPU& gpu);
     ~Host1x();
 
     /// Writes the method into the state, Invoke Execute() if encountered
-    void ProcessMethod(Method method, const std::vector<u32>& arguments);
+    void ProcessMethod(Method method, u32 argument);
 
 private:
     /// For Host1x, execute is waiting on a syncpoint previously written into the state
     void Execute(u32 data);
 
-    /// Write argument into the provided offset
-    void StateWrite(u32 offset, u32 arguments);
-
     u32 syncpoint_value{};
-    Host1xClassRegisters state{};
     GPU& gpu;
 };
 
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index 1619d8664..acf2668dc 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -10,9 +10,7 @@
 #include "video_core/surface.h"
 
 namespace VideoCore::Surface {
-
 namespace {
-
 using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
 
 // Compatibility table taken from Table 3.X.2 in:
@@ -233,10 +231,13 @@ constexpr Table MakeCopyTable() {
     EnableRange(copy, COPY_CLASS_64_BITS);
     return copy;
 }
-
 } // Anonymous namespace
 
-bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) {
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) {
+    if (broken_views) {
+        // If format views are broken, only accept formats that are identical.
+        return format_a == format_b;
+    }
     static constexpr Table TABLE = MakeViewTable();
     return IsSupported(TABLE, format_a, format_b);
 }
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index b5eb03bea..9a0522988 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -8,7 +8,7 @@
 
 namespace VideoCore::Surface {
 
-bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b);
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views);
 
 bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b24179d59..81b71edfb 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -208,6 +208,7 @@ Device::Device()
 
     const bool is_nvidia = vendor == "NVIDIA Corporation";
     const bool is_amd = vendor == "ATI Technologies Inc.";
+    const bool is_intel = vendor == "Intel";
 
     bool disable_fast_buffer_sub_data = false;
     if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
@@ -231,6 +232,7 @@ Device::Device()
     has_variable_aoffi = TestVariableAoffi();
     has_component_indexing_bug = is_amd;
     has_precise_bug = TestPreciseBug();
+    has_broken_texture_view_formats = is_amd || is_intel;
     has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
     has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
     has_debugging_tool_attached = IsDebugToolAttached(extensions);
@@ -248,6 +250,8 @@ Device::Device()
     LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
     LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
     LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
+    LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
+             has_broken_texture_view_formats);
 
     if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
         LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 13e66846c..3e79d1e37 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -96,6 +96,10 @@ public:
         return has_precise_bug;
     }
 
+    bool HasBrokenTextureViewFormats() const {
+        return has_broken_texture_view_formats;
+    }
+
     bool HasFastBufferSubData() const {
         return has_fast_buffer_sub_data;
     }
@@ -137,6 +141,7 @@ private:
     bool has_variable_aoffi{};
     bool has_component_indexing_bug{};
     bool has_precise_bug{};
+    bool has_broken_texture_view_formats{};
     bool has_fast_buffer_sub_data{};
     bool has_nv_viewport_array2{};
     bool has_debugging_tool_attached{};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 4c690418c..546cb6d00 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -61,100 +61,99 @@ struct FormatTuple {
     GLenum internal_format;
     GLenum format = GL_NONE;
     GLenum type = GL_NONE;
-    GLenum store_format = internal_format;
 };
 
 constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                  // A8B8G8R8_UNORM
-    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                                // A8B8G8R8_SNORM
-    {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                             // A8B8G8R8_SINT
-    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                   // A8B8G8R8_UINT
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                      // R5G6B5_UNORM
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                  // B5G6R5_UNORM
-    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},              // A1R5G5B5_UNORM
-    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},            // A2B10G10R10_UNORM
-    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV},  // A2B10G10R10_UINT
-    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},              // A1B5G5R5_UNORM
-    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                 // R8_UNORM
-    {GL_R8_SNORM, GL_RED, GL_BYTE},                                    // R8_SNORM
-    {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                 // R8_SINT
-    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                       // R8_UINT
-    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16A16_FLOAT
-    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                           // R16G16B16A16_UNORM
-    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                              // R16G16B16A16_SNORM
-    {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                           // R16G16B16A16_SINT
-    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                 // R16G16B16A16_UINT
-    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},      // B10G11R11_FLOAT
-    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                   // R32G32B32A32_UINT
-    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                                // BC1_RGBA_UNORM
-    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                                // BC2_UNORM
-    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                                // BC3_UNORM
-    {GL_COMPRESSED_RED_RGTC1},                                         // BC4_UNORM
-    {GL_COMPRESSED_SIGNED_RED_RGTC1},                                  // BC4_SNORM
-    {GL_COMPRESSED_RG_RGTC2},                                          // BC5_UNORM
-    {GL_COMPRESSED_SIGNED_RG_RGTC2},                                   // BC5_SNORM
-    {GL_COMPRESSED_RGBA_BPTC_UNORM},                                   // BC7_UNORM
-    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                           // BC6H_UFLOAT
-    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                             // BC6H_SFLOAT
-    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                 // ASTC_2D_4X4_UNORM
-    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                             // B8G8R8A8_UNORM
-    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                   // R32G32B32A32_FLOAT
-    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                             // R32G32B32A32_SINT
-    {GL_RG32F, GL_RG, GL_FLOAT},                                       // R32G32_FLOAT
-    {GL_RG32I, GL_RG_INTEGER, GL_INT},                                 // R32G32_SINT
-    {GL_R32F, GL_RED, GL_FLOAT},                                       // R32_FLOAT
-    {GL_R16F, GL_RED, GL_HALF_FLOAT},                                  // R16_FLOAT
-    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                               // R16_UNORM
-    {GL_R16_SNORM, GL_RED, GL_SHORT},                                  // R16_SNORM
-    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                     // R16_UINT
-    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                               // R16_SINT
-    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                               // R16G16_UNORM
-    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                  // R16G16_FLOAT
-    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                     // R16G16_UINT
-    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                               // R16G16_SINT
-    {GL_RG16_SNORM, GL_RG, GL_SHORT},                                  // R16G16_SNORM
-    {GL_RGB32F, GL_RGB, GL_FLOAT},                                     // R32G32B32_FLOAT
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                 // R8G8_UNORM
-    {GL_RG8_SNORM, GL_RG, GL_BYTE},                                    // R8G8_SNORM
-    {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                 // R8G8_SINT
-    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                       // R8G8_UINT
-    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                       // R32G32_UINT
-    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                               // R16G16B16X16_FLOAT
-    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                       // R32_UINT
-    {GL_R32I, GL_RED_INTEGER, GL_INT},                                 // R32_SINT
-    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                 // ASTC_2D_8X8_UNORM
-    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                 // ASTC_2D_8X5_UNORM
-    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                 // ASTC_2D_5X4_UNORM
-    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8},            // B8G8R8A8_UNORM
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                          // BC1_RGBA_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                          // BC2_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                          // BC3_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                             // BC7_SRGB
-    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},                // A4B4G4R4_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                         // ASTC_2D_4X4_SRGB
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                         // ASTC_2D_8X8_SRGB
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                         // ASTC_2D_8X5_SRGB
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},                         // ASTC_2D_5X4_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                                 // ASTC_2D_5X5_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},                         // ASTC_2D_5X5_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                                // ASTC_2D_10X8_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},                        // ASTC_2D_10X8_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                                 // ASTC_2D_6X6_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},                         // ASTC_2D_6X6_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                               // ASTC_2D_10X10_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},                       // ASTC_2D_10X10_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                               // ASTC_2D_12X12_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},                       // ASTC_2D_12X12_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                                 // ASTC_2D_8X6_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},                         // ASTC_2D_8X6_SRGB
-    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                                 // ASTC_2D_6X5_UNORM
-    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},                         // ASTC_2D_6X5_SRGB
-    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                 // E5B9G9R9_FLOAT
-    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},             // D32_FLOAT
-    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},     // D16_UNORM
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},     // D24_UNORM_S8_UINT
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},     // S8_UINT_D24_UNORM
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // A8B8G8R8_UNORM
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                               // A8B8G8R8_SNORM
+    {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                            // A8B8G8R8_SINT
+    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                  // A8B8G8R8_UINT
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                     // R5G6B5_UNORM
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                 // B5G6R5_UNORM
+    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM
+    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
+    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
+    {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM
+    {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT
+    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                      // R8_UINT
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                             // R16G16B16A16_FLOAT
+    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                          // R16G16B16A16_UNORM
+    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                             // R16G16B16A16_SNORM
+    {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                          // R16G16B16A16_SINT
+    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                // R16G16B16A16_UINT
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},     // B10G11R11_FLOAT
+    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                  // R32G32B32A32_UINT
+    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                               // BC1_RGBA_UNORM
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                               // BC2_UNORM
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                               // BC3_UNORM
+    {GL_COMPRESSED_RED_RGTC1},                                        // BC4_UNORM
+    {GL_COMPRESSED_SIGNED_RED_RGTC1},                                 // BC4_SNORM
+    {GL_COMPRESSED_RG_RGTC2},                                         // BC5_UNORM
+    {GL_COMPRESSED_SIGNED_RG_RGTC2},                                  // BC5_SNORM
+    {GL_COMPRESSED_RGBA_BPTC_UNORM},                                  // BC7_UNORM
+    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT
+    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT
+    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM
+    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM
+    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT
+    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT
+    {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT
+    {GL_RG32I, GL_RG_INTEGER, GL_INT},                                // R32G32_SINT
+    {GL_R32F, GL_RED, GL_FLOAT},                                      // R32_FLOAT
+    {GL_R16F, GL_RED, GL_HALF_FLOAT},                                 // R16_FLOAT
+    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                              // R16_UNORM
+    {GL_R16_SNORM, GL_RED, GL_SHORT},                                 // R16_SNORM
+    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                    // R16_UINT
+    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                              // R16_SINT
+    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                              // R16G16_UNORM
+    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                 // R16G16_FLOAT
+    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                    // R16G16_UINT
+    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                              // R16G16_SINT
+    {GL_RG16_SNORM, GL_RG, GL_SHORT},                                 // R16G16_SNORM
+    {GL_RGB32F, GL_RGB, GL_FLOAT},                                    // R32G32B32_FLOAT
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},          // A8B8G8R8_SRGB
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                // R8G8_UNORM
+    {GL_RG8_SNORM, GL_RG, GL_BYTE},                                   // R8G8_SNORM
+    {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                // R8G8_SINT
+    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                      // R8G8_UINT
+    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                      // R32G32_UINT
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16X16_FLOAT
+    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                      // R32_UINT
+    {GL_R32I, GL_RED_INTEGER, GL_INT},                                // R32_SINT
+    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM
+    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM
+    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM
+    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_UNORM
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                            // BC7_SRGB
+    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},               // A4B4G4R4_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                        // ASTC_2D_4X4_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                        // ASTC_2D_8X8_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                        // ASTC_2D_8X5_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},                        // ASTC_2D_5X4_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                                // ASTC_2D_5X5_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},                        // ASTC_2D_5X5_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                               // ASTC_2D_10X8_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},                       // ASTC_2D_10X8_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                                // ASTC_2D_6X6_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},                        // ASTC_2D_6X6_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                              // ASTC_2D_10X10_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},                      // ASTC_2D_10X10_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                              // ASTC_2D_12X12_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},                      // ASTC_2D_12X12_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                                // ASTC_2D_8X6_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},                        // ASTC_2D_8X6_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                                // ASTC_2D_6X5_UNORM
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},                        // ASTC_2D_6X5_SRGB
+    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                // E5B9G9R9_FLOAT
+    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},            // D32_FLOAT
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},    // D16_UNORM
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // D24_UNORM_S8_UINT
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // S8_UINT_D24_UNORM
     {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
      GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
 }};
@@ -431,6 +430,8 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
             format_properties[i].emplace(format, properties);
         }
     }
+    has_broken_texture_view_formats = device.HasBrokenTextureViewFormats();
+
     null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
     null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
     null_image_3d.Create(GL_TEXTURE_3D);
@@ -651,13 +652,11 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
     if (IsConverted(runtime.device, info.format, info.type)) {
         flags |= ImageFlagBits::Converted;
         gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
-        gl_store_format = GL_RGBA8;
         gl_format = GL_RGBA;
         gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
     } else {
         const auto& tuple = GetFormatTuple(info.format);
         gl_internal_format = tuple.internal_format;
-        gl_store_format = tuple.store_format;
         gl_format = tuple.format;
         gl_type = tuple.type;
     }
@@ -677,23 +676,23 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
     }
     switch (target) {
     case GL_TEXTURE_1D_ARRAY:
-        glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers);
+        glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
         break;
     case GL_TEXTURE_2D_ARRAY:
-        glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers);
+        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
         break;
     case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
         // TODO: Where should 'fixedsamplelocations' come from?
         const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
-        glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x,
+        glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
                                       height >> samples_y, num_layers, GL_FALSE);
         break;
     }
     case GL_TEXTURE_RECTANGLE:
-        glTextureStorage2D(handle, num_levels, gl_store_format, width, height);
+        glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
         break;
     case GL_TEXTURE_3D:
-        glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth);
+        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
         break;
     case GL_TEXTURE_BUFFER:
         buffer.Create();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 04193e31e..15b7c3676 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -96,6 +96,10 @@ public:
 
     FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
 
+    bool HasBrokenTextureViewFormats() const noexcept {
+        return has_broken_texture_view_formats;
+    }
+
 private:
     struct StagingBuffers {
         explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
@@ -120,6 +124,7 @@ private:
     UtilShaders util_shaders;
 
     std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
+    bool has_broken_texture_view_formats = false;
 
     StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
     StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
@@ -165,7 +170,6 @@ private:
     OGLTextureView store_view;
     OGLBuffer buffer;
     GLenum gl_internal_format = GL_NONE;
-    GLenum gl_store_format = GL_NONE;
     GLenum gl_format = GL_NONE;
     GLenum gl_type = GL_NONE;
 };
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 87c8e5693..1f6a169ae 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -11,14 +11,14 @@
 #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
 #include "video_core/renderer_vulkan/blit_image.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -225,7 +225,7 @@ constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
     };
 }
 
-void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
+void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descriptor_set,
                                    VkSampler sampler, VkImageView image_view) {
     const VkDescriptorImageInfo image_info{
         .sampler = sampler,
@@ -247,7 +247,7 @@ void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descr
     device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
 }
 
-void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
+void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descriptor_set,
                                     VkSampler sampler, VkImageView image_view_0,
                                     VkImageView image_view_1) {
     const VkDescriptorImageInfo image_info_0{
@@ -326,7 +326,7 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
 
 } // Anonymous namespace
 
-BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_,
+BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
                                  StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
     : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
       one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 2c2790bf9..43fd3d737 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -8,19 +8,18 @@
 
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/texture_cache/types.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 using VideoCommon::Offset2D;
 
-class VKDevice;
-class VKScheduler;
-class StateTracker;
-
+class Device;
 class Framebuffer;
 class ImageView;
+class StateTracker;
+class VKScheduler;
 
 struct BlitImagePipelineKey {
     constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
@@ -31,7 +30,7 @@ struct BlitImagePipelineKey {
 
 class BlitImageHelper {
 public:
-    explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler,
+    explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
                              StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
     ~BlitImageHelper();
 
@@ -67,7 +66,7 @@ private:
 
     void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
 
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
     StateTracker& state_tracker;
 
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 67dd10500..5be6dabd9 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -76,7 +76,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
             regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
     }
 
-    for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+    for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
         const auto& input = regs.vertex_attrib_format[index];
         auto& attribute = attributes[index];
         attribute.raw = 0;
@@ -85,6 +85,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
         attribute.offset.Assign(input.offset);
         attribute.type.Assign(static_cast<u32>(input.type.Value()));
         attribute.size.Assign(static_cast<u32>(input.size.Value()));
+        attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0);
     }
 
     for (std::size_t index = 0; index < std::size(attachments); ++index) {
@@ -172,14 +173,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
     depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
     cull_face.Assign(PackCullFace(regs.cull_face));
     cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
-
-    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        const auto& input = regs.vertex_array[index];
-        VertexBinding& binding = vertex_bindings[index];
-        binding.raw = 0;
-        binding.enabled.Assign(input.IsEnabled() ? 1 : 0);
-        binding.stride.Assign(static_cast<u16>(input.stride.Value()));
-    }
+    std::ranges::transform(regs.vertex_array, vertex_strides.begin(), [](const auto& array) {
+        return static_cast<u16>(array.stride.Value());
+    });
 }
 
 std::size_t FixedPipelineState::Hash() const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 7e95e6fce..465a55fdb 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -96,6 +96,8 @@ struct FixedPipelineState {
         BitField<6, 14, u32> offset;
         BitField<20, 3, u32> type;
         BitField<23, 6, u32> size;
+        // Not really an element of a vertex attribute, but it can be packed here
+        BitField<29, 1, u32> binding_index_enabled;
 
         constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
             return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
@@ -130,12 +132,6 @@ struct FixedPipelineState {
         }
     };
 
-    union VertexBinding {
-        u16 raw;
-        BitField<0, 12, u16> stride;
-        BitField<12, 1, u16> enabled;
-    };
-
     struct DynamicState {
         union {
             u32 raw1;
@@ -153,7 +149,8 @@ struct FixedPipelineState {
             BitField<0, 2, u32> cull_face;
             BitField<2, 1, u32> cull_enable;
         };
-        std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings;
+        // Vertex stride is a 12 bits value, we have 4 bits to spare per element
+        std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
 
         void Fill(const Maxwell& regs);
 
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 4c988429f..ca7c2c579 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -9,9 +9,9 @@
 #include "common/logging/log.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan::MaxwellToVK {
 
@@ -47,7 +47,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter
     return {};
 }
 
-VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
+VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
                               Tegra::Texture::TextureFilter filter) {
     switch (wrap_mode) {
     case Tegra::Texture::WrapMode::Wrap:
@@ -222,7 +222,7 @@ constexpr bool IsZetaFormat(PixelFormat pixel_format) {
 
 } // Anonymous namespace
 
-FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) {
+FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format) {
     ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));
 
     auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
@@ -280,7 +280,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
     return {};
 }
 
-VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
+VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
                                       Maxwell::PrimitiveTopology topology) {
     switch (topology) {
     case Maxwell::PrimitiveTopology::Points:
@@ -526,7 +526,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
     return {};
 }
 
-VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
+VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
     switch (index_format) {
     case Maxwell::IndexFormat::UnsignedByte:
         if (!device.IsExtIndexTypeUint8Supported()) {
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 1a90f192e..537969840 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -6,10 +6,10 @@
 
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/surface.h"
 #include "video_core/textures/texture.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan::MaxwellToVK {
 
@@ -22,7 +22,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter);
 
 VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
 
-VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
+VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
                               Tegra::Texture::TextureFilter filter);
 
 VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
@@ -35,17 +35,17 @@ struct FormatInfo {
     bool storage;
 };
 
-FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);
+FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format);
 
 VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
 
-VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology);
+VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
 
 VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
 
 VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
 
-VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);
+VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
 
 VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
 
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7f521cb9b..d7437e185 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -12,8 +12,6 @@
 
 #include <fmt/format.h>
 
-#include "common/dynamic_library.h"
-#include "common/file_util.h"
 #include "common/logging/log.h"
 #include "common/telemetry.h"
 #include "core/core.h"
@@ -24,182 +22,27 @@
 #include "video_core/gpu.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-// Include these late to avoid polluting previous headers
-#ifdef _WIN32
-#include <windows.h>
-// ensure include order
-#include <vulkan/vulkan_win32.h>
-#endif
-
-#if !defined(_WIN32) && !defined(__APPLE__)
-#include <X11/Xlib.h>
-#include <vulkan/vulkan_wayland.h>
-#include <vulkan/vulkan_xlib.h>
-#endif
+#include "video_core/vulkan_common/vulkan_debug_callback.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_instance.h"
+#include "video_core/vulkan_common/vulkan_library.h"
+#include "video_core/vulkan_common/vulkan_surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
-
 namespace {
-
-using Core::Frontend::WindowSystemType;
-
-VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
-                       VkDebugUtilsMessageTypeFlagsEXT type,
-                       const VkDebugUtilsMessengerCallbackDataEXT* data,
-                       [[maybe_unused]] void* user_data) {
-    const char* const message{data->pMessage};
-
-    if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
-        LOG_CRITICAL(Render_Vulkan, "{}", message);
-    } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
-        LOG_WARNING(Render_Vulkan, "{}", message);
-    } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
-        LOG_INFO(Render_Vulkan, "{}", message);
-    } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
-        LOG_DEBUG(Render_Vulkan, "{}", message);
-    }
-    return VK_FALSE;
-}
-
-Common::DynamicLibrary OpenVulkanLibrary() {
-    Common::DynamicLibrary library;
-#ifdef __APPLE__
-    // Check if a path to a specific Vulkan library has been specified.
-    char* libvulkan_env = getenv("LIBVULKAN_PATH");
-    if (!libvulkan_env || !library.Open(libvulkan_env)) {
-        // Use the libvulkan.dylib from the application bundle.
-        const std::string filename =
-            Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
-        library.Open(filename.c_str());
-    }
-#else
-    std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
-    if (!library.Open(filename.c_str())) {
-        // Android devices may not have libvulkan.so.1, only libvulkan.so.
-        filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
-        (void)library.Open(filename.c_str());
-    }
-#endif
-    return library;
-}
-
-std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library,
-                                            vk::InstanceDispatch& dld, WindowSystemType window_type,
-                                            bool enable_debug_utils, bool enable_layers) {
-    if (!library.IsOpen()) {
-        LOG_ERROR(Render_Vulkan, "Vulkan library not available");
-        return {};
-    }
-    if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
-        LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
-        return {};
-    }
-    if (!vk::Load(dld)) {
-        LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
-        return {};
-    }
-
-    std::vector<const char*> extensions;
-    extensions.reserve(6);
-    switch (window_type) {
-    case Core::Frontend::WindowSystemType::Headless:
-        break;
-#ifdef _WIN32
-    case Core::Frontend::WindowSystemType::Windows:
-        extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
-        break;
-#endif
-#if !defined(_WIN32) && !defined(__APPLE__)
-    case Core::Frontend::WindowSystemType::X11:
-        extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
-        break;
-    case Core::Frontend::WindowSystemType::Wayland:
-        extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
-        break;
-#endif
-    default:
-        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
-        break;
-    }
-    if (window_type != Core::Frontend::WindowSystemType::Headless) {
-        extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
-    }
-    if (enable_debug_utils) {
-        extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
-    }
-    extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
-
-    const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
-    if (!properties) {
-        LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
-        return {};
-    }
-
-    for (const char* extension : extensions) {
-        const auto it =
-            std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) {
-                return !std::strcmp(extension, prop.extensionName);
-            });
-        if (it == properties->end()) {
-            LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
-            return {};
-        }
-    }
-
-    std::vector<const char*> layers;
-    layers.reserve(1);
-    if (enable_layers) {
-        layers.push_back("VK_LAYER_KHRONOS_validation");
-    }
-
-    const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
-    if (!layer_properties) {
-        LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
-        layers.clear();
-    }
-
-    for (auto layer_it = layers.begin(); layer_it != layers.end();) {
-        const char* const layer = *layer_it;
-        const auto it = std::find_if(
-            layer_properties->begin(), layer_properties->end(),
-            [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
-        if (it == layer_properties->end()) {
-            LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
-            layer_it = layers.erase(layer_it);
-        } else {
-            ++layer_it;
-        }
-    }
-
-    // Limit the maximum version of Vulkan to avoid using untested version.
-    const u32 version = std::min(vk::AvailableVersion(dld), static_cast<u32>(VK_API_VERSION_1_1));
-
-    vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld);
-    if (!instance) {
-        LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
-        return {};
-    }
-    if (!vk::Load(*instance, dld)) {
-        LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
-    }
-    return std::make_pair(std::move(instance), version);
-}
-
 std::string GetReadableVersion(u32 version) {
     return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
                        VK_VERSION_PATCH(version));
 }
 
-std::string GetDriverVersion(const VKDevice& device) {
+std::string GetDriverVersion(const Device& device) {
     // Extracted from
     // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
     const u32 version = device.GetDriverVersion();
@@ -216,7 +59,6 @@ std::string GetDriverVersion(const VKDevice& device) {
         const u32 minor = version & 0x3fff;
         return fmt::format("{}.{}", major, minor);
     }
-
     return GetReadableVersion(version);
 }
 
@@ -255,7 +97,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     if (!framebuffer) {
         return;
     }
-
     const auto& layout = render_window.GetFramebufferLayout();
     if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
         const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
@@ -284,14 +125,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     render_window.OnFrameDisplayed();
 }
 
-bool RendererVulkan::Init() {
-    library = OpenVulkanLibrary();
-    std::tie(instance, instance_version) = CreateInstance(
-        library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug);
-    if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
-        return false;
+bool RendererVulkan::Init() try {
+    library = OpenLibrary();
+    instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
+                              true, Settings::values.renderer_debug);
+    if (Settings::values.renderer_debug) {
+        debug_callback = CreateDebugCallback(instance);
     }
+    surface = CreateSurface(instance, render_window);
 
+    InitializeDevice();
     Report();
 
     memory_manager = std::make_unique<VKMemoryManager>(*device);
@@ -311,8 +154,11 @@ bool RendererVulkan::Init() {
     blit_screen =
         std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
                                        *memory_manager, *swapchain, *scheduler, screen_info);
-
     return true;
+
+} catch (const vk::Exception& exception) {
+    LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
+    return false;
 }
 
 void RendererVulkan::ShutDown() {
@@ -322,7 +168,6 @@ void RendererVulkan::ShutDown() {
     if (const auto& dev = device->GetLogical()) {
         dev.WaitIdle();
     }
-
     rasterizer.reset();
     blit_screen.reset();
     scheduler.reset();
@@ -331,95 +176,15 @@ void RendererVulkan::ShutDown() {
     device.reset();
 }
 
-bool RendererVulkan::CreateDebugCallback() {
-    if (!Settings::values.renderer_debug) {
-        return true;
-    }
-    debug_callback = instance.TryCreateDebugCallback(DebugCallback);
-    if (!debug_callback) {
-        LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
-        return false;
-    }
-    return true;
-}
-
-bool RendererVulkan::CreateSurface() {
-    [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
-    VkSurfaceKHR unsafe_surface = nullptr;
-
-#ifdef _WIN32
-    if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
-        const HWND hWnd = static_cast<HWND>(window_info.render_surface);
-        const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
-                                                   nullptr, 0, nullptr, hWnd};
-        const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
-            dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
-        if (!vkCreateWin32SurfaceKHR ||
-            vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
-            LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
-            return false;
-        }
-    }
-#endif
-#if !defined(_WIN32) && !defined(__APPLE__)
-    if (window_info.type == Core::Frontend::WindowSystemType::X11) {
-        const VkXlibSurfaceCreateInfoKHR xlib_ci{
-            VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
-            static_cast<Display*>(window_info.display_connection),
-            reinterpret_cast<Window>(window_info.render_surface)};
-        const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
-            dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
-        if (!vkCreateXlibSurfaceKHR ||
-            vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
-            LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
-            return false;
-        }
-    }
-    if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
-        const VkWaylandSurfaceCreateInfoKHR wayland_ci{
-            VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
-            static_cast<wl_display*>(window_info.display_connection),
-            static_cast<wl_surface*>(window_info.render_surface)};
-        const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
-            dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
-        if (!vkCreateWaylandSurfaceKHR ||
-            vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
-                VK_SUCCESS) {
-            LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
-            return false;
-        }
-    }
-#endif
-    if (!unsafe_surface) {
-        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
-        return false;
-    }
-
-    surface = vk::SurfaceKHR(unsafe_surface, *instance, dld);
-    return true;
-}
-
-bool RendererVulkan::PickDevices() {
-    const auto devices = instance.EnumeratePhysicalDevices();
-    if (!devices) {
-        LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices");
-        return false;
-    }
-
+void RendererVulkan::InitializeDevice() {
+    const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
     const s32 device_index = Settings::values.vulkan_device.GetValue();
-    if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) {
+    if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
         LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
-        return false;
-    }
-    const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)],
-                                             dld);
-    if (!VKDevice::IsSuitable(physical_device, *surface)) {
-        return false;
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
     }
-
-    device =
-        std::make_unique<VKDevice>(*instance, instance_version, physical_device, *surface, dld);
-    return device->Create();
+    const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
+    device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
 }
 
 void RendererVulkan::Report() const {
@@ -444,26 +209,21 @@ void RendererVulkan::Report() const {
     telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
 }
 
-std::vector<std::string> RendererVulkan::EnumerateDevices() {
+std::vector<std::string> RendererVulkan::EnumerateDevices() try {
     vk::InstanceDispatch dld;
-    Common::DynamicLibrary library = OpenVulkanLibrary();
-    vk::Instance instance =
-        CreateInstance(library, dld, WindowSystemType::Headless, false, false).first;
-    if (!instance) {
-        return {};
-    }
-
-    const std::optional physical_devices = instance.EnumeratePhysicalDevices();
-    if (!physical_devices) {
-        return {};
-    }
-
+    const Common::DynamicLibrary library = OpenLibrary();
+    const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
+    const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
     std::vector<std::string> names;
-    names.reserve(physical_devices->size());
-    for (const auto& device : *physical_devices) {
+    names.reserve(physical_devices.size());
+    for (const VkPhysicalDevice device : physical_devices) {
         names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
     }
     return names;
+
+} catch (const vk::Exception& exception) {
+    LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
+    return {};
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 74642fba4..5575ffc54 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -11,7 +11,7 @@
 #include "common/dynamic_library.h"
 
 #include "video_core/renderer_base.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
 class TelemetrySession;
@@ -27,9 +27,9 @@ class GPU;
 
 namespace Vulkan {
 
+class Device;
 class StateTracker;
 class VKBlitScreen;
-class VKDevice;
 class VKMemoryManager;
 class VKSwapchain;
 class VKScheduler;
@@ -56,11 +56,7 @@ public:
     static std::vector<std::string> EnumerateDevices();
 
 private:
-    bool CreateDebugCallback();
-
-    bool CreateSurface();
-
-    bool PickDevices();
+    void InitializeDevice();
 
     void Report() const;
 
@@ -72,14 +68,13 @@ private:
     vk::InstanceDispatch dld;
 
     vk::Instance instance;
-    u32 instance_version{};
 
     vk::SurfaceKHR surface;
 
     VKScreenInfo screen_info;
 
-    vk::DebugCallback debug_callback;
-    std::unique_ptr<VKDevice> device;
+    vk::DebugUtilsMessenger debug_callback;
+    std::unique_ptr<Device> device;
     std::unique_ptr<VKMemoryManager> memory_manager;
     std::unique_ptr<StateTracker> state_tracker;
     std::unique_ptr<VKScheduler> scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index d3a83f22f..5e184eb42 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -21,15 +21,15 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -114,7 +114,7 @@ struct VKBlitScreen::BufferData {
 
 VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
                            Core::Frontend::EmuWindow& render_window_,
-                           VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_,
+                           VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
                            VKMemoryManager& memory_manager_, VKSwapchain& swapchain_,
                            VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
     : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 2ee374247..69ed61770 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -7,7 +7,7 @@
 #include <memory>
 
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
 class System;
@@ -33,8 +33,8 @@ namespace Vulkan {
 
 struct ScreenInfo;
 
+class Device;
 class RasterizerVulkan;
-class VKDevice;
 class VKScheduler;
 class VKSwapchain;
 
@@ -42,7 +42,7 @@ class VKBlitScreen final {
 public:
     explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
                           Core::Frontend::EmuWindow& render_window,
-                          VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                          VideoCore::RasterizerInterface& rasterizer, const Device& device,
                           VKMemoryManager& memory_manager, VKSwapchain& swapchain,
                           VKScheduler& scheduler, const VKScreenInfo& screen_info);
     ~VKBlitScreen();
@@ -85,7 +85,7 @@ private:
     Core::Memory::Memory& cpu_memory;
     Core::Frontend::EmuWindow& render_window;
     VideoCore::RasterizerInterface& rasterizer;
-    const VKDevice& device;
+    const Device& device;
     VKMemoryManager& memory_manager;
     VKSwapchain& swapchain;
     VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 10d296c2f..4d517c547 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -9,10 +9,10 @@
 #include "core/core.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -34,13 +34,13 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
 constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
     VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
 
-std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const Device& device, VKScheduler& scheduler) {
     return std::make_unique<VKStreamBuffer>(device, scheduler);
 }
 
 } // Anonymous namespace
 
-Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
+Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
                VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
     : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
                                                                                  staging_pool_} {
@@ -168,7 +168,7 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
 
 VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
                              Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                             const VKDevice& device_, VKMemoryManager& memory_manager_,
+                             const Device& device_, VKMemoryManager& memory_manager_,
                              VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
                              VKStagingBufferPool& staging_pool_)
     : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index daf498222..1c39aed34 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -11,17 +11,17 @@
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKMemoryManager;
 class VKScheduler;
 
 class Buffer final : public VideoCommon::BufferBlock {
 public:
-    explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
+    explicit Buffer(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
                     VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
     ~Buffer();
 
@@ -41,7 +41,7 @@ public:
     }
 
 private:
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
     VKStagingBufferPool& staging_pool;
 
@@ -52,7 +52,7 @@ class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VK
 public:
     explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
                            Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
-                           const VKDevice& device, VKMemoryManager& memory_manager,
+                           const Device& device, VKMemoryManager& memory_manager,
                            VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
                            VKStagingBufferPool& staging_pool);
     ~VKBufferCache();
@@ -63,7 +63,7 @@ protected:
     std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
 
 private:
-    const VKDevice& device;
+    const Device& device;
     VKMemoryManager& memory_manager;
     VKScheduler& scheduler;
     VKStagingBufferPool& staging_pool;
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
index 8f7d6410e..a99df9323 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -5,8 +5,8 @@
 #include <cstddef>
 
 #include "video_core/renderer_vulkan/vk_command_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -17,7 +17,7 @@ struct CommandPool::Pool {
     vk::CommandBuffers cmdbufs;
 };
 
-CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_)
+CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_)
     : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
 
 CommandPool::~CommandPool() = default;
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
index 62a7ce3f1..61c26a22a 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.h
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -8,16 +8,16 @@
 #include <vector>
 
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
+class Device;
 class MasterSemaphore;
-class VKDevice;
 
 class CommandPool final : public ResourcePool {
 public:
-    explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_);
+    explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_);
     ~CommandPool() override;
 
     void Allocate(size_t begin, size_t end) override;
@@ -27,7 +27,7 @@ public:
 private:
     struct Pool;
 
-    const VKDevice& device;
+    const Device& device;
     std::vector<Pool> pools;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 2c030e910..02a6d54b7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -15,11 +15,11 @@
 #include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -86,7 +86,7 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
 
 } // Anonymous namespace
 
-VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
                              vk::Span<VkDescriptorSetLayoutBinding> bindings,
                              vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
                              vk::Span<VkPushConstantRange> push_constants,
@@ -162,7 +162,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
     return set;
 }
 
-QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
+QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
                              VKDescriptorPool& descriptor_pool_,
                              VKStagingBufferPool& staging_buffer_pool_,
                              VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -211,7 +211,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
     return {*buffer.handle, 0};
 }
 
-Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_,
+Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
                      VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
                      VKUpdateDescriptorQueue& update_descriptor_queue_)
     : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
@@ -255,7 +255,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
     return {*buffer.handle, 0};
 }
 
-QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_,
+QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
                                  VKDescriptorPool& descriptor_pool_,
                                  VKStagingBufferPool& staging_buffer_pool_,
                                  VKUpdateDescriptorQueue& update_descriptor_queue_)
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index abdf61e2c..7ddb09afb 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -11,18 +11,18 @@
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKScheduler;
 class VKStagingBufferPool;
 class VKUpdateDescriptorQueue;
 
 class VKComputePass {
 public:
-    explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+    explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
                            vk::Span<VkDescriptorSetLayoutBinding> bindings,
                            vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
                            vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
@@ -43,7 +43,7 @@ private:
 
 class QuadArrayPass final : public VKComputePass {
 public:
-    explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
+    explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
                            VKDescriptorPool& descriptor_pool_,
                            VKStagingBufferPool& staging_buffer_pool_,
                            VKUpdateDescriptorQueue& update_descriptor_queue_);
@@ -59,7 +59,7 @@ private:
 
 class Uint8Pass final : public VKComputePass {
 public:
-    explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_,
+    explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
                        VKDescriptorPool& descriptor_pool_,
                        VKStagingBufferPool& staging_buffer_pool_,
                        VKUpdateDescriptorQueue& update_descriptor_queue_);
@@ -75,7 +75,7 @@ private:
 
 class QuadIndexedPass final : public VKComputePass {
 public:
-    explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_,
+    explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
                              VKDescriptorPool& descriptor_pool_,
                              VKStagingBufferPool& staging_buffer_pool_,
                              VKUpdateDescriptorQueue& update_descriptor_queue_);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 62f44d6da..3a48219b7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -6,16 +6,16 @@
 
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_,
+VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
                                      VKDescriptorPool& descriptor_pool_,
                                      VKUpdateDescriptorQueue& update_descriptor_queue_,
                                      const SPIRVShader& shader_)
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 49e2113a2..7e16575ac 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -7,17 +7,17 @@
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
 class VKComputePipeline final {
 public:
-    explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_,
+    explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
                                VKDescriptorPool& descriptor_pool_,
                                VKUpdateDescriptorQueue& update_descriptor_queue_,
                                const SPIRVShader& shader_);
@@ -48,7 +48,7 @@ private:
 
     vk::Pipeline CreatePipeline() const;
 
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
     ShaderEntries entries;
 
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index f38e089d5..ef9fb5910 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -6,10 +6,10 @@
 
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -32,7 +32,7 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
     descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
 }
 
-VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler)
+VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
     : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
                                                                              AllocateNewPool()} {}
 
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index 544f32a20..f892be7be 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -7,11 +7,11 @@
 #include <vector>
 
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKDescriptorPool;
 class VKScheduler;
 
@@ -39,7 +39,7 @@ class VKDescriptorPool final {
     friend DescriptorAllocator;
 
 public:
-    explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler);
+    explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler);
     ~VKDescriptorPool();
 
     VKDescriptorPool(const VKDescriptorPool&) = delete;
@@ -50,7 +50,7 @@ private:
 
     vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
 
-    const VKDevice& device;
+    const Device& device;
     MasterSemaphore& master_semaphore;
 
     std::vector<vk::DescriptorPool> pools;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 774a12a53..6cd00884d 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -3,24 +3,21 @@
 // Refer to the license.txt file included.
 
 #include <memory>
-#include <thread>
 
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_fence_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_,
-                       bool is_stubbed_)
-    : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
+InnerFence::InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_)
+    : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
 
-InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_,
-                       u32 payload_, bool is_stubbed_)
-    : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
+InnerFence::InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
+    : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
 
 InnerFence::~InnerFence() = default;
 
@@ -28,63 +25,38 @@ void InnerFence::Queue() {
     if (is_stubbed) {
         return;
     }
-    ASSERT(!event);
-
-    event = device.GetLogical().CreateEvent();
-    ticks = scheduler.CurrentTick();
-
-    scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
-        cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
-    });
+    // Get the current tick so we can wait for it
+    wait_tick = scheduler.CurrentTick();
+    scheduler.Flush();
 }
 
 bool InnerFence::IsSignaled() const {
     if (is_stubbed) {
         return true;
     }
-    ASSERT(event);
-    return IsEventSignalled();
+    return scheduler.IsFree(wait_tick);
 }
 
 void InnerFence::Wait() {
     if (is_stubbed) {
         return;
     }
-    ASSERT(event);
-
-    if (ticks >= scheduler.CurrentTick()) {
-        scheduler.Flush();
-    }
-    while (!IsEventSignalled()) {
-        std::this_thread::yield();
-    }
-}
-
-bool InnerFence::IsEventSignalled() const {
-    switch (const VkResult result = event.GetStatus()) {
-    case VK_EVENT_SET:
-        return true;
-    case VK_EVENT_RESET:
-        return false;
-    default:
-        throw vk::Exception(result);
-    }
+    scheduler.Wait(wait_tick);
 }
 
 VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
                                Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
                                VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
-                               const VKDevice& device_, VKScheduler& scheduler_)
+                               VKScheduler& scheduler_)
     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
-      device{device_}, scheduler{scheduler_} {}
+      scheduler{scheduler_} {}
 
 Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
-    return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
+    return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
 }
 
 Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
-    return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
+    return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
 }
 
 void VKFenceManager::QueueFence(Fence& fence) {
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index c2869e8e3..9c5e5aa8f 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -9,7 +9,7 @@
 #include "video_core/fence_manager.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
 class System;
@@ -21,17 +21,15 @@ class RasterizerInterface;
 
 namespace Vulkan {
 
+class Device;
 class VKBufferCache;
-class VKDevice;
 class VKQueryCache;
 class VKScheduler;
 
 class InnerFence : public VideoCommon::FenceBase {
 public:
-    explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_,
-                        bool is_stubbed_);
-    explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_,
-                        u32 payload_, bool is_stubbed_);
+    explicit InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_);
+    explicit InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
     ~InnerFence();
 
     void Queue();
@@ -41,12 +39,8 @@ public:
     void Wait();
 
 private:
-    bool IsEventSignalled() const;
-
-    const VKDevice& device;
     VKScheduler& scheduler;
-    vk::Event event;
-    u64 ticks = 0;
+    u64 wait_tick = 0;
 };
 using Fence = std::shared_ptr<InnerFence>;
 
@@ -58,7 +52,7 @@ public:
     explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
                             Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
                             VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
-                            const VKDevice& device_, VKScheduler& scheduler_);
+                            VKScheduler& scheduler_);
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
@@ -68,7 +62,6 @@ protected:
     void WaitFence(Fence& fence) override;
 
 private:
-    const VKDevice& device;
     VKScheduler& scheduler;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 7979df3a8..a5214d0bc 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -12,12 +12,12 @@
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -94,7 +94,7 @@ VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
 
 } // Anonymous namespace
 
-VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
+VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
                                        VKDescriptorPool& descriptor_pool_,
                                        VKUpdateDescriptorQueue& update_descriptor_queue_,
                                        const GraphicsPipelineCacheKey& key,
@@ -212,11 +212,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
         // state is ignored
         dynamic.raw1 = 0;
         dynamic.raw2 = 0;
-        for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) {
-            // Enable all vertex bindings
-            binding.raw = 0;
-            binding.enabled.Assign(1);
-        }
+        dynamic.vertex_strides.fill(0);
     } else {
         dynamic = state.dynamic_state;
     }
@@ -224,19 +220,16 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
     std::vector<VkVertexInputBindingDescription> vertex_bindings;
     std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
     for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        const auto& binding = dynamic.vertex_bindings[index];
-        if (!binding.enabled) {
+        if (state.attributes[index].binding_index_enabled == 0) {
             continue;
         }
         const bool instanced = state.binding_divisors[index] != 0;
         const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
-
         vertex_bindings.push_back({
             .binding = static_cast<u32>(index),
-            .stride = binding.stride,
+            .stride = dynamic.vertex_strides[index],
             .inputRate = rate,
         });
-
         if (instanced) {
             vertex_binding_divisors.push_back({
                 .binding = static_cast<u32>(index),
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 214d06b4c..8b6a98fe0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -13,7 +13,7 @@
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -40,8 +40,8 @@ static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>
 static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 
+class Device;
 class VKDescriptorPool;
-class VKDevice;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
@@ -49,7 +49,7 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt
 
 class VKGraphicsPipeline final {
 public:
-    explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
+    explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
                                 VKDescriptorPool& descriptor_pool,
                                 VKUpdateDescriptorQueue& update_descriptor_queue_,
                                 const GraphicsPipelineCacheKey& key,
@@ -85,7 +85,7 @@ private:
     vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
                                 u32 num_color_buffers) const;
 
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
     const GraphicsPipelineCacheKey cache_key;
     const u64 hash;
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index ae26e558d..56ec5e380 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -6,15 +6,15 @@
 #include <chrono>
 
 #include "core/settings.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 using namespace std::chrono_literals;
 
-MasterSemaphore::MasterSemaphore(const VKDevice& device) {
+MasterSemaphore::MasterSemaphore(const Device& device) {
     static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
         .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 0e93706d7..f336f1862 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -8,15 +8,15 @@
 #include <thread>
 
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 
 class MasterSemaphore {
 public:
-    explicit MasterSemaphore(const VKDevice& device);
+    explicit MasterSemaphore(const Device& device);
     ~MasterSemaphore();
 
     /// Returns the current logical tick.
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 56b24b70f..a6abd0eee 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -11,9 +11,9 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -29,7 +29,7 @@ u64 GetAllocationChunkSize(u64 required_size) {
 
 class VKMemoryAllocation final {
 public:
-    explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_,
+    explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
                                 VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_)
         : device{device_}, memory{std::move(memory_)}, properties{properties_},
           allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {}
@@ -104,7 +104,7 @@ private:
         return std::nullopt;
     }
 
-    const VKDevice& device;                 ///< Vulkan device.
+    const Device& device;                   ///< Vulkan device.
     const vk::DeviceMemory memory;          ///< Vulkan memory allocation handler.
     const VkMemoryPropertyFlags properties; ///< Vulkan properties.
     const u64 allocation_size;              ///< Size of this allocation.
@@ -117,7 +117,7 @@ private:
     std::vector<const VKMemoryCommitImpl*> commits;
 };
 
-VKMemoryManager::VKMemoryManager(const VKDevice& device_)
+VKMemoryManager::VKMemoryManager(const Device& device_)
     : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
 
 VKMemoryManager::~VKMemoryManager() = default;
@@ -207,7 +207,7 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi
     return {};
 }
 
-VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_,
+VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
                                        const vk::DeviceMemory& memory_, u64 begin_, u64 end_)
     : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {}
 
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 318f8b43e..2452bca4e 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -9,12 +9,12 @@
 #include <utility>
 #include <vector>
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
+class Device;
 class MemoryMap;
-class VKDevice;
 class VKMemoryAllocation;
 class VKMemoryCommitImpl;
 
@@ -22,7 +22,7 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
 
 class VKMemoryManager final {
 public:
-    explicit VKMemoryManager(const VKDevice& device_);
+    explicit VKMemoryManager(const Device& device_);
     VKMemoryManager(const VKMemoryManager&) = delete;
     ~VKMemoryManager();
 
@@ -49,7 +49,7 @@ private:
     VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
                                   VkMemoryPropertyFlags wanted_properties);
 
-    const VKDevice& device;                                       ///< Device handler.
+    const Device& device;                                         ///< Device handler.
     const VkPhysicalDeviceMemoryProperties properties;            ///< Physical device properties.
     std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };
@@ -59,7 +59,7 @@ class VKMemoryCommitImpl final {
     friend MemoryMap;
 
 public:
-    explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_,
+    explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
                                 const vk::DeviceMemory& memory_, u64 begin_, u64 end_);
     ~VKMemoryCommitImpl();
 
@@ -85,7 +85,7 @@ private:
     /// Unmaps memory.
     void Unmap() const;
 
-    const VKDevice& device;           ///< Vulkan device.
+    const Device& device;             ///< Vulkan device.
     const vk::DeviceMemory& memory;   ///< Vulkan device memory handler.
     std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
     VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 083796d05..02282e36f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -19,17 +19,17 @@
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader_cache.h"
 #include "video_core/shader_notify.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -149,7 +149,7 @@ Shader::~Shader() = default;
 VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
                                  Tegra::Engines::Maxwell3D& maxwell3d_,
                                  Tegra::Engines::KeplerCompute& kepler_compute_,
-                                 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
+                                 Tegra::MemoryManager& gpu_memory_, const Device& device_,
                                  VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
                                  VKUpdateDescriptorQueue& update_descriptor_queue_)
     : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index fbaa8257c..89d635a3d 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -20,12 +20,12 @@
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/async_shaders.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
 class System;
@@ -33,10 +33,10 @@ class System;
 
 namespace Vulkan {
 
+class Device;
 class RasterizerVulkan;
 class VKComputePipeline;
 class VKDescriptorPool;
-class VKDevice;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
@@ -121,7 +121,7 @@ public:
     explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
                              Tegra::Engines::Maxwell3D& maxwell3d,
                              Tegra::Engines::KeplerCompute& kepler_compute,
-                             Tegra::MemoryManager& gpu_memory, const VKDevice& device,
+                             Tegra::MemoryManager& gpu_memory, const Device& device,
                              VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
                              VKUpdateDescriptorQueue& update_descriptor_queue);
     ~VKPipelineCache() override;
@@ -148,7 +148,7 @@ private:
     Tegra::Engines::KeplerCompute& kepler_compute;
     Tegra::MemoryManager& gpu_memory;
 
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
     VKDescriptorPool& descriptor_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 038760de3..7cadd5147 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -7,11 +7,11 @@
 #include <utility>
 #include <vector>
 
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -27,7 +27,7 @@ constexpr VkQueryType GetTarget(QueryType type) {
 
 } // Anonymous namespace
 
-QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_)
+QueryPool::QueryPool(const Device& device_, VKScheduler& scheduler, QueryType type_)
     : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
 
 QueryPool::~QueryPool() = default;
@@ -68,7 +68,7 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
 
 VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
                            Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
-                           const VKDevice& device_, VKScheduler& scheduler_)
+                           const Device& device_, VKScheduler& scheduler_)
     : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
       query_pools{
           QueryPool{device_, scheduler_, QueryType::SamplesPassed},
@@ -96,9 +96,9 @@ void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
 HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
                          QueryType type_)
     : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
-      query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} {
-    const vk::Device* logical = &cache_.Device().GetLogical();
-    cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
+      query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
+    const vk::Device* logical = &cache.GetDevice().GetLogical();
+    cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
         logical->ResetQueryPoolEXT(query.first, query.second, 1);
         cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
     });
@@ -109,17 +109,17 @@ HostCounter::~HostCounter() {
 }
 
 void HostCounter::EndQuery() {
-    cache.Scheduler().Record(
+    cache.GetScheduler().Record(
         [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
 }
 
 u64 HostCounter::BlockingQuery() const {
-    if (tick >= cache.Scheduler().CurrentTick()) {
-        cache.Scheduler().Flush();
+    if (tick >= cache.GetScheduler().CurrentTick()) {
+        cache.GetScheduler().Flush();
     }
 
     u64 data;
-    const VkResult query_result = cache.Device().GetLogical().GetQueryResults(
+    const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
         query.first, query.second, 1, sizeof(data), &data, sizeof(data),
         VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
 
@@ -127,7 +127,7 @@ u64 HostCounter::BlockingQuery() const {
     case VK_SUCCESS:
         return data;
     case VK_ERROR_DEVICE_LOST:
-        cache.Device().ReportLoss();
+        cache.GetDevice().ReportLoss();
         [[fallthrough]];
     default:
         throw vk::Exception(query_result);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 837fe9ebf..7190946b9 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -12,7 +12,7 @@
 #include "common/common_types.h"
 #include "video_core/query_cache.h"
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -21,8 +21,8 @@ class RasterizerInterface;
 namespace Vulkan {
 
 class CachedQuery;
+class Device;
 class HostCounter;
-class VKDevice;
 class VKQueryCache;
 class VKScheduler;
 
@@ -30,7 +30,7 @@ using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
 
 class QueryPool final : public ResourcePool {
 public:
-    explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type);
+    explicit QueryPool(const Device& device, VKScheduler& scheduler, VideoCore::QueryType type);
     ~QueryPool() override;
 
     std::pair<VkQueryPool, u32> Commit();
@@ -43,7 +43,7 @@ protected:
 private:
     static constexpr std::size_t GROW_STEP = 512;
 
-    const VKDevice& device;
+    const Device& device;
     const VideoCore::QueryType type;
 
     std::vector<vk::QueryPool> pools;
@@ -55,23 +55,23 @@ class VKQueryCache final
 public:
     explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
                           Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
-                          const VKDevice& device_, VKScheduler& scheduler_);
+                          const Device& device_, VKScheduler& scheduler_);
     ~VKQueryCache();
 
     std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
 
     void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
 
-    const VKDevice& Device() const noexcept {
+    const Device& GetDevice() const noexcept {
         return device;
     }
 
-    VKScheduler& Scheduler() const noexcept {
+    VKScheduler& GetScheduler() const noexcept {
         return scheduler;
     }
 
 private:
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
     std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
 };
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 04c5c859c..ce3db49bd 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -27,7 +27,6 @@
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
@@ -36,9 +35,10 @@
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader_cache.h"
 #include "video_core/texture_cache/texture_cache.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -62,7 +62,7 @@ namespace {
 
 constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
 
-VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) {
+VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
     const auto& src = regs.viewport_transform[index];
     const float width = src.scale_x * 2.0f;
     const float height = src.scale_y * 2.0f;
@@ -239,7 +239,7 @@ public:
         index.type = type;
     }
 
-    void Bind(const VKDevice& device, VKScheduler& scheduler) const {
+    void Bind(const Device& device, VKScheduler& scheduler) const {
         // Use this large switch case to avoid dispatching more memory in the record lambda than
         // what we need. It looks horrible, but it's the best we can do on standard C++.
         switch (vertex.num_buffers) {
@@ -330,7 +330,7 @@ private:
     } index;
 
     template <size_t N>
-    void BindStatic(const VKDevice& device, VKScheduler& scheduler) const {
+    void BindStatic(const Device& device, VKScheduler& scheduler) const {
         if (device.IsExtExtendedDynamicStateSupported()) {
             if (index.buffer) {
                 BindStatic<N, true, true>(scheduler);
@@ -409,7 +409,7 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
 RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
                                    Tegra::MemoryManager& gpu_memory_,
                                    Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
-                                   const VKDevice& device_, VKMemoryManager& memory_manager_,
+                                   const Device& device_, VKMemoryManager& memory_manager_,
                                    StateTracker& state_tracker_, VKScheduler& scheduler_)
     : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
       gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
@@ -428,8 +428,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
       buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer,
                    staging_pool),
       query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
-      fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
-                    scheduler),
+      fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler),
       wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
@@ -628,8 +627,10 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
                       grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
                       descriptor_set](vk::CommandBuffer cmdbuf) {
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
-        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET,
-                                  descriptor_set, {});
+        if (descriptor_set) {
+            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
+                                      DESCRIPTOR_SET, descriptor_set, nullptr);
+        }
         cmdbuf.Dispatch(grid_x, grid_y, grid_z);
     });
 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 990f9e031..4695718e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -29,8 +29,8 @@
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/async_shaders.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
 class System;
@@ -55,7 +55,7 @@ class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
 public:
     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
                               Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                              VKScreenInfo& screen_info_, const VKDevice& device_,
+                              VKScreenInfo& screen_info_, const Device& device_,
                               VKMemoryManager& memory_manager_, StateTracker& state_tracker_,
                               VKScheduler& scheduler_);
     ~RasterizerVulkan() override;
@@ -212,7 +212,7 @@ private:
     Tegra::Engines::KeplerCompute& kepler_compute;
 
     VKScreenInfo& screen_info;
-    const VKDevice& device;
+    const Device& device;
     VKMemoryManager& memory_manager;
     StateTracker& state_tracker;
     VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index c104c6fe3..66004f9c0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -11,13 +11,13 @@
 #include "common/microprofile.h"
 #include "common/thread.h"
 #include "video_core/renderer_vulkan/vk_command_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -37,7 +37,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
     last = nullptr;
 }
 
-VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_)
+VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
     : device{device_}, state_tracker{state_tracker_},
       master_semaphore{std::make_unique<MasterSemaphore>(device)},
       command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 0a36c8fad..4cd43e425 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -12,22 +12,22 @@
 #include <utility>
 #include "common/common_types.h"
 #include "common/threadsafe_queue.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 class CommandPool;
+class Device;
 class Framebuffer;
 class MasterSemaphore;
 class StateTracker;
-class VKDevice;
 class VKQueryCache;
 
 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class VKScheduler {
 public:
-    explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker);
+    explicit VKScheduler(const Device& device, StateTracker& state_tracker);
     ~VKScheduler();
 
     /// Returns the current command buffer tick.
@@ -179,7 +179,7 @@ private:
 
     void AcquireNewChunk();
 
-    const VKDevice& device;
+    const Device& device;
     StateTracker& state_tracker;
 
     std::unique_ptr<MasterSemaphore> master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 09d6f9f35..89cbe01ad 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -22,11 +22,11 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/engines/shader_type.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader/transform_feedback.h"
+#include "video_core/vulkan_common/vulkan_device.h"
 
 namespace Vulkan {
 
@@ -272,19 +272,12 @@ bool IsPrecise(Operation operand) {
     return false;
 }
 
-u32 ShaderVersion(const VKDevice& device) {
-    if (device.InstanceApiVersion() < VK_API_VERSION_1_1) {
-        return 0x00010000;
-    }
-    return 0x00010300;
-}
-
 class SPIRVDecompiler final : public Sirit::Module {
 public:
-    explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_,
+    explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
                              const Registry& registry_, const Specialization& specialization_)
-        : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_},
-          header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} {
+        : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
+          registry{registry_}, specialization{specialization_} {
         if (stage_ != ShaderType::Compute) {
             transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
         }
@@ -2749,7 +2742,7 @@ private:
     };
     static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
 
-    const VKDevice& device;
+    const Device& device;
     const ShaderIR& ir;
     const ShaderType stage;
     const Tegra::Shader::Header header;
@@ -3137,7 +3130,7 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
     return entries;
 }
 
-std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                            ShaderType stage, const VideoCommon::Shader::Registry& registry,
                            const Specialization& specialization) {
     return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index ad91ad5de..26381e444 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,10 +15,8 @@
 #include "video_core/shader/shader_ir.h"
 
 namespace Vulkan {
-class VKDevice;
-}
 
-namespace Vulkan {
+class Device;
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
@@ -109,7 +107,7 @@ struct SPIRVShader {
 
 ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
 
-std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                            Tegra::Engines::ShaderType stage,
                            const VideoCommon::Shader::Registry& registry,
                            const Specialization& specialization);
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index 38a0be7f2..aaad4f292 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -7,13 +7,13 @@
 
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) {
+vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
     return device.GetLogical().CreateShaderModule({
         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
         .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
index dce34a140..9517cbe84 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -7,12 +7,12 @@
 #include <span>
 
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 
-vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code);
+vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 2fd3b7f39..1e0b8b922 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -9,17 +9,17 @@
 
 #include "common/bit_util.h"
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_)
     : buffer{std::move(buffer_)} {}
 
-VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_,
+VKStagingBufferPool::VKStagingBufferPool(const Device& device_, VKMemoryManager& memory_manager_,
                                          VKScheduler& scheduler_)
     : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {}
 
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 2dd5049ac..90dadcbbe 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -10,11 +10,11 @@
 #include "common/common_types.h"
 
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKScheduler;
 
 struct VKBuffer final {
@@ -24,7 +24,7 @@ struct VKBuffer final {
 
 class VKStagingBufferPool final {
 public:
-    explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
+    explicit VKStagingBufferPool(const Device& device, VKMemoryManager& memory_manager,
                                  VKScheduler& scheduler);
     ~VKStagingBufferPool();
 
@@ -58,7 +58,7 @@ private:
 
     u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
 
-    const VKDevice& device;
+    const Device& device;
     VKMemoryManager& memory_manager;
     VKScheduler& scheduler;
 
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 419cb154d..a09fe084e 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -10,10 +10,10 @@
 
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -60,7 +60,7 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
 
 } // Anonymous namespace
 
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_)
+VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_)
     : device{device_}, scheduler{scheduler_} {
     CreateBuffers();
     ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 1428f77bf..2e9c8cb46 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -9,17 +9,17 @@
 #include <vector>
 
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKFenceWatch;
 class VKScheduler;
 
 class VKStreamBuffer final {
 public:
-    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler);
+    explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler);
     ~VKStreamBuffer();
 
     /**
@@ -54,7 +54,7 @@ private:
 
     void WaitPendingOperations(u64 requested_upper_bound);
 
-    const VKDevice& device; ///< Vulkan device manager.
+    const Device& device;   ///< Vulkan device manager.
     VKScheduler& scheduler; ///< Command scheduler.
 
     vk::Buffer buffer;        ///< Mapped buffer.
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 9636a7c65..725a2a05d 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -11,10 +11,10 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/frontend/framebuffer_layout.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -56,7 +56,7 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
 
 } // Anonymous namespace
 
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_)
+VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
     : surface{surface_}, device{device_}, scheduler{scheduler_} {}
 
 VKSwapchain::~VKSwapchain() = default;
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 6b39befdf..2eadd62b3 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -7,7 +7,7 @@
 #include <vector>
 
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Layout {
 struct FramebufferLayout;
@@ -15,12 +15,12 @@ struct FramebufferLayout;
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKScheduler;
 
 class VKSwapchain {
 public:
-    explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler);
+    explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
     ~VKSwapchain();
 
     /// Creates (or recreates) the swapchain with a given size.
@@ -73,7 +73,7 @@ private:
     void Destroy();
 
     const VkSurfaceKHR surface;
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
 
     vk::SwapchainKHR swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 261808391..bd11de012 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -10,11 +10,13 @@
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/renderer_vulkan/blit_image.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -93,7 +95,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     }
 }
 
-[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) {
+[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) {
     const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format);
     VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
     if (info.type == ImageType::e2D && info.resources.layers >= 6 &&
@@ -146,14 +148,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     };
 }
 
-[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) {
+[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) {
     if (info.type == ImageType::Buffer) {
         return vk::Image{};
     }
     return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
 }
 
-[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) {
+[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
     if (info.type != ImageType::Buffer) {
         return vk::Buffer{};
     }
@@ -205,7 +207,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     }
 }
 
-[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device,
+[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
                                                             const ImageView* image_view) {
     const auto pixel_format = image_view->format;
     return VkAttachmentDescription{
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index edc3d80c0..92a7aad8b 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -8,8 +8,8 @@
 #include <span>
 
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/texture_cache/texture_cache.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -19,11 +19,11 @@ using VideoCommon::Offset2D;
 using VideoCommon::RenderTargets;
 using VideoCore::Surface::PixelFormat;
 
-class VKDevice;
 class VKScheduler;
 class VKStagingBufferPool;
 
 class BlitImageHelper;
+class Device;
 class Image;
 class ImageView;
 class Framebuffer;
@@ -68,7 +68,7 @@ struct ImageBufferMap {
 };
 
 struct TextureCacheRuntime {
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
     VKMemoryManager& memory_manager;
     VKStagingBufferPool& staging_buffer_pool;
@@ -104,6 +104,11 @@ struct TextureCacheRuntime {
     }
 
     void InsertUploadMemoryBarrier() {}
+
+    bool HasBrokenTextureViewFormats() const noexcept {
+        // No known Vulkan driver has broken image views
+        return false;
+    }
 };
 
 class Image : public VideoCommon::ImageBase {
@@ -177,7 +182,7 @@ public:
 private:
     [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
 
-    const VKDevice* device = nullptr;
+    const Device* device = nullptr;
     std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
     vk::ImageView depth_view;
     vk::ImageView stencil_view;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 8826da325..f99273c6a 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -7,14 +7,14 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_)
+VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
     : device{device_}, scheduler{scheduler_} {}
 
 VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index f098a8540..e214f7195 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -8,11 +8,11 @@
 #include <boost/container/static_vector.hpp>
 
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
-class VKDevice;
+class Device;
 class VKScheduler;
 
 struct DescriptorUpdateEntry {
@@ -31,7 +31,7 @@ struct DescriptorUpdateEntry {
 
 class VKUpdateDescriptorQueue final {
 public:
-    explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_);
+    explicit VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_);
     ~VKUpdateDescriptorQueue();
 
     void TickFrame();
@@ -69,7 +69,7 @@ public:
     }
 
 private:
-    const VKDevice& device;
+    const Device& device;
     VKScheduler& scheduler;
 
     const DescriptorUpdateEntry* upload_start = nullptr;
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 09f93463b..9707136e9 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -134,7 +134,7 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
 }
 
 void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
-                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+                                     const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
                                      Vulkan::VKDescriptorPool& descriptor_pool,
                                      Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
                                      std::vector<VkDescriptorSetLayoutBinding> bindings,
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 004e214a8..0dbb1a31f 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -24,9 +24,9 @@
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/vulkan_common/vulkan_device.h"
 
 namespace Core::Frontend {
 class EmuWindow;
@@ -94,7 +94,7 @@ public:
                            CompilerSettings compiler_settings, const Registry& registry,
                            VAddr cpu_addr);
 
-    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
                            Vulkan::VKScheduler& scheduler,
                            Vulkan::VKDescriptorPool& descriptor_pool,
                            Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
@@ -123,7 +123,7 @@ private:
 
         // For Vulkan
         Vulkan::VKPipelineCache* pp_cache;
-        const Vulkan::VKDevice* vk_device;
+        const Vulkan::Device* vk_device;
         Vulkan::VKScheduler* scheduler;
         Vulkan::VKDescriptorPool* descriptor_pool;
         Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 448a05fcc..959b3f115 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -120,7 +120,9 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
     if (lhs.info.type == ImageType::Linear) {
         base = SubresourceBase{.level = 0, .layer = 0};
     } else {
-        base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS);
+        // We are passing relaxed formats as an option, having broken views or not won't matter
+        static constexpr bool broken_views = false;
+        base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views);
     }
     if (!base) {
         LOG_ERROR(HW_GPU, "Image alias should have been flipped");
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 076a4bcfd..18f72e508 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
           .height = std::max(image_info.size.height >> range.base.level, 1u),
           .depth = std::max(image_info.size.depth >> range.base.level, 1u),
       } {
-    ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format),
+    ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false),
                "Image view format {} is incompatible with image format {}", info.format,
                image_info.format);
     const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 968059842..d1080300f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -61,7 +61,7 @@ using VideoCore::Surface::SurfaceType;
 template <class P>
 class TextureCache {
     /// Address shift for caching images into a hash table
-    static constexpr u64 PAGE_SHIFT = 20;
+    static constexpr u64 PAGE_BITS = 20;
 
     /// Enables debugging features to the texture cache
     static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
@@ -184,8 +184,8 @@ private:
     template <typename Func>
     static void ForEachPage(VAddr addr, size_t size, Func&& func) {
         static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
-        const u64 page_end = (addr + size - 1) >> PAGE_SHIFT;
-        for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+        const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+        for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
             if constexpr (RETURNS_BOOL) {
                 if (func(page)) {
                     break;
@@ -708,7 +708,7 @@ void TextureCache<P>::InvalidateDepthBuffer() {
 template <class P>
 typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
     // TODO: Properly implement this
-    const auto it = page_table.find(cpu_addr >> PAGE_SHIFT);
+    const auto it = page_table.find(cpu_addr >> PAGE_BITS);
     if (it == page_table.end()) {
         return nullptr;
     }
@@ -883,6 +883,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
     if (!cpu_addr) {
         return ImageId{};
     }
+    const bool broken_views = runtime.HasBrokenTextureViewFormats();
     ImageId image_id;
     const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
         if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
@@ -892,11 +893,11 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
             if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
                 existing.pitch == info.pitch &&
                 IsPitchLinearSameSize(existing, info, strict_size) &&
-                IsViewCompatible(existing.format, info.format)) {
+                IsViewCompatible(existing.format, info.format, broken_views)) {
                 image_id = existing_image_id;
                 return true;
             }
-        } else if (IsSubresource(info, existing_image, gpu_addr, options)) {
+        } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) {
             image_id = existing_image_id;
             return true;
         }
@@ -926,6 +927,7 @@ template <class P>
 ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
     ImageInfo new_info = info;
     const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
+    const bool broken_views = runtime.HasBrokenTextureViewFormats();
     std::vector<ImageId> overlap_ids;
     std::vector<ImageId> left_aliased_ids;
     std::vector<ImageId> right_aliased_ids;
@@ -940,7 +942,9 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
             }
             return;
         }
-        const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true);
+        static constexpr bool strict_size = true;
+        const std::optional<OverlapResult> solution =
+            ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views);
         if (solution) {
             gpu_addr = solution->gpu_addr;
             cpu_addr = solution->cpu_addr;
@@ -950,9 +954,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
         }
         static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
         const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
-        if (IsSubresource(new_info, overlap, gpu_addr, options)) {
+        if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) {
             left_aliased_ids.push_back(overlap_id);
-        } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) {
+        } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
+                                 broken_views)) {
             right_aliased_ids.push_back(overlap_id);
         }
     });
@@ -1165,13 +1170,13 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
     ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
         const auto page_it = page_table.find(page);
         if (page_it == page_table.end()) {
-            UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT);
+            UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
             return;
         }
         std::vector<ImageId>& image_ids = page_it->second;
         const auto vector_it = std::ranges::find(image_ids, image_id);
         if (vector_it == image_ids.end()) {
-            UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT);
+            UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
             return;
         }
         image_ids.erase(vector_it);
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 9ed1fc007..279932778 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1069,13 +1069,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri
 
 std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
                                             VAddr cpu_addr, const ImageBase& overlap,
-                                            bool strict_size) {
+                                            bool strict_size, bool broken_views) {
     ASSERT(new_info.type != ImageType::Linear);
     ASSERT(overlap.info.type != ImageType::Linear);
     if (!IsLayerStrideCompatible(new_info, overlap.info)) {
         return std::nullopt;
     }
-    if (!IsViewCompatible(overlap.info.format, new_info.format)) {
+    if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) {
         return std::nullopt;
     }
     if (gpu_addr == overlap.gpu_addr) {
@@ -1118,14 +1118,15 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
 }
 
 std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
-                                               GPUVAddr candidate_addr, RelaxedOptions options) {
+                                               GPUVAddr candidate_addr, RelaxedOptions options,
+                                               bool broken_views) {
     const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
     if (!base) {
         return std::nullopt;
     }
     const ImageInfo& existing = image.info;
     if (False(options & RelaxedOptions::Format)) {
-        if (!IsViewCompatible(existing.format, candidate.format)) {
+        if (!IsViewCompatible(existing.format, candidate.format, broken_views)) {
             return std::nullopt;
         }
     }
@@ -1162,8 +1163,8 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
 }
 
 bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
-                   RelaxedOptions options) {
-    return FindSubresource(candidate, image, candidate_addr, options).has_value();
+                   RelaxedOptions options, bool broken_views) {
+    return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value();
 }
 
 void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index dbbbd33cd..52a9207d6 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -87,17 +87,19 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
 [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
                                                           GPUVAddr gpu_addr, VAddr cpu_addr,
                                                           const ImageBase& overlap,
-                                                          bool strict_size);
+                                                          bool strict_size, bool broken_views);
 
 [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
 
 [[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
                                                              const ImageBase& image,
                                                              GPUVAddr candidate_addr,
-                                                             RelaxedOptions options);
+                                                             RelaxedOptions options,
+                                                             bool broken_views);
 
 [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
-                                 GPUVAddr candidate_addr, RelaxedOptions options);
+                                 GPUVAddr candidate_addr, RelaxedOptions options,
+                                 bool broken_views);
 
 void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
                       const ImageBase* src);
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 5b01020ec..8d10ac29e 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -32,20 +32,11 @@ namespace Vulkan {
 
 static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
 
-NsightAftermathTracker::NsightAftermathTracker() = default;
-
-NsightAftermathTracker::~NsightAftermathTracker() {
-    if (initialized) {
-        (void)GFSDK_Aftermath_DisableGpuCrashDumps();
-    }
-}
-
-bool NsightAftermathTracker::Initialize() {
+NsightAftermathTracker::NsightAftermathTracker() {
     if (!dl.Open(AFTERMATH_LIB_NAME)) {
         LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
-        return false;
+        return;
     }
-
     if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
                       &GFSDK_Aftermath_DisableGpuCrashDumps) ||
         !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
@@ -64,27 +55,28 @@ bool NsightAftermathTracker::Initialize() {
         LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
         return false;
     }
-
     dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
 
-    (void)Common::FS::DeleteDirRecursively(dump_dir);
+    void(Common::FS::DeleteDirRecursively(dump_dir));
     if (!Common::FS::CreateDir(dump_dir)) {
         LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
-        return false;
+        return;
     }
-
     if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
             GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
             GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
             ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
         LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
-        return false;
+        return;
     }
-
     LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
-
     initialized = true;
-    return true;
+}
+
+NsightAftermathTracker::~NsightAftermathTracker() {
+    if (initialized) {
+        (void)GFSDK_Aftermath_DisableGpuCrashDumps();
+    }
 }
 
 void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index afe7ae99e..cee3847fb 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -34,8 +34,6 @@ public:
     NsightAftermathTracker(NsightAftermathTracker&&) = delete;
     NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
 
-    bool Initialize();
-
     void SaveShader(const std::vector<u32>& spirv) const;
 
 private:
@@ -78,9 +76,6 @@ private:
 #ifndef HAS_NSIGHT_AFTERMATH
 inline NsightAftermathTracker::NsightAftermathTracker() = default;
 inline NsightAftermathTracker::~NsightAftermathTracker() = default;
-inline bool NsightAftermathTracker::Initialize() {
-    return false;
-}
 inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
 #endif
 
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
new file mode 100644
index 000000000..ea7af8ad4
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -0,0 +1,45 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+#include "common/logging/log.h"
+#include "video_core/vulkan_common/vulkan_debug_callback.h"
+
+namespace Vulkan {
+namespace {
+VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
+                  VkDebugUtilsMessageTypeFlagsEXT type,
+                  const VkDebugUtilsMessengerCallbackDataEXT* data,
+                  [[maybe_unused]] void* user_data) {
+    const std::string_view message{data->pMessage};
+    if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
+        LOG_CRITICAL(Render_Vulkan, "{}", message);
+    } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
+        LOG_WARNING(Render_Vulkan, "{}", message);
+    } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
+        LOG_INFO(Render_Vulkan, "{}", message);
+    } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
+        LOG_DEBUG(Render_Vulkan, "{}", message);
+    }
+    return VK_FALSE;
+}
+} // Anonymous namespace
+
+vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) {
+    return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .flags = 0,
+        .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
+        .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+                       VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+                       VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+        .pfnUserCallback = Callback,
+    });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h
new file mode 100644
index 000000000..2efcd244c
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.h
@@ -0,0 +1,11 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance);
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 85b4f0dff..75173324e 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -13,8 +13,9 @@
 
 #include "common/assert.h"
 #include "core/settings.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
@@ -206,17 +207,14 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
 
 } // Anonymous namespace
 
-VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_,
-                   VkSurfaceKHR surface, const vk::InstanceDispatch& dld_)
+Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
+               const vk::InstanceDispatch& dld_)
     : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
-      instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} {
+      format_properties{GetFormatProperties(physical, dld)} {
+    CheckSuitability();
     SetupFamilies(surface);
     SetupFeatures();
-}
-
-VKDevice::~VKDevice() = default;
 
-bool VKDevice::Create() {
     const auto queue_cis = GetDeviceQueueCreateInfos();
     const std::vector extensions = LoadExtensions();
 
@@ -415,7 +413,7 @@ bool VKDevice::Create() {
 
     VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
     if (nv_device_diagnostics_config) {
-        nsight_aftermath_tracker.Initialize();
+        nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
 
         diagnostics_nv = {
             .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV,
@@ -426,12 +424,7 @@ bool VKDevice::Create() {
         };
         first_next = &diagnostics_nv;
     }
-
     logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
-    if (!logical) {
-        LOG_ERROR(Render_Vulkan, "Failed to create logical device");
-        return false;
-    }
 
     CollectTelemetryParameters();
     CollectToolingInfo();
@@ -455,11 +448,12 @@ bool VKDevice::Create() {
     present_queue = logical.GetQueue(present_family);
 
     use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
-    return true;
 }
 
-VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
-                                      FormatType format_type) const {
+Device::~Device() = default;
+
+VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
+                                    FormatType format_type) const {
     if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
         return wanted_format;
     }
@@ -490,18 +484,20 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
     return wanted_format;
 }
 
-void VKDevice::ReportLoss() const {
-    LOG_CRITICAL(Render_Vulkan, "Device loss occurred!");
+void Device::ReportLoss() const {
+    LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
 
     // Wait for the log to flush and for Nsight Aftermath to dump the results
     std::this_thread::sleep_for(std::chrono::seconds{15});
 }
 
-void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
-    nsight_aftermath_tracker.SaveShader(spirv);
+void Device::SaveShader(const std::vector<u32>& spirv) const {
+    if (nsight_aftermath_tracker) {
+        nsight_aftermath_tracker->SaveShader(spirv);
+    }
 }
 
-bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
+bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
     // Disable for now to avoid converting ASTC twice.
     static constexpr std::array astc_formats = {
         VK_FORMAT_ASTC_4x4_UNORM_BLOCK,   VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
@@ -535,7 +531,7 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)
     return true;
 }
 
-bool VKDevice::TestDepthStencilBlits() const {
+bool Device::TestDepthStencilBlits() const {
     static constexpr VkFormatFeatureFlags required_features =
         VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
     const auto test_features = [](VkFormatProperties props) {
@@ -545,8 +541,8 @@ bool VKDevice::TestDepthStencilBlits() const {
            test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
 }
 
-bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
-                                 FormatType format_type) const {
+bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
+                               FormatType format_type) const {
     const auto it = format_properties.find(wanted_format);
     if (it == format_properties.end()) {
         UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format);
@@ -556,64 +552,45 @@ bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wa
     return (supported_usage & wanted_usage) == wanted_usage;
 }
 
-bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
-    bool is_suitable = true;
+void Device::CheckSuitability() const {
     std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
-
-    for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) {
+    for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
         for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
             if (available_extensions[i]) {
                 continue;
             }
-            const std::string_view name{prop.extensionName};
+            const std::string_view name{property.extensionName};
             available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
         }
     }
-    if (!available_extensions.all()) {
-        for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
-            if (available_extensions[i]) {
-                continue;
-            }
-            LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
-            is_suitable = false;
-        }
-    }
-
-    bool has_graphics{}, has_present{};
-    const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
-    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
-        const auto& family = queue_family_properties[i];
-        if (family.queueCount == 0) {
+    for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
+        if (available_extensions[i]) {
             continue;
         }
-        has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT;
-        has_present |= physical.GetSurfaceSupportKHR(i, surface);
-    }
-    if (!has_graphics || !has_present) {
-        LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
-        is_suitable = false;
-    }
-
-    // TODO(Rodrigo): Check if the device matches all requeriments.
-    const auto properties{physical.GetProperties()};
-    const auto& limits{properties.limits};
-
-    constexpr u32 required_ubo_size = 65536;
-    if (limits.maxUniformBufferRange < required_ubo_size) {
-        LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
-                  limits.maxUniformBufferRange, required_ubo_size);
-        is_suitable = false;
+        LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
+        throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
     }
-
-    constexpr u32 required_num_viewports = 16;
-    if (limits.maxViewports < required_num_viewports) {
-        LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
-                 limits.maxViewports, required_num_viewports);
-        is_suitable = false;
+    struct LimitTuple {
+        u32 minimum;
+        u32 value;
+        const char* name;
+    };
+    const VkPhysicalDeviceLimits& limits{properties.limits};
+    const std::array limits_report{
+        LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
+        LimitTuple{16, limits.maxViewports, "maxViewports"},
+        LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"},
+        LimitTuple{8, limits.maxClipDistances, "maxClipDistances"},
+    };
+    for (const auto& tuple : limits_report) {
+        if (tuple.value < tuple.minimum) {
+            LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name,
+                      tuple.minimum, tuple.value);
+            throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
+        }
     }
-
-    const auto features{physical.GetFeatures()};
-    const std::array feature_report = {
+    const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
+    const std::array feature_report{
         std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
         std::make_pair(features.imageCubeArray, "imageCubeArray"),
         std::make_pair(features.independentBlend, "independentBlend"),
@@ -631,22 +608,16 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
         std::make_pair(features.shaderStorageImageWriteWithoutFormat,
                        "shaderStorageImageWriteWithoutFormat"),
     };
-    for (const auto& [supported, name] : feature_report) {
-        if (supported) {
+    for (const auto& [is_supported, name] : feature_report) {
+        if (is_supported) {
             continue;
         }
         LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
-        is_suitable = false;
+        throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
     }
-
-    if (!is_suitable) {
-        LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
-    }
-
-    return is_suitable;
 }
 
-std::vector<const char*> VKDevice::LoadExtensions() {
+std::vector<const char*> Device::LoadExtensions() {
     std::vector<const char*> extensions;
     extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
     extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
@@ -685,9 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
         test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
         test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
         test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
-        if (instance_version >= VK_API_VERSION_1_1) {
-            test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
-        }
+        test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
         if (Settings::values.renderer_debug) {
             test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
                  true);
@@ -801,39 +770,45 @@ std::vector<const char*> VKDevice::LoadExtensions() {
     return extensions;
 }
 
-void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
-    std::optional<u32> graphics_family_, present_family_;
-
+void Device::SetupFamilies(VkSurfaceKHR surface) {
     const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
-    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
-        if (graphics_family_ && present_family_)
+    std::optional<u32> graphics;
+    std::optional<u32> present;
+    for (u32 index = 0; index < static_cast<u32>(queue_family_properties.size()); ++index) {
+        if (graphics && (present || !surface)) {
             break;
-
-        const auto& queue_family = queue_family_properties[i];
-        if (queue_family.queueCount == 0)
+        }
+        const VkQueueFamilyProperties& queue_family = queue_family_properties[index];
+        if (queue_family.queueCount == 0) {
             continue;
-
+        }
         if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
-            graphics_family_ = i;
+            graphics = index;
         }
-        if (physical.GetSurfaceSupportKHR(i, surface)) {
-            present_family_ = i;
+        if (surface && physical.GetSurfaceSupportKHR(index, surface)) {
+            present = index;
         }
     }
-    ASSERT(graphics_family_ && present_family_);
-
-    graphics_family = *graphics_family_;
-    present_family = *present_family_;
+    if (!graphics) {
+        LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue");
+        throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
+    }
+    if (surface && !present) {
+        LOG_ERROR(Render_Vulkan, "Device lacks a present queue");
+        throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
+    }
+    graphics_family = *graphics;
+    present_family = *present;
 }
 
-void VKDevice::SetupFeatures() {
+void Device::SetupFeatures() {
     const auto supported_features{physical.GetFeatures()};
     is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
     is_blit_depth_stencil_supported = TestDepthStencilBlits();
     is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
 }
 
-void VKDevice::CollectTelemetryParameters() {
+void Device::CollectTelemetryParameters() {
     VkPhysicalDeviceDriverPropertiesKHR driver{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
         .pNext = nullptr,
@@ -860,7 +835,7 @@ void VKDevice::CollectTelemetryParameters() {
     }
 }
 
-void VKDevice::CollectToolingInfo() {
+void Device::CollectToolingInfo() {
     if (!ext_tooling_info) {
         return;
     }
@@ -886,7 +861,7 @@ void VKDevice::CollectToolingInfo() {
     }
 }
 
-std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
+std::vector<VkDeviceQueueCreateInfo> Device::GetDeviceQueueCreateInfos() const {
     static constexpr float QUEUE_PRIORITY = 1.0f;
 
     std::unordered_set<u32> unique_queue_families{graphics_family, present_family};
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 995dcfc0f..a973c3ce4 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,11 +10,12 @@
 #include <vector>
 
 #include "common/common_types.h"
-#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
+class NsightAftermathTracker;
+
 /// Format usage descriptor.
 enum class FormatType { Linear, Optimal, Buffer };
 
@@ -22,14 +23,11 @@ enum class FormatType { Linear, Optimal, Buffer };
 const u32 GuestWarpSize = 32;
 
 /// Handles data specific to a physical device.
-class VKDevice final {
+class Device final {
 public:
-    explicit VKDevice(VkInstance instance, u32 instance_version, vk::PhysicalDevice physical,
-                      VkSurfaceKHR surface, const vk::InstanceDispatch& dld);
-    ~VKDevice();
-
-    /// Initializes the device. Returns true on success.
-    bool Create();
+    explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
+                    const vk::InstanceDispatch& dld);
+    ~Device();
 
     /**
      * Returns a format supported by the device for the passed requeriments.
@@ -82,11 +80,6 @@ public:
         return present_family;
     }
 
-    /// Returns the current instance Vulkan API version in Vulkan-formatted version numbers.
-    u32 InstanceApiVersion() const {
-        return instance_version;
-    }
-
     /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
     u32 ApiVersion() const {
         return properties.apiVersion;
@@ -232,10 +225,10 @@ public:
         return use_asynchronous_shaders;
     }
 
+private:
     /// Checks if the physical device is suitable.
-    static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
+    void CheckSuitability() const;
 
-private:
     /// Loads extensions into a vector and stores available ones in this object.
     std::vector<const char*> LoadExtensions();
 
@@ -308,7 +301,7 @@ private:
     std::unordered_map<VkFormat, VkFormatProperties> format_properties;
 
     /// Nsight Aftermath GPU crash tracker
-    NsightAftermathTracker nsight_aftermath_tracker;
+    std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
new file mode 100644
index 000000000..889ecda0c
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -0,0 +1,151 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include <span>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/dynamic_library.h"
+#include "common/logging/log.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/vulkan_common/vulkan_instance.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+// Include these late to avoid polluting previous headers
+#ifdef _WIN32
+#include <windows.h>
+// ensure include order
+#include <vulkan/vulkan_win32.h>
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#include <X11/Xlib.h>
+#include <vulkan/vulkan_wayland.h>
+#include <vulkan/vulkan_xlib.h>
+#endif
+
+namespace Vulkan {
+namespace {
+[[nodiscard]] std::vector<const char*> RequiredExtensions(
+    Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) {
+    std::vector<const char*> extensions;
+    extensions.reserve(6);
+    switch (window_type) {
+    case Core::Frontend::WindowSystemType::Headless:
+        break;
+#ifdef _WIN32
+    case Core::Frontend::WindowSystemType::Windows:
+        extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
+        break;
+#endif
+#if !defined(_WIN32) && !defined(__APPLE__)
+    case Core::Frontend::WindowSystemType::X11:
+        extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
+        break;
+    case Core::Frontend::WindowSystemType::Wayland:
+        extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
+        break;
+#endif
+    default:
+        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
+        break;
+    }
+    if (window_type != Core::Frontend::WindowSystemType::Headless) {
+        extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
+    }
+    if (enable_debug_utils) {
+        extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+    }
+    extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
+    return extensions;
+}
+
+[[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld,
+                                          std::span<const char* const> extensions) {
+    const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
+    if (!properties) {
+        LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
+        return false;
+    }
+    for (const char* extension : extensions) {
+        const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) {
+            return std::strcmp(extension, prop.extensionName) == 0;
+        });
+        if (it == properties->end()) {
+            LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
+            return false;
+        }
+    }
+    return true;
+}
+
+[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) {
+    std::vector<const char*> layers;
+    if (enable_layers) {
+        layers.push_back("VK_LAYER_KHRONOS_validation");
+    }
+    return layers;
+}
+
+void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const char*>& layers) {
+    const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
+    if (!layer_properties) {
+        LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
+        layers.clear();
+    }
+    std::erase_if(layers, [&layer_properties](const char* layer) {
+        const auto comp = [layer](const VkLayerProperties& layer_property) {
+            return std::strcmp(layer, layer_property.layerName) == 0;
+        };
+        const auto it = std::ranges::find_if(*layer_properties, comp);
+        if (it == layer_properties->end()) {
+            LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
+            return true;
+        }
+        return false;
+    });
+}
+} // Anonymous namespace
+
+vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
+                            u32 required_version, Core::Frontend::WindowSystemType window_type,
+                            bool enable_debug_utils, bool enable_layers) {
+    if (!library.IsOpen()) {
+        LOG_ERROR(Render_Vulkan, "Vulkan library not available");
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+    }
+    if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
+        LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+    }
+    if (!vk::Load(dld)) {
+        LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+    }
+    const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils);
+    if (!AreExtensionsSupported(dld, extensions)) {
+        throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
+    }
+    std::vector<const char*> layers = Layers(enable_layers);
+    RemoveUnavailableLayers(dld, layers);
+
+    const u32 available_version = vk::AvailableVersion(dld);
+    if (available_version < required_version) {
+        LOG_ERROR(Render_Vulkan, "Vulkan {}.{} is not supported, {}.{} is required",
+                  VK_VERSION_MAJOR(available_version), VK_VERSION_MINOR(available_version),
+                  VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
+        throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
+    }
+    vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld);
+    if (!vk::Load(*instance, dld)) {
+        LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+    }
+    return instance;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h
new file mode 100644
index 000000000..e5e3a7144
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_instance.h
@@ -0,0 +1,32 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/dynamic_library.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+/**
+ * Create a Vulkan instance
+ *
+ * @param library            Dynamic library to load the Vulkan instance from
+ * @param dld                Dispatch table to load function pointers into
+ * @param required_version   Required Vulkan version (for example, VK_API_VERSION_1_1)
+ * @param window_type        Window system type's enabled extension
+ * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not
+ * @param enable_layers      Whether to enable Vulkan validation layers or not
+ *
+ * @return A new Vulkan instance
+ * @throw vk::Exception on failure
+ */
+[[nodiscard]] vk::Instance CreateInstance(
+    const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
+    Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
+    bool enable_debug_utils = false, bool enable_layers = false);
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp
new file mode 100644
index 000000000..557871d81
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_library.cpp
@@ -0,0 +1,36 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstdlib>
+#include <string>
+
+#include "common/dynamic_library.h"
+#include "common/file_util.h"
+#include "video_core/vulkan_common/vulkan_library.h"
+
+namespace Vulkan {
+
+Common::DynamicLibrary OpenLibrary() {
+    Common::DynamicLibrary library;
+#ifdef __APPLE__
+    // Check if a path to a specific Vulkan library has been specified.
+    char* const libvulkan_env = std::getenv("LIBVULKAN_PATH");
+    if (!libvulkan_env || !library.Open(libvulkan_env)) {
+        // Use the libvulkan.dylib from the application bundle.
+        const std::string filename =
+            Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+        void(library.Open(filename.c_str()));
+    }
+#else
+    std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
+    if (!library.Open(filename.c_str())) {
+        // Android devices may not have libvulkan.so.1, only libvulkan.so.
+        filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
+        void(library.Open(filename.c_str()));
+    }
+#endif
+    return library;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h
new file mode 100644
index 000000000..8b28b0e17
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_library.h
@@ -0,0 +1,13 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/dynamic_library.h"
+
+namespace Vulkan {
+
+Common::DynamicLibrary OpenLibrary();
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp
new file mode 100644
index 000000000..3c3238f96
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_surface.cpp
@@ -0,0 +1,81 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/vulkan_common/vulkan_surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+// Include these late to avoid polluting previous headers
+#ifdef _WIN32
+#include <windows.h>
+// ensure include order
+#include <vulkan/vulkan_win32.h>
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#include <X11/Xlib.h>
+#include <vulkan/vulkan_wayland.h>
+#include <vulkan/vulkan_xlib.h>
+#endif
+
+namespace Vulkan {
+
+vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
+                             const Core::Frontend::EmuWindow& emu_window) {
+    [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch();
+    [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo();
+    VkSurfaceKHR unsafe_surface = nullptr;
+
+#ifdef _WIN32
+    if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
+        const HWND hWnd = static_cast<HWND>(window_info.render_surface);
+        const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
+                                                   nullptr, 0, nullptr, hWnd};
+        const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
+            dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
+        if (!vkCreateWin32SurfaceKHR ||
+            vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
+            throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+        }
+    }
+#endif
+#if !defined(_WIN32) && !defined(__APPLE__)
+    if (window_info.type == Core::Frontend::WindowSystemType::X11) {
+        const VkXlibSurfaceCreateInfoKHR xlib_ci{
+            VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
+            static_cast<Display*>(window_info.display_connection),
+            reinterpret_cast<Window>(window_info.render_surface)};
+        const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
+            dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
+        if (!vkCreateXlibSurfaceKHR ||
+            vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
+            throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+        }
+    }
+    if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
+        const VkWaylandSurfaceCreateInfoKHR wayland_ci{
+            VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
+            static_cast<wl_display*>(window_info.display_connection),
+            static_cast<wl_surface*>(window_info.render_surface)};
+        const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
+            dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
+        if (!vkCreateWaylandSurfaceKHR ||
+            vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
+                VK_SUCCESS) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
+            throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+        }
+    }
+#endif
+    if (!unsafe_surface) {
+        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+    }
+    return vk::SurfaceKHR(unsafe_surface, *instance, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h
new file mode 100644
index 000000000..05a169e32
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_surface.h
@@ -0,0 +1,18 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+}
+
+namespace Vulkan {
+
+[[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
+                                           const Core::Frontend::EmuWindow& emu_window);
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2a21e850d..5e15ad607 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -13,7 +13,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan::vk {
 
@@ -435,7 +435,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
 }
 
 Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
-                          InstanceDispatch& dispatch) noexcept {
+                          InstanceDispatch& dispatch) {
     const VkApplicationInfo application_info{
         .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
         .pNext = nullptr,
@@ -455,55 +455,30 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char
         .enabledExtensionCount = extensions.size(),
         .ppEnabledExtensionNames = extensions.data(),
     };
-
     VkInstance instance;
-    if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
-        // Failed to create the instance.
-        return {};
-    }
+    Check(dispatch.vkCreateInstance(&ci, nullptr, &instance));
     if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) {
         // We successfully created an instance but the destroy function couldn't be loaded.
         // This is a good moment to panic.
-        return {};
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
     }
-
     return Instance(instance, dispatch);
 }
 
-std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() {
+std::vector<VkPhysicalDevice> Instance::EnumeratePhysicalDevices() const {
     u32 num;
-    if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) {
-        return std::nullopt;
-    }
+    Check(dld->vkEnumeratePhysicalDevices(handle, &num, nullptr));
     std::vector<VkPhysicalDevice> physical_devices(num);
-    if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) {
-        return std::nullopt;
-    }
+    Check(dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()));
     SortPhysicalDevices(physical_devices, *dld);
-    return std::make_optional(std::move(physical_devices));
+    return physical_devices;
 }
 
-DebugCallback Instance::TryCreateDebugCallback(
-    PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
-    const VkDebugUtilsMessengerCreateInfoEXT ci{
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
-        .pNext = nullptr,
-        .flags = 0,
-        .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
-                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
-                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
-                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
-        .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
-                       VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
-        .pfnUserCallback = callback,
-        .pUserData = nullptr,
-    };
-
-    VkDebugUtilsMessengerEXT messenger;
-    if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
-        return {};
-    }
-    return DebugCallback(messenger, handle, *dld);
+DebugUtilsMessenger Instance::CreateDebugUtilsMessenger(
+    const VkDebugUtilsMessengerCreateInfoEXT& create_info) const {
+    VkDebugUtilsMessengerEXT object;
+    Check(dld->vkCreateDebugUtilsMessengerEXT(handle, &create_info, nullptr, &object));
+    return DebugUtilsMessenger(object, handle, *dld);
 }
 
 void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
@@ -605,7 +580,7 @@ void Semaphore::SetObjectNameEXT(const char* name) const {
 
 Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
                       Span<const char*> enabled_extensions, const void* next,
-                      DeviceDispatch& dispatch) noexcept {
+                      DeviceDispatch& dispatch) {
     const VkDeviceCreateInfo ci{
         .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
         .pNext = next,
@@ -618,11 +593,8 @@ Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreate
         .ppEnabledExtensionNames = enabled_extensions.data(),
         .pEnabledFeatures = nullptr,
     };
-
     VkDevice device;
-    if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
-        return {};
-    }
+    Check(dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device));
     Load(device, dispatch);
     return Device(device, dispatch);
 }
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index f9a184e00..912cab46c 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -555,7 +555,7 @@ private:
     const DeviceDispatch* dld = nullptr;
 };
 
-using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
+using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
 using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
 using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>;
 using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
@@ -573,16 +573,25 @@ class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> {
     using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle;
 
 public:
-    /// Creates a Vulkan instance. Use "operator bool" for error handling.
+    /// Creates a Vulkan instance.
+    /// @throw Exception on initialization error.
     static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
-                           InstanceDispatch& dispatch) noexcept;
+                           InstanceDispatch& dispatch);
 
     /// Enumerates physical devices.
     /// @return Physical devices and an empty handle on failure.
-    std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices();
+    /// @throw Exception on Vulkan error.
+    std::vector<VkPhysicalDevice> EnumeratePhysicalDevices() const;
 
-    /// Tries to create a debug callback messenger. Returns an empty handle on failure.
-    DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept;
+    /// Creates a debug callback messenger.
+    /// @throw Exception on creation failure.
+    DebugUtilsMessenger CreateDebugUtilsMessenger(
+        const VkDebugUtilsMessengerCreateInfoEXT& create_info) const;
+
+    /// Returns dispatch table.
+    const InstanceDispatch& Dispatch() const noexcept {
+        return *dld;
+    }
 };
 
 class Queue {
@@ -787,7 +796,7 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
 public:
     static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
                          Span<const char*> enabled_extensions, const void* next,
-                         DeviceDispatch& dispatch) noexcept;
+                         DeviceDispatch& dispatch);
 
     Queue GetQueue(u32 family_index) const noexcept;
 
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index e124836b5..85ee2577d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -397,7 +397,7 @@ void GRenderWindow::mousePressEvent(QMouseEvent* event) {
         this->TouchPressed(x, y);
     }
 
-    QWidget::mousePressEvent(event);
+    emit MouseActivity();
 }
 
 void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
@@ -411,7 +411,7 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
     input_subsystem->GetMouse()->MouseMove(x, y);
     this->TouchMoved(x, y);
 
-    QWidget::mouseMoveEvent(event);
+    emit MouseActivity();
 }
 
 void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) {
@@ -688,3 +688,10 @@ void GRenderWindow::showEvent(QShowEvent* event) {
     connect(windowHandle(), &QWindow::screenChanged, this, &GRenderWindow::OnFramebufferSizeChanged,
             Qt::UniqueConnection);
 }
+
+bool GRenderWindow::eventFilter(QObject* object, QEvent* event) {
+    if (event->type() == QEvent::HoverMove) {
+        emit MouseActivity();
+    }
+    return false;
+}
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index ebe5cb965..339095509 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -184,6 +184,7 @@ signals:
     void Closed();
     void FirstFrameDisplayed();
     void ExecuteProgramSignal(std::size_t program_index);
+    void MouseActivity();
 
 private:
     void TouchBeginEvent(const QTouchEvent* event);
@@ -216,4 +217,5 @@ private:
 
 protected:
     void showEvent(QShowEvent* event) override;
+    bool eventFilter(QObject* object, QEvent* event) override;
 };
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 34c2a5f8b..cda448718 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -514,7 +514,7 @@ void Config::ReadControlValues() {
     Settings::values.emulate_analog_keyboard =
         ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
 
-    ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), false);
+    ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true);
     ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
                       true);
     ReadSettingGlobal(Settings::values.enable_accurate_vibrations,
@@ -764,6 +764,8 @@ void Config::ReadCpuValues() {
             ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool();
         Settings::values.cpuopt_unsafe_reduce_fp_error =
             ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool();
+        Settings::values.cpuopt_unsafe_inaccurate_nan =
+            ReadSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true).toBool();
     }
 
     qt_config->endGroup();
@@ -1174,7 +1176,7 @@ void Config::SaveControlValues() {
     SaveTouchscreenValues();
     SaveMotionTouchValues();
 
-    WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false);
+    WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, true);
     WriteSettingGlobal(QStringLiteral("vibration_enabled"), Settings::values.vibration_enabled,
                        true);
     WriteSettingGlobal(QStringLiteral("enable_accurate_vibrations"),
@@ -1327,6 +1329,8 @@ void Config::SaveCpuValues() {
                      Settings::values.cpuopt_unsafe_unfuse_fma, true);
         WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
                      Settings::values.cpuopt_unsafe_reduce_fp_error, true);
+        WriteSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
+                     Settings::values.cpuopt_unsafe_inaccurate_nan, true);
     }
 
     qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index 37fcd6adc..d055cbd60 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -36,6 +36,8 @@ void ConfigureCpu::SetConfiguration() {
     ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma);
     ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
     ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error);
+    ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
+    ui->cpuopt_unsafe_inaccurate_nan->setChecked(Settings::values.cpuopt_unsafe_inaccurate_nan);
 }
 
 void ConfigureCpu::AccuracyUpdated(int index) {
@@ -61,6 +63,7 @@ void ConfigureCpu::ApplyConfiguration() {
         static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex());
     Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked();
     Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked();
+    Settings::values.cpuopt_unsafe_inaccurate_nan = ui->cpuopt_unsafe_inaccurate_nan->isChecked();
 }
 
 void ConfigureCpu::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index ebdd2e6e9..bcd0962e9 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -109,6 +109,18 @@
           </property>
          </widget>
         </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan">
+          <property name="text">
+           <string>Inaccurate NaN handling</string>
+          </property>
+          <property name="toolTip">
+           <string>
+            &lt;div&gt;This option improves speed by removing NaN checking. Please note this also reduces accuracy of certain floating-point instructions.&lt;/div&gt;
+           </string>
+          </property>
+         </widget>
+        </item>
        </layout>
       </widget>
      </item>
diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp
index c2a7113da..eb8eacbf9 100644
--- a/src/yuzu/configuration/configure_motion_touch.cpp
+++ b/src/yuzu/configuration/configure_motion_touch.cpp
@@ -51,6 +51,8 @@ CalibrationConfigurationDialog::CalibrationConfigurationDialog(QWidget* parent,
             case CalibrationConfigurationJob::Status::Completed:
                 text = tr("Configuration completed!");
                 break;
+            default:
+                break;
             }
             QMetaObject::invokeMethod(this, "UpdateLabelText", Q_ARG(QString, text));
             if (status == CalibrationConfigurationJob::Status::Completed) {
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 0925c10b4..a93b5d3c2 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -14,10 +14,10 @@
 #include "core/core.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_scheduler.h"
-#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/svc_common.h"
 #include "core/hle/kernel/thread.h"
 #include "core/memory.h"
 
@@ -116,7 +116,7 @@ QString WaitTreeText::GetText() const {
 WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
     : mutex_address(mutex_address) {
     mutex_value = Core::System::GetInstance().Memory().Read32(mutex_address);
-    owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
+    owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Svc::HandleWaitMask);
     owner = handle_table.Get<Kernel::Thread>(owner_handle);
 }
 
@@ -127,7 +127,7 @@ QString WaitTreeMutexInfo::GetText() const {
 }
 
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeMutexInfo::GetChildren() const {
-    const bool has_waiters = (mutex_value & Kernel::Mutex::MutexHasWaitersFlag) != 0;
+    const bool has_waiters = (mutex_value & Kernel::Svc::HandleWaitMask) != 0;
 
     std::vector<std::unique_ptr<WaitTreeItem>> list;
     list.push_back(std::make_unique<WaitTreeText>(tr("has waiters: %1").arg(has_waiters)));
@@ -169,7 +169,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
     return list;
 }
 
-WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& o)
+WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(
+    const Kernel::KSynchronizationObject& o)
     : object(o) {}
 WaitTreeSynchronizationObject::~WaitTreeSynchronizationObject() = default;
 
@@ -188,7 +189,7 @@ QString WaitTreeSynchronizationObject::GetText() const {
 }
 
 std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::make(
-    const Kernel::SynchronizationObject& object) {
+    const Kernel::KSynchronizationObject& object) {
     switch (object.GetHandleType()) {
     case Kernel::HandleType::ReadableEvent:
         return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object));
@@ -202,7 +203,7 @@ std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::ma
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChildren() const {
     std::vector<std::unique_ptr<WaitTreeItem>> list;
 
-    const auto& threads = object.GetWaitingThreads();
+    const auto& threads = object.GetWaitingThreadsForDebugging();
     if (threads.empty()) {
         list.push_back(std::make_unique<WaitTreeText>(tr("waited by no thread")));
     } else {
@@ -211,8 +212,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChi
     return list;
 }
 
-WaitTreeObjectList::WaitTreeObjectList(
-    const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, bool w_all)
+WaitTreeObjectList::WaitTreeObjectList(const std::vector<Kernel::KSynchronizationObject*>& list,
+                                       bool w_all)
     : object_list(list), wait_all(w_all) {}
 
 WaitTreeObjectList::~WaitTreeObjectList() = default;
@@ -237,8 +238,8 @@ WaitTreeThread::~WaitTreeThread() = default;
 QString WaitTreeThread::GetText() const {
     const auto& thread = static_cast<const Kernel::Thread&>(object);
     QString status;
-    switch (thread.GetStatus()) {
-    case Kernel::ThreadStatus::Ready:
+    switch (thread.GetState()) {
+    case Kernel::ThreadState::Runnable:
         if (!thread.IsPaused()) {
             if (thread.WasRunning()) {
                 status = tr("running");
@@ -249,35 +250,39 @@ QString WaitTreeThread::GetText() const {
             status = tr("paused");
         }
         break;
-    case Kernel::ThreadStatus::Paused:
-        status = tr("paused");
-        break;
-    case Kernel::ThreadStatus::WaitHLEEvent:
-        status = tr("waiting for HLE return");
-        break;
-    case Kernel::ThreadStatus::WaitSleep:
-        status = tr("sleeping");
-        break;
-    case Kernel::ThreadStatus::WaitIPC:
-        status = tr("waiting for IPC reply");
-        break;
-    case Kernel::ThreadStatus::WaitSynch:
-        status = tr("waiting for objects");
-        break;
-    case Kernel::ThreadStatus::WaitMutex:
-        status = tr("waiting for mutex");
-        break;
-    case Kernel::ThreadStatus::WaitCondVar:
-        status = tr("waiting for condition variable");
+    case Kernel::ThreadState::Waiting:
+        switch (thread.GetWaitReasonForDebugging()) {
+        case Kernel::ThreadWaitReasonForDebugging::Sleep:
+            status = tr("sleeping");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::IPC:
+            status = tr("waiting for IPC reply");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::Synchronization:
+            status = tr("waiting for objects");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::ConditionVar:
+            status = tr("waiting for condition variable");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::Arbitration:
+            status = tr("waiting for address arbiter");
+            break;
+        case Kernel::ThreadWaitReasonForDebugging::Suspended:
+            status = tr("waiting for suspend resume");
+            break;
+        default:
+            status = tr("waiting");
+            break;
+        }
         break;
-    case Kernel::ThreadStatus::WaitArb:
-        status = tr("waiting for address arbiter");
+    case Kernel::ThreadState::Initialized:
+        status = tr("initialized");
         break;
-    case Kernel::ThreadStatus::Dormant:
-        status = tr("dormant");
+    case Kernel::ThreadState::Terminated:
+        status = tr("terminated");
         break;
-    case Kernel::ThreadStatus::Dead:
-        status = tr("dead");
+    default:
+        status = tr("unknown");
         break;
     }
 
@@ -293,8 +298,8 @@ QColor WaitTreeThread::GetColor() const {
     const std::size_t color_index = IsDarkTheme() ? 1 : 0;
 
     const auto& thread = static_cast<const Kernel::Thread&>(object);
-    switch (thread.GetStatus()) {
-    case Kernel::ThreadStatus::Ready:
+    switch (thread.GetState()) {
+    case Kernel::ThreadState::Runnable:
         if (!thread.IsPaused()) {
             if (thread.WasRunning()) {
                 return QColor(WaitTreeColors[0][color_index]);
@@ -304,21 +309,24 @@ QColor WaitTreeThread::GetColor() const {
         } else {
             return QColor(WaitTreeColors[2][color_index]);
         }
-    case Kernel::ThreadStatus::Paused:
-        return QColor(WaitTreeColors[3][color_index]);
-    case Kernel::ThreadStatus::WaitHLEEvent:
-    case Kernel::ThreadStatus::WaitIPC:
-        return QColor(WaitTreeColors[4][color_index]);
-    case Kernel::ThreadStatus::WaitSleep:
-        return QColor(WaitTreeColors[5][color_index]);
-    case Kernel::ThreadStatus::WaitSynch:
-    case Kernel::ThreadStatus::WaitMutex:
-    case Kernel::ThreadStatus::WaitCondVar:
-    case Kernel::ThreadStatus::WaitArb:
-        return QColor(WaitTreeColors[6][color_index]);
-    case Kernel::ThreadStatus::Dormant:
+    case Kernel::ThreadState::Waiting:
+        switch (thread.GetWaitReasonForDebugging()) {
+        case Kernel::ThreadWaitReasonForDebugging::IPC:
+            return QColor(WaitTreeColors[4][color_index]);
+        case Kernel::ThreadWaitReasonForDebugging::Sleep:
+            return QColor(WaitTreeColors[5][color_index]);
+        case Kernel::ThreadWaitReasonForDebugging::Synchronization:
+        case Kernel::ThreadWaitReasonForDebugging::ConditionVar:
+        case Kernel::ThreadWaitReasonForDebugging::Arbitration:
+        case Kernel::ThreadWaitReasonForDebugging::Suspended:
+            return QColor(WaitTreeColors[6][color_index]);
+            break;
+        default:
+            return QColor(WaitTreeColors[3][color_index]);
+        }
+    case Kernel::ThreadState::Initialized:
         return QColor(WaitTreeColors[7][color_index]);
-    case Kernel::ThreadStatus::Dead:
+    case Kernel::ThreadState::Terminated:
         return QColor(WaitTreeColors[8][color_index]);
     default:
         return WaitTreeItem::GetColor();
@@ -354,11 +362,11 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
     list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadID())));
     list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
                                                       .arg(thread.GetPriority())
-                                                      .arg(thread.GetNominalPriority())));
+                                                      .arg(thread.GetBasePriority())));
     list.push_back(std::make_unique<WaitTreeText>(
         tr("last running ticks = %1").arg(thread.GetLastScheduledTick())));
 
-    const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
+    const VAddr mutex_wait_address = thread.GetMutexWaitAddressForDebugging();
     if (mutex_wait_address != 0) {
         const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
         list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
@@ -366,9 +374,11 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
         list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
     }
 
-    if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) {
-        list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(),
-                                                            thread.IsWaitingSync()));
+    if (thread.GetState() == Kernel::ThreadState::Waiting &&
+        thread.GetWaitReasonForDebugging() ==
+            Kernel::ThreadWaitReasonForDebugging::Synchronization) {
+        list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetWaitObjectsForDebugging(),
+                                                            thread.IsCancellable()));
     }
 
     list.push_back(std::make_unique<WaitTreeCallstack>(thread));
@@ -380,7 +390,7 @@ WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object)
     : WaitTreeSynchronizationObject(object) {}
 WaitTreeEvent::~WaitTreeEvent() = default;
 
-WaitTreeThreadList::WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list)
+WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::Thread*>& list)
     : thread_list(list) {}
 WaitTreeThreadList::~WaitTreeThreadList() = default;
 
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 8e3bc4b24..cf96911ea 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -18,8 +18,8 @@ class EmuThread;
 
 namespace Kernel {
 class HandleTable;
+class KSynchronizationObject;
 class ReadableEvent;
-class SynchronizationObject;
 class Thread;
 } // namespace Kernel
 
@@ -102,30 +102,29 @@ private:
 class WaitTreeSynchronizationObject : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
-    explicit WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& object);
+    explicit WaitTreeSynchronizationObject(const Kernel::KSynchronizationObject& object);
     ~WaitTreeSynchronizationObject() override;
 
     static std::unique_ptr<WaitTreeSynchronizationObject> make(
-        const Kernel::SynchronizationObject& object);
+        const Kernel::KSynchronizationObject& object);
     QString GetText() const override;
     std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
 
 protected:
-    const Kernel::SynchronizationObject& object;
+    const Kernel::KSynchronizationObject& object;
 };
 
 class WaitTreeObjectList : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
-    WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list,
-                       bool wait_all);
+    WaitTreeObjectList(const std::vector<Kernel::KSynchronizationObject*>& list, bool wait_all);
     ~WaitTreeObjectList() override;
 
     QString GetText() const override;
     std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
 
 private:
-    const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& object_list;
+    const std::vector<Kernel::KSynchronizationObject*>& object_list;
     bool wait_all;
 };
 
@@ -150,14 +149,14 @@ public:
 class WaitTreeThreadList : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
-    explicit WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list);
+    explicit WaitTreeThreadList(const std::vector<Kernel::Thread*>& list);
     ~WaitTreeThreadList() override;
 
     QString GetText() const override;
     std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
 
 private:
-    const std::vector<std::shared_ptr<Kernel::Thread>>& thread_list;
+    const std::vector<Kernel::Thread*>& thread_list;
 };
 
 class WaitTreeModel : public QAbstractItemModel {
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 43d64b708..2e74037d1 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -292,12 +292,48 @@ GMainWindow::GMainWindow()
     connect(&mouse_hide_timer, &QTimer::timeout, this, &GMainWindow::HideMouseCursor);
     connect(ui.menubar, &QMenuBar::hovered, this, &GMainWindow::ShowMouseCursor);
 
+    MigrateConfigFiles();
+
+    ui.action_Fullscreen->setChecked(false);
+
     QStringList args = QApplication::arguments();
-    if (args.length() >= 2) {
-        BootGame(args[1]);
+
+    if (args.size() < 2) {
+        return;
     }
 
-    MigrateConfigFiles();
+    QString game_path;
+
+    for (int i = 1; i < args.size(); ++i) {
+        // Preserves drag/drop functionality
+        if (args.size() == 2 && !args[1].startsWith(QChar::fromLatin1('-'))) {
+            game_path = args[1];
+            break;
+        }
+
+        // Launch game in fullscreen mode
+        if (args[i] == QStringLiteral("-f")) {
+            ui.action_Fullscreen->setChecked(true);
+            continue;
+        }
+
+        // Launch game at path
+        if (args[i] == QStringLiteral("-g")) {
+            if (i >= args.size() - 1) {
+                continue;
+            }
+
+            if (args[i + 1].startsWith(QChar::fromLatin1('-'))) {
+                continue;
+            }
+
+            game_path = args[++i];
+        }
+    }
+
+    if (!game_path.isEmpty()) {
+        BootGame(game_path);
+    }
 }
 
 GMainWindow::~GMainWindow() {
@@ -1058,8 +1094,9 @@ bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) {
                     tr("%1<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the "
                        "yuzu quickstart guide</a> to redump your files.<br>You can refer "
                        "to the yuzu wiki</a> or the yuzu Discord</a> for help.",
-                       "%1 signifies a numeric error ID.")
-                        .arg(error_id);
+                       "%1 signifies an error string.")
+                        .arg(QString::fromStdString(
+                            GetResultStatusString(static_cast<Loader::ResultStatus>(error_id))));
 
                 QMessageBox::critical(this, title, description);
             } else {
@@ -1133,6 +1170,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
         [this](std::size_t program_index) { render_window->ExecuteProgram(program_index); });
 
     connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame);
+    connect(render_window, &GRenderWindow::MouseActivity, this, &GMainWindow::OnMouseActivity);
     // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views
     // before the CPU continues
     connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget,
@@ -1156,8 +1194,8 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
 
     if (UISettings::values.hide_mouse) {
         mouse_hide_timer.start();
-        setMouseTracking(true);
-        ui.centralwidget->setMouseTracking(true);
+        render_window->installEventFilter(render_window);
+        render_window->setAttribute(Qt::WA_Hover, true);
     }
 
     std::string title_name;
@@ -1234,8 +1272,8 @@ void GMainWindow::ShutdownGame() {
     }
     game_list->SetFilterFocus();
 
-    setMouseTracking(false);
-    ui.centralwidget->setMouseTracking(false);
+    render_window->removeEventFilter(render_window);
+    render_window->setAttribute(Qt::WA_Hover, false);
 
     UpdateWindowTitle();
 
@@ -2316,12 +2354,12 @@ void GMainWindow::OnConfigure() {
     config->Save();
 
     if (UISettings::values.hide_mouse && emulation_running) {
-        setMouseTracking(true);
-        ui.centralwidget->setMouseTracking(true);
+        render_window->installEventFilter(render_window);
+        render_window->setAttribute(Qt::WA_Hover, true);
         mouse_hide_timer.start();
     } else {
-        setMouseTracking(false);
-        ui.centralwidget->setMouseTracking(false);
+        render_window->removeEventFilter(render_window);
+        render_window->setAttribute(Qt::WA_Hover, false);
     }
 
     UpdateStatusButtons();
@@ -2561,21 +2599,17 @@ void GMainWindow::HideMouseCursor() {
         ShowMouseCursor();
         return;
     }
-    setCursor(QCursor(Qt::BlankCursor));
+    render_window->setCursor(QCursor(Qt::BlankCursor));
 }
 
 void GMainWindow::ShowMouseCursor() {
-    unsetCursor();
+    render_window->unsetCursor();
     if (emu_thread != nullptr && UISettings::values.hide_mouse) {
         mouse_hide_timer.start();
     }
 }
 
-void GMainWindow::mouseMoveEvent(QMouseEvent* event) {
-    ShowMouseCursor();
-}
-
-void GMainWindow::mousePressEvent(QMouseEvent* event) {
+void GMainWindow::OnMouseActivity() {
     ShowMouseCursor();
 }
 
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index ea6d2c30d..31788ea62 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -248,6 +248,7 @@ private slots:
     void OnCoreError(Core::System::ResultStatus, std::string);
     void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
     void OnLanguageChanged(const QString& locale);
+    void OnMouseActivity();
 
 private:
     void RemoveBaseContent(u64 program_id, const QString& entry_type);
@@ -335,6 +336,4 @@ protected:
     void dropEvent(QDropEvent* event) override;
     void dragEnterEvent(QDragEnterEvent* event) override;
     void dragMoveEvent(QDragMoveEvent* event) override;
-    void mouseMoveEvent(QMouseEvent* event) override;
-    void mousePressEvent(QMouseEvent* event) override;
 };
diff --git a/src/yuzu/util/url_request_interceptor.cpp b/src/yuzu/util/url_request_interceptor.cpp
index 2d491d8c0..b637e771e 100644
--- a/src/yuzu/util/url_request_interceptor.cpp
+++ b/src/yuzu/util/url_request_interceptor.cpp
@@ -22,6 +22,8 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo& info) {
     case QWebEngineUrlRequestInfo::ResourceTypeXhr:
         emit FrameChanged();
         break;
+    default:
+        break;
     }
 }
 
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 38075c345..41ef6f6b8 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -344,7 +344,7 @@ void Config::ReadValues() {
 
     // System
     Settings::values.use_docked_mode.SetValue(
-        sdl2_config->GetBoolean("System", "use_docked_mode", false));
+        sdl2_config->GetBoolean("System", "use_docked_mode", true));
 
     Settings::values.current_user = std::clamp<int>(
         sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 2d4b98d9a..3ee0e037d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -274,7 +274,7 @@ gamecard_path =
 
 [System]
 # Whether the system is docked
-# 1: Yes, 0 (default): No
+# 1 (default): Yes, 0: No
 use_docked_mode =
 
 # Allow the use of NFC in games
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 39e0d35aa..4faf62ede 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -95,8 +95,6 @@ int main(int argc, char** argv) {
     int option_index = 0;
 
     InitializeLogging();
-
-    char* endarg;
 #ifdef _WIN32
     int argc_w;
     auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w);
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 91684e96e..0aa143e1f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -86,7 +86,7 @@ void Config::ReadValues() {
     Settings::values.touchscreen.diameter_y = 15;
 
     Settings::values.use_docked_mode.SetValue(
-        sdl2_config->GetBoolean("Controls", "use_docked_mode", false));
+        sdl2_config->GetBoolean("Controls", "use_docked_mode", true));
 
     // Data Storage
     Settings::values.use_virtual_sd =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 3eb64e9d7..779c3791b 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -116,7 +116,7 @@ use_virtual_sd =
 
 [System]
 # Whether the system is docked
-# 1: Yes, 0 (default): No
+# 1 (default): Yes, 0: No
 use_docked_mode =
 
 # Allow the use of NFC in games